chore: Convert enable probabilistic inference from additional experime… (googleapis#1643)

AndrewLeach · sasha-gitg · web-flow · commit a6465cc92410 · 2022-09-07T06:20:46.000-07:00
* chore: Convert enable probabilistic inference from additional experiments.

Converts `enable_probabilistic_inference` flag in additional experiments
to a boolean field in the API `enableProbabilisticInference`, only
adds if True. The flag is removed from the additional experiments to
reduce duplication.

* chore: Fix linting issues.

Fixes extra newline.

Co-authored-by: sasha-gitg &lt;44654632+sasha-gitg@users.noreply.github.com&gt;
diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py
@@ -2417,6 +2417,9 @@ def _run(
             max_count=window_max_count,
         )
 
+        # TODO(b/244643824): Replace additional experiments with a new job arg.
+        enable_probabilistic_inference = self._convert_enable_probabilistic_inference()
+
         training_task_inputs_dict = {
             # required inputs
             "targetColumn": target_column,
@@ -2459,6 +2462,11 @@ def _run(
         if window_config:
             training_task_inputs_dict["windowConfig"] = window_config
 
+        if enable_probabilistic_inference:
+            training_task_inputs_dict[
+                "enableProbabilisticInference"
+            ] = enable_probabilistic_inference
+
         final_export_eval_bq_uri = export_evaluated_data_items_bigquery_destination_uri
         if final_export_eval_bq_uri and not final_export_eval_bq_uri.startswith(
             "bq://"
@@ -2541,6 +2549,15 @@ def _add_additional_experiments(self, additional_experiments: List[str]):
         """
         self._additional_experiments.extend(additional_experiments)
 
+    def _convert_enable_probabilistic_inference(self) -> bool:
+        """Convert enable probabilistic from additional experiments."""
+        key = "enable_probabilistic_inference"
+        if self._additional_experiments:
+            if key in self._additional_experiments:
+                self._additional_experiments.remove(key)
+                return True
+        return False
+
     @staticmethod
     def _create_window_config(
         column: Optional[str] = None,
diff --git a/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py b/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py
@@ -89,6 +89,11 @@
 _TEST_WINDOW_STRIDE_LENGTH = 1
 _TEST_WINDOW_MAX_COUNT = None
 _TEST_TRAINING_HOLIDAY_REGIONS = ["GLOBAL"]
+_TEST_ADDITIONAL_EXPERIMENTS_PROBABILISTIC_INFERENCE = [
+    "exp1",
+    "exp2",
+    "enable_probabilistic_inference",
+]
 _TEST_TRAINING_TASK_INPUTS_DICT = {
     # required inputs
     "targetColumn": _TEST_TRAINING_TARGET_COLUMN,
@@ -134,6 +139,17 @@
     struct_pb2.Value(),
 )
 
+_TEST_TRAINING_TASK_INPUTS_WITH_ADDITIONAL_EXPERIMENTS_PROBABILISTIC_INFERENCE = (
+    json_format.ParseDict(
+        {
+            **_TEST_TRAINING_TASK_INPUTS_DICT,
+            "additionalExperiments": _TEST_ADDITIONAL_EXPERIMENTS,
+            "enableProbabilisticInference": True,
+        },
+        struct_pb2.Value(),
+    )
+)
+
 _TEST_TRAINING_TASK_INPUTS = json_format.ParseDict(
     _TEST_TRAINING_TASK_INPUTS_DICT,
     struct_pb2.Value(),
@@ -1243,3 +1259,92 @@ def test_splits_default(
             training_pipeline=true_training_pipeline,
             timeout=None,
         )
+
+    @mock.patch.object(training_jobs, "_JOB_WAIT_TIME", 1)
+    @mock.patch.object(training_jobs, "_LOG_WAIT_TIME", 1)
+    @pytest.mark.usefixtures("mock_pipeline_service_get")
+    @pytest.mark.parametrize("sync", [True, False])
+    @pytest.mark.parametrize(
+        "training_job",
+        [
+            training_jobs.AutoMLForecastingTrainingJob,
+            training_jobs.SequenceToSequencePlusForecastingTrainingJob,
+        ],
+    )
+    def test_run_call_pipeline_if_set_additional_experiments_probabilistic_inference(
+        self,
+        mock_pipeline_service_create,
+        mock_dataset_time_series,
+        mock_model_service_get,
+        sync,
+        training_job,
+    ):
+        aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME)
+
+        job = training_job(
+            display_name=_TEST_DISPLAY_NAME,
+            optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME,
+            column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS,
+        )
+
+        job._add_additional_experiments(
+            _TEST_ADDITIONAL_EXPERIMENTS_PROBABILISTIC_INFERENCE
+        )
+
+        model_from_job = job.run(
+            dataset=mock_dataset_time_series,
+            target_column=_TEST_TRAINING_TARGET_COLUMN,
+            time_column=_TEST_TRAINING_TIME_COLUMN,
+            time_series_identifier_column=_TEST_TRAINING_TIME_SERIES_IDENTIFIER_COLUMN,
+            unavailable_at_forecast_columns=_TEST_TRAINING_UNAVAILABLE_AT_FORECAST_COLUMNS,
+            available_at_forecast_columns=_TEST_TRAINING_AVAILABLE_AT_FORECAST_COLUMNS,
+            forecast_horizon=_TEST_TRAINING_FORECAST_HORIZON,
+            data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT,
+            data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT,
+            weight_column=_TEST_TRAINING_WEIGHT_COLUMN,
+            time_series_attribute_columns=_TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS,
+            context_window=_TEST_TRAINING_CONTEXT_WINDOW,
+            budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS,
+            export_evaluated_data_items=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS,
+            export_evaluated_data_items_bigquery_destination_uri=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI,
+            export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION,
+            quantiles=_TEST_TRAINING_QUANTILES,
+            validation_options=_TEST_TRAINING_VALIDATION_OPTIONS,
+            hierarchy_group_columns=_TEST_HIERARCHY_GROUP_COLUMNS,
+            hierarchy_group_total_weight=_TEST_HIERARCHY_GROUP_TOTAL_WEIGHT,
+            hierarchy_temporal_total_weight=_TEST_HIERARCHY_TEMPORAL_TOTAL_WEIGHT,
+            hierarchy_group_temporal_total_weight=_TEST_HIERARCHY_GROUP_TEMPORAL_TOTAL_WEIGHT,
+            window_column=_TEST_WINDOW_COLUMN,
+            window_stride_length=_TEST_WINDOW_STRIDE_LENGTH,
+            window_max_count=_TEST_WINDOW_MAX_COUNT,
+            sync=sync,
+            create_request_timeout=None,
+            holiday_regions=_TEST_TRAINING_HOLIDAY_REGIONS,
+        )
+
+        if not sync:
+            model_from_job.wait()
+
+        # Test that if defaults to the job display name
+        true_managed_model = gca_model.Model(
+            display_name=_TEST_DISPLAY_NAME,
+            version_aliases=["default"],
+        )
+
+        true_input_data_config = gca_training_pipeline.InputDataConfig(
+            dataset_id=mock_dataset_time_series.name,
+        )
+
+        true_training_pipeline = gca_training_pipeline.TrainingPipeline(
+            display_name=_TEST_DISPLAY_NAME,
+            training_task_definition=training_job._training_task_definition,
+            training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_ADDITIONAL_EXPERIMENTS_PROBABILISTIC_INFERENCE,
+            model_to_upload=true_managed_model,
+            input_data_config=true_input_data_config,
+        )
+
+        mock_pipeline_service_create.assert_called_once_with(
+            parent=initializer.global_config.common_location_path(),
+            training_pipeline=true_training_pipeline,
+            timeout=None,
+        )