13
13
# limitations under the License.
14
14
#
15
15
16
+
16
17
def make_parent (parent : str ) -> str :
17
18
parent = parent
18
19
19
20
return parent
20
21
22
+
21
23
def make_training_pipeline (
22
- display_name : str ,
23
- dataset_id : str ,
24
- model_display_name : str ,
25
- target_column : str ,
26
- time_series_identifier_column : str ,
27
- time_column : str ,
28
- static_columns : str ,
29
- time_variant_past_only_columns : str ,
30
- time_variant_past_and_future_columns : str ,
31
- forecast_window_end : int ,
32
- ) -> google .cloud .aiplatform_v1alpha1 .types .training_pipeline .TrainingPipeline :
24
+ display_name : str ,
25
+ dataset_id : str ,
26
+ model_display_name : str ,
27
+ target_column : str ,
28
+ time_series_identifier_column : str ,
29
+ time_column : str ,
30
+ static_columns : str ,
31
+ time_variant_past_only_columns : str ,
32
+ time_variant_past_and_future_columns : str ,
33
+ forecast_window_end : int ,
34
+ ) -> google .cloud .aiplatform_v1alpha1 .types .training_pipeline .TrainingPipeline :
33
35
# set the columns used for training and their data types
34
36
transformations = [
35
37
{"auto" : {"column_name" : "date" }},
36
38
{"auto" : {"column_name" : "state_name" }},
37
39
{"auto" : {"column_name" : "county_fips_code" }},
38
40
{"auto" : {"column_name" : "confirmed_cases" }},
39
- {"auto" : {"column_name" : "deaths" }}
41
+ {"auto" : {"column_name" : "deaths" }},
40
42
]
41
43
42
44
period = {"unit" : "day" , "quantity" : 1 }
43
45
46
+ # the inputs should be formatted according to the training_task_definition yaml file
44
47
training_task_inputs_dict = {
45
48
# required inputs
46
49
"targetColumn" : target_column ,
47
50
"timeSeriesIdentifierColumn" : time_series_identifier_column ,
48
51
"timeColumn" : time_column ,
49
52
"transformations" : transformations ,
50
53
"period" : period ,
51
-
52
- # Objective function the model is to be optimized towards.
53
- # The training process creates a Model that optimizes the value of the objective
54
- # function over the validation set. The supported optimization objectives:
55
- # "minimize-rmse" (default) - Minimize root-mean-squared error (RMSE).
56
- # "minimize-mae" - Minimize mean-absolute error (MAE).
57
- # "minimize-rmsle" - Minimize root-mean-squared log error (RMSLE).
58
- # "minimize-rmspe" - Minimize root-mean-squared percentage error (RMSPE).
59
- # "minimize-wape-mae" - Minimize the combination of weighted absolute percentage error (WAPE)
60
- # and mean-absolute-error (MAE).
61
- # "minimize-quantile-loss" - Minimize the quantile loss at the defined quantiles.
62
54
"optimizationObjective" : "minimize-rmse" ,
63
55
"trainBudgetMilliNodeHours" : 8000 ,
64
56
"staticColumns" : static_columns ,
@@ -70,20 +62,18 @@ def make_training_pipeline(
70
62
training_task_inputs = to_protobuf_value (training_task_inputs_dict )
71
63
72
64
training_pipeline = {
73
- ' display_name' : display_name ,
74
- ' training_task_definition' : "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_forecasting_1.0.0.yaml" ,
75
- ' training_task_inputs' : training_task_inputs ,
76
- ' input_data_config' : {
77
- ' dataset_id' : dataset_id ,
78
- ' fraction_split' : {
79
- ' training_fraction' : 0.8 ,
80
- ' validation_fraction' : 0.1 ,
81
- ' test_fraction' : 0.1 ,
82
- }
65
+ " display_name" : display_name ,
66
+ " training_task_definition" : "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_forecasting_1.0.0.yaml" ,
67
+ " training_task_inputs" : training_task_inputs ,
68
+ " input_data_config" : {
69
+ " dataset_id" : dataset_id ,
70
+ " fraction_split" : {
71
+ " training_fraction" : 0.8 ,
72
+ " validation_fraction" : 0.1 ,
73
+ " test_fraction" : 0.1 ,
74
+ },
83
75
},
84
- 'model_to_upload' : {
85
- 'display_name' : model_display_name
86
- }
76
+ "model_to_upload" : {"display_name" : model_display_name },
87
77
}
88
78
89
79
return training_pipeline
0 commit comments