45
45
46
46
47
47
@log_adapter .class_logger
48
- class ARIMAPlus (base .SupervisedTrainablePredictor ):
48
+ class ARIMAPlus (base .SupervisedTrainableWithIdColPredictor ):
49
49
"""Time Series ARIMA Plus model.
50
50
51
51
Args:
@@ -183,37 +183,53 @@ def _fit(
183
183
X : utils .ArrayType ,
184
184
y : utils .ArrayType ,
185
185
transforms : Optional [List [str ]] = None ,
186
- ):
186
+ id_col : Optional [utils .ArrayType ] = None ,
187
+ ) -> ARIMAPlus :
187
188
"""Fit the model to training data.
188
189
189
190
Args:
190
- X (bigframes.dataframe.DataFrame or bigframes.series.Series):
191
- A dataframe of training timestamp.
192
-
193
- y (bigframes.dataframe.DataFrame or bigframes.series.Series):
191
+ X (bigframes.dataframe.DataFrame or bigframes.series.Series,
192
+ or pandas.core.frame.DataFrame or pandas.core.series.Series):
193
+ A dataframe or series of trainging timestamp.
194
+ y (bigframes.dataframe.DataFrame, or bigframes.series.Series,
195
+ or pandas.core.frame.DataFrame, or pandas.core.series.Series):
194
196
Target values for training.
195
197
transforms (Optional[List[str]], default None):
196
198
Do not use. Internal param to be deprecated.
197
199
Use bigframes.ml.pipeline instead.
200
+ id_col (Optional[bigframes.dataframe.DataFrame]
201
+ or Optional[bigframes.series.Series]
202
+ or Optional[pandas.core.frame.DataFrame]
203
+ or Optional[pandas.core.frame.Series]
204
+ or None, default None):
205
+ An optional dataframe or series of training id col.
198
206
199
207
Returns:
200
208
ARIMAPlus: Fitted estimator.
201
209
"""
202
210
X , y = utils .batch_convert_to_dataframe (X , y )
203
211
204
212
if X .columns .size != 1 :
205
- raise ValueError (
206
- "Time series timestamp input X must only contain 1 column."
207
- )
213
+ raise ValueError ("Time series timestamp input X contain at least 1 column." )
208
214
if y .columns .size != 1 :
209
215
raise ValueError ("Time series data input y must only contain 1 column." )
210
216
217
+ if id_col is not None :
218
+ (id_col ,) = utils .batch_convert_to_dataframe (id_col )
219
+
220
+ if id_col .columns .size != 1 :
221
+ raise ValueError (
222
+ "Time series id input id_col must only contain 1 column."
223
+ )
224
+
211
225
self ._bqml_model = self ._bqml_model_factory .create_time_series_model (
212
226
X ,
213
227
y ,
228
+ id_col = id_col ,
214
229
transforms = transforms ,
215
230
options = self ._bqml_options ,
216
231
)
232
+ return self
217
233
218
234
def predict (
219
235
self , X = None , * , horizon : int = 3 , confidence_level : float = 0.95
@@ -237,7 +253,7 @@ def predict(
237
253
238
254
Returns:
239
255
bigframes.dataframe.DataFrame: The predicted DataFrames. Which
240
- contains 2 columns: "forecast_timestamp" and "forecast_value".
256
+ contains 2 columns: "forecast_timestamp", "id" as optional, and "forecast_value".
241
257
"""
242
258
if horizon < 1 or horizon > 1000 :
243
259
raise ValueError (f"horizon must be [1, 1000], but is { horizon } ." )
@@ -345,6 +361,7 @@ def score(
345
361
self ,
346
362
X : utils .ArrayType ,
347
363
y : utils .ArrayType ,
364
+ id_col : Optional [utils .ArrayType ] = None ,
348
365
) -> bpd .DataFrame :
349
366
"""Calculate evaluation metrics of the model.
350
367
@@ -355,13 +372,22 @@ def score(
355
372
for the outputs relevant to this model type.
356
373
357
374
Args:
358
- X (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
359
- A BigQuery DataFrame only contains 1 column as
375
+ X (bigframes.dataframe.DataFrame or bigframes.series.Series
376
+ or pandas.core.frame.DataFrame or pandas.core.series.Series):
377
+ A dataframe or series only contains 1 column as
360
378
evaluation timestamp. The timestamp must be within the horizon
361
379
of the model, which by default is 1000 data points.
362
- y (bigframes.dataframe.DataFrame or bigframes.series.Series or pandas.core.frame.DataFrame or pandas.core.series.Series):
363
- A BigQuery DataFrame only contains 1 column as
380
+ y (bigframes.dataframe.DataFrame or bigframes.series.Series
381
+ or pandas.core.frame.DataFrame or pandas.core.series.Series):
382
+ A dataframe or series only contains 1 column as
364
383
evaluation numeric values.
384
+ id_col (Optional[bigframes.dataframe.DataFrame]
385
+ or Optional[bigframes.series.Series]
386
+ or Optional[pandas.core.frame.DataFrame]
387
+ or Optional[pandas.core.series.Series]
388
+ or None, default None):
389
+ An optional dataframe or series contains at least 1 column as
390
+ evaluation id column.
365
391
366
392
Returns:
367
393
bigframes.dataframe.DataFrame: A DataFrame as evaluation result.
@@ -371,6 +397,10 @@ def score(
371
397
X , y = utils .batch_convert_to_dataframe (X , y , session = self ._bqml_model .session )
372
398
373
399
input_data = X .join (y , how = "outer" )
400
+ if id_col is not None :
401
+ (id_col ,) = utils .batch_convert_to_dataframe (id_col )
402
+ input_data = input_data .join (id_col , how = "outer" )
403
+
374
404
return self ._bqml_model .evaluate (input_data )
375
405
376
406
def summary (
0 commit comments