From e4efcf758736d815adab211907369c3f0167c2c4 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Tue, 21 Nov 2023 18:06:23 +0000 Subject: [PATCH 1/2] feat: add the recent api method for ML component --- bigframes/ml/cluster.py | 2 ++ bigframes/ml/compose.py | 2 ++ bigframes/ml/decomposition.py | 2 ++ bigframes/ml/ensemble.py | 5 +++++ bigframes/ml/forecasting.py | 2 ++ bigframes/ml/imported.py | 3 +++ bigframes/ml/linear_model.py | 3 +++ bigframes/ml/llm.py | 4 +++- bigframes/ml/pipeline.py | 2 ++ bigframes/ml/preprocessing.py | 7 +++++++ 10 files changed, 31 insertions(+), 1 deletion(-) diff --git a/bigframes/ml/cluster.py b/bigframes/ml/cluster.py index c9f52ba0b6..6b79d356a2 100644 --- a/bigframes/ml/cluster.py +++ b/bigframes/ml/cluster.py @@ -22,11 +22,13 @@ from google.cloud import bigquery import bigframes +from bigframes.core import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd import third_party.bigframes_vendored.sklearn.cluster._kmeans +@log_adapter.class_logger class KMeans( base.UnsupervisedTrainablePredictor, third_party.bigframes_vendored.sklearn.cluster._kmeans.KMeans, diff --git a/bigframes/ml/compose.py b/bigframes/ml/compose.py index bf046ff691..ace876dd2d 100644 --- a/bigframes/ml/compose.py +++ b/bigframes/ml/compose.py @@ -22,6 +22,7 @@ from typing import List, Optional, Tuple, Union from bigframes import constants +from bigframes.core import log_adapter from bigframes.ml import base, core, globals, preprocessing, utils import bigframes.pandas as bpd import third_party.bigframes_vendored.sklearn.compose._column_transformer @@ -36,6 +37,7 @@ ] +@log_adapter.class_logger class ColumnTransformer( base.Transformer, third_party.bigframes_vendored.sklearn.compose._column_transformer.ColumnTransformer, diff --git a/bigframes/ml/decomposition.py b/bigframes/ml/decomposition.py index 7cda7a6993..ef777cb33a 100644 --- a/bigframes/ml/decomposition.py +++ b/bigframes/ml/decomposition.py @@ -22,11 +22,13 @@ from google.cloud import bigquery import bigframes +from bigframes.core import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd import third_party.bigframes_vendored.sklearn.decomposition._pca +@log_adapter.class_logger class PCA( base.UnsupervisedTrainablePredictor, third_party.bigframes_vendored.sklearn.decomposition._pca.PCA, diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py index fcb3fe5343..1cc9fb3739 100644 --- a/bigframes/ml/ensemble.py +++ b/bigframes/ml/ensemble.py @@ -22,6 +22,7 @@ from google.cloud import bigquery import bigframes +from bigframes.core import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd import third_party.bigframes_vendored.sklearn.ensemble._forest @@ -47,6 +48,7 @@ } +@log_adapter.class_logger class XGBRegressor( base.SupervisedTrainablePredictor, third_party.bigframes_vendored.xgboost.sklearn.XGBRegressor, @@ -202,6 +204,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBRegressor: return new_model.session.read_gbq_model(model_name) +@log_adapter.class_logger class XGBClassifier( base.SupervisedTrainablePredictor, third_party.bigframes_vendored.xgboost.sklearn.XGBClassifier, @@ -356,6 +359,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBClassifier: return new_model.session.read_gbq_model(model_name) +@log_adapter.class_logger class RandomForestRegressor( base.SupervisedTrainablePredictor, third_party.bigframes_vendored.sklearn.ensemble._forest.RandomForestRegressor, @@ -521,6 +525,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> RandomForestRegresso return new_model.session.read_gbq_model(model_name) +@log_adapter.class_logger class RandomForestClassifier( base.SupervisedTrainablePredictor, third_party.bigframes_vendored.sklearn.ensemble._forest.RandomForestClassifier, diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py index cf23854fa0..995201062b 100644 --- a/bigframes/ml/forecasting.py +++ b/bigframes/ml/forecasting.py @@ -21,10 +21,12 @@ from google.cloud import bigquery import bigframes +from bigframes.core import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd +@log_adapter.class_logger class ARIMAPlus(base.SupervisedTrainablePredictor): """Time Series ARIMA Plus model.""" diff --git a/bigframes/ml/imported.py b/bigframes/ml/imported.py index f6afc9aa38..4ae0a8ea4d 100644 --- a/bigframes/ml/imported.py +++ b/bigframes/ml/imported.py @@ -21,10 +21,12 @@ from google.cloud import bigquery import bigframes +from bigframes.core import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd +@log_adapter.class_logger class TensorFlowModel(base.Predictor): """Imported TensorFlow model. @@ -101,6 +103,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> TensorFlowModel: return new_model.session.read_gbq_model(model_name) +@log_adapter.class_logger class ONNXModel(base.Predictor): """Imported Open Neural Network Exchange (ONNX) model. diff --git a/bigframes/ml/linear_model.py b/bigframes/ml/linear_model.py index 433d9fbc38..5ee87b8850 100644 --- a/bigframes/ml/linear_model.py +++ b/bigframes/ml/linear_model.py @@ -23,6 +23,7 @@ import bigframes import bigframes.constants as constants +from bigframes.core import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd import third_party.bigframes_vendored.sklearn.linear_model._base @@ -46,6 +47,7 @@ } +@log_adapter.class_logger class LinearRegression( base.SupervisedTrainablePredictor, third_party.bigframes_vendored.sklearn.linear_model._base.LinearRegression, @@ -178,6 +180,7 @@ def to_gbq(self, model_name: str, replace: bool = False) -> LinearRegression: return new_model.session.read_gbq_model(model_name) +@log_adapter.class_logger class LogisticRegression( base.SupervisedTrainablePredictor, third_party.bigframes_vendored.sklearn.linear_model._logistic.LogisticRegression, diff --git a/bigframes/ml/llm.py b/bigframes/ml/llm.py index 78f3369daf..5beb54a32d 100644 --- a/bigframes/ml/llm.py +++ b/bigframes/ml/llm.py @@ -21,7 +21,7 @@ import bigframes from bigframes import clients, constants -from bigframes.core import blocks +from bigframes.core import blocks, log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd @@ -43,6 +43,7 @@ _ML_EMBED_TEXT_STATUS = "ml_embed_text_status" +@log_adapter.class_logger class PaLM2TextGenerator(base.Predictor): """PaLM2 text generator LLM model. @@ -200,6 +201,7 @@ def predict( return df +@log_adapter.class_logger class PaLM2TextEmbeddingGenerator(base.Predictor): """PaLM2 text embedding generator LLM model. diff --git a/bigframes/ml/pipeline.py b/bigframes/ml/pipeline.py index ad0b3fae11..4ae2bfe555 100644 --- a/bigframes/ml/pipeline.py +++ b/bigframes/ml/pipeline.py @@ -24,11 +24,13 @@ import bigframes import bigframes.constants as constants +from bigframes.core import log_adapter from bigframes.ml import base, compose, forecasting, loader, preprocessing, utils import bigframes.pandas as bpd import third_party.bigframes_vendored.sklearn.pipeline +@log_adapter.class_logger class Pipeline( base.BaseEstimator, third_party.bigframes_vendored.sklearn.pipeline.Pipeline, diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py index 5f44d40218..a403e57e71 100644 --- a/bigframes/ml/preprocessing.py +++ b/bigframes/ml/preprocessing.py @@ -20,6 +20,7 @@ import typing from typing import Any, cast, List, Literal, Optional, Tuple, Union +from bigframes.core import log_adapter from bigframes.ml import base, core, globals, utils import bigframes.pandas as bpd import third_party.bigframes_vendored.sklearn.preprocessing._data @@ -28,6 +29,7 @@ import third_party.bigframes_vendored.sklearn.preprocessing._label +@log_adapter.class_logger class StandardScaler( base.Transformer, third_party.bigframes_vendored.sklearn.preprocessing._data.StandardScaler, @@ -111,6 +113,7 @@ def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: ) +@log_adapter.class_logger class MaxAbsScaler( base.Transformer, third_party.bigframes_vendored.sklearn.preprocessing._data.MaxAbsScaler, @@ -194,6 +197,7 @@ def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: ) +@log_adapter.class_logger class MinMaxScaler( base.Transformer, third_party.bigframes_vendored.sklearn.preprocessing._data.MinMaxScaler, @@ -277,6 +281,7 @@ def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: ) +@log_adapter.class_logger class KBinsDiscretizer( base.Transformer, third_party.bigframes_vendored.sklearn.preprocessing._discretization.KBinsDiscretizer, @@ -395,6 +400,7 @@ def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: ) +@log_adapter.class_logger class OneHotEncoder( base.Transformer, third_party.bigframes_vendored.sklearn.preprocessing._encoder.OneHotEncoder, @@ -524,6 +530,7 @@ def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: ) +@log_adapter.class_logger class LabelEncoder( base.LabelTransformer, third_party.bigframes_vendored.sklearn.preprocessing._label.LabelEncoder, From 2d5a85b81afb82d9ec9acad41a91ae8c5bc9a5bb Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Tue, 21 Nov 2023 19:05:35 +0000 Subject: [PATCH 2/2] fix: fix unit test failure --- tests/unit/session/test_io_bigquery.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/unit/session/test_io_bigquery.py b/tests/unit/session/test_io_bigquery.py index e1481d3f05..c87835c412 100644 --- a/tests/unit/session/test_io_bigquery.py +++ b/tests/unit/session/test_io_bigquery.py @@ -59,6 +59,7 @@ def test_create_job_configs_labels_length_limit_not_met(): def test_create_job_configs_labels_log_adaptor_call_method_under_length_limit(): + log_adapter.get_and_reset_api_methods() cur_labels = { "bigframes-api": "read_pandas", "source": "bigquery-dataframes-temp", @@ -87,6 +88,7 @@ def test_create_job_configs_labels_log_adaptor_call_method_under_length_limit(): def test_create_job_configs_labels_length_limit_met_and_labels_is_none(): + log_adapter.get_and_reset_api_methods() df = bpd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) # Test running methods more than the labels' length limit for i in range(66): @@ -102,6 +104,7 @@ def test_create_job_configs_labels_length_limit_met_and_labels_is_none(): def test_create_job_configs_labels_length_limit_met(): + log_adapter.get_and_reset_api_methods() cur_labels = { "bigframes-api": "read_pandas", "source": "bigquery-dataframes-temp",