googleapis · gcf-merge-on-green · Apr 4, 2024 · Apr 2, 2024 · Apr 4, 2024
@@ -77,7 +77,7 @@ def fit_transform(self, x_train: Union[DataFrame, Series], y_train: Union[DataFr
     """
 
     def __repr__(self):
-        """Print the estimator's constructor with all non-default parameter values"""
+        """Print the estimator's constructor with all non-default parameter values."""
 
         # Estimator pretty printer adapted from Sklearn's, which is in turn an adaption of
         # the inbuilt pretty-printer in CPython
@@ -106,13 +106,13 @@ def predict(self, X):
     def register(self: _T, vertex_ai_model_id: Optional[str] = None) -> _T:
         """Register the model to Vertex AI.
 
-        After register, go to Google Cloud Console (https://console.cloud.google.com/vertex-ai/models)
+        After register, go to the Google Cloud console (https://console.cloud.google.com/vertex-ai/models)
         to manage the model registries.
         Refer to https://cloud.google.com/vertex-ai/docs/model-registry/introduction for more options.
 
         Args:
             vertex_ai_model_id (Optional[str], default None):
-                optional string id as model id in Vertex. If not set, will by default to 'bigframes_{bq_model_id}'.
+                Optional string id as model id in Vertex. If not set, will default to 'bigframes_{bq_model_id}'.
                 Vertex Ai model id will be truncated to 63 characters due to its limitation.
 
         Returns:
@@ -191,9 +191,9 @@ def to_gbq(self: _T, model_name: str, replace: bool = False) -> _T:
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns:
             Saved transformer."""

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Ensemble models. This module is styled after Scikit-Learn's ensemble module:
+"""Ensemble models. This module is styled after scikit-learn's ensemble module:
 https://scikit-learn.org/stable/modules/ensemble.html"""
 
 from __future__ import annotations
@@ -190,9 +190,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBRegressor:
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns: saved model."""
         if not self._bqml_model:
@@ -343,9 +343,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBClassifier:
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns:
             XGBClassifier: saved model."""
@@ -506,9 +506,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> RandomForestRegresso
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns:
             RandomForestRegressor: saved model."""
@@ -669,9 +669,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> RandomForestClassifi
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns:
             RandomForestClassifier: saved model."""

@@ -248,12 +248,12 @@ def predict(
                 an int value that specifies the number of time points to forecast.
                 The default value is 3, and the maximum value is 1000.
             confidence_level (float, default 0.95):
-                a float value that specifies percentage of the future values that fall in the prediction interval.
+                A float value that specifies percentage of the future values that fall in the prediction interval.
                 The valid input range is [0.0, 1.0).
 
         Returns:
             bigframes.dataframe.DataFrame: The predicted DataFrames. Which
-                contains 2 columns "forecast_timestamp" and "forecast_value".
+                contains 2 columns: "forecast_timestamp" and "forecast_value".
         """
         if horizon < 1 or horizon > 1000:
             raise ValueError(f"horizon must be [1, 1000], but is {horizon}.")
@@ -284,7 +284,7 @@ def detect_anomalies(
                 Identifies the custom threshold to use for anomaly detection. The value must be in the range [0, 1), with a default value of 0.95.
 
         Returns:
-            bigframes.dataframe.DataFrame: detected DataFrame."""
+            bigframes.dataframe.DataFrame: Detected DataFrame."""
         if anomaly_prob_threshold < 0.0 or anomaly_prob_threshold >= 1.0:
             raise ValueError(
                 f"anomaly_prob_threshold must be [0.0, 1.0), but is {anomaly_prob_threshold}."

@@ -13,7 +13,7 @@
 # limitations under the License.
 
 """Metrics functions for evaluating models. This module is styled after
-Scikit-Learn's metrics module: https://scikit-learn.org/stable/modules/metrics.html."""
+scikit-learn's metrics module: https://scikit-learn.org/stable/modules/metrics.html."""
 
 import inspect
 import typing

@@ -13,7 +13,7 @@
 # limitations under the License.
 
 """Functions for test/train split and model tuning. This module is styled after
-Scikit-Learn's model_selection module:
+scikit-learn's model_selection module:
 https://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection."""
 
 
@@ -51,7 +51,7 @@ def train_test_split(
         List[Union[bigframes.dataframe.DataFrame, bigframes.series.Series]]: A list of BigQuery DataFrames or Series.
     """
 
-    # TODO(garrettwu): Scikit-Learn throws an error when the dataframes don't have the same
+    # TODO(garrettwu): scikit-learn throws an error when the dataframes don't have the same
     # number of rows. We probably want to do something similar. Now the implementation is based
     # on index. We'll move to based on ordering first.
 

@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""For composing estimators together. This module is styled after Scikit-Learn's
+"""For composing estimators together. This module is styled after scikit-learn's
 pipeline module: https://scikit-learn.org/stable/modules/pipeline.html."""
 
 

@@ -13,7 +13,7 @@
 # limitations under the License.
 
 """Transformers that prepare data for other estimators. This module is styled after
-Scikit-Learn's preprocessing module: https://scikit-learn.org/stable/modules/preprocessing.html."""
+scikit-learn's preprocessing module: https://scikit-learn.org/stable/modules/preprocessing.html."""
 
 from __future__ import annotations
 

@@ -1051,7 +1051,7 @@
       "source": [
         "## Estimators\n",
         "\n",
-        "Following Scikit-Learn, all learning components are \"estimators\"; objects that can learn from training data and then apply themselves to new data. Estimators share the following patterns:\n",
+        "Following scikit-learn, all learning components are \"estimators\"; objects that can learn from training data and then apply themselves to new data. Estimators share the following patterns:\n",
         "\n",
         "- a constructor that takes a list of parameters\n",
         "- a standard string representation that shows the class name and all non-default parameters, e.g. `LinearRegression(fit_intercept=False)`\n",

@@ -7,7 +7,7 @@
    "source": [
     "# Using ML - SKLearn linear regression\n",
     "\n",
-    "This demo shows how we can implement a linear regression in BigQuery DataFrames ML, with API that is exactly compatible with Scikit-Learn."
+    "This demo shows how we can implement a linear regression in BigQuery DataFrames ML, with API that is exactly compatible with scikit-learn."
    ]
   },
   {

@@ -59,18 +59,18 @@
     repr_mode (Literal[`head`, `deferred`]):
         `head`:
             Execute, download, and display results (limited to head) from
-            dataframe and series objects during repr.
+            Dataframe and Series objects during repr.
         `deferred`:
-            Prevent executions from repr statements in dataframe and series objects.
-            Instead estimated bytes processed will be shown. Dataframe and Series
+            Prevent executions from repr statements in DataFrame and Series objects.
+            Instead, estimated bytes processed will be shown. DataFrame and Series
             objects can still be computed with methods that explicitly execute and
             download results.
     max_info_columns (int):
         max_info_columns is used in DataFrame.info method to decide if
-        per column information will be printed.
+        information in each column will be printed.
     max_info_rows (int or None):
         df.info() will usually show null-counts for each column.
-        For large frames this can be quite slow. max_info_rows and max_info_cols
+        For large frames, this can be quite slow. max_info_rows and max_info_cols
         limit this null check only to frames with smaller dimensions than
         specified.
     memory_usage (bool):

@@ -1,4 +1,4 @@
-"""Scikit-Learn Wrapper interface for XGBoost."""
+"""scikit-learn Wrapper interface for XGBoost."""
 
 from typing import Any