docs: address more comments from technical writers to meet legal purposes (#571)

ashleyxuu · web-flow · commit 9084df369bc6 · 2024-04-04T17:28:16.000Z
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
diff --git a/bigframes/ml/base.py b/bigframes/ml/base.py
@@ -77,7 +77,7 @@ def fit_transform(self, x_train: Union[DataFrame, Series], y_train: Union[DataFr
     """
 
     def __repr__(self):
-        """Print the estimator's constructor with all non-default parameter values"""
+        """Print the estimator's constructor with all non-default parameter values."""
 
         # Estimator pretty printer adapted from Sklearn's, which is in turn an adaption of
         # the inbuilt pretty-printer in CPython
@@ -106,13 +106,13 @@ def predict(self, X):
     def register(self: _T, vertex_ai_model_id: Optional[str] = None) -> _T:
         """Register the model to Vertex AI.
 
-        After register, go to Google Cloud Console (https://console.cloud.google.com/vertex-ai/models)
+        After register, go to the Google Cloud console (https://console.cloud.google.com/vertex-ai/models)
         to manage the model registries.
         Refer to https://cloud.google.com/vertex-ai/docs/model-registry/introduction for more options.
 
         Args:
             vertex_ai_model_id (Optional[str], default None):
-                optional string id as model id in Vertex. If not set, will by default to 'bigframes_{bq_model_id}'.
+                Optional string id as model id in Vertex. If not set, will default to 'bigframes_{bq_model_id}'.
                 Vertex Ai model id will be truncated to 63 characters due to its limitation.
 
         Returns:
@@ -191,9 +191,9 @@ def to_gbq(self: _T, model_name: str, replace: bool = False) -> _T:
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns:
             Saved transformer."""
diff --git a/bigframes/ml/ensemble.py b/bigframes/ml/ensemble.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Ensemble models. This module is styled after Scikit-Learn's ensemble module:
+"""Ensemble models. This module is styled after scikit-learn's ensemble module:
 https://scikit-learn.org/stable/modules/ensemble.html"""
 
 from __future__ import annotations
@@ -190,9 +190,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBRegressor:
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns: saved model."""
         if not self._bqml_model:
@@ -343,9 +343,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> XGBClassifier:
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns:
             XGBClassifier: saved model."""
@@ -506,9 +506,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> RandomForestRegresso
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns:
             RandomForestRegressor: saved model."""
@@ -669,9 +669,9 @@ def to_gbq(self, model_name: str, replace: bool = False) -> RandomForestClassifi
 
         Args:
             model_name (str):
-                the name of the model.
+                The name of the model.
             replace (bool, default False):
-                whether to replace if the model already exists. Default to False.
+                Whether to replace if the model already exists. Default to False.
 
         Returns:
             RandomForestClassifier: saved model."""
diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py
@@ -248,12 +248,12 @@ def predict(
                 an int value that specifies the number of time points to forecast.
                 The default value is 3, and the maximum value is 1000.
             confidence_level (float, default 0.95):
-                a float value that specifies percentage of the future values that fall in the prediction interval.
+                A float value that specifies percentage of the future values that fall in the prediction interval.
                 The valid input range is [0.0, 1.0).
 
         Returns:
             bigframes.dataframe.DataFrame: The predicted DataFrames. Which
-                contains 2 columns "forecast_timestamp" and "forecast_value".
+                contains 2 columns: "forecast_timestamp" and "forecast_value".
         """
         if horizon < 1 or horizon > 1000:
             raise ValueError(f"horizon must be [1, 1000], but is {horizon}.")
@@ -284,7 +284,7 @@ def detect_anomalies(
                 Identifies the custom threshold to use for anomaly detection. The value must be in the range [0, 1), with a default value of 0.95.
 
         Returns:
-            bigframes.dataframe.DataFrame: detected DataFrame."""
+            bigframes.dataframe.DataFrame: Detected DataFrame."""
         if anomaly_prob_threshold < 0.0 or anomaly_prob_threshold >= 1.0:
             raise ValueError(
                 f"anomaly_prob_threshold must be [0.0, 1.0), but is {anomaly_prob_threshold}."
diff --git a/bigframes/ml/metrics/_metrics.py b/bigframes/ml/metrics/_metrics.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 """Metrics functions for evaluating models. This module is styled after
-Scikit-Learn's metrics module: https://scikit-learn.org/stable/modules/metrics.html."""
+scikit-learn's metrics module: https://scikit-learn.org/stable/modules/metrics.html."""
 
 import inspect
 import typing
diff --git a/bigframes/ml/model_selection.py b/bigframes/ml/model_selection.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 """Functions for test/train split and model tuning. This module is styled after
-Scikit-Learn's model_selection module:
+scikit-learn's model_selection module:
 https://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection."""
 
 
@@ -51,7 +51,7 @@ def train_test_split(
         List[Union[bigframes.dataframe.DataFrame, bigframes.series.Series]]: A list of BigQuery DataFrames or Series.
     """
 
-    # TODO(garrettwu): Scikit-Learn throws an error when the dataframes don't have the same
+    # TODO(garrettwu): scikit-learn throws an error when the dataframes don't have the same
     # number of rows. We probably want to do something similar. Now the implementation is based
     # on index. We'll move to based on ordering first.
 
diff --git a/bigframes/ml/pipeline.py b/bigframes/ml/pipeline.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""For composing estimators together. This module is styled after Scikit-Learn's
+"""For composing estimators together. This module is styled after scikit-learn's
 pipeline module: https://scikit-learn.org/stable/modules/pipeline.html."""
 
 
diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 """Transformers that prepare data for other estimators. This module is styled after
-Scikit-Learn's preprocessing module: https://scikit-learn.org/stable/modules/preprocessing.html."""
+scikit-learn's preprocessing module: https://scikit-learn.org/stable/modules/preprocessing.html."""
 
 from __future__ import annotations
 
diff --git a/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb b/notebooks/getting_started/ml_fundamentals_bq_dataframes.ipynb
@@ -1051,7 +1051,7 @@
       "source": [
         "## Estimators\n",
         "\n",
-        "Following Scikit-Learn, all learning components are \"estimators\"; objects that can learn from training data and then apply themselves to new data. Estimators share the following patterns:\n",
+        "Following scikit-learn, all learning components are \"estimators\"; objects that can learn from training data and then apply themselves to new data. Estimators share the following patterns:\n",
         "\n",
         "- a constructor that takes a list of parameters\n",
         "- a standard string representation that shows the class name and all non-default parameters, e.g. `LinearRegression(fit_intercept=False)`\n",
diff --git a/notebooks/regression/sklearn_linear_regression.ipynb b/notebooks/regression/sklearn_linear_regression.ipynb
@@ -7,7 +7,7 @@
    "source": [
     "# Using ML - SKLearn linear regression\n",
     "\n",
-    "This demo shows how we can implement a linear regression in BigQuery DataFrames ML, with API that is exactly compatible with Scikit-Learn."
+    "This demo shows how we can implement a linear regression in BigQuery DataFrames ML, with API that is exactly compatible with scikit-learn."
    ]
   },
   {
diff --git a/third_party/bigframes_vendored/pandas/core/config_init.py b/third_party/bigframes_vendored/pandas/core/config_init.py
@@ -59,18 +59,18 @@
     repr_mode (Literal[`head`, `deferred`]):
         `head`:
             Execute, download, and display results (limited to head) from
-            dataframe and series objects during repr.
+            Dataframe and Series objects during repr.
         `deferred`:
-            Prevent executions from repr statements in dataframe and series objects.
-            Instead estimated bytes processed will be shown. Dataframe and Series
+            Prevent executions from repr statements in DataFrame and Series objects.
+            Instead, estimated bytes processed will be shown. DataFrame and Series
             objects can still be computed with methods that explicitly execute and
             download results.
     max_info_columns (int):
         max_info_columns is used in DataFrame.info method to decide if
-        per column information will be printed.
+        information in each column will be printed.
     max_info_rows (int or None):
         df.info() will usually show null-counts for each column.
-        For large frames this can be quite slow. max_info_rows and max_info_cols
+        For large frames, this can be quite slow. max_info_rows and max_info_cols
         limit this null check only to frames with smaller dimensions than
         specified.
     memory_usage (bool):
diff --git a/third_party/bigframes_vendored/xgboost/sklearn.py b/third_party/bigframes_vendored/xgboost/sklearn.py
@@ -1,4 +1,4 @@
-"""Scikit-Learn Wrapper interface for XGBoost."""
+"""scikit-learn Wrapper interface for XGBoost."""
 
 from typing import Any
 

Original file line number	Diff line number	Diff line change
`@@ -7,7 +7,7 @@`
`7`	`7`	`"source": [`
`8`	`8`	`"# Using ML - SKLearn linear regression\n",`
`9`	`9`	`"\n",`
`10`		`- "This demo shows how we can implement a linear regression in BigQuery DataFrames ML, with API that is exactly compatible with Scikit-Learn."`
	`10`	`+ "This demo shows how we can implement a linear regression in BigQuery DataFrames ML, with API that is exactly compatible with scikit-learn."`
`11`	`11`	`]`
`12`	`12`	`},`
`13`	`13`	`{`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-"""Scikit-Learn Wrapper interface for XGBoost."""`
	`1`	`+"""scikit-learn Wrapper interface for XGBoost."""`
`2`	`2`
`3`	`3`	`from typing import Any`
`4`	`4`