From 66d1839c3e9a3011c7feb13a59d966b64cf8313f Mon Sep 17 00:00:00 2001
From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Date: Thu, 30 Nov 2023 11:39:47 -0800
Subject: [PATCH 01/20] fix: update the llm_kmeans notebook (#247)

---
 notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
index 5f74046fc0..69efb11018 100644
--- a/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
+++ b/notebooks/generative_ai/bq_dataframes_llm_kmeans.ipynb
@@ -371,7 +371,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "We now have the complaints and their text embeddings as two columns in our combined_df. Recall that complaints with numerically similar text embeddings should have similar meanings semantically. We will now group similar complaints together."
+        "We now have the complaints and their text embeddings as two columns in our predicted_embeddings DataFrame."
       ]
     },
     {
@@ -426,7 +426,7 @@
       "cell_type": "markdown",
       "metadata": {},
       "source": [
-        "Our dataframe combined_clustered_result now has three complaint columns: the content, their text embeddings, and an ID from 1-10 (inclusive) indicating which semantically similar group they belong to."
+        "Our DataFrame clustered_result now has an additional column that includes an ID from 1-10 (inclusive) indicating which semantically similar group they belong to."
       ]
     },
     {
@@ -501,7 +501,7 @@
       "source": [
         "# The plain English request we will make of PaLM 2\n",
         "prompt = (\n",
-        "    \"Please highlight the most obvious difference between\"\n",
+        "    \"Please highlight the most obvious difference between \"\n",
         "    \"the two lists of comments:\\n\" + prompt1 + prompt2\n",
         ")\n",
         "print(prompt)"

From 1737acc51b4fdd9b385bbf91a758efd2e7ead11a Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Thu, 30 Nov 2023 20:22:16 -0800
Subject: [PATCH 02/20] feat: add DataFrame.select_dtypes method (#242)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/dataframe.py                        | 13 +++++++
 tests/system/small/test_dataframe.py          | 20 +++++++++++
 .../bigframes_vendored/pandas/core/frame.py   | 36 +++++++++++++++++++
 3 files changed, 69 insertions(+)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index f7796291b9..c6b28f1b01 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -434,6 +434,19 @@ def info(
             # TODO: Convert to different units (kb, mb, etc.)
             obuf.write(f"memory usage: {self.memory_usage().sum()} bytes\n")
 
+    def select_dtypes(self, include=None, exclude=None) -> DataFrame:
+        # Create empty pandas dataframe with same schema and then leverage actual pandas implementation
+        as_pandas = pandas.DataFrame(
+            {
+                col_id: pandas.Series([], dtype=dtype)
+                for col_id, dtype in zip(self._block.value_columns, self._block.dtypes)
+            }
+        )
+        selected_columns = tuple(
+            as_pandas.select_dtypes(include=include, exclude=exclude).columns
+        )
+        return DataFrame(self._block.select_columns(selected_columns))
+
     def _set_internal_query_job(self, query_job: bigquery.QueryJob):
         self._query_job = query_job
 
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 9744d3f6e9..5940df590c 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -297,6 +297,26 @@ def test_df_info(scalars_dfs):
     assert expected == bf_result.getvalue()
 
 
+@pytest.mark.parametrize(
+    ("include", "exclude"),
+    [
+        ("Int64", None),
+        (["int"], None),
+        ("number", None),
+        ([pd.Int64Dtype(), pd.BooleanDtype()], None),
+        (None, [pd.Int64Dtype(), pd.BooleanDtype()]),
+        ("Int64", ["boolean"]),
+    ],
+)
+def test_select_dtypes(scalars_dfs, include, exclude):
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    pd_result = scalars_pandas_df.select_dtypes(include=include, exclude=exclude)
+    bf_result = scalars_df.select_dtypes(include=include, exclude=exclude).to_pandas()
+
+    pd.testing.assert_frame_equal(pd_result, bf_result)
+
+
 def test_drop_index(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
 
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 099d8b8e66..3bd90be2e4 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -158,6 +158,42 @@ def memory_usage(self, index: bool = True):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def select_dtypes(self, include=None, exclude=None) -> DataFrame:
+        """
+        Return a subset of the DataFrame's columns based on the column dtypes.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'col1': [1, 2], 'col2': ["hello", "world"], 'col3': [True, False]})
+            >>> df.select_dtypes(include=['Int64'])
+               col1
+            0     1
+            1     2
+            <BLANKLINE>
+            [2 rows x 1 columns]
+
+            >>> df.select_dtypes(exclude=['Int64'])
+                col2   col3
+            0  hello   True
+            1  world  False
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+
+        Args:
+            include (scalar or list-like):
+                A selection of dtypes or strings to be included.
+            exclude (scalar or list-like):
+                A selection of dtypes or strings to be excluded.
+
+        Returns:
+            DataFrame: The subset of the frame including the dtypes in ``include`` and excluding the dtypes in ``exclude``.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     # ----------------------------------------------------------------------
     # IO methods (to / from other formats)
     def to_numpy(self, dtype=None, copy=False, na_value=None, **kwargs) -> np.ndarray:

From 0523a31fa0b589f88afe0ad5b447634409ddeb86 Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Fri, 1 Dec 2023 10:06:23 -0800
Subject: [PATCH 03/20] docs: add examples for dataframe.cummin,
 dataframe.cummax, dataframe.cumsum, dataframe.cumprod (#243)

---
 .../bigframes_vendored/pandas/core/frame.py   | 96 ++++++++++++++++++-
 1 file changed, 92 insertions(+), 4 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 3bd90be2e4..6b5a580e99 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3367,40 +3367,128 @@ def nunique(self):
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def cummin(self) -> DataFrame:
-        """Return cumulative minimum over a DataFrame axis.
+        """Return cumulative minimum over columns.
 
         Returns a DataFrame of the same size containing the cumulative minimum.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]})
+            >>> df
+                A	B
+            0	3	1
+            1	1	2
+            2	2	3
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+            >>> df.cummin()
+                A	B
+            0	3	1
+            1	1	1
+            2	1	1
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
         Returns:
             bigframes.dataframe.DataFrame: Return cumulative minimum of DataFrame.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def cummax(self) -> DataFrame:
-        """Return cumulative maximum over a DataFrame axis.
+        """Return cumulative maximum over columns.
 
         Returns a DataFrame of the same size containing the cumulative maximum.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]})
+            >>> df
+                A	B
+            0	3	1
+            1	1	2
+            2	2	3
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+            >>> df.cummax()
+                A	B
+            0	3	1
+            1	3	2
+            2	3	3
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
         Returns:
             bigframes.dataframe.DataFrame: Return cumulative maximum of DataFrame.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def cumsum(self) -> DataFrame:
-        """Return cumulative sum over a DataFrame axis.
+        """Return cumulative sum over columns.
 
         Returns a DataFrame of the same size containing the cumulative sum.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]})
+            >>> df
+                A	B
+            0	3	1
+            1	1	2
+            2	2	3
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+            >>> df.cumsum()
+                A	B
+            0	3	1
+            1	4	3
+            2	6	6
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
         Returns:
             bigframes.dataframe.DataFrame: Return cumulative sum of DataFrame.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def cumprod(self) -> DataFrame:
-        """Return cumulative product over a DataFrame axis.
+        """Return cumulative product over columns.
 
         Returns a DataFrame of the same size containing the cumulative product.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]})
+            >>> df
+                A	B
+            0	3	1
+            1	1	2
+            2	2	3
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+            >>> df.cumprod()
+                A	B
+            0	3	1
+            1	3	2
+            2	6	6
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
         Returns:
             bigframes.dataframe.DataFrame: Return cumulative product of DataFrame.
         """

From 8d81e24677613dcf4d275c27a327384b8c17bc85 Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Fri, 1 Dec 2023 11:58:10 -0800
Subject: [PATCH 04/20] feat: add DataFrame from_dict and from_records methods
 (#244)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/dataframe.py                        | 26 +++++++
 tests/system/small/test_dataframe.py          | 48 ++++++++++++
 .../bigframes_vendored/pandas/core/frame.py   | 75 +++++++++++++++++++
 3 files changed, 149 insertions(+)

diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index c6b28f1b01..3b0fd7008a 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -2381,6 +2381,32 @@ def _split(
         blocks = self._block._split(ns=ns, fracs=fracs, random_state=random_state)
         return [DataFrame(block) for block in blocks]
 
+    @classmethod
+    def from_dict(
+        cls,
+        data: dict,
+        orient: str = "columns",
+        dtype=None,
+        columns=None,
+    ) -> DataFrame:
+        return cls(pandas.DataFrame.from_dict(data, orient, dtype, columns))  # type: ignore
+
+    @classmethod
+    def from_records(
+        cls,
+        data,
+        index=None,
+        exclude=None,
+        columns=None,
+        coerce_float: bool = False,
+        nrows: int | None = None,
+    ) -> DataFrame:
+        return cls(
+            pandas.DataFrame.from_records(
+                data, index, exclude, columns, coerce_float, nrows
+            )
+        )
+
     def to_csv(
         self, path_or_buf: str, sep=",", *, header: bool = True, index: bool = True
     ) -> None:
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 5940df590c..9318a5d9d2 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3309,6 +3309,54 @@ def test_df_duplicated(scalars_df_index, scalars_pandas_df_index, keep, subset):
     pd.testing.assert_series_equal(pd_series, bf_series, check_dtype=False)
 
 
+def test_df_from_dict_columns_orient():
+    data = {"a": [1, 2], "b": [3.3, 2.4]}
+    bf_result = dataframe.DataFrame.from_dict(data, orient="columns").to_pandas()
+    pd_result = pd.DataFrame.from_dict(data, orient="columns")
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_from_dict_index_orient():
+    data = {"a": [1, 2], "b": [3.3, 2.4]}
+    bf_result = dataframe.DataFrame.from_dict(
+        data, orient="index", columns=["col1", "col2"]
+    ).to_pandas()
+    pd_result = pd.DataFrame.from_dict(data, orient="index", columns=["col1", "col2"])
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_from_dict_tight_orient():
+    data = {
+        "index": [("i1", "i2"), ("i3", "i4")],
+        "columns": ["col1", "col2"],
+        "data": [[1, 2.6], [3, 4.5]],
+        "index_names": ["in1", "in2"],
+        "column_names": ["column_axis"],
+    }
+
+    bf_result = dataframe.DataFrame.from_dict(data, orient="tight").to_pandas()
+    pd_result = pd.DataFrame.from_dict(data, orient="tight")
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
+def test_df_from_records():
+    records = ((1, "a"), (2.5, "b"), (3.3, "c"), (4.9, "d"))
+
+    bf_result = dataframe.DataFrame.from_records(
+        records, columns=["c1", "c2"]
+    ).to_pandas()
+    pd_result = pd.DataFrame.from_records(records, columns=["c1", "c2"])
+    assert_pandas_df_equal(
+        pd_result, bf_result, check_dtype=False, check_index_type=False
+    )
+
+
 def test_df_to_dict(scalars_df_index, scalars_pandas_df_index):
     unsupported = ["numeric_col"]  # formatted differently
     bf_result = scalars_df_index.drop(columns=unsupported).to_dict()
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 6b5a580e99..08fe8e2de0 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -196,6 +196,81 @@ def select_dtypes(self, include=None, exclude=None) -> DataFrame:
 
     # ----------------------------------------------------------------------
     # IO methods (to / from other formats)
+    @classmethod
+    def from_dict(
+        cls,
+        data: dict,
+        orient="columns",
+        dtype=None,
+        columns=None,
+    ) -> DataFrame:
+        """
+        Construct DataFrame from dict of array-like or dicts.
+
+        Creates DataFrame object from dictionary by columns or by index
+        allowing dtype specification.
+
+        Args:
+            data (dict):
+                Of the form {field : array-like} or {field : dict}.
+            orient ({'columns', 'index', 'tight'}, default 'columns'):
+                The "orientation" of the data. If the keys of the passed dict
+                should be the columns of the resulting DataFrame, pass 'columns'
+                (default). Otherwise if the keys should be rows, pass 'index'.
+                If 'tight', assume a dict with keys ['index', 'columns', 'data',
+                'index_names', 'column_names'].
+            dtype (dtype, default None):
+                Data type to force after DataFrame construction, otherwise infer.
+            columns (list, default None):
+                Column labels to use when ``orient='index'``. Raises a ValueError
+                if used with ``orient='columns'`` or ``orient='tight'``.
+
+        Returns:
+            DataFrame
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    @classmethod
+    def from_records(
+        cls,
+        data,
+        index=None,
+        exclude=None,
+        columns=None,
+        coerce_float: bool = False,
+        nrows: int | None = None,
+    ) -> DataFrame:
+        """
+        Convert structured or record ndarray to DataFrame.
+
+        Creates a DataFrame object from a structured ndarray, sequence of
+        tuples or dicts, or DataFrame.
+
+        Args:
+            data (structured ndarray, sequence of tuples or dicts):
+                Structured input data.
+            index (str, list of fields, array-like):
+                Field of array to use as the index, alternately a specific set of
+                input labels to use.
+            exclude (sequence, default None):
+                Columns or fields to exclude.
+            columns (sequence, default None):
+                Column names to use. If the passed data do not have names
+                associated with them, this argument provides names for the
+                columns. Otherwise this argument indicates the order of the columns
+                in the result (any names not found in the data will become all-NA
+                columns).
+            coerce_float (bool, default False):
+                Attempt to convert values of non-string, non-numeric objects (like
+                decimal.Decimal) to floating point, useful for SQL result sets.
+            nrows (int, default None):
+                Number of rows to read if data is an iterator.
+
+        Returns:
+            DataFrame
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def to_numpy(self, dtype=None, copy=False, na_value=None, **kwargs) -> np.ndarray:
         """
         Convert the DataFrame to a NumPy array.

From c2829e3d976a43c53251c9288266e3a8ec5304c5 Mon Sep 17 00:00:00 2001
From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Date: Mon, 4 Dec 2023 16:18:16 -0800
Subject: [PATCH 05/20] docs: correct the params rendering for `ml.remote` and
 `ml.ensemble` modules (#248)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated (if necessary)
  - `ensemble.RandomForestClassifier`: https://screenshot.googleplex.com/4Q88xgdm5hkaYXu
  - `ensemble.RandomForestRegressor`: https://screenshot.googleplex.com/3CU6pJBjYHQvnDo
  - `remote.VertexAIModel`: https://screenshot.googleplex.com/8SL2max6GfPMwFe

Fixes internal issue 314150462 🦕
---
 bigframes/ml/remote.py                        |  8 +--
 docs/templates/toc.yml                        | 12 ++--
 .../sklearn/ensemble/_forest.py               | 72 +++++++++----------
 3 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/bigframes/ml/remote.py b/bigframes/ml/remote.py
index d4c34bbd0d..8da073802d 100644
--- a/bigframes/ml/remote.py
+++ b/bigframes/ml/remote.py
@@ -47,10 +47,10 @@ class VertexAIModel(base.BaseEstimator):
     Args:
         endpoint (str):
             Vertex AI https endpoint.
-        input ({column_name: column_type}):
-            Input schema. Supported types are "bool", "string", "int64", "float64", "array<bool>", "array<string>", "array<int64>", "array<float64>".
-        output ({column_name: column_type}):
-            Output label schema. Supported the same types as the input.
+        input (Mapping):
+            Input schema: `{column_name: column_type}`. Supported types are "bool", "string", "int64", "float64", "array<bool>", "array<string>", "array<int64>", "array<float64>".
+        output (Mapping):
+            Output label schema: `{column_name: column_type}`. Supported the same types as the input.
         session (bigframes.Session or None):
             BQ session to create the model. If None, use the global default session.
         connection_name (str or None):
diff --git a/docs/templates/toc.yml b/docs/templates/toc.yml
index 58ac1c0efe..b680a5fc1a 100644
--- a/docs/templates/toc.yml
+++ b/docs/templates/toc.yml
@@ -108,12 +108,6 @@
       - name: PaLM2TextEmbeddingGenerator
         uid: bigframes.ml.llm.PaLM2TextEmbeddingGenerator
       name: llm
-    - items:
-      - name: Overview
-        uid: bigframes.ml.remote
-      - name: VertexAIModel
-        uid: bigframes.ml.remote.VertexAIModel
-      name: remote
     - items:
       - name: metrics
         uid: bigframes.ml.metrics
@@ -144,6 +138,12 @@
       - name: OneHotEncoder
         uid: bigframes.ml.preprocessing.OneHotEncoder
       name: preprocessing
+    - items:
+      - name: Overview
+        uid: bigframes.ml.remote
+      - name: VertexAIModel
+        uid: bigframes.ml.remote.VertexAIModel
+      name: remote
     name: bigframes.ml
   name: BigQuery DataFrames
   status: beta
diff --git a/third_party/bigframes_vendored/sklearn/ensemble/_forest.py b/third_party/bigframes_vendored/sklearn/ensemble/_forest.py
index 6be41bf9aa..63c62274fd 100644
--- a/third_party/bigframes_vendored/sklearn/ensemble/_forest.py
+++ b/third_party/bigframes_vendored/sklearn/ensemble/_forest.py
@@ -47,16 +47,16 @@ def fit(self, X, y):
         """Build a forest of trees from the training set (X, y).
 
         Args:
-            X:
+            X (bigframes.dataframe.DataFrame or bigframes.series.Series):
                 Series or DataFrame of shape (n_samples, n_features). Training data.
 
-            y:
+            y (bigframes.dataframe.DataFrame or bigframes.series.Series):
                 Series or DataFrame of shape (n_samples,) or (n_samples, n_targets).
                 Target values. Will be cast to X's dtype if necessary.
 
 
         Returns:
-            Fitted Estimator.
+            ForestModel: Fitted Estimator.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -73,12 +73,12 @@ def predict(self, X):
         mean predicted regression targets of the trees in the forest.
 
         Args:
-            X:
+            X (bigframes.dataframe.DataFrame or bigframes.series.Series):
                 Series or DataFrame of shape (n_samples, n_features). The data matrix for
                 which we want to get the predictions.
 
         Returns:
-            The predicted values.
+            bigframes.dataframe.DataFrame: The predicted values.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -91,38 +91,38 @@ class RandomForestRegressor(ForestRegressor):
     to improve the predictive accuracy and control over-fitting.
 
     Args:
-        num_parallel_tree: Optional[int]
+        num_parallel_tree (Optional[int]):
             Number of parallel trees constructed during each iteration. Default to 100. Minimum value is 2.
-        tree_method: Optional[str]
+        tree_method (Optional[str]):
             Specify which tree method to use. Default to "auto". If this parameter is set to
             default, XGBoost will choose the most conservative option available. Possible values: ""exact", "approx",
             "hist".
-        min_child_weight : Optional[float]
+        min_child_weight (Optional[float]):
             Minimum sum of instance weight(hessian) needed in a child. Default to 1.
-        colsample_bytree : Optional[float]
+        colsample_bytree (Optional[float]):
             Subsample ratio of columns when constructing each tree. Default to 1.0. The value should be between 0 and 1.
-        colsample_bylevel : Optional[float]
+        colsample_bylevel (Optional[float]):
             Subsample ratio of columns for each level. Default to 1.0. The value should be between 0 and 1.
-        colsample_bynode : Optional[float]
+        colsample_bynode (Optional[float]):
             Subsample ratio of columns for each split. Default to 0.8. The value should be between 0 and 1.
-        gamma : Optional[float]
+        gamma (Optional[float]):
             (min_split_loss) Minimum loss reduction required to make a further partition on a
             leaf node of the tree. Default to 0.0.
-        max_depth :  Optional[int]
+        max_depth (Optional[int]):
             Maximum tree depth for base learners. Default to 15. The value should be greater than 0 and less than 1.
-        subsample : Optional[float]
+        subsample (Optional[float]:
             Subsample ratio of the training instance. Default to 0.8. The value should be greater than 0 and less than 1.
-        reg_alpha : Optional[float]
+        reg_alpha (Optional[float]):
             L1 regularization term on weights (xgb's alpha). Default to 0.0.
-        reg_lambda : Optional[float]
+        reg_lambda (Optional[float]):
             L2 regularization term on weights (xgb's lambda). Default to 1.0.
-        early_stop: Optional[bool]
+        early_stop (Optional[bool]):
             Whether training should stop after the first iteration. Default to True.
-        min_rel_progress: Optional[float]
+        min_rel_progress (Optional[float]):
             Minimum relative loss improvement necessary to continue training when early_stop is set to True. Default to 0.01.
-        enable_global_explain: Optional[bool]
+        enable_global_explain (Optional[bool]):
             Whether to compute global explanations using explainable AI to evaluate global feature importance to the model. Default to False.
-        xgboost_version: Optional[str]
+        xgboost_version (Optional[str]):
             Specifies the Xgboost version for model training.  Default to "0.9". Possible values: "0.9", "1.1".
     """
 
@@ -144,7 +144,7 @@ def predict(self, X):
                 which we want to get the predictions.
 
         Returns:
-            The predicted values.
+            bigframes.dataframe.DataFrame: The predicted values.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -158,37 +158,37 @@ class RandomForestClassifier(ForestClassifier):
     improve the predictive accuracy and control over-fitting.
 
     Args:
-        num_parallel_tree: Optional[int]
+        num_parallel_tree (Optional[int]):
             Number of parallel trees constructed during each iteration. Default to 100. Minimum value is 2.
-        tree_method: Optional[str]
+        tree_method (Optional[str]):
             Specify which tree method to use. Default to "auto". If this parameter is set to
             default, XGBoost will choose the most conservative option available. Possible values: ""exact", "approx",
             "hist".
-        min_child_weight : Optional[float]
+        min_child_weight (Optional[float]):
             Minimum sum of instance weight(hessian) needed in a child. Default to 1.
-        colsample_bytree : Optional[float]
+        colsample_bytree (Optional[float]):
             Subsample ratio of columns when constructing each tree. Default to 1.0. The value should be between 0 and 1.
-        colsample_bylevel : Optional[float]
+        colsample_bylevel (Optional[float]):
             Subsample ratio of columns for each level. Default to 1.0. The value should be between 0 and 1.
-        colsample_bynode : Optional[float]
+        colsample_bynode (Optional[float]):
             Subsample ratio of columns for each split. Default to 0.8. The value should be between 0 and 1.
-        gamma : Optional[float]
+        gamma (Optional[float]):
             (min_split_loss) Minimum loss reduction required to make a further partition on a
             leaf node of the tree. Default to 0.0.
-        max_depth :  Optional[int]
+        max_depth (Optional[int]):
             Maximum tree depth for base learners. Default to 15. The value should be greater than 0 and less than 1.
-        subsample : Optional[float]
+        subsample (Optional[float]):
             Subsample ratio of the training instance. Default to 0.8. The value should be greater than 0 and less than 1.
-        reg_alpha : Optional[float]
+        reg_alpha (Optional[float]):
             L1 regularization term on weights (xgb's alpha). Default to 0.0.
-        reg_lambda : Optional[float]
+        reg_lambda (Optional[float]):
             L2 regularization term on weights (xgb's lambda). Default to 1.0.
-        early_stop: Optional[bool]
+        early_stop (Optional[bool]):
             Whether training should stop after the first iteration. Default to True.
-        min_rel_progress: Optional[float]
+        min_rel_progress (Optional[float]):
             Minimum relative loss improvement necessary to continue training when early_stop is set to True. Default to 0.01.
-        enable_global_explain: Optional[bool]
+        enable_global_explain (Optional[bool]):
             Whether to compute global explanations using explainable AI to evaluate global feature importance to the model. Default to False.
-        xgboost_version: Optional[str]
+        xgboost_version (Optional[str]):
             Specifies the Xgboost version for model training.  Default to "0.9". Possible values: "0.9", "1.1".ß
     """

From 77074ecbe7f52d1d7d1d1dc537fbe4062b407672 Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Mon, 4 Dec 2023 17:15:14 -0800
Subject: [PATCH 06/20] =?UTF-8?q?docs:=20add=20examples=20for=20dataframe.?=
 =?UTF-8?q?nunique,=20dataframe.diff,=20dataframe.a=E2=80=A6=20(#251)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* docs: add examples for dataframe.nunique, dataframe.diff, dataframe.agg, dataframe.describe

* update spacing

* update ordering
---
 .../bigframes_vendored/pandas/core/frame.py   | 114 +++++++++++++++++-
 1 file changed, 112 insertions(+), 2 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 08fe8e2de0..174ab069f6 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3434,7 +3434,26 @@ def melt(self, id_vars, value_vars, var_name, value_name):
 
     def nunique(self):
         """
-        Count number of distinct elements in specified axis.
+        Count number of distinct elements in each column.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 2]})
+            >>> df
+                A	B
+            0	3	1
+            1	1	2
+            2	2	2
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+            >>> df.nunique()
+            A    3.0
+            B    2.0
+            dtype: Float64
 
         Returns:
             bigframes.series.Series: Series with number of distinct elements.
@@ -3578,6 +3597,40 @@ def diff(
         Calculates the difference of a DataFrame element compared with another
         element in the DataFrame (default is element in previous row).
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]})
+            >>> df
+                A	B
+            0	3	1
+            1	1	2
+            2	2	3
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Calculating difference with default periods=1:
+
+            >>> df.diff()
+                   A	   B
+            0	<NA>	<NA>
+            1	  -2	   1
+            2	   1	   1
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Calculating difference with periods=-1:
+
+            >>> df.diff(periods=-1)
+                   A	   B
+            0	   2	  -1
+            1	  -1	  -1
+            2	<NA>	<NA>
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
         Args:
             periods (int, default 1):
                 Periods to shift for calculating difference, accepts negative
@@ -3590,7 +3643,37 @@ def diff(
 
     def agg(self, func):
         """
-        Aggregate using one or more operations over the specified axis.
+        Aggregate using one or more operations over columns.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]})
+            >>> df
+                A	B
+            0	3	1
+            1	1	2
+            2	2	3
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Using a single function:
+
+            >>> df.agg('sum')
+            A    6.0
+            B    6.0
+            dtype: Float64
+
+        Using a list of functions:
+
+            >>> df.agg(['sum', 'mean'])
+                      A	  B
+            sum	    6.0	6.0
+            mean	2.0	2.0
+            <BLANKLINE>
+            [2 rows x 2 columns]
 
         Args:
             func (function):
@@ -3623,6 +3706,33 @@ def describe(self):
             upper percentile is ``75``. The ``50`` percentile is the
             same as the median.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [0, 2, 8]})
+            >>> df
+                A	B
+            0	3	0
+            1	1	2
+            2	2	8
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+            >>> df.describe()
+                          A	          B
+            count       3.0	        3.0
+            mean        2.0	   3.333333
+            std	        1.0	   4.163332
+            min	        1.0	        0.0
+            25%	        1.0	        0.0
+            50%	        2.0	        2.0
+            75%	        3.0	        8.0
+            max	        3.0	        8.0
+            <BLANKLINE>
+            [8 rows x 2 columns]
+
         Returns:
             bigframes.dataframe.DataFrame: Summary statistics of the Series or Dataframe provided.
         """

From 89a1c67fa5cbb76c1cc6ae24d5f919e22514705c Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Tue, 5 Dec 2023 02:14:14 +0000
Subject: [PATCH 07/20] docs: Fix return annotation in API docstrings (#253)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes internal issue 314367409 🦕
---
 .../bigframes_vendored/pandas/core/frame.py   | 30 +++++++--------
 .../bigframes_vendored/pandas/core/series.py  | 38 +++++++++----------
 2 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 174ab069f6..7168572705 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -226,7 +226,7 @@ def from_dict(
                 if used with ``orient='columns'`` or ``orient='tight'``.
 
         Returns:
-            DataFrame
+            DataFrame: DataFrame.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -267,7 +267,7 @@ def from_records(
                 Number of rows to read if data is an iterator.
 
         Returns:
-            DataFrame
+            DataFrame: DataFrame.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -717,7 +717,7 @@ def to_markdown(
                 These parameters will be passed to `tabulate <https://pypi.org/project/tabulate>`_.
 
         Returns:
-            DataFrame in Markdown-friendly format.
+            DataFrame: DataFrame in Markdown-friendly format.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1272,7 +1272,7 @@ def sort_values(
              if `first`; `last` puts NaNs at the end.
 
         Returns:
-            DataFrame with sorted values.
+            DataFrame: DataFrame with sorted values.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1282,7 +1282,7 @@ def sort_index(
         """Sort object by labels (along an axis).
 
         Returns:
-            The original DataFrame sorted by the labels.
+            DataFrame: The original DataFrame sorted by the labels.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1330,7 +1330,7 @@ def eq(self, other, axis: str | int = "columns") -> DataFrame:
                 (1 or 'columns').
 
         Returns:
-            Result of the comparison.
+            DataFrame: Result of the comparison.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1861,7 +1861,7 @@ def rtruediv(self, other, axis: str | int = "columns") -> DataFrame:
                 (1 or 'columns'). For Series input, axis to match Series index on.
 
         Returns:
-            DataFrame result of the arithmetic operation.
+            DataFrame: DataFrame result of the arithmetic operation.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -2796,7 +2796,7 @@ def any(self, *, axis=0, bool_only: bool = False):
                 Include only boolean columns.
 
         Returns:
-            Series
+            bigframes.series.Series: Series indicating if any element is True per column.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -2843,7 +2843,7 @@ def all(self, axis=0, *, bool_only: bool = False):
                 Include only boolean columns.
 
         Returns:
-            bigframes.series.Series: Series if all elements are True.
+            bigframes.series.Series: Series indicating if all elements are True per column.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -3184,7 +3184,7 @@ def skew(self, *, numeric_only: bool = False):
                 Include only float, int, boolean columns.
 
         Returns:
-            Series
+            Series: Series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -3225,7 +3225,7 @@ def kurt(self, *, numeric_only: bool = False):
                 Include only float, int, boolean columns.
 
         Returns:
-            Series
+            Series: Series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -3770,7 +3770,7 @@ def pivot(self, *, columns, index=None, values=None):
                 have hierarchically indexed columns.
 
         Returns:
-            Returns reshaped DataFrame.
+            DataFrame: Returns reshaped DataFrame.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -3808,7 +3808,7 @@ def unstack(self):
         (the analogue of stack when the columns are not a MultiIndex).
 
         Returns:
-            DataFrame or Series
+            DataFrame or Series: DataFrame or Series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -3866,7 +3866,7 @@ def index(self):
                 dtype=object)
 
         Returns:
-            The index labels of the DataFrame.
+            Index: The index object of the DataFrame.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -4115,7 +4115,7 @@ def dot(self, other):
                 The other object to compute the matrix product with.
 
         Returns:
-            Series or DataFrame
+            Series or DataFrame:
                 If `other` is a Series, return the matrix product between self and
                 other as a Series. If other is a DataFrame, return
                 the matrix product of self and other in a DataFrame.
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 1b751ed83b..6b8dd1d64d 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -90,7 +90,7 @@ def index(self):
                 dtype=object)
 
         Returns:
-            The index labels of the Series.
+            Index: The index object of the Series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -138,7 +138,7 @@ def transpose(self) -> Series:
         Return the transpose, which is by definition self.
 
         Returns:
-            Series
+            Series: Series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -593,7 +593,7 @@ def corr(self, other, method="pearson", min_periods=None) -> float:
                 are not yet supported, so a result will be returned for at least two observations.
 
         Returns:
-            float;  Will return NaN if there are fewer than two numeric pairs, either series has a
+            float:  Will return NaN if there are fewer than two numeric pairs, either series has a
                 variance or covariance of zero, or any input value is infinite.
         """
         raise NotImplementedError("abstract method")
@@ -611,7 +611,7 @@ def diff(self) -> Series:
                 values.
 
         Returns:
-            {klass}: First differences of the Series.
+            Series: First differences of the Series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1317,7 +1317,7 @@ def le(self, other) -> Series:
             other: Series, or scalar value
 
         Returns:
-            bigframes.series.Series. The result of the comparison.
+            bigframes.series.Series: The result of the comparison.
 
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
@@ -1331,7 +1331,7 @@ def lt(self, other) -> Series:
         Args:
              other (Series, or scalar value):
 
-         Returns:
+        Returns:
             bigframes.series.Series: The result of the operation.
 
         """
@@ -1588,7 +1588,7 @@ def divmod(self, other) -> Series:
             other: Series, or scalar value
 
         Returns:
-            2-Tuple of Series. The result of the operation. The result is always
+            2-Tuple of Series: The result of the operation. The result is always
             consistent with (floordiv, mod) (though pandas may not).
 
         """
@@ -1603,7 +1603,7 @@ def rdivmod(self, other) -> Series:
             other: Series, or scalar value
 
         Returns:
-            2-Tuple of Series. The result of the operation. The result is always
+            2-Tuple of Series: The result of the operation. The result is always
             consistent with (rfloordiv, rmod) (though pandas may not).
 
         """
@@ -1650,7 +1650,7 @@ def max(
 
 
         Returns:
-            scalar or scalar
+            scalar: Scalar.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1664,7 +1664,7 @@ def min(
         of the ``numpy.ndarray`` method ``argmin``.
 
         Returns:
-            scalar or scalar
+            scalar: Scalar.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1692,7 +1692,7 @@ def var(
         Normalized by N-1 by default.
 
         Returns:
-            scalar or Series (if level specified)
+            scalar or Series (if level specified): Variance.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1702,7 +1702,7 @@ def sum(self):
         This is equivalent to the method ``numpy.sum``.
 
         Returns:
-            scalar
+            scalar: Scalar.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1710,7 +1710,7 @@ def mean(self):
         """Return the mean of the values over the requested axis.
 
         Returns:
-            scalar
+            scalar: Scalar.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1723,7 +1723,7 @@ def median(self, *, exact: bool = False):
                 one. Note: ``exact=True`` not yet supported.
 
         Returns:
-            scalar
+            scalar: Scalar.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1731,7 +1731,7 @@ def prod(self):
         """Return the product of the values over the requested axis.
 
         Returns:
-            scalar
+            scalar: Scalar.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1741,7 +1741,7 @@ def skew(self):
         Normalized by N-1.
 
         Returns:
-            scalar
+            scalar: Scalar.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -1932,7 +1932,7 @@ def clip(self):
                 Maximum threshold value. All values above this threshold will be set to it. A missing threshold (e.g NA) will not clip the value.
 
         Returns:
-            Series.
+            Series: Series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -2083,7 +2083,7 @@ def is_monotonic_increasing(self) -> bool:
         Return boolean if values in the object are monotonically increasing.
 
         Returns:
-            bool
+            bool: Boolean.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
@@ -2093,7 +2093,7 @@ def is_monotonic_decreasing(self) -> bool:
         Return boolean if values in the object are monotonically decreasing.
 
         Returns:
-            bool
+            bool: Boolean.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 

From c8ec245070402aa0770bc9b2375693de674ca925 Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Tue, 5 Dec 2023 11:34:15 -0800
Subject: [PATCH 08/20] feat: add nunique method to Series/DataFrameGroupby
 (#256)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/core/groupby/__init__.py             |  6 ++++++
 tests/system/small/test_groupby.py             |  2 ++
 .../pandas/core/groupby/__init__.py            | 18 ++++++++++++++++++
 3 files changed, 26 insertions(+)

diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py
index 18cb83fa18..a8b8afdae7 100644
--- a/bigframes/core/groupby/__init__.py
+++ b/bigframes/core/groupby/__init__.py
@@ -179,6 +179,9 @@ def any(self) -> df.DataFrame:
     def count(self) -> df.DataFrame:
         return self._aggregate_all(agg_ops.count_op)
 
+    def nunique(self) -> df.DataFrame:
+        return self._aggregate_all(agg_ops.nunique_op)
+
     def cumsum(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame:
         if not numeric_only:
             self._raise_on_non_numeric("cumsum")
@@ -442,6 +445,9 @@ def max(self, *args) -> series.Series:
     def count(self) -> series.Series:
         return self._aggregate(agg_ops.count_op)
 
+    def nunique(self) -> series.Series:
+        return self._aggregate(agg_ops.nunique_op)
+
     def sum(self, *args) -> series.Series:
         return self._aggregate(agg_ops.sum_op)
 
diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py
index a24713c2b3..5214905186 100644
--- a/tests/system/small/test_groupby.py
+++ b/tests/system/small/test_groupby.py
@@ -69,11 +69,13 @@ def test_dataframe_groupby_median(scalars_df_index, scalars_pandas_df_index):
     ("operator"),
     [
         (lambda x: x.count()),
+        (lambda x: x.nunique()),
         (lambda x: x.any()),
         (lambda x: x.all()),
     ],
     ids=[
         "count",
+        "nunique",
         "any",
         "all",
     ],
diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
index b05319b4f7..8730cf0007 100644
--- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
+++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py
@@ -363,6 +363,15 @@ def agg(self, func):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def nunique(self):
+        """
+        Return number of unique elements in the group.
+
+        Returns:
+            Series: Number of unique values within each group.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
 
 class DataFrameGroupBy(GroupBy):
     def agg(self, func, **kwargs):
@@ -391,3 +400,12 @@ def agg(self, func, **kwargs):
             DataFrame
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def nunique(self):
+        """
+        Return DataFrame with counts of unique elements in each position.
+
+        Returns:
+            DataFrame
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

From d3fa6f26931d5d0f0ae3fa49baccfc148f870417 Mon Sep 17 00:00:00 2001
From: TrevorBergeron <tbergeron@google.com>
Date: Wed, 6 Dec 2023 11:04:15 -0800
Subject: [PATCH 09/20] fix: fix value_counts column label for normalize=True
 (#245)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/core/block_transforms.py   |  4 +++-
 tests/system/small/test_dataframe.py |  6 ++----
 tests/system/small/test_series.py    | 11 ++++-------
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index ce0fdd219a..df84f70859 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -353,7 +353,9 @@ def value_counts(
                 )
             ]
         )
-    return block.select_column(count_id).with_column_labels(["count"])
+    return block.select_column(count_id).with_column_labels(
+        ["proportion" if normalize else "count"]
+    )
 
 
 def pct_change(block: blocks.Block, periods: int = 1) -> blocks.Block:
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 9318a5d9d2..45490e00ca 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -3453,6 +3453,8 @@ def test_df_to_orc(scalars_df_index, scalars_pandas_df_index):
     ],
 )
 def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna):
+    if pd.__version__.startswith("1."):
+        pytest.skip("pandas 1.x produces different column labels.")
     scalars_df, scalars_pandas_df = scalars_dfs
 
     bf_result = (
@@ -3464,10 +3466,6 @@ def test_df_value_counts(scalars_dfs, subset, normalize, ascending, dropna):
         subset, normalize=normalize, ascending=ascending, dropna=dropna
     )
 
-    # Older pandas version may not have these values, bigframes tries to emulate 2.0+
-    pd_result.name = "count"
-    pd_result.index.names = bf_result.index.names
-
     pd.testing.assert_series_equal(
         bf_result, pd_result, check_dtype=False, check_index_type=False
     )
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index d9fc23fad0..92a7b6f099 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -1940,16 +1940,14 @@ def test_cummax_int(scalars_df_index, scalars_pandas_df_index):
 
 
 def test_value_counts(scalars_dfs):
+    if pd.__version__.startswith("1."):
+        pytest.skip("pandas 1.x produces different column labels.")
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "int64_too"
 
     bf_result = scalars_df[col_name].value_counts().to_pandas()
     pd_result = scalars_pandas_df[col_name].value_counts()
 
-    # Older pandas version may not have these values, bigframes tries to emulate 2.0+
-    pd_result.name = "count"
-    pd_result.index.name = col_name
-
     pd.testing.assert_series_equal(
         bf_result,
         pd_result,
@@ -1957,6 +1955,8 @@ def test_value_counts(scalars_dfs):
 
 
 def test_value_counts_w_cut(scalars_dfs):
+    if pd.__version__.startswith("1."):
+        pytest.skip("value_counts results different in pandas 1.x.")
     scalars_df, scalars_pandas_df = scalars_dfs
     col_name = "int64_col"
 
@@ -1965,9 +1965,6 @@ def test_value_counts_w_cut(scalars_dfs):
 
     bf_result = bf_cut.value_counts().to_pandas()
     pd_result = pd_cut.value_counts()
-    # Older pandas version may not have these values, bigframes tries to emulate 2.0+
-    pd_result.name = "count"
-    pd_result.index.name = col_name
     pd_result.index = pd_result.index.astype(pd.Int64Dtype())
 
     pd.testing.assert_series_equal(

From 5bdcc6594ef2e99e96636341d286ea70420858fe Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Thu, 7 Dec 2023 08:24:14 +0000
Subject: [PATCH 10/20] docs: add code samples for `shape` and `head` (#257)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated (if necessary)
  - `DataFrame.head`: https://screenshot.googleplex.com/BmM7jPxCk3iLuay
  - `Series.head`: https://screenshot.googleplex.com/7hANtzZCw8SbEKL
  - `Series.shape`: https://screenshot.googleplex.com/8AJ2xvLY6dmQUZe

Fixes internal issue 314875595 🦕
---
 .../bigframes_vendored/pandas/core/generic.py | 60 ++++++++++++++++++-
 .../bigframes_vendored/pandas/core/series.py  | 15 ++++-
 2 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/generic.py b/third_party/bigframes_vendored/pandas/core/generic.py
index 127efe6a3d..607243f844 100644
--- a/third_party/bigframes_vendored/pandas/core/generic.py
+++ b/third_party/bigframes_vendored/pandas/core/generic.py
@@ -272,17 +272,73 @@ def head(self, n: int = 5):
         on position. It is useful for quickly testing if your object
         has the right type of data in it.
 
-        **Not yet supported** For negative values of `n`, this function returns
+        For negative values of `n`, this function returns
         all rows except the last `|n|` rows, equivalent to ``df[:n]``.
 
         If n is larger than the number of rows, this function returns all rows.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
+            ...                     'monkey', 'parrot', 'shark', 'whale', 'zebra']})
+            >>> df
+                animal
+            0  alligator
+            1        bee
+            2     falcon
+            3       lion
+            4     monkey
+            5     parrot
+            6      shark
+            7      whale
+            8      zebra
+            <BLANKLINE>
+            [9 rows x 1 columns]
+
+        Viewing the first 5 lines:
+
+            >>> df.head()
+                animal
+            0  alligator
+            1        bee
+            2     falcon
+            3       lion
+            4     monkey
+            <BLANKLINE>
+            [5 rows x 1 columns]
+
+        Viewing the first `n` lines (three in this case):
+
+            >>> df.head(3)
+                animal
+            0  alligator
+            1        bee
+            2     falcon
+            <BLANKLINE>
+            [3 rows x 1 columns]
+
+        For negative values of `n`:
+
+            >>> df.head(-3)
+                animal
+            0  alligator
+            1        bee
+            2     falcon
+            3       lion
+            4     monkey
+            5     parrot
+            <BLANKLINE>
+            [6 rows x 1 columns]
+
         Args:
             n (int, default 5):
                 Default 5. Number of rows to select.
 
         Returns:
-            The first `n` rows of the caller object.
+            same type as caller: The first ``n`` rows of the caller object.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index 6b8dd1d64d..e6af1648fd 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -96,7 +96,20 @@ def index(self):
 
     @property
     def shape(self):
-        """Return a tuple of the shape of the underlying data."""
+        """Return a tuple of the shape of the underlying data.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> s = bpd.Series([1, 4, 9, 16])
+            >>> s.shape
+            (4,)
+            >>> s = bpd.Series(['Alice', 'Bob', bpd.NA])
+            >>> s.shape
+            (3,)
+        """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     @property

From 68c6fdf78af8b87fa4ef4f832631f24d7433a4d8 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Thu, 7 Dec 2023 11:30:15 -0800
Subject: [PATCH 11/20] fix: ml.sql logic (#262)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/ml/sql.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py
index 1c88eda4ab..5fb40624dd 100644
--- a/bigframes/ml/sql.py
+++ b/bigframes/ml/sql.py
@@ -153,14 +153,12 @@ def create_model(
     ) -> str:
         """Encode the CREATE OR REPLACE MODEL statement for BQML"""
         source_sql = source_df.sql
-        transform_sql = self.transform(*transforms) if transforms is not None else None
-        options_sql = self.options(**options)
 
         parts = [f"CREATE OR REPLACE MODEL {self._model_id_sql(model_ref)}"]
-        if transform_sql:
-            parts.append(transform_sql)
-        if options_sql:
-            parts.append(options_sql)
+        if transforms:
+            parts.append(self.transform(*transforms))
+        if options:
+            parts.append(self.options(**options))
         parts.append(f"AS {source_sql}")
         return "\n".join(parts)
 
@@ -189,11 +187,10 @@ def create_imported_model(
         options: Mapping[str, Union[str, int, float, Iterable[str]]] = {},
     ) -> str:
         """Encode the CREATE OR REPLACE MODEL statement for BQML remote model."""
-        options_sql = self.options(**options)
 
         parts = [f"CREATE OR REPLACE MODEL {self._model_id_sql(model_ref)}"]
-        if options_sql:
-            parts.append(options_sql)
+        if options:
+            parts.append(self.options(**options))
         return "\n".join(parts)
 
 

From d21c6dd26eadd64c526b0fd35b977a74b8334562 Mon Sep 17 00:00:00 2001
From: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Date: Fri, 8 Dec 2023 11:52:17 -0800
Subject: [PATCH 12/20] docs: correct the docs for `option_context` (#263)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes internal issue #315382764 🦕
---
 .../bigframes_vendored/pandas/_config/config.py       | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/_config/config.py b/third_party/bigframes_vendored/pandas/_config/config.py
index 8abaca76c7..1b73e649c8 100644
--- a/third_party/bigframes_vendored/pandas/_config/config.py
+++ b/third_party/bigframes_vendored/pandas/_config/config.py
@@ -11,11 +11,12 @@ class option_context(contextlib.ContextDecorator):
 
     You need to invoke as ``option_context(pat, val, [(pat, val), ...])``.
 
-    Examples
-    --------
-    >>> import bigframes
-    >>> with bigframes.option_context('display.max_rows', 10, 'display.max_columns', 5):
-    ...     pass
+    **Examples:**
+
+        >>> import bigframes
+
+        >>> with bigframes.option_context('display.max_rows', 10, 'display.max_columns', 5):
+        ...     pass
     """
 
     def __init__(self, *args) -> None:

From 9dd63f6dcb6234e1f3aebd63c59e1e5c717099dc Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Sat, 9 Dec 2023 02:10:15 +0000
Subject: [PATCH 13/20] fix: enfore pandas version requirement <2.1.4 (#265)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There seem to be a breaking change in pandas release 2.1.4 that is failing tests using `pandas.read_json`. This change is pinning pandas dependency version to <2.1.4 until the proper fix.

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes internal issue 315539920 🦕
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index abf165b3df..3351542985 100644
--- a/setup.py
+++ b/setup.py
@@ -45,7 +45,7 @@
     "google-cloud-storage >=2.0.0",
     # TODO: Relax upper bound once we have fixed `system_prerelease` tests.
     "ibis-framework[bigquery] >=6.2.0,<7.0.0dev",
-    "pandas >=1.5.0",
+    "pandas >=1.5.0,<2.1.4",
     "pydata-google-auth >=1.8.2",
     "requests >=2.27.1",
     "scikit-learn >=1.2.2",

From 99598c7d359f1d1e0671dcf27a5c77094f3c7f67 Mon Sep 17 00:00:00 2001
From: Garrett Wu <6505921+GarrettWu@users.noreply.github.com>
Date: Sun, 10 Dec 2023 22:02:15 -0800
Subject: [PATCH 14/20] feat: add ARIMAPlus.predict parameters (#264)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕
---
 bigframes/ml/core.py                      |  4 +--
 bigframes/ml/forecasting.py               | 21 +++++++++--
 bigframes/ml/sql.py                       |  6 ++--
 tests/system/small/ml/test_core.py        |  9 ++---
 tests/system/small/ml/test_forecasting.py | 43 +++++++++++++++++++++--
 tests/unit/ml/test_sql.py                 | 16 +++++++++
 6 files changed, 86 insertions(+), 13 deletions(-)

diff --git a/bigframes/ml/core.py b/bigframes/ml/core.py
index 5aad77a394..1e2224c9bc 100644
--- a/bigframes/ml/core.py
+++ b/bigframes/ml/core.py
@@ -126,8 +126,8 @@ def generate_text_embedding(
             ),
         )
 
-    def forecast(self) -> bpd.DataFrame:
-        sql = self._model_manipulation_sql_generator.ml_forecast()
+    def forecast(self, options: Mapping[str, int | float]) -> bpd.DataFrame:
+        sql = self._model_manipulation_sql_generator.ml_forecast(struct_options=options)
         return self._session.read_gbq(sql, index_col="forecast_timestamp").reset_index()
 
     def evaluate(self, input_data: Optional[bpd.DataFrame] = None):
diff --git a/bigframes/ml/forecasting.py b/bigframes/ml/forecasting.py
index 995201062b..03b9857cc5 100644
--- a/bigframes/ml/forecasting.py
+++ b/bigframes/ml/forecasting.py
@@ -86,21 +86,38 @@ def _fit(
             options=self._bqml_options,
         )
 
-    def predict(self, X=None) -> bpd.DataFrame:
+    def predict(
+        self, X=None, horizon: int = 3, confidence_level: float = 0.95
+    ) -> bpd.DataFrame:
         """Predict the closest cluster for each sample in X.
 
         Args:
             X (default None):
                 ignored, to be compatible with other APIs.
+            horizon (int, default: 3):
+                an int value that specifies the number of time points to forecast.
+                The default value is 3, and the maximum value is 1000.
+            confidence_level (float, default 0.95):
+                a float value that specifies percentage of the future values that fall in the prediction interval.
+                The valid input range is [0.0, 1.0).
 
         Returns:
             bigframes.dataframe.DataFrame: The predicted DataFrames. Which
                 contains 2 columns "forecast_timestamp" and "forecast_value".
         """
+        if horizon < 1 or horizon > 1000:
+            raise ValueError(f"horizon must be [1, 1000], but is {horizon}.")
+        if confidence_level < 0.0 or confidence_level >= 1.0:
+            raise ValueError(
+                f"confidence_level must be [0.0, 1.0), but is {confidence_level}."
+            )
+
         if not self._bqml_model:
             raise RuntimeError("A model must be fitted before predict")
 
-        return self._bqml_model.forecast()
+        return self._bqml_model.forecast(
+            options={"horizon": horizon, "confidence_level": confidence_level}
+        )
 
     def score(
         self,
diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py
index 5fb40624dd..25caaf1ac6 100644
--- a/bigframes/ml/sql.py
+++ b/bigframes/ml/sql.py
@@ -223,9 +223,11 @@ def ml_predict(self, source_df: bpd.DataFrame) -> str:
         return f"""SELECT * FROM ML.PREDICT(MODEL `{self._model_name}`,
   ({self._source_sql(source_df)}))"""
 
-    def ml_forecast(self) -> str:
+    def ml_forecast(self, struct_options: Mapping[str, Union[int, float]]) -> str:
         """Encode ML.FORECAST for BQML"""
-        return f"""SELECT * FROM ML.FORECAST(MODEL `{self._model_name}`)"""
+        struct_options_sql = self.struct_options(**struct_options)
+        return f"""SELECT * FROM ML.FORECAST(MODEL `{self._model_name}`,
+  {struct_options_sql})"""
 
     def ml_generate_text(
         self, source_df: bpd.DataFrame, struct_options: Mapping[str, Union[int, float]]
diff --git a/tests/system/small/ml/test_core.py b/tests/system/small/ml/test_core.py
index 22cbbb1932..915c4aa444 100644
--- a/tests/system/small/ml/test_core.py
+++ b/tests/system/small/ml/test_core.py
@@ -336,17 +336,18 @@ def test_model_generate_text(
 
 def test_model_forecast(time_series_bqml_arima_plus_model: core.BqmlModel):
     utc = pytz.utc
-    forecast = time_series_bqml_arima_plus_model.forecast().to_pandas()[
-        ["forecast_timestamp", "forecast_value"]
-    ]
+    forecast = time_series_bqml_arima_plus_model.forecast(
+        {"horizon": 4, "confidence_level": 0.8}
+    ).to_pandas()[["forecast_timestamp", "forecast_value"]]
     expected = pd.DataFrame(
         {
             "forecast_timestamp": [
                 datetime(2017, 8, 2, tzinfo=utc),
                 datetime(2017, 8, 3, tzinfo=utc),
                 datetime(2017, 8, 4, tzinfo=utc),
+                datetime(2017, 8, 5, tzinfo=utc),
             ],
-            "forecast_value": [2724.472284, 2593.368389, 2353.613034],
+            "forecast_value": [2724.472284, 2593.368389, 2353.613034, 1781.623071],
         }
     )
     expected["forecast_value"] = expected["forecast_value"].astype(pd.Float64Dtype())
diff --git a/tests/system/small/ml/test_forecasting.py b/tests/system/small/ml/test_forecasting.py
index 948db59650..be8d9c2bac 100644
--- a/tests/system/small/ml/test_forecasting.py
+++ b/tests/system/small/ml/test_forecasting.py
@@ -18,8 +18,10 @@
 import pyarrow as pa
 import pytz
 
+from bigframes.ml import forecasting
 
-def test_model_predict(time_series_arima_plus_model):
+
+def test_model_predict_default(time_series_arima_plus_model: forecasting.ARIMAPlus):
     utc = pytz.utc
     predictions = time_series_arima_plus_model.predict().to_pandas()
     assert predictions.shape == (3, 8)
@@ -47,7 +49,40 @@ def test_model_predict(time_series_arima_plus_model):
     )
 
 
-def test_model_score(time_series_arima_plus_model, new_time_series_df):
+def test_model_predict_params(time_series_arima_plus_model: forecasting.ARIMAPlus):
+    utc = pytz.utc
+    predictions = time_series_arima_plus_model.predict(
+        horizon=4, confidence_level=0.9
+    ).to_pandas()
+    assert predictions.shape == (4, 8)
+    result = predictions[["forecast_timestamp", "forecast_value"]]
+    expected = pd.DataFrame(
+        {
+            "forecast_timestamp": [
+                datetime(2017, 8, 2, tzinfo=utc),
+                datetime(2017, 8, 3, tzinfo=utc),
+                datetime(2017, 8, 4, tzinfo=utc),
+                datetime(2017, 8, 5, tzinfo=utc),
+            ],
+            "forecast_value": [2724.472284, 2593.368389, 2353.613034, 1781.623071],
+        }
+    )
+    expected["forecast_value"] = expected["forecast_value"].astype(pd.Float64Dtype())
+    expected["forecast_timestamp"] = expected["forecast_timestamp"].astype(
+        pd.ArrowDtype(pa.timestamp("us", tz="UTC"))
+    )
+
+    pd.testing.assert_frame_equal(
+        result,
+        expected,
+        rtol=0.1,
+        check_index_type=False,
+    )
+
+
+def test_model_score(
+    time_series_arima_plus_model: forecasting.ARIMAPlus, new_time_series_df
+):
     result = time_series_arima_plus_model.score(
         new_time_series_df[["parsed_date"]], new_time_series_df[["total_visits"]]
     ).to_pandas()
@@ -69,7 +104,9 @@ def test_model_score(time_series_arima_plus_model, new_time_series_df):
     )
 
 
-def test_model_score_series(time_series_arima_plus_model, new_time_series_df):
+def test_model_score_series(
+    time_series_arima_plus_model: forecasting.ARIMAPlus, new_time_series_df
+):
     result = time_series_arima_plus_model.score(
         new_time_series_df["parsed_date"], new_time_series_df["total_visits"]
     ).to_pandas()
diff --git a/tests/unit/ml/test_sql.py b/tests/unit/ml/test_sql.py
index 9223058540..73d19cc0bb 100644
--- a/tests/unit/ml/test_sql.py
+++ b/tests/unit/ml/test_sql.py
@@ -293,6 +293,22 @@ def test_ml_centroids_produces_correct_sql(
     )
 
 
+def test_forecast_correct_sql(
+    model_manipulation_sql_generator: ml_sql.ModelManipulationSqlGenerator,
+    mock_df: bpd.DataFrame,
+):
+    sql = model_manipulation_sql_generator.ml_forecast(
+        struct_options={"option_key1": 1, "option_key2": 2.2},
+    )
+    assert (
+        sql
+        == """SELECT * FROM ML.FORECAST(MODEL `my_project_id.my_dataset_id.my_model_id`,
+  STRUCT(
+  1 AS option_key1,
+  2.2 AS option_key2))"""
+    )
+
+
 def test_ml_generate_text_produces_correct_sql(
     model_manipulation_sql_generator: ml_sql.ModelManipulationSqlGenerator,
     mock_df: bpd.DataFrame,

From 3febea99358d10f823d43c3af83ea30458e579a2 Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Mon, 11 Dec 2023 11:57:30 -0800
Subject: [PATCH 15/20] feat: support dataframe.loc with conditional columns
 selection (#233)

Co-authored-by: Tim Swast <swast@google.com>
---
 bigframes/core/indexers.py           |  7 ++++++-
 tests/system/small/test_dataframe.py | 11 +++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py
index 69048b6845..12a1303d29 100644
--- a/bigframes/core/indexers.py
+++ b/bigframes/core/indexers.py
@@ -156,7 +156,12 @@ def __getitem__(self, key):
                 bigframes.dataframe.DataFrame,
                 _loc_getitem_series_or_dataframe(self._dataframe, key[0]),
             )
-            return df[key[1]]
+
+            columns = key[1]
+            if isinstance(columns, pd.Series) and columns.dtype == "bool":
+                columns = df.columns[columns]
+
+            return df[columns]
 
         return typing.cast(
             bigframes.dataframe.DataFrame,
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index 45490e00ca..57115335dc 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -2474,6 +2474,17 @@ def test_loc_select_column(scalars_df_index, scalars_pandas_df_index):
     )
 
 
+def test_loc_select_with_column_condition(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.loc[:, scalars_df_index.dtypes == "Int64"].to_pandas()
+    pd_result = scalars_pandas_df_index.loc[
+        :, scalars_pandas_df_index.dtypes == "Int64"
+    ]
+    pd.testing.assert_frame_equal(
+        bf_result,
+        pd_result,
+    )
+
+
 def test_loc_single_index_with_duplicate(scalars_df_index, scalars_pandas_df_index):
     scalars_df_index = scalars_df_index.set_index("string_col", drop=False)
     scalars_pandas_df_index = scalars_pandas_df_index.set_index(

From 8c636978f4a21eda2856862100b7a8272797fe42 Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Mon, 11 Dec 2023 14:34:31 -0800
Subject: [PATCH 16/20] =?UTF-8?q?docs:=20add=20example=20for=20dataframe.m?=
 =?UTF-8?q?elt,=20dataframe.pivot,=20dataframe.stac=E2=80=A6=20(#252)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* docs: add example for dataframe.melt, dataframe.pivot, dataframe.stack, dataframe.unstack

* remove empty line

* docstring fix

* spacing update

* docs: correct the params rendering for `ml.remote` and `ml.ensemble` modules (#248)

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [x] Appropriate docs were updated (if necessary)
  - `ensemble.RandomForestClassifier`: https://screenshot.googleplex.com/4Q88xgdm5hkaYXu
  - `ensemble.RandomForestRegressor`: https://screenshot.googleplex.com/3CU6pJBjYHQvnDo
  - `remote.VertexAIModel`: https://screenshot.googleplex.com/8SL2max6GfPMwFe

Fixes internal issue 314150462 🦕

* docs: add examples for dataframe.nunique, dataframe.diff, dataframe.a… (#251)

* docs: add examples for dataframe.nunique, dataframe.diff, dataframe.agg, dataframe.describe

* update spacing

* update ordering

* docs: Fix return annotation in API docstrings (#253)

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes internal issue 314367409 🦕

* feat: add nunique method to Series/DataFrameGroupby (#256)

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes #<issue_number_goes_here> 🦕

* docs: add example for dataframe.melt, dataframe.pivot, dataframe.stack, dataframe.unstack

* docstring fix

---------

Co-authored-by: Ashley Xu <139821907+ashleyxuu@users.noreply.github.com>
Co-authored-by: Shobhit Singh <shobs@google.com>
Co-authored-by: TrevorBergeron <tbergeron@google.com>
---
 .../bigframes_vendored/pandas/core/frame.py   | 179 ++++++++++++++++--
 1 file changed, 165 insertions(+), 14 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 7168572705..5b00385eb8 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3414,18 +3414,75 @@ def melt(self, id_vars, value_vars, var_name, value_name):
         the row axis, leaving just two non-identifier columns, 'variable' and
         'value'.
 
-        Parameters
-        ----------
-        id_vars (tuple, list, or ndarray, optional):
-            Column(s) to use as identifier variables.
-        value_vars (tuple, list, or ndarray, optional):
-            Column(s) to unpivot. If not specified, uses all columns that
-            are not set as `id_vars`.
-        var_name (scalar):
-            Name to use for the 'variable' column. If None it uses
-            ``frame.columns.name`` or 'variable'.
-        value_name (scalar, default 'value'):
-            Name to use for the 'value' column.
+         **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, None, 3, 4, 5],
+            ...                     "B": [1, 2, 3, 4, 5],
+            ...                     "C": [None, 3.5, None, 4.5, 5.0]})
+            >>> df
+                    A	    B	   C
+            0	  1.0	    1	<NA>
+            1	 <NA>	    2	 3.5
+            2     3.0	    3	<NA>
+            3	  4.0	    4	 4.5
+            4	  5.0	    5	 5.0
+            <BLANKLINE>
+            [5 rows x 3 columns]
+
+        Using `melt` without optional arguments:
+
+            >>> df.melt()
+                variable    value
+            0	       A      1.0
+            1	       A     <NA>
+            2	       A      3.0
+            3	       A      4.0
+            4	       A      5.0
+            5	       B      1.0
+            6	       B      2.0
+            7	       B      3.0
+            8	       B      4.0
+            9	       B      5.0
+            10	       C     <NA>
+            11	       C      3.5
+            12	       C     <NA>
+            13	       C      4.5
+            14	       C      5.0
+            <BLANKLINE>
+            [15 rows x 2 columns]
+
+        Using `melt` with `id_vars` and `value_vars`:
+
+            >>> df.melt(id_vars='A', value_vars=['B', 'C'])
+                   A	variable	value
+            0	 1.0	       B	    1
+            1	<NA>	       B	    2
+            2	 3.0	       B	    3
+            3	 4.0	       B	    4
+            4	 5.0	       B	    5
+            5	 1.0	       C	 <NA>
+            6	 <NA>	       C	    3
+            7	 3.0	       C	 <NA>
+            8	 4.0	       C	    4
+            9	 5.0	       C	    5
+            <BLANKLINE>
+            [10 rows x 3 columns]
+
+
+        Args:
+            id_vars (tuple, list, or ndarray, optional):
+                Column(s) to use as identifier variables.
+            value_vars (tuple, list, or ndarray, optional):
+                Column(s) to unpivot. If not specified, uses all columns that
+                are not set as `id_vars`.
+            var_name (scalar):
+                Name to use for the 'variable' column. If None it uses
+                ``frame.columns.name`` or 'variable'.
+            value_name (scalar, default 'value'):
+                Name to use for the 'value' column.
 
         Returns:
             DataFrame: Unpivoted DataFrame.
@@ -3757,6 +3814,52 @@ def pivot(self, *, columns, index=None, values=None):
             do not together uniquely identify input rows, the output will be
             silently non-deterministic.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({
+            ...     "foo": ["one", "one", "one", "two", "two"],
+            ...     "bar": ["A", "B", "C", "A", "B"],
+            ...     "baz": [1, 2, 3, 4, 5],
+            ...     "zoo": ['x', 'y', 'z', 'q', 'w']
+            ... })
+
+            >>> df
+                foo	bar	baz	zoo
+            0	one	  A	  1	  x
+            1	one	  B	  2	  y
+            2	one	  C	  3	  z
+            3	two	  A	  4	  q
+            4	two	  B	  5	  w
+            <BLANKLINE>
+            [5 rows x 4 columns]
+
+        Using `pivot` without optional arguments:
+
+            >>> df.pivot(columns='foo')
+                    bar	            baz	            zoo
+            foo	 one	 two	 one	 two	 one	 two
+            0	   A	<NA>	   1	<NA>	   x	<NA>
+            1	   B	<NA>	   2	<NA>	   y	<NA>
+            2	   C	<NA>	   3	<NA>	   z	<NA>
+            3	<NA>	   A	<NA>	   4	<NA>	   q
+            4	<NA>	   B	<NA>	   5	<NA>	   w
+            <BLANKLINE>
+            [5 rows x 6 columns]
+
+        Using `pivot` with `index` and `values`:
+
+            >>> df.pivot(columns='foo', index='bar', values='baz')
+            foo	    one     two
+            bar
+            A	    1         4
+            B	    2	      5
+            C	    3	   <NA>
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
         Args:
             columns (str or object or a list of str):
                 Column to use to make new frame's columns.
@@ -3774,7 +3877,7 @@ def pivot(self, *, columns, index=None, values=None):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def stack(self):
+    def stack(self, level=-1):
         """
         Stack the prescribed level(s) from columns to index.
 
@@ -3792,12 +3895,36 @@ def stack(self):
             BigQuery DataFrames does not support stack operations that would
             combine columns of different dtypes.
 
+        **Example:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'A': [1, 3], 'B': [2, 4]}, index=['foo', 'bar'])
+            >>> df
+                    A	B
+            foo	    1	2
+            bar	    3	4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> df.stack()
+            foo  A    1
+                 B    2
+            bar  A    3
+                 B    4
+            dtype: Int64
+
+        Args:
+            level (int, str, or list of these, default -1 (last level)):
+                Level(s) to stack from the column axis onto the index axis.
+
         Returns:
             DataFrame or Series: Stacked dataframe or series.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
-    def unstack(self):
+    def unstack(self, level=-1):
         """
         Pivot a level of the (necessarily hierarchical) index labels.
 
@@ -3807,6 +3934,30 @@ def unstack(self):
         If the index is not a MultiIndex, the output will be a Series
         (the analogue of stack when the columns are not a MultiIndex).
 
+        **Example:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({'A': [1, 3], 'B': [2, 4]}, index=['foo', 'bar'])
+            >>> df
+                    A	B
+            foo	    1	2
+            bar	    3	4
+            <BLANKLINE>
+            [2 rows x 2 columns]
+
+            >>> df.unstack()
+            A   foo    1
+                bar    3
+            B   foo    2
+                bar    4
+            dtype: Int64
+
+        Args:
+            level (int, str, or list of these, default -1 (last level)):
+                Level(s) of index to unstack, can pass level name.
+
         Returns:
             DataFrame or Series: DataFrame or Series.
         """

From e735412fdc52d034df92dd5462d6956bdc0167be Mon Sep 17 00:00:00 2001
From: Huan Chen <142538604+Genesis929@users.noreply.github.com>
Date: Mon, 11 Dec 2023 15:54:55 -0800
Subject: [PATCH 17/20] =?UTF-8?q?docs:=20add=20example=20to=20dataframe.nl?=
 =?UTF-8?q?argest,=20dataframe.nsmallest,=20datafra=E2=80=A6=20(#234)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* docs: add example to dataframe.nlargest, dataframe.nsmallest, dataframe.idxmin, dataframe
.idxmax

* update example output
---
 .../bigframes_vendored/pandas/core/frame.py   | 151 +++++++++++++++++-
 1 file changed, 147 insertions(+), 4 deletions(-)

diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 5b00385eb8..4753bfc589 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -3324,6 +3324,58 @@ def nlargest(self, n: int, columns, keep: str = "first"):
         ``df.sort_values(columns, ascending=False).head(n)``, but more
         performant.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, 1, 3, 3, 5, 5],
+            ...                     "B": [5, 6, 3, 4, 1, 2],
+            ...                     "C": ['a', 'b', 'a', 'b', 'a', 'b']})
+            >>> df
+                A	B	C
+            0	1	5	a
+            1	1	6	b
+            2	3	3	a
+            3	3	4	b
+            4	5	1	a
+            5	5	2	b
+            <BLANKLINE>
+            [6 rows x 3 columns]
+
+        Returns rows with the largest value in 'A', including all ties:
+
+            >>> df.nlargest(1, 'A', keep = "all")
+                A	B	C
+            4	5	1	a
+            5	5	2	b
+            <BLANKLINE>
+            [2 rows x 3 columns]
+
+        Returns the first row with the largest value in 'A', default behavior in case of ties:
+
+            >>> df.nlargest(1, 'A')
+                A	B	C
+            4	5	1	a
+            <BLANKLINE>
+            [1 rows x 3 columns]
+
+        Returns the last row with the largest value in 'A' in case of ties:
+
+            >>> df.nlargest(1, 'A', keep = "last")
+                A	B	C
+            5	5	2	b
+            <BLANKLINE>
+            [1 rows x 3 columns]
+
+        Returns the row with the largest combined values in both 'A' and 'C':
+
+            >>> df.nlargest(1, ['A', 'C'])
+                A	B	C
+            5	5	2	b
+            <BLANKLINE>
+            [1 rows x 3 columns]
+
         Args:
             n (int):
                 Number of rows to return.
@@ -3359,6 +3411,59 @@ def nsmallest(self, n: int, columns, keep: str = "first"):
         ``df.sort_values(columns, ascending=True).head(n)``, but more
         performant.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [1, 1, 3, 3, 5, 5],
+            ...                     "B": [5, 6, 3, 4, 1, 2],
+            ...                     "C": ['a', 'b', 'a', 'b', 'a', 'b']})
+            >>> df
+                A	B	C
+            0	1	5	a
+            1	1	6	b
+            2	3	3	a
+            3	3	4	b
+            4	5	1	a
+            5	5	2	b
+            <BLANKLINE>
+            [6 rows x 3 columns]
+
+        Returns rows with the smallest value in 'A', including all ties:
+
+            >>> df.nsmallest(1, 'A', keep = "all")
+                A	B	C
+            0	1	5	a
+            1	1	6	b
+            <BLANKLINE>
+            [2 rows x 3 columns]
+
+        Returns the first row with the smallest value in 'A', default behavior in case of ties:
+
+            >>> df.nsmallest(1, 'A')
+                A	B	C
+            0  	1	5	a
+            <BLANKLINE>
+            [1 rows x 3 columns]
+
+        Returns the last row with the smallest value in 'A' in case of ties:
+
+            >>> df.nsmallest(1, 'A', keep = "last")
+                A	B	C
+            1	1	6	b
+            <BLANKLINE>
+            [1 rows x 3 columns]
+
+        Returns rows with the smallest values in 'A' and 'C'
+
+            >>> df.nsmallest(1, ['A', 'C'])
+                A	B	C
+            0	1	5	a
+            <BLANKLINE>
+            [1 rows x 3 columns]
+
+
         Args:
             n (int):
                 Number of rows to return.
@@ -3384,23 +3489,61 @@ def nsmallest(self, n: int, columns, keep: str = "first"):
 
     def idxmin(self):
         """
-        Return index of first occurrence of minimum over requested axis.
+        Return index of first occurrence of minimum over columns.
 
         NA/null values are excluded.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]})
+            >>> df
+                A	B
+            0	3	1
+            1	1	2
+            2	2	3
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+            >>> df.idxmin()
+            A    1
+            B    0
+            dtype: Int64
+
         Returns:
-            Series: Indexes of minima along the specified axis.
+            Series: Indexes of minima along the columns.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def idxmax(self):
         """
-        Return index of first occurrence of maximum over requested axis.
+        Return index of first occurrence of maximum over columns.
 
         NA/null values are excluded.
 
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> bpd.options.display.progress_bar = None
+
+            >>> df = bpd.DataFrame({"A": [3, 1, 2], "B": [1, 2, 3]})
+            >>> df
+                A	B
+            0	3	1
+            1	1	2
+            2	2	3
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+            >>> df.idxmax()
+            A    0
+            B    2
+            dtype: Int64
+
         Returns:
-            Series: Indexes of maxima along the specified axis.
+            Series: Indexes of maxima along the columns.
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 

From b02fc2c1843e18d3a8d6894c64763f53e6af1b73 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Tue, 12 Dec 2023 02:34:27 +0000
Subject: [PATCH 18/20] fix: pin prerelease tests to pandas 2.1.3 to unblock
 e2e tests (#268)

* fix: pin prerelease tests to pandas 2.1.3 to unblock e2e tests

* specify excluded pandas version differently, to automatically test on a
release
---
 noxfile.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/noxfile.py b/noxfile.py
index 3b10a37fc7..2174e27529 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -518,7 +518,9 @@ def prerelease(session: nox.sessions.Session, tests_path):
         "--prefer-binary",
         "--pre",
         "--upgrade",
-        "pandas",
+        # TODO(shobs): Remove tying to version 2.1.3 after
+        # https://github.com/pandas-dev/pandas/issues/56463 is resolved
+        "pandas!=2.1.4",
     )
     already_installed.add("pandas")
 

From 8766ac63f501929577f71e6bd2b523e92c43ba66 Mon Sep 17 00:00:00 2001
From: Shobhit Singh <shobs@google.com>
Date: Tue, 12 Dec 2023 03:22:15 +0000
Subject: [PATCH 19/20] test: migrate e2e presubmit tests to
 bigframes-load-testing project (#160)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BEGIN_COMMIT_OVERRIDE
fix: migrate e2e tests to bigframes-load-testing project
END_COMMIT_OVERRIDE

Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly:
- [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code!  That way we can discuss the change, evaluate designs, and agree on the general idea
- [ ] Ensure the tests and linter pass
- [ ] Code coverage does not decrease (if any source code was changed)
- [ ] Appropriate docs were updated (if necessary)

Fixes internal issue 307809767 🦕
---
 .kokoro/continuous/e2e.cfg                    |   10 +
 .kokoro/presubmit/e2e.cfg                     |   10 +
 CONTRIBUTING.rst                              |   39 +-
 bigframes/remote_function.py                  |   24 +-
 .../generative_ai/large_language_models.ipynb |    6 +-
 .../regression/easy_linear_regression.ipynb   | 1116 ++++++++---------
 owlbot.py                                     |    1 +
 samples/snippets/gen_ai_model_test.py         |    9 +-
 .../load_data_from_biquery_job_test.py        |   11 +-
 samples/snippets/quickstart_test.py           |    9 +-
 samples/snippets/remote_function_test.py      |    9 +-
 scripts/create_test_model_vertex.py           |   71 ++
 scripts/setup-project-for-testing.sh          |  256 ++++
 tests/system/conftest.py                      |    1 -
 tests/system/large/ml/test_decomposition.py   |    8 +-
 tests/system/large/ml/test_pipeline.py        |   21 +-
 tests/system/large/test_remote_function.py    |    6 +-
 tests/system/small/ml/conftest.py             |   12 +-
 tests/system/small/ml/test_core.py            |    4 +-
 tests/system/small/ml/test_decomposition.py   |   11 +-
 tests/system/small/ml/test_llm.py             |   28 +-
 tests/system/small/test_remote_function.py    |   34 +-
 tests/system/utils.py                         |   74 ++
 23 files changed, 1130 insertions(+), 640 deletions(-)
 create mode 100644 scripts/create_test_model_vertex.py
 create mode 100755 scripts/setup-project-for-testing.sh

diff --git a/.kokoro/continuous/e2e.cfg b/.kokoro/continuous/e2e.cfg
index 2f93a58212..7479346590 100644
--- a/.kokoro/continuous/e2e.cfg
+++ b/.kokoro/continuous/e2e.cfg
@@ -5,3 +5,13 @@ env_vars: {
     key: "NOX_SESSION"
     value: "unit_prerelease system_prerelease system_noextras e2e notebook samples"
 }
+
+env_vars: {
+    key: "GOOGLE_CLOUD_PROJECT"
+    value: "bigframes-load-testing"
+}
+
+env_vars: {
+    key: "BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT"
+    value: "https://us-central1-aiplatform.googleapis.com/v1/projects/272725758477/locations/us-central1/endpoints/590545496255234048"
+}
diff --git a/.kokoro/presubmit/e2e.cfg b/.kokoro/presubmit/e2e.cfg
index 2f93a58212..7479346590 100644
--- a/.kokoro/presubmit/e2e.cfg
+++ b/.kokoro/presubmit/e2e.cfg
@@ -5,3 +5,13 @@ env_vars: {
     key: "NOX_SESSION"
     value: "unit_prerelease system_prerelease system_noextras e2e notebook samples"
 }
+
+env_vars: {
+    key: "GOOGLE_CLOUD_PROJECT"
+    value: "bigframes-load-testing"
+}
+
+env_vars: {
+    key: "BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT"
+    value: "https://us-central1-aiplatform.googleapis.com/v1/projects/272725758477/locations/us-central1/endpoints/590545496255234048"
+}
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index f9103bfa72..5146b4bc7e 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -155,7 +155,44 @@ Running System Tests
   auth settings and change some configuration in your project to
   run all the tests.
 
-- System tests will be run against an actual project. You should use local credentials from gcloud when possible. See `Best practices for application authentication <https://cloud.google.com/docs/authentication/best-practices-applications#local_development_and_testing_with_the>`__. Some tests require a service account. For those tests see `Authenticating as a service account <https://cloud.google.com/docs/authentication/production>`__.
+- System tests will be run against an actual project. A project can be set in
+  the environment variable ``$GOOGLE_CLOUD_PROJECT``. If not, the project property
+  set in the `Google Cloud CLI <https://cloud.google.com/sdk/gcloud/reference/config/get>`__
+  will be effective, which can be peeked into via ``gcloud config get project``,
+  or set via ``gcloud config set project <project-name>``. The following roles
+  carry the permissions to run the system tests in the project:
+
+  - `BigQuery User <https://cloud.google.com/bigquery/docs/access-control#bigquery.user>`__
+    to be able to create test datasets and run BigQuery jobs in the project.
+
+  - `BigQuery Connection Admin <https://cloud.google.com/bigquery/docs/access-control#bigquery.connectionAdmin>`__
+    to be able to use BigQuery connections in the project.
+
+  - `BigQuery Data Editor <https://cloud.google.com/bigquery/docs/access-control#bigquery.dataEditor>`__
+    to be able to create BigQuery remote functions in the project.
+
+  - `Browser <https://cloud.google.com/resource-manager/docs/access-control-proj#browser>`__
+    to be able to get current IAM policy for the service accounts of the BigQuery connections in the project.
+
+  - `Cloud Functions Developer <https://cloud.google.com/functions/docs/reference/iam/roles#cloudfunctions.developer>`__
+    to be able to create cloud functions to support BigQuery DataFrames remote functions.
+
+  - `Service Account User <https://cloud.google.com/iam/docs/service-account-permissions#user-role>`__
+    to be able to use the project's service accounts.
+
+  - `Vertex AI User <https://cloud.google.com/vertex-ai/docs/general/access-control#aiplatform.user>`__
+    to be able to use the BigQuery DataFrames' ML integration with Vertex AI.
+
+- You can run the script ``scripts/setup-project-for-testing.sh <project-id> [<principal>]``
+  to set up a project for running system tests and optionally set up necessary
+  IAM roles for a principal (user/group/service-account). You need to have the following
+  IAM permission to be able to run the set up script successfully:
+
+  - ``serviceusage.services.enable``
+  - ``bigquery.connections.create``
+  - ``resourcemanager.projects.setIamPolicy``
+
+- You should use local credentials from gcloud when possible. See `Best practices for application authentication <https://cloud.google.com/docs/authentication/best-practices-applications#local_development_and_testing_with_the>`__. Some tests require a service account. For those tests see `Authenticating as a service account <https://cloud.google.com/docs/authentication/production>`__.
 
 *************
 Test Coverage
diff --git a/bigframes/remote_function.py b/bigframes/remote_function.py
index 7280ac7d42..a899ebd371 100644
--- a/bigframes/remote_function.py
+++ b/bigframes/remote_function.py
@@ -411,13 +411,23 @@ def create_cloud_function(self, def_, cf_name, package_requirements=None):
             create_function_request.function = function
 
             # Create the cloud function and wait for it to be ready to use
-            operation = self._cloud_functions_client.create_function(
-                request=create_function_request
-            )
-            operation.result()
-
-            # Cleanup
-            os.remove(archive_path)
+            try:
+                operation = self._cloud_functions_client.create_function(
+                    request=create_function_request
+                )
+                operation.result()
+
+                # Cleanup
+                os.remove(archive_path)
+            except google.api_core.exceptions.AlreadyExists:
+                # If a cloud function with the same name already exists, let's
+                # update it
+                update_function_request = functions_v2.UpdateFunctionRequest()
+                update_function_request.function = function
+                operation = self._cloud_functions_client.update_function(
+                    request=update_function_request
+                )
+                operation.result()
 
         # Fetch the endpoint of the just created function
         endpoint = self.get_cloud_function_endpoint(cf_name)
diff --git a/notebooks/generative_ai/large_language_models.ipynb b/notebooks/generative_ai/large_language_models.ipynb
index 45a46c44af..2695ee9dc0 100644
--- a/notebooks/generative_ai/large_language_models.ipynb
+++ b/notebooks/generative_ai/large_language_models.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -22,12 +22,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
     "session = bigframes.pandas.get_global_session()\n",
-    "connection = \"bigframes-dev.us.bigframes-ml\""
+    "connection = f\"{session.bqclient.project}.us.bigframes-default-connection\""
    ]
   },
   {
diff --git a/notebooks/regression/easy_linear_regression.ipynb b/notebooks/regression/easy_linear_regression.ipynb
index c441a966ec..fdabd82a4b 100644
--- a/notebooks/regression/easy_linear_regression.ipynb
+++ b/notebooks/regression/easy_linear_regression.ipynb
@@ -26,48 +26,86 @@
     "## 1. Init & load data"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Import `bigframes.pandas` module and get the default session"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import bigframes.pandas\n",
+    "session = bigframes.pandas.get_global_session()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Define a dataset for storing BQML model, and create it if it does not exist."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ffc6d6c7815a4a92903a08a11af6db11",
-       "version_major": 2,
-       "version_minor": 0
-      },
       "text/plain": [
-       "HTML(value='Query job d1e085ba-66d8-4631-bb51-50a17d0a6e51 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "460f64979bc544a7bcdd2206ca1d3067",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job 4468d93d-c22c-43f4-a09b-262b5b830c0e is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "Dataset(DatasetReference('shobs-test', 'bqml_tutorial'))"
       ]
      },
+     "execution_count": 23,
      "metadata": {},
-     "output_type": "display_data"
-    },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dataset = f\"{session.bqclient.project}.bqml_tutorial\"\n",
+    "session.bqclient.create_dataset(dataset, exists_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Define a model path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "penguins_model = f\"{dataset}.penguins_model\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Read the penguins data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9f6d3b4d6af3424f9ff2c434f0c3d00e",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 525fc879-1f59-45e8-96b4-f9c67d244d06 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:525fc879-1f59-45e8-96b4-f9c67d244d06&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 8fe1dc50-9d32-4466-9c2b-76d32cbde7c5 is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -75,13 +113,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "042c351aa0944eeeab8b36254f88c072",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 91aa1b30-2b0e-41eb-9bfb-4f6232913b31 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:91aa1b30-2b0e-41eb-9bfb-4f6232913b31&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job e40d99ae-1b3a-4a12-b4be-e264af8b22e5 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -121,250 +157,250 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>36.6</td>\n",
-       "      <td>18.4</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>3475.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>40.1</td>\n",
+       "      <td>18.9</td>\n",
+       "      <td>188.0</td>\n",
+       "      <td>4300.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>39.8</td>\n",
-       "      <td>19.1</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>4650.0</td>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>39.1</td>\n",
+       "      <td>18.7</td>\n",
+       "      <td>181.0</td>\n",
+       "      <td>3750.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.9</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>3900.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>47.4</td>\n",
+       "      <td>14.6</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>4725.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>46.5</td>\n",
-       "      <td>17.9</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>3500.0</td>\n",
+       "      <td>42.5</td>\n",
+       "      <td>16.7</td>\n",
+       "      <td>187.0</td>\n",
+       "      <td>3350.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.3</td>\n",
-       "      <td>16.8</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>3000.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>43.2</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>197.0</td>\n",
+       "      <td>4775.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>43.2</td>\n",
-       "      <td>18.5</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>4100.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.7</td>\n",
+       "      <td>15.3</td>\n",
+       "      <td>219.0</td>\n",
+       "      <td>5200.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>46.9</td>\n",
-       "      <td>16.6</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>2700.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>41.3</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>195.0</td>\n",
+       "      <td>4400.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>50.5</td>\n",
-       "      <td>18.4</td>\n",
-       "      <td>200.0</td>\n",
-       "      <td>3400.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>45.2</td>\n",
+       "      <td>13.8</td>\n",
+       "      <td>215.0</td>\n",
+       "      <td>4750.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>49.5</td>\n",
-       "      <td>19.0</td>\n",
-       "      <td>200.0</td>\n",
-       "      <td>3800.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.5</td>\n",
+       "      <td>13.5</td>\n",
+       "      <td>210.0</td>\n",
+       "      <td>4550.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.2</td>\n",
-       "      <td>20.1</td>\n",
-       "      <td>200.0</td>\n",
-       "      <td>3975.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>50.5</td>\n",
+       "      <td>15.2</td>\n",
+       "      <td>216.0</td>\n",
+       "      <td>5000.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.8</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>208.0</td>\n",
-       "      <td>4300.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>48.2</td>\n",
+       "      <td>15.6</td>\n",
+       "      <td>221.0</td>\n",
+       "      <td>5100.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>39.0</td>\n",
-       "      <td>18.7</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3650.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>38.1</td>\n",
+       "      <td>18.6</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>3700.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>12</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.0</td>\n",
-       "      <td>16.9</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3000.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>50.7</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>223.0</td>\n",
+       "      <td>5550.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>13</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>17.3</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3700.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>37.8</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>4250.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>14</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>34.0</td>\n",
-       "      <td>17.1</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3400.0</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>17.9</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>3450.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>15</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.0</td>\n",
-       "      <td>16.5</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3400.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>48.7</td>\n",
+       "      <td>15.7</td>\n",
+       "      <td>208.0</td>\n",
+       "      <td>5350.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>16</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>45.7</td>\n",
-       "      <td>17.3</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3600.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>34.6</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>198.0</td>\n",
+       "      <td>4400.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>17</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>50.6</td>\n",
-       "      <td>19.4</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3800.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>46.8</td>\n",
+       "      <td>15.4</td>\n",
+       "      <td>215.0</td>\n",
+       "      <td>5150.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>18</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>39.7</td>\n",
-       "      <td>17.9</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>4250.0</td>\n",
+       "      <td>50.3</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>197.0</td>\n",
+       "      <td>3300.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>19</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>37.8</td>\n",
+       "      <td>37.2</td>\n",
        "      <td>18.1</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3750.0</td>\n",
+       "      <td>178.0</td>\n",
+       "      <td>3900.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>20</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>46.6</td>\n",
-       "      <td>17.8</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3800.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>18.8</td>\n",
+       "      <td>203.0</td>\n",
+       "      <td>4100.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>21</th>\n",
-       "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>51.3</td>\n",
-       "      <td>19.2</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3650.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>40.5</td>\n",
+       "      <td>17.9</td>\n",
+       "      <td>187.0</td>\n",
+       "      <td>3200.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>22</th>\n",
-       "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.2</td>\n",
-       "      <td>17.1</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3400.0</td>\n",
+       "      <td>Gentoo penguin (Pygoscelis papua)</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>45.5</td>\n",
+       "      <td>13.9</td>\n",
+       "      <td>210.0</td>\n",
+       "      <td>4200.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>23</th>\n",
        "      <td>Adelie Penguin (Pygoscelis adeliae)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>36.8</td>\n",
+       "      <td>42.2</td>\n",
        "      <td>18.5</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3500.0</td>\n",
+       "      <td>180.0</td>\n",
+       "      <td>3550.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>24</th>\n",
        "      <td>Chinstrap penguin (Pygoscelis antarctica)</td>\n",
        "      <td>Dream</td>\n",
-       "      <td>49.6</td>\n",
-       "      <td>18.2</td>\n",
-       "      <td>193.0</td>\n",
+       "      <td>51.7</td>\n",
+       "      <td>20.3</td>\n",
+       "      <td>194.0</td>\n",
        "      <td>3775.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
@@ -374,74 +410,72 @@
        "</div>[344 rows x 7 columns in total]"
       ],
       "text/plain": [
-       "                                      species island  culmen_length_mm  \\\n",
-       "0         Adelie Penguin (Pygoscelis adeliae)  Dream              36.6   \n",
-       "1         Adelie Penguin (Pygoscelis adeliae)  Dream              39.8   \n",
-       "2         Adelie Penguin (Pygoscelis adeliae)  Dream              40.9   \n",
-       "3   Chinstrap penguin (Pygoscelis antarctica)  Dream              46.5   \n",
-       "4         Adelie Penguin (Pygoscelis adeliae)  Dream              37.3   \n",
-       "5         Adelie Penguin (Pygoscelis adeliae)  Dream              43.2   \n",
-       "6   Chinstrap penguin (Pygoscelis antarctica)  Dream              46.9   \n",
-       "7   Chinstrap penguin (Pygoscelis antarctica)  Dream              50.5   \n",
-       "8   Chinstrap penguin (Pygoscelis antarctica)  Dream              49.5   \n",
-       "9         Adelie Penguin (Pygoscelis adeliae)  Dream              40.2   \n",
-       "10        Adelie Penguin (Pygoscelis adeliae)  Dream              40.8   \n",
-       "11        Adelie Penguin (Pygoscelis adeliae)  Dream              39.0   \n",
-       "12        Adelie Penguin (Pygoscelis adeliae)  Dream              37.0   \n",
-       "13  Chinstrap penguin (Pygoscelis antarctica)  Dream              47.0   \n",
-       "14        Adelie Penguin (Pygoscelis adeliae)  Dream              34.0   \n",
-       "15        Adelie Penguin (Pygoscelis adeliae)  Dream              37.0   \n",
-       "16  Chinstrap penguin (Pygoscelis antarctica)  Dream              45.7   \n",
-       "17  Chinstrap penguin (Pygoscelis antarctica)  Dream              50.6   \n",
-       "18        Adelie Penguin (Pygoscelis adeliae)  Dream              39.7   \n",
-       "19        Adelie Penguin (Pygoscelis adeliae)  Dream              37.8   \n",
-       "20  Chinstrap penguin (Pygoscelis antarctica)  Dream              46.6   \n",
-       "21  Chinstrap penguin (Pygoscelis antarctica)  Dream              51.3   \n",
-       "22        Adelie Penguin (Pygoscelis adeliae)  Dream              40.2   \n",
-       "23        Adelie Penguin (Pygoscelis adeliae)  Dream              36.8   \n",
-       "24  Chinstrap penguin (Pygoscelis antarctica)  Dream              49.6   \n",
+       "                                      species     island  culmen_length_mm  \\\n",
+       "0         Adelie Penguin (Pygoscelis adeliae)     Biscoe              40.1   \n",
+       "1         Adelie Penguin (Pygoscelis adeliae)  Torgersen              39.1   \n",
+       "2           Gentoo penguin (Pygoscelis papua)     Biscoe              47.4   \n",
+       "3   Chinstrap penguin (Pygoscelis antarctica)      Dream              42.5   \n",
+       "4         Adelie Penguin (Pygoscelis adeliae)     Biscoe              43.2   \n",
+       "5           Gentoo penguin (Pygoscelis papua)     Biscoe              46.7   \n",
+       "6         Adelie Penguin (Pygoscelis adeliae)     Biscoe              41.3   \n",
+       "7           Gentoo penguin (Pygoscelis papua)     Biscoe              45.2   \n",
+       "8           Gentoo penguin (Pygoscelis papua)     Biscoe              46.5   \n",
+       "9           Gentoo penguin (Pygoscelis papua)     Biscoe              50.5   \n",
+       "10          Gentoo penguin (Pygoscelis papua)     Biscoe              48.2   \n",
+       "11        Adelie Penguin (Pygoscelis adeliae)      Dream              38.1   \n",
+       "12          Gentoo penguin (Pygoscelis papua)     Biscoe              50.7   \n",
+       "13        Adelie Penguin (Pygoscelis adeliae)     Biscoe              37.8   \n",
+       "14        Adelie Penguin (Pygoscelis adeliae)     Biscoe              35.0   \n",
+       "15          Gentoo penguin (Pygoscelis papua)     Biscoe              48.7   \n",
+       "16        Adelie Penguin (Pygoscelis adeliae)  Torgersen              34.6   \n",
+       "17          Gentoo penguin (Pygoscelis papua)     Biscoe              46.8   \n",
+       "18  Chinstrap penguin (Pygoscelis antarctica)      Dream              50.3   \n",
+       "19        Adelie Penguin (Pygoscelis adeliae)      Dream              37.2   \n",
+       "20  Chinstrap penguin (Pygoscelis antarctica)      Dream              51.0   \n",
+       "21        Adelie Penguin (Pygoscelis adeliae)     Biscoe              40.5   \n",
+       "22          Gentoo penguin (Pygoscelis papua)     Biscoe              45.5   \n",
+       "23        Adelie Penguin (Pygoscelis adeliae)      Dream              42.2   \n",
+       "24  Chinstrap penguin (Pygoscelis antarctica)      Dream              51.7   \n",
        "\n",
        "    culmen_depth_mm  flipper_length_mm  body_mass_g     sex  \n",
-       "0              18.4              184.0       3475.0  FEMALE  \n",
-       "1              19.1              184.0       4650.0    MALE  \n",
-       "2              18.9              184.0       3900.0    MALE  \n",
-       "3              17.9              192.0       3500.0  FEMALE  \n",
-       "4              16.8              192.0       3000.0  FEMALE  \n",
-       "5              18.5              192.0       4100.0    MALE  \n",
-       "6              16.6              192.0       2700.0  FEMALE  \n",
-       "7              18.4              200.0       3400.0  FEMALE  \n",
-       "8              19.0              200.0       3800.0    MALE  \n",
-       "9              20.1              200.0       3975.0    MALE  \n",
-       "10             18.9              208.0       4300.0    MALE  \n",
-       "11             18.7              185.0       3650.0    MALE  \n",
-       "12             16.9              185.0       3000.0  FEMALE  \n",
-       "13             17.3              185.0       3700.0  FEMALE  \n",
-       "14             17.1              185.0       3400.0  FEMALE  \n",
-       "15             16.5              185.0       3400.0  FEMALE  \n",
-       "16             17.3              193.0       3600.0  FEMALE  \n",
-       "17             19.4              193.0       3800.0    MALE  \n",
-       "18             17.9              193.0       4250.0    MALE  \n",
-       "19             18.1              193.0       3750.0    MALE  \n",
-       "20             17.8              193.0       3800.0  FEMALE  \n",
-       "21             19.2              193.0       3650.0    MALE  \n",
-       "22             17.1              193.0       3400.0  FEMALE  \n",
-       "23             18.5              193.0       3500.0  FEMALE  \n",
-       "24             18.2              193.0       3775.0    MALE  \n",
+       "0              18.9              188.0       4300.0    MALE  \n",
+       "1              18.7              181.0       3750.0    MALE  \n",
+       "2              14.6              212.0       4725.0  FEMALE  \n",
+       "3              16.7              187.0       3350.0  FEMALE  \n",
+       "4              19.0              197.0       4775.0    MALE  \n",
+       "5              15.3              219.0       5200.0    MALE  \n",
+       "6              21.1              195.0       4400.0    MALE  \n",
+       "7              13.8              215.0       4750.0  FEMALE  \n",
+       "8              13.5              210.0       4550.0  FEMALE  \n",
+       "9              15.2              216.0       5000.0  FEMALE  \n",
+       "10             15.6              221.0       5100.0    MALE  \n",
+       "11             18.6              190.0       3700.0  FEMALE  \n",
+       "12             15.0              223.0       5550.0    MALE  \n",
+       "13             20.0              190.0       4250.0    MALE  \n",
+       "14             17.9              190.0       3450.0  FEMALE  \n",
+       "15             15.7              208.0       5350.0    MALE  \n",
+       "16             21.1              198.0       4400.0    MALE  \n",
+       "17             15.4              215.0       5150.0    MALE  \n",
+       "18             20.0              197.0       3300.0    MALE  \n",
+       "19             18.1              178.0       3900.0    MALE  \n",
+       "20             18.8              203.0       4100.0    MALE  \n",
+       "21             17.9              187.0       3200.0  FEMALE  \n",
+       "22             13.9              210.0       4200.0  FEMALE  \n",
+       "23             18.5              180.0       3550.0  FEMALE  \n",
+       "24             20.3              194.0       3775.0    MALE  \n",
        "...\n",
        "\n",
        "[344 rows x 7 columns]"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "import bigframes.pandas\n",
-    "\n",
     "# read a BigQuery table to a BigQuery DataFrame\n",
-    "df = bigframes.pandas.read_gbq(\"bigframes-dev.bqml_tutorial.penguins\")\n",
+    "df = bigframes.pandas.read_gbq(f\"bigquery-public-data.ml_datasets.penguins\")\n",
     "\n",
     "# take a peek at the dataframe\n",
     "df"
@@ -457,18 +491,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "0ddb322731fe4b80b2904e1610862c31",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job d2bd7c5e-2652-4c0d-8495-8ef65e89031b is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:d2bd7c5e-2652-4c0d-8495-8ef65e89031b&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 7d289291-5c60-4d8f-b476-e46cb2ab06a7 is DONE. 28.9 kB processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -476,13 +508,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9f91e7a3d7ed416096d7660a110e0eab",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 92f0a5e5-bc61-426f-a9ef-213a1c376851 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:92f0a5e5-bc61-426f-a9ef-213a1c376851&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 8411db98-9ec3-4655-a40f-f9bf272e2403 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -520,294 +550,294 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>36.6</td>\n",
-       "      <td>18.4</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>3475.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>39.8</td>\n",
-       "      <td>19.1</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>4650.0</td>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>40.1</td>\n",
+       "      <td>18.9</td>\n",
+       "      <td>188.0</td>\n",
+       "      <td>4300.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.9</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>184.0</td>\n",
-       "      <td>3900.0</td>\n",
+       "      <th>1</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>39.1</td>\n",
+       "      <td>18.7</td>\n",
+       "      <td>181.0</td>\n",
+       "      <td>3750.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.3</td>\n",
-       "      <td>16.8</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>3000.0</td>\n",
-       "      <td>FEMALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>Dream</td>\n",
+       "      <td>Biscoe</td>\n",
        "      <td>43.2</td>\n",
-       "      <td>18.5</td>\n",
-       "      <td>192.0</td>\n",
-       "      <td>4100.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.2</td>\n",
-       "      <td>20.1</td>\n",
-       "      <td>200.0</td>\n",
-       "      <td>3975.0</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>197.0</td>\n",
+       "      <td>4775.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.8</td>\n",
-       "      <td>18.9</td>\n",
-       "      <td>208.0</td>\n",
-       "      <td>4300.0</td>\n",
+       "      <th>6</th>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>41.3</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>195.0</td>\n",
+       "      <td>4400.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
        "      <td>Dream</td>\n",
-       "      <td>39.0</td>\n",
-       "      <td>18.7</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3650.0</td>\n",
-       "      <td>MALE</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.0</td>\n",
-       "      <td>16.9</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3000.0</td>\n",
+       "      <td>38.1</td>\n",
+       "      <td>18.6</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>3700.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>14</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>34.0</td>\n",
-       "      <td>17.1</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3400.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <th>13</th>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>37.8</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>4250.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>15</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>37.0</td>\n",
-       "      <td>16.5</td>\n",
-       "      <td>185.0</td>\n",
-       "      <td>3400.0</td>\n",
+       "      <th>14</th>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>17.9</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>3450.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>18</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>39.7</td>\n",
-       "      <td>17.9</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>4250.0</td>\n",
+       "      <th>16</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>34.6</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>198.0</td>\n",
+       "      <td>4400.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>19</th>\n",
        "      <td>Dream</td>\n",
-       "      <td>37.8</td>\n",
+       "      <td>37.2</td>\n",
        "      <td>18.1</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3750.0</td>\n",
+       "      <td>178.0</td>\n",
+       "      <td>3900.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>22</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>40.2</td>\n",
-       "      <td>17.1</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3400.0</td>\n",
+       "      <th>21</th>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>40.5</td>\n",
+       "      <td>17.9</td>\n",
+       "      <td>187.0</td>\n",
+       "      <td>3200.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>23</th>\n",
        "      <td>Dream</td>\n",
-       "      <td>36.8</td>\n",
+       "      <td>42.2</td>\n",
        "      <td>18.5</td>\n",
-       "      <td>193.0</td>\n",
-       "      <td>3500.0</td>\n",
+       "      <td>180.0</td>\n",
+       "      <td>3550.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>26</th>\n",
+       "      <th>30</th>\n",
        "      <td>Dream</td>\n",
-       "      <td>41.5</td>\n",
-       "      <td>18.5</td>\n",
-       "      <td>201.0</td>\n",
-       "      <td>4000.0</td>\n",
+       "      <td>39.2</td>\n",
+       "      <td>21.1</td>\n",
+       "      <td>196.0</td>\n",
+       "      <td>4150.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>31</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>33.1</td>\n",
-       "      <td>16.1</td>\n",
-       "      <td>178.0</td>\n",
-       "      <td>2900.0</td>\n",
-       "      <td>FEMALE</td>\n",
+       "      <th>32</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>42.9</td>\n",
+       "      <td>17.6</td>\n",
+       "      <td>196.0</td>\n",
+       "      <td>4700.0</td>\n",
+       "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>32</th>\n",
+       "      <th>38</th>\n",
        "      <td>Dream</td>\n",
-       "      <td>37.2</td>\n",
-       "      <td>18.1</td>\n",
-       "      <td>178.0</td>\n",
+       "      <td>41.1</td>\n",
+       "      <td>17.5</td>\n",
+       "      <td>190.0</td>\n",
        "      <td>3900.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>33</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>39.5</td>\n",
-       "      <td>16.7</td>\n",
-       "      <td>178.0</td>\n",
-       "      <td>3250.0</td>\n",
+       "      <th>40</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>38.6</td>\n",
+       "      <td>21.2</td>\n",
+       "      <td>191.0</td>\n",
+       "      <td>3800.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>42</th>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>35.5</td>\n",
+       "      <td>16.2</td>\n",
+       "      <td>195.0</td>\n",
+       "      <td>3350.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>35</th>\n",
+       "      <th>44</th>\n",
        "      <td>Dream</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>18.5</td>\n",
+       "      <td>39.2</td>\n",
+       "      <td>18.6</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>4250.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>45</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>35.2</td>\n",
+       "      <td>15.9</td>\n",
        "      <td>186.0</td>\n",
-       "      <td>3100.0</td>\n",
+       "      <td>3050.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>36</th>\n",
+       "      <th>46</th>\n",
        "      <td>Dream</td>\n",
+       "      <td>43.2</td>\n",
+       "      <td>18.5</td>\n",
+       "      <td>192.0</td>\n",
+       "      <td>4100.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>49</th>\n",
+       "      <td>Biscoe</td>\n",
        "      <td>39.6</td>\n",
-       "      <td>18.1</td>\n",
+       "      <td>17.7</td>\n",
        "      <td>186.0</td>\n",
-       "      <td>4450.0</td>\n",
-       "      <td>MALE</td>\n",
+       "      <td>3500.0</td>\n",
+       "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>38</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>41.3</td>\n",
+       "      <th>53</th>\n",
+       "      <td>Biscoe</td>\n",
+       "      <td>45.6</td>\n",
        "      <td>20.3</td>\n",
-       "      <td>194.0</td>\n",
-       "      <td>3550.0</td>\n",
+       "      <td>191.0</td>\n",
+       "      <td>4600.0</td>\n",
        "      <td>MALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>41</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>35.7</td>\n",
-       "      <td>18.0</td>\n",
-       "      <td>202.0</td>\n",
-       "      <td>3550.0</td>\n",
+       "      <th>58</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>40.9</td>\n",
+       "      <td>16.8</td>\n",
+       "      <td>191.0</td>\n",
+       "      <td>3700.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>51</th>\n",
-       "      <td>Dream</td>\n",
-       "      <td>38.1</td>\n",
-       "      <td>17.6</td>\n",
-       "      <td>187.0</td>\n",
-       "      <td>3425.0</td>\n",
+       "      <th>60</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>40.3</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>195.0</td>\n",
+       "      <td>3250.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>53</th>\n",
+       "      <th>62</th>\n",
        "      <td>Dream</td>\n",
        "      <td>36.0</td>\n",
-       "      <td>17.1</td>\n",
-       "      <td>187.0</td>\n",
-       "      <td>3700.0</td>\n",
+       "      <td>18.5</td>\n",
+       "      <td>186.0</td>\n",
+       "      <td>3100.0</td>\n",
        "      <td>FEMALE</td>\n",
        "    </tr>\n",
+       "    <tr>\n",
+       "      <th>63</th>\n",
+       "      <td>Torgersen</td>\n",
+       "      <td>39.3</td>\n",
+       "      <td>20.6</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>3650.0</td>\n",
+       "      <td>MALE</td>\n",
+       "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "<p>25 rows × 6 columns</p>\n",
        "</div>[146 rows x 6 columns in total]"
       ],
       "text/plain": [
-       "   island  culmen_length_mm  culmen_depth_mm  flipper_length_mm  body_mass_g  \\\n",
-       "0   Dream              36.6             18.4              184.0       3475.0   \n",
-       "1   Dream              39.8             19.1              184.0       4650.0   \n",
-       "2   Dream              40.9             18.9              184.0       3900.0   \n",
-       "4   Dream              37.3             16.8              192.0       3000.0   \n",
-       "5   Dream              43.2             18.5              192.0       4100.0   \n",
-       "9   Dream              40.2             20.1              200.0       3975.0   \n",
-       "10  Dream              40.8             18.9              208.0       4300.0   \n",
-       "11  Dream              39.0             18.7              185.0       3650.0   \n",
-       "12  Dream              37.0             16.9              185.0       3000.0   \n",
-       "14  Dream              34.0             17.1              185.0       3400.0   \n",
-       "15  Dream              37.0             16.5              185.0       3400.0   \n",
-       "18  Dream              39.7             17.9              193.0       4250.0   \n",
-       "19  Dream              37.8             18.1              193.0       3750.0   \n",
-       "22  Dream              40.2             17.1              193.0       3400.0   \n",
-       "23  Dream              36.8             18.5              193.0       3500.0   \n",
-       "26  Dream              41.5             18.5              201.0       4000.0   \n",
-       "31  Dream              33.1             16.1              178.0       2900.0   \n",
-       "32  Dream              37.2             18.1              178.0       3900.0   \n",
-       "33  Dream              39.5             16.7              178.0       3250.0   \n",
-       "35  Dream              36.0             18.5              186.0       3100.0   \n",
-       "36  Dream              39.6             18.1              186.0       4450.0   \n",
-       "38  Dream              41.3             20.3              194.0       3550.0   \n",
-       "41  Dream              35.7             18.0              202.0       3550.0   \n",
-       "51  Dream              38.1             17.6              187.0       3425.0   \n",
-       "53  Dream              36.0             17.1              187.0       3700.0   \n",
+       "       island  culmen_length_mm  culmen_depth_mm  flipper_length_mm  \\\n",
+       "0      Biscoe              40.1             18.9              188.0   \n",
+       "1   Torgersen              39.1             18.7              181.0   \n",
+       "4      Biscoe              43.2             19.0              197.0   \n",
+       "6      Biscoe              41.3             21.1              195.0   \n",
+       "11      Dream              38.1             18.6              190.0   \n",
+       "13     Biscoe              37.8             20.0              190.0   \n",
+       "14     Biscoe              35.0             17.9              190.0   \n",
+       "16  Torgersen              34.6             21.1              198.0   \n",
+       "19      Dream              37.2             18.1              178.0   \n",
+       "21     Biscoe              40.5             17.9              187.0   \n",
+       "23      Dream              42.2             18.5              180.0   \n",
+       "30      Dream              39.2             21.1              196.0   \n",
+       "32  Torgersen              42.9             17.6              196.0   \n",
+       "38      Dream              41.1             17.5              190.0   \n",
+       "40  Torgersen              38.6             21.2              191.0   \n",
+       "42     Biscoe              35.5             16.2              195.0   \n",
+       "44      Dream              39.2             18.6              190.0   \n",
+       "45  Torgersen              35.2             15.9              186.0   \n",
+       "46      Dream              43.2             18.5              192.0   \n",
+       "49     Biscoe              39.6             17.7              186.0   \n",
+       "53     Biscoe              45.6             20.3              191.0   \n",
+       "58  Torgersen              40.9             16.8              191.0   \n",
+       "60  Torgersen              40.3             18.0              195.0   \n",
+       "62      Dream              36.0             18.5              186.0   \n",
+       "63  Torgersen              39.3             20.6              190.0   \n",
        "\n",
-       "       sex  \n",
-       "0   FEMALE  \n",
-       "1     MALE  \n",
-       "2     MALE  \n",
-       "4   FEMALE  \n",
-       "5     MALE  \n",
-       "9     MALE  \n",
-       "10    MALE  \n",
-       "11    MALE  \n",
-       "12  FEMALE  \n",
-       "14  FEMALE  \n",
-       "15  FEMALE  \n",
-       "18    MALE  \n",
-       "19    MALE  \n",
-       "22  FEMALE  \n",
-       "23  FEMALE  \n",
-       "26    MALE  \n",
-       "31  FEMALE  \n",
-       "32    MALE  \n",
-       "33  FEMALE  \n",
-       "35  FEMALE  \n",
-       "36    MALE  \n",
-       "38    MALE  \n",
-       "41  FEMALE  \n",
-       "51  FEMALE  \n",
-       "53  FEMALE  \n",
+       "    body_mass_g     sex  \n",
+       "0        4300.0    MALE  \n",
+       "1        3750.0    MALE  \n",
+       "4        4775.0    MALE  \n",
+       "6        4400.0    MALE  \n",
+       "11       3700.0  FEMALE  \n",
+       "13       4250.0    MALE  \n",
+       "14       3450.0  FEMALE  \n",
+       "16       4400.0    MALE  \n",
+       "19       3900.0    MALE  \n",
+       "21       3200.0  FEMALE  \n",
+       "23       3550.0  FEMALE  \n",
+       "30       4150.0    MALE  \n",
+       "32       4700.0    MALE  \n",
+       "38       3900.0    MALE  \n",
+       "40       3800.0    MALE  \n",
+       "42       3350.0  FEMALE  \n",
+       "44       4250.0    MALE  \n",
+       "45       3050.0  FEMALE  \n",
+       "46       4100.0    MALE  \n",
+       "49       3500.0  FEMALE  \n",
+       "53       4600.0    MALE  \n",
+       "58       3700.0  FEMALE  \n",
+       "60       3250.0  FEMALE  \n",
+       "62       3100.0  FEMALE  \n",
+       "63       3650.0    MALE  \n",
        "...\n",
        "\n",
        "[146 rows x 6 columns]"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -828,7 +858,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -850,18 +880,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "cf14ebed505a4a92b4c72f51c82efe55",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 43c8fdc2-0bc3-4607-a36d-5bee87c894d8 is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:43c8fdc2-0bc3-4607-a36d-5bee87c894d8&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 97e0c84d-aa6a-4197-9377-740d973ea44d is DONE. 28.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:97e0c84d-aa6a-4197-9377-740d973ea44d&page=queryresults\">Open Job</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Query job 726b9a5e-48a1-4ced-ac34-fa028dcb2bf4 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:726b9a5e-48a1-4ced-ac34-fa028dcb2bf4&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job dcef36e5-4bd6-40f8-88c6-72e84360533f is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -873,7 +925,7 @@
        "LinearRegression()"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -890,60 +942,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "66af192d9a784994b9d4a48a49c70721",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job 87895ee3-81d0-4267-8a50-ab00e04664a7 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "23ae404428394a5e8a42e12e26ad52b8",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job f00512e0-983a-4e93-b209-58205ebad99f is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f168bfb949b6422192c61c5c8fa0fd94",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job 51ba4529-5be9-4e9f-aae6-3c80dd4b36b8 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6daeb6a96f834a60a839cc9144c6d3a9",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 28975567-2526-40f7-a7be-9dee6f782b4e is DONE. 9.5 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:28975567-2526-40f7-a7be-9dee6f782b4e&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 2e3a6603-9f0e-44ff-9086-2e14ad50bd25 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -951,13 +959,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "163d87d9a2274142b31f5aafa145357a",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 5c71d3d9-0e1c-45bd-866f-1f98f056260d is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:5c71d3d9-0e1c-45bd-866f-1f98f056260d&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 357878f9-b705-4a03-aeeb-818a51873724 is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -965,13 +971,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3d307bbbd60e431a8d5bbd2ef7c41e2b",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 890767f7-a83b-469a-9f3e-abd5667f8202 is DONE. 48 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:890767f7-a83b-469a-9f3e-abd5667f8202&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 7d6c2e32-56e7-43ef-9b21-ccd2a25930ea is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1031,7 +1035,7 @@
        "[1 rows x 6 columns]"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1043,32 +1047,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3966d9ee16b346cf943305112ce60fb6",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "HTML(value='Query job a25c445d-9b60-4a8d-a325-1bfacd32bc8d is RUNNING. <a target=\"_blank\" href=\"https://consol…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f8fa7ce4a2f14c49addcb73f15f06db9",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job d59df3e8-cf87-4340-a4c7-a27c3abfcc50 is DONE. 29.1 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:d59df3e8-cf87-4340-a4c7-a27c3abfcc50&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job b881b602-abfa-4c19-a385-2480b3e8b2bd is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1076,13 +1064,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2084e1cd66ba449081eda92350f72fd0",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job 5af493aa-96f9-434f-a101-ec855f4de694 is DONE. 8 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:5af493aa-96f9-434f-a101-ec855f4de694&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 03249520-93d3-4b2e-8976-f49cc4efe520 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1090,13 +1076,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "68049943e6ad477988b9e65a962ecdf2",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job e2076bc3-3966-4c45-8265-c461756a7782 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:e2076bc3-3966-4c45-8265-c461756a7782&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 31094013-70ea-415f-8b96-85c1af7ee9c8 is DONE. 0 Bytes processed. <a target=\"_blank\" href…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1104,13 +1088,11 @@
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "341e6796def340cb9e0681ddeb40ff9d",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Query job e9cdfca7-30f6-4e93-95fb-244896e7c2ab is DONE. 16 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:e9cdfca7-30f6-4e93-95fb-244896e7c2ab&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Query job 0e456f33-4cb7-45a0-88e6-29324175b5a6 is RUNNING. <a target=\"_blank\" href=\"https://consol…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1142,8 +1124,8 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>292</th>\n",
-       "      <td>3459.735118</td>\n",
+       "      <th>334</th>\n",
+       "      <td>5891.735118</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1152,12 +1134,12 @@
       ],
       "text/plain": [
        "     predicted_body_mass_g\n",
-       "292            3459.735118\n",
+       "334            5891.735118\n",
        "\n",
        "[1 rows x 1 columns]"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1177,18 +1159,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "256ff43296a9405f890e78511acc38e5",
-       "version_major": 2,
-       "version_minor": 0
-      },
+      "text/html": [
+       "Copy job cb4ef454-10df-4325-b9cb-6084df3ac9d5 is DONE. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=shobs-test&j=bq:US:cb4ef454-10df-4325-b9cb-6084df3ac9d5&page=queryresults\">Open Job</a>"
+      ],
       "text/plain": [
-       "HTML(value='Copy job 1a273ccd-212a-4750-a3c1-615256af6d48 is RUNNING. <a target=\"_blank\" href=\"https://console…"
+       "<IPython.core.display.HTML object>"
       ]
      },
      "metadata": {},
@@ -1200,14 +1180,14 @@
        "LinearRegression(optimize_strategy='NORMAL_EQUATION')"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "# save the model to a permanent location in BigQuery, so we can use it in future sessions (and elsewhere in BQ)\n",
-    "model.to_gbq(\"bigframes-dev.bqml_tutorial.penguins_model\", replace=True)"
+    "model.to_gbq(penguins_model, replace=True)"
    ]
   },
   {
@@ -1219,7 +1199,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -1228,7 +1208,7 @@
        "LinearRegression(optimize_strategy='NORMAL_EQUATION')"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1236,7 +1216,7 @@
    "source": [
     "# WARNING - until b/281709360 is fixed & pipeline is updated, pipelines will load as models,\n",
     "# and details of their transform steps will be lost (the loaded model will behave the same)\n",
-    "bigframes.pandas.read_gbq_model(\"bigframes-dev.bqml_tutorial.penguins_model\")"
+    "bigframes.pandas.read_gbq_model(penguins_model)"
    ]
   }
  ],
diff --git a/owlbot.py b/owlbot.py
index 082970018d..dc84de7d8f 100644
--- a/owlbot.py
+++ b/owlbot.py
@@ -46,6 +46,7 @@
         "noxfile.py",
         ".pre-commit-config.yaml",
         "README.rst",
+        "CONTRIBUTING.rst",
         ".github/release-trigger.yml",
         # BigQuery DataFrames manages its own Kokoro cluster for presubmit & continuous tests.
         ".kokoro/build.sh",
diff --git a/samples/snippets/gen_ai_model_test.py b/samples/snippets/gen_ai_model_test.py
index 7cbc90d4c0..e4bead0e46 100644
--- a/samples/snippets/gen_ai_model_test.py
+++ b/samples/snippets/gen_ai_model_test.py
@@ -14,9 +14,14 @@
 
 
 def test_llm_model():
-    PROJECT_ID = "bigframes-dev"
+    # Determine project id, in this case prefer the one set in the environment
+    # variable GOOGLE_CLOUD_PROJECT (if any)
+    import os
+
+    PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT", "bigframes-dev")
     REGION = "us"
-    CONN_NAME = "bigframes-ml"
+    CONN_NAME = "bigframes-default-connection"
+
     # [START bigquery_dataframes_gen_ai_model]
     from bigframes.ml.llm import PaLM2TextGenerator
     import bigframes.pandas as bpd
diff --git a/samples/snippets/load_data_from_biquery_job_test.py b/samples/snippets/load_data_from_biquery_job_test.py
index 5271574a49..9a7793a7e5 100644
--- a/samples/snippets/load_data_from_biquery_job_test.py
+++ b/samples/snippets/load_data_from_biquery_job_test.py
@@ -14,10 +14,16 @@
 
 
 def test_bigquery_dataframes_load_data_from_bigquery_job():
-    from google.cloud import bigquery
+    # Determine project id, in this case prefer the one set in the environment
+    # variable GOOGLE_CLOUD_PROJECT (if any)
+    import os
+
+    your_project_id = os.getenv("GOOGLE_CLOUD_PROJECT", "bigframes-dev")
 
     # Construct a BigQuery client object.
-    client = bigquery.Client(project="bigframes-dev", location="us")
+    from google.cloud import bigquery
+
+    client = bigquery.Client(project=your_project_id, location="us")
 
     query = """
         SELECT *
@@ -26,7 +32,6 @@ def test_bigquery_dataframes_load_data_from_bigquery_job():
     """
     query_job = client.query(query)
     JOB_ID = query_job.job_id
-    your_project_id = "bigframes-dev"
 
     # [START bigquery_dataframes_load_data_from_bigquery_job]
     from google.cloud import bigquery
diff --git a/samples/snippets/quickstart_test.py b/samples/snippets/quickstart_test.py
index bbe4a8b3c4..4abc87d011 100644
--- a/samples/snippets/quickstart_test.py
+++ b/samples/snippets/quickstart_test.py
@@ -25,7 +25,12 @@ def test_quickstart(
     # We need a fresh session since we're modifying connection options.
     bigframes.pandas.close_session()
 
-    # TODO(swast): Get project from environment so contributors can run tests.
-    quickstart.run_quickstart("bigframes-dev")
+    # Determine project id, in this case prefer the one set in the environment
+    # variable GOOGLE_CLOUD_PROJECT (if any)
+    import os
+
+    your_project_id = os.getenv("GOOGLE_CLOUD_PROJECT", "bigframes-dev")
+
+    quickstart.run_quickstart(your_project_id)
     out, _ = capsys.readouterr()
     assert "average_body_mass (df_session):" in out
diff --git a/samples/snippets/remote_function_test.py b/samples/snippets/remote_function_test.py
index e1317c6ac0..8f891274de 100644
--- a/samples/snippets/remote_function_test.py
+++ b/samples/snippets/remote_function_test.py
@@ -25,8 +25,13 @@ def test_remote_function_and_read_gbq_function(
     # We need a fresh session since we're modifying connection options.
     bigframes.pandas.close_session()
 
-    # TODO(swast): Get project from environment so contributors can run tests.
-    remote_function.run_remote_function_and_read_gbq_function("bigframes-dev")
+    # Determine project id, in this case prefer the one set in the environment
+    # variable GOOGLE_CLOUD_PROJECT (if any)
+    import os
+
+    your_project_id = os.getenv("GOOGLE_CLOUD_PROJECT", "bigframes-dev")
+
+    remote_function.run_remote_function_and_read_gbq_function(your_project_id)
     out, _ = capsys.readouterr()
     assert "Created BQ remote function:" in out
     assert "Created cloud function:" in out
diff --git a/scripts/create_test_model_vertex.py b/scripts/create_test_model_vertex.py
new file mode 100644
index 0000000000..946e54773e
--- /dev/null
+++ b/scripts/create_test_model_vertex.py
@@ -0,0 +1,71 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import sys
+
+import bigframes.ml.linear_model
+import bigframes.pandas
+
+
+def create_vertex_model(vertex_model_name):
+    df = bigframes.pandas.read_gbq("bigquery-public-data.ml_datasets.penguins")
+
+    # filter down to the data we want to analyze
+    adelie_data = df[df.species == "Adelie Penguin (Pygoscelis adeliae)"]
+
+    # drop the columns we don't care about
+    adelie_data = adelie_data.drop(columns=["species"])
+
+    # drop rows with nulls to get our training data
+    training_data = adelie_data.dropna()
+
+    feature_columns = training_data["culmen_length_mm"]
+    label_columns = training_data[["body_mass_g"]]
+
+    # create model
+    model = bigframes.ml.linear_model.LinearRegression()
+    model.fit(feature_columns, label_columns)
+
+    # register to Vertex Registry
+    model.register(vertex_model_name)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Get top APIs for which there are no code samples in the docstring."
+    )
+    parser.add_argument(
+        "-m",
+        "--model-name",
+        type=str,
+        required=True,
+        action="store",
+        help="Name of the model in Vertex.",
+    )
+    parser.add_argument(
+        "-p",
+        "--project-id",
+        type=str,
+        required=False,
+        action="store",
+        help="Project id in which the model should be created. "
+        "By default, a project will be resolved as per https://cloud.google.com/python/docs/reference/google-cloud-core/latest/config#overview.",
+    )
+
+    args = parser.parse_args(sys.argv[1:])
+    if args.project_id:
+        bigframes.pandas.options.bigquery.project = args.project_id
+
+    create_vertex_model(args.model_name)
diff --git a/scripts/setup-project-for-testing.sh b/scripts/setup-project-for-testing.sh
new file mode 100755
index 0000000000..a160784c12
--- /dev/null
+++ b/scripts/setup-project-for-testing.sh
@@ -0,0 +1,256 @@
+#!/bin/bash
+
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+if [ $# -lt 1 ]; then
+  echo "USAGE: `basename $0` <project-id> [<principal>]"
+  echo "EXAMPLES:"
+  echo "       `basename $0` my-project"
+  echo "       `basename $0` my-project user:user_id@example.com"
+  echo "       `basename $0` my-project group:group_id@example.com"
+  echo "       `basename $0` my-project serviceAccount:service_account_id@example.com"
+  exit 1
+fi
+
+PROJECT_ID=$1
+PRINCIPAL=$2
+BIGFRAMES_DEFAULT_CONNECTION_NAME=bigframes-default-connection
+BIGFRAMES_RF_CONNECTION_NAME=bigframes-rf-conn
+
+if [ "$PRINCIPAL" != "" ]; then
+  echo $PRINCIPAL | grep -E "(user|group|serviceAccount):" >/dev/null
+  if [ $? -ne 0 ]; then
+    echo "principal must have prefix 'user:', 'group:' or 'serviceAccount:'"
+    exit 1
+  fi
+fi
+
+if ! test `which gcloud`; then
+  echo "gcloud CLI is not installed. Install it from https://cloud.google.com/sdk/docs/install." >&2
+  exit 1
+fi
+
+################################################################################
+# Log and execute a command
+################################################################################
+function log_and_execute() {
+  echo Running command: $*
+  $*
+}
+
+
+################################################################################
+# Enable APIs
+################################################################################
+function enable_apis() {
+  for service in aiplatform.googleapis.com \
+                 bigquery.googleapis.com \
+                 bigqueryconnection.googleapis.com \
+                 bigquerystorage.googleapis.com \
+                 cloudbuild.googleapis.com \
+                 cloudfunctions.googleapis.com \
+                 cloudresourcemanager.googleapis.com \
+                 run.googleapis.com \
+    ; do
+    log_and_execute gcloud --project=$PROJECT_ID services enable $service
+    if [ $? -ne 0 ]; then
+      echo "Failed to enable service $service, exiting..."
+      exit 1
+    fi
+  done
+}
+
+
+################################################################################
+# Ensure a BQ connection exists with desired IAM rols
+################################################################################
+function ensure_bq_connection_with_iam() {
+  if [ $# -ne 2 ]; then
+    echo "USAGE: `basename $0` <location> <connection-name>"
+    echo "EXAMPLES:"
+    echo "       `basename $0` my-project my-connection"
+    exit 1
+  fi
+
+  location=$1
+  connection_name=$2
+
+  log_and_execute bq show \
+                    --connection \
+                    --project_id=$PROJECT_ID \
+                    --location=$location \
+                    $connection_name 2>&1 >/dev/null
+  if [ $? -ne 0 ]; then
+    echo "Connection $connection_name doesn't exists in location \"$location\", creating..."
+    log_and_execute bq mk \
+                      --connection \
+                      --project_id=$PROJECT_ID \
+                      --location=$location \
+                      --connection_type=CLOUD_RESOURCE \
+                      $connection_name
+    if [ $? -ne 0 ]; then
+      echo "Failed creating connection, exiting."
+      exit 1
+    fi
+  else
+    echo "Connection $connection_name already exists in location $location."
+  fi
+
+  compact_json_info_cmd="bq show --connection \
+                          --project_id=$PROJECT_ID \
+                          --location=$location \
+                          --format=json \
+                          $connection_name"
+  compact_json_info_cmd_output=`$compact_json_info_cmd`
+  if [ $? -ne 0 ]; then
+    echo "Failed to fetch connection info: $compact_json_info_cmd_output"
+    exit 1
+  fi
+
+  connection_service_account=`echo $compact_json_info_cmd_output | sed -e 's/.*"cloudResource":{"serviceAccountId":"//' -e 's/".*//'`
+
+  # Configure roles for the service accounts associated with the connection
+  for role in run.invoker aiplatform.user; do
+    log_and_execute gcloud projects add-iam-policy-binding $PROJECT_ID \
+                      --member=serviceAccount:$connection_service_account \
+                      --role=roles/$role
+    if [ $? -ne 0 ]; then
+      echo "Failed to set IAM, exiting..."
+      exit 1
+    fi
+  done
+}
+
+
+################################################################################
+# Create the default BQ connection in US location
+################################################################################
+function ensure_bq_connections_with_iam() {
+  ensure_bq_connection_with_iam "us" "$BIGFRAMES_DEFAULT_CONNECTION_NAME"
+
+  # Create commonly used BQ connection in various locations
+  for location in asia-southeast1 \
+                  eu \
+                  europe-west4 \
+                  southamerica-west1 \
+                  us \
+                  us-central1 \
+    ; do
+    ensure_bq_connection_with_iam "$location" "$BIGFRAMES_RF_CONNECTION_NAME"
+  done
+}
+
+
+################################################################################
+# Set up IAM roles for principal
+################################################################################
+function setup_iam_roles () {
+  if [ "$PRINCIPAL" != "" ]; then
+    for role in aiplatform.user \
+                bigquery.user \
+                bigquery.connectionAdmin \
+                bigquery.dataEditor \
+                browser \
+                cloudfunctions.developer \
+                iam.serviceAccountUser \
+      ; do
+      log_and_execute gcloud projects add-iam-policy-binding $PROJECT_ID \
+                        --member=$PRINCIPAL \
+                        --role=roles/$role
+      if [ $? -ne 0 ]; then
+        echo "Failed to set IAM, exiting..."
+        exit 1
+      fi
+    done
+  fi
+}
+
+
+################################################################################
+# Create vertex endpoint for test ML model
+################################################################################
+function create_bq_model_vertex_endpoint () {
+  vertex_region=us-central1
+  model_name=bigframes-test-linreg2
+  endpoint_name=$model_name-endpoint
+
+  # Create vertex model
+  log_and_execute python scripts/create_test_model_vertex.py \
+                    -m $model_name \
+                    -p $PROJECT_ID
+  if [ $? -ne 0 ]; then
+    echo "Failed to create model, exiting..."
+    exit 1
+  fi
+
+  # Create vertex endpoint
+  log_and_execute gcloud ai endpoints create \
+                    --project=$PROJECT_ID \
+                    --region=$vertex_region \
+                    --display-name=$endpoint_name
+  if [ $? -ne 0 ]; then
+    echo "Failed to create vertex endpoint, exiting..."
+    exit 1
+  fi
+
+  # Fetch endpoint id
+  endpoint_id=`gcloud ai endpoints list \
+                --project=$PROJECT_ID \
+                --region=$vertex_region \
+                --filter=display_name=$endpoint_name 2>/dev/null \
+                | tail -n1 | cut -d' '  -f 1`
+  if [ "$endpoint_id" = "" ]; then
+    echo "Failed to fetch vertex endpoint id, exiting..."
+    exit 1
+  fi
+
+  # Deploy the model to the vertex endpoint
+  log_and_execute gcloud ai endpoints deploy-model $endpoint_id \
+                    --project=$PROJECT_ID \
+                    --region=$vertex_region \
+                    --model=$model_name \
+                    --display-name=$model_name
+  if [ $? -ne 0 ]; then
+    echo "Failed to deploy model to vertex endpoint, exiting..."
+    exit 1
+  fi
+
+  # Form the endpoint
+  endpoint_rel_path=`gcloud ai endpoints describe \
+                      --project=$PROJECT_ID \
+                      --region=us-central1 \
+                      $endpoint_id 2>/dev/null \
+                      | grep "^name:" | cut -d' ' -f2`
+  if [ "$endpoint_rel_path" = "" ]; then
+    echo "Failed to fetch vertex endpoint relativr path, exiting..."
+    exit 1
+  fi
+  endpoint_path=https://$vertex_region-aiplatform.googleapis.com/v1/$endpoint_rel_path
+
+  # Print the endpoint configuration to be used in tests
+  echo
+  echo Run following command to set test model vertex endpoint:
+  echo export BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT=$endpoint_path
+}
+
+
+################################################################################
+# Set the things up
+################################################################################
+enable_apis
+ensure_bq_connections_with_iam
+setup_iam_roles
+create_bq_model_vertex_endpoint
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index f9f69c6c8e..0ad4280497 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -120,7 +120,6 @@ def session() -> bigframes.Session:
 def session_tokyo(tokyo_location: str) -> bigframes.Session:
     context = bigframes.BigQueryOptions(
         location=tokyo_location,
-        use_regional_endpoints=True,
     )
     return bigframes.Session(context=context)
 
diff --git a/tests/system/large/ml/test_decomposition.py b/tests/system/large/ml/test_decomposition.py
index a7049d4c18..953287def2 100644
--- a/tests/system/large/ml/test_decomposition.py
+++ b/tests/system/large/ml/test_decomposition.py
@@ -15,6 +15,7 @@
 import pandas as pd
 
 from bigframes.ml import decomposition
+import tests.system.utils
 
 
 def test_decomposition_configure_fit_score_predict(
@@ -66,9 +67,10 @@ def test_decomposition_configure_fit_score_predict(
         dtype="Float64",
         index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
     )
-    pd.testing.assert_frame_equal(
-        abs(result.sort_index()),  # results may differ by a minus sign
-        abs(expected),
+
+    tests.system.utils.assert_pandas_df_equal_pca(
+        result,
+        expected,
         check_exact=False,
         rtol=0.1,
     )
diff --git a/tests/system/large/ml/test_pipeline.py b/tests/system/large/ml/test_pipeline.py
index 2929baf3f7..c128469bd2 100644
--- a/tests/system/large/ml/test_pipeline.py
+++ b/tests/system/large/ml/test_pipeline.py
@@ -24,7 +24,7 @@
     pipeline,
     preprocessing,
 )
-from tests.system.utils import assert_pandas_df_equal
+from tests.system.utils import assert_pandas_df_equal, assert_pandas_df_equal_pca
 
 
 def test_pipeline_linear_regression_fit_score_predict(
@@ -430,17 +430,16 @@ def test_pipeline_PCA_fit_score_predict(session, penguins_df_default_index):
         dtype="Float64",
         index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
     )
-    pd.testing.assert_frame_equal(
-        abs(  # results may differ by a minus sign
-            predictions[
-                [
-                    "principal_component_1",
-                    "principal_component_2",
-                    "principal_component_3",
-                ]
+
+    assert_pandas_df_equal_pca(
+        predictions[
+            [
+                "principal_component_1",
+                "principal_component_2",
+                "principal_component_3",
             ]
-        ),
-        abs(expected),
+        ],
+        expected,
         check_exact=False,
         rtol=0.1,
     )
diff --git a/tests/system/large/test_remote_function.py b/tests/system/large/test_remote_function.py
index 5cb4df188c..4b4c794a05 100644
--- a/tests/system/large/test_remote_function.py
+++ b/tests/system/large/test_remote_function.py
@@ -161,8 +161,10 @@ def make_uniq_udf(udf):
 
 @pytest.fixture(scope="module")
 def bq_cf_connection() -> str:
-    """Pre-created BQ connection to invoke cloud function for bigframes-dev
-    $ bq show --connection --location=us --project_id=bigframes-dev bigframes-rf-conn
+    """Pre-created BQ connection in the test project in US location, used to
+    invoke cloud function.
+
+    $ bq show --connection --location=us --project_id=PROJECT_ID bigframes-rf-conn
     """
     return "bigframes-rf-conn"
 
diff --git a/tests/system/small/ml/conftest.py b/tests/system/small/ml/conftest.py
index c4a1272e44..e3180d2892 100644
--- a/tests/system/small/ml/conftest.py
+++ b/tests/system/small/ml/conftest.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
 from typing import cast
 import uuid
 
@@ -34,8 +35,8 @@
 
 
 @pytest.fixture(scope="session")
-def bq_connection() -> str:
-    return "bigframes-dev.us.bigframes-rf-conn"
+def bq_connection(bigquery_client) -> str:
+    return f"{bigquery_client.project}.us.bigframes-rf-conn"
 
 
 @pytest.fixture(scope="session")
@@ -252,10 +253,15 @@ def palm2_embedding_generator_multilingual_model(
 def linear_remote_model_params() -> dict:
     # Pre-deployed endpoint of linear reg model in Vertex.
     # bigframes-test-linreg2 -> bigframes-test-linreg-endpoint2
+    model_vertex_endpoint = os.environ.get(
+        "BIGFRAMES_TEST_MODEL_VERTEX_ENDPOINT",
+        "https://us-central1-aiplatform.googleapis.com/v1/projects/1084210331973/locations/us-central1/endpoints/3193318217619603456",
+    )
+
     return {
         "input": {"culmen_length_mm": "float64"},
         "output": {"predicted_body_mass_g": "array<float64>"},
-        "endpoint": "https://us-central1-aiplatform.googleapis.com/v1/projects/1084210331973/locations/us-central1/endpoints/3193318217619603456",
+        "endpoint": model_vertex_endpoint,
     }
 
 
diff --git a/tests/system/small/ml/test_core.py b/tests/system/small/ml/test_core.py
index 915c4aa444..eece5ef21d 100644
--- a/tests/system/small/ml/test_core.py
+++ b/tests/system/small/ml/test_core.py
@@ -210,12 +210,12 @@ def test_pca_model_principal_components(penguins_bqml_pca_model: core.BqmlModel)
         .sort_values(["principal_component_id", "feature"])
         .reset_index(drop=True)
     )
-    pd.testing.assert_frame_equal(
+
+    tests.system.utils.assert_pandas_df_equal_pca_components(
         result,
         expected,
         check_exact=False,
         rtol=0.1,
-        # int64 Index by default in pandas versus Int64 (nullable) Index in BigQuery DataFrame
         check_index_type=False,
         check_dtype=False,
     )
diff --git a/tests/system/small/ml/test_decomposition.py b/tests/system/small/ml/test_decomposition.py
index 42fea66cf8..9565b8f7a8 100644
--- a/tests/system/small/ml/test_decomposition.py
+++ b/tests/system/small/ml/test_decomposition.py
@@ -29,11 +29,9 @@ def test_pca_predict(penguins_pca_model, new_penguins_df):
         dtype="Float64",
         index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
     )
-    pd.testing.assert_frame_equal(
-        predictions.sort_index(),
-        expected,
-        check_exact=False,
-        rtol=0.1,
+
+    tests.system.utils.assert_pandas_df_equal_pca(
+        predictions, expected, check_exact=False, rtol=0.1
     )
 
 
@@ -115,7 +113,8 @@ def test_pca_components_(penguins_pca_model: decomposition.PCA):
         .sort_values(["principal_component_id", "feature"])
         .reset_index(drop=True)
     )
-    pd.testing.assert_frame_equal(
+
+    tests.system.utils.assert_pandas_df_equal_pca_components(
         result,
         expected,
         check_exact=False,
diff --git a/tests/system/small/ml/test_llm.py b/tests/system/small/ml/test_llm.py
index 306098548e..267a2ed9c1 100644
--- a/tests/system/small/ml/test_llm.py
+++ b/tests/system/small/ml/test_llm.py
@@ -24,14 +24,10 @@ def test_create_text_generator_model(palm2_text_generator_model):
     assert palm2_text_generator_model._bqml_model is not None
 
 
-def test_create_text_generator_32k_model(palm2_text_generator_32k_model):
-    # Model creation doesn't return error
-    assert palm2_text_generator_32k_model is not None
-    assert palm2_text_generator_32k_model._bqml_model is not None
-
-
 @pytest.mark.flaky(retries=2, delay=120)
-def test_create_text_generator_model_default_session(bq_connection, llm_text_pandas_df):
+def test_create_text_generator_model_default_session(
+    bq_connection, llm_text_pandas_df, bigquery_client
+):
     import bigframes.pandas as bpd
 
     bpd.close_session()
@@ -41,7 +37,10 @@ def test_create_text_generator_model_default_session(bq_connection, llm_text_pan
     model = llm.PaLM2TextGenerator()
     assert model is not None
     assert model._bqml_model is not None
-    assert model.connection_name.casefold() == "bigframes-dev.us.bigframes-rf-conn"
+    assert (
+        model.connection_name.casefold()
+        == f"{bigquery_client.project}.us.bigframes-rf-conn"
+    )
 
     llm_text_df = bpd.read_pandas(llm_text_pandas_df)
 
@@ -54,7 +53,7 @@ def test_create_text_generator_model_default_session(bq_connection, llm_text_pan
 
 @pytest.mark.flaky(retries=2, delay=120)
 def test_create_text_generator_32k_model_default_session(
-    bq_connection, llm_text_pandas_df
+    bq_connection, llm_text_pandas_df, bigquery_client
 ):
     import bigframes.pandas as bpd
 
@@ -65,7 +64,10 @@ def test_create_text_generator_32k_model_default_session(
     model = llm.PaLM2TextGenerator(model_name="text-bison-32k")
     assert model is not None
     assert model._bqml_model is not None
-    assert model.connection_name.casefold() == "bigframes-dev.us.bigframes-rf-conn"
+    assert (
+        model.connection_name.casefold()
+        == f"{bigquery_client.project}.us.bigframes-rf-conn"
+    )
 
     llm_text_df = bpd.read_pandas(llm_text_pandas_df)
 
@@ -77,7 +79,9 @@ def test_create_text_generator_32k_model_default_session(
 
 
 @pytest.mark.flaky(retries=2, delay=120)
-def test_create_text_generator_model_default_connection(llm_text_pandas_df):
+def test_create_text_generator_model_default_connection(
+    llm_text_pandas_df, bigquery_client
+):
     from bigframes import _config
     import bigframes.pandas as bpd
 
@@ -91,7 +95,7 @@ def test_create_text_generator_model_default_connection(llm_text_pandas_df):
     assert model._bqml_model is not None
     assert (
         model.connection_name.casefold()
-        == "bigframes-dev.us.bigframes-default-connection"
+        == f"{bigquery_client.project}.us.bigframes-default-connection"
     )
 
     df = model.predict(llm_text_df).to_pandas()
diff --git a/tests/system/small/test_remote_function.py b/tests/system/small/test_remote_function.py
index 960a384126..a98056d82a 100644
--- a/tests/system/small/test_remote_function.py
+++ b/tests/system/small/test_remote_function.py
@@ -23,40 +23,50 @@
 
 @pytest.fixture(scope="module")
 def bq_cf_connection() -> str:
-    """Pre-created BQ connection to invoke cloud function for bigframes-dev
-    $ bq show --connection --location=us --project_id=bigframes-dev bigframes-rf-conn
+    """Pre-created BQ connection in the test project in US location, used to
+    invoke cloud function.
+
+    $ bq show --connection --location=us --project_id=PROJECT_ID bigframes-rf-conn
     """
     return "bigframes-rf-conn"
 
 
 @pytest.fixture(scope="module")
 def bq_cf_connection_location() -> str:
-    """Pre-created BQ connection to invoke cloud function for bigframes-dev
-    $ bq show --connection --location=us --project_id=bigframes-dev bigframes-rf-conn
+    """Pre-created BQ connection in the test project in US location, in format
+    PROJECT_ID.LOCATION.CONNECTION_NAME, used to invoke cloud function.
+
+    $ bq show --connection --location=us --project_id=PROJECT_ID bigframes-rf-conn
     """
     return "us.bigframes-rf-conn"
 
 
 @pytest.fixture(scope="module")
 def bq_cf_connection_location_mismatched() -> str:
-    """Pre-created BQ connection to invoke cloud function for bigframes-dev
-    $ bq show --connection --location=eu --project_id=bigframes-dev bigframes-rf-conn
+    """Pre-created BQ connection in the test project in EU location, in format
+    LOCATION.CONNECTION_NAME, used to invoke cloud function.
+
+    $ bq show --connection --location=us --project_id=PROJECT_ID bigframes-rf-conn
     """
     return "eu.bigframes-rf-conn"
 
 
 @pytest.fixture(scope="module")
-def bq_cf_connection_location_project() -> str:
-    """Pre-created BQ connection to invoke cloud function for bigframes-dev
-    $ bq show --connection --location=us --project_id=bigframes-dev bigframes-rf-conn
+def bq_cf_connection_location_project(bigquery_client) -> str:
+    """Pre-created BQ connection in the test project in US location, in format
+    PROJECT_ID.LOCATION.CONNECTION_NAME, used to invoke cloud function.
+
+    $ bq show --connection --location=us --project_id=PROJECT_ID bigframes-rf-conn
     """
-    return "bigframes-dev.us.bigframes-rf-conn"
+    return f"{bigquery_client.project}.us.bigframes-rf-conn"
 
 
 @pytest.fixture(scope="module")
 def bq_cf_connection_location_project_mismatched() -> str:
-    """Pre-created BQ connection to invoke cloud function for bigframes-dev
-    $ bq show --connection --location=eu --project_id=bigframes-metrics bigframes-rf-conn
+    """Pre-created BQ connection in the migframes-metrics project in US location,
+    in format PROJECT_ID.LOCATION.CONNECTION_NAME, used to invoke cloud function.
+
+    $ bq show --connection --location=us --project_id=PROJECT_ID bigframes-rf-conn
     """
     return "bigframes-metrics.eu.bigframes-rf-conn"
 
diff --git a/tests/system/utils.py b/tests/system/utils.py
index f7831972b8..f49b5ece31 100644
--- a/tests/system/utils.py
+++ b/tests/system/utils.py
@@ -143,3 +143,77 @@ def convert_pandas_dtypes(df: pd.DataFrame, bytes_col: bool):
     df["numeric_col"] = df["numeric_col"].apply(
         lambda value: decimal.Decimal(str(value)) if value else None  # type: ignore
     )
+
+
+def assert_pandas_df_equal_pca_components(actual, expected, **kwargs):
+    """Compare two pandas dataframes representing PCA components. The columns
+    required to be present in the dataframes are:
+        numerical_value: numeric,
+        categorical_value: List[object(category, value)]
+
+    The index types of `actual` and `expected` are ignored in the comparison.
+
+    Args:
+        actual: Actual Pandas DataFrame
+
+        expected: Expected Pandas DataFrame
+
+        kwargs: kwargs to use in `pandas.testing.assert_series_equal` per column
+    """
+    # Compare the index, columns and values separately, as the polarity of the
+    # PCA vectors can be arbitrary
+    pd.testing.assert_index_equal(
+        actual.index, expected.index.astype(actual.index.dtype)
+    )  # dtype agnostic index comparison
+    pd.testing.assert_index_equal(actual.columns, expected.columns)
+    for column in expected.columns:
+        try:
+            pd.testing.assert_series_equal(actual[column], expected[column], **kwargs)
+        except AssertionError:
+            if column not in {"numerical_value", "categorical_value"}:
+                raise
+
+            # Allow for sign difference per numeric/categorical column
+            if column == "numerical_value":
+                actual_ = -actual[column]
+                expected_ = expected[column]
+            else:
+                # In this column each element is an array of objects, where the
+                # object has attributes "category" and "value". For the sake of
+                # comparison let's normalize by flipping the polarity of "value".
+                def normalize_array_of_objects(arr, reverse_polarity=False):
+                    newarr = []
+                    for element in arr:
+                        newelement = dict(element)
+                        if reverse_polarity:
+                            newelement["value"] = -newelement["value"]
+                        newarr.append(newelement)
+                    return sorted(newarr, key=lambda d: d["category"])
+
+                actual_ = actual[column].apply(normalize_array_of_objects, args=(True,))
+                expected_ = expected[column].apply(normalize_array_of_objects)
+
+            pd.testing.assert_series_equal(actual_, expected_, **kwargs)
+
+
+def assert_pandas_df_equal_pca(actual, expected, **kwargs):
+    """Compare two pandas dataframes representing PCA predictions. The columns
+    in the dataframes are expected to be numeric.
+
+    Args:
+        actual: Actual Pandas DataFrame
+
+        expected: Expected Pandas DataFrame
+
+        kwargs: kwargs to use in `pandas.testing.assert_series_equal` per column
+    """
+    # Compare the index, columns and values separately, as the polarity of the
+    # PCA vector can be arbitrary
+    pd.testing.assert_index_equal(actual.index, expected.index)
+    pd.testing.assert_index_equal(actual.columns, expected.columns)
+    for column in expected.columns:
+        try:
+            pd.testing.assert_series_equal(actual[column], expected[column], **kwargs)
+        except AssertionError:
+            # Allow for sign difference per column
+            pd.testing.assert_series_equal(-actual[column], expected[column], **kwargs)

From 9cde708bb4a94d3ba35ecdf298cc80bc5680e7b4 Mon Sep 17 00:00:00 2001
From: "release-please[bot]"
 <55107282+release-please[bot]@users.noreply.github.com>
Date: Tue, 12 Dec 2023 04:59:17 +0000
Subject: [PATCH 20/20] chore(main): release 0.16.0 (#250)

Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com>
---
 CHANGELOG.md         | 33 +++++++++++++++++++++++++++++++++
 bigframes/version.py |  2 +-
 2 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ef75a017e0..68ea51707c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,39 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [0.16.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.15.0...v0.16.0) (2023-12-12)
+
+
+### Features
+
+* Add ARIMAPlus.predict parameters ([#264](https://github.com/googleapis/python-bigquery-dataframes/issues/264)) ([99598c7](https://github.com/googleapis/python-bigquery-dataframes/commit/99598c7d359f1d1e0671dcf27a5c77094f3c7f67))
+* Add DataFrame from_dict and from_records methods ([#244](https://github.com/googleapis/python-bigquery-dataframes/issues/244)) ([8d81e24](https://github.com/googleapis/python-bigquery-dataframes/commit/8d81e24677613dcf4d275c27a327384b8c17bc85))
+* Add DataFrame.select_dtypes method ([#242](https://github.com/googleapis/python-bigquery-dataframes/issues/242)) ([1737acc](https://github.com/googleapis/python-bigquery-dataframes/commit/1737acc51b4fdd9b385bbf91a758efd2e7ead11a))
+* Add nunique method to Series/DataFrameGroupby ([#256](https://github.com/googleapis/python-bigquery-dataframes/issues/256)) ([c8ec245](https://github.com/googleapis/python-bigquery-dataframes/commit/c8ec245070402aa0770bc9b2375693de674ca925))
+* Support dataframe.loc with conditional columns selection ([#233](https://github.com/googleapis/python-bigquery-dataframes/issues/233)) ([3febea9](https://github.com/googleapis/python-bigquery-dataframes/commit/3febea99358d10f823d43c3af83ea30458e579a2))
+
+
+### Bug Fixes
+
+* Enfore pandas version requirement &lt;2.1.4 ([#265](https://github.com/googleapis/python-bigquery-dataframes/issues/265)) ([9dd63f6](https://github.com/googleapis/python-bigquery-dataframes/commit/9dd63f6dcb6234e1f3aebd63c59e1e5c717099dc))
+* Exclude pandas 2.1.4 from prerelease tests to unblock e2e tests ([b02fc2c](https://github.com/googleapis/python-bigquery-dataframes/commit/b02fc2c1843e18d3a8d6894c64763f53e6af1b73))
+* Fix value_counts column label for normalize=True ([#245](https://github.com/googleapis/python-bigquery-dataframes/issues/245)) ([d3fa6f2](https://github.com/googleapis/python-bigquery-dataframes/commit/d3fa6f26931d5d0f0ae3fa49baccfc148f870417))
+* Migrate e2e tests to bigframes-load-testing project ([8766ac6](https://github.com/googleapis/python-bigquery-dataframes/commit/8766ac63f501929577f71e6bd2b523e92c43ba66))
+* Ml.sql logic ([#262](https://github.com/googleapis/python-bigquery-dataframes/issues/262)) ([68c6fdf](https://github.com/googleapis/python-bigquery-dataframes/commit/68c6fdf78af8b87fa4ef4f832631f24d7433a4d8))
+* Update the llm_kmeans notebook ([#247](https://github.com/googleapis/python-bigquery-dataframes/issues/247)) ([66d1839](https://github.com/googleapis/python-bigquery-dataframes/commit/66d1839c3e9a3011c7feb13a59d966b64cf8313f))
+
+
+### Documentation
+
+* Add code samples for `shape` and `head` ([#257](https://github.com/googleapis/python-bigquery-dataframes/issues/257)) ([5bdcc65](https://github.com/googleapis/python-bigquery-dataframes/commit/5bdcc6594ef2e99e96636341d286ea70420858fe))
+* Add example for dataframe.melt, dataframe.pivot, dataframe.stac… ([#252](https://github.com/googleapis/python-bigquery-dataframes/issues/252)) ([8c63697](https://github.com/googleapis/python-bigquery-dataframes/commit/8c636978f4a21eda2856862100b7a8272797fe42))
+* Add example to dataframe.nlargest, dataframe.nsmallest, datafra… ([#234](https://github.com/googleapis/python-bigquery-dataframes/issues/234)) ([e735412](https://github.com/googleapis/python-bigquery-dataframes/commit/e735412fdc52d034df92dd5462d6956bdc0167be))
+* Add examples for dataframe.cummin, dataframe.cummax, dataframe.cumsum, dataframe.cumprod ([#243](https://github.com/googleapis/python-bigquery-dataframes/issues/243)) ([0523a31](https://github.com/googleapis/python-bigquery-dataframes/commit/0523a31fa0b589f88afe0ad5b447634409ddeb86))
+* Add examples for dataframe.nunique, dataframe.diff, dataframe.a… ([#251](https://github.com/googleapis/python-bigquery-dataframes/issues/251)) ([77074ec](https://github.com/googleapis/python-bigquery-dataframes/commit/77074ecbe7f52d1d7d1d1dc537fbe4062b407672))
+* Correct the docs for `option_context` ([#263](https://github.com/googleapis/python-bigquery-dataframes/issues/263)) ([d21c6dd](https://github.com/googleapis/python-bigquery-dataframes/commit/d21c6dd26eadd64c526b0fd35b977a74b8334562))
+* Correct the params rendering for `ml.remote` and `ml.ensemble` modules ([#248](https://github.com/googleapis/python-bigquery-dataframes/issues/248)) ([c2829e3](https://github.com/googleapis/python-bigquery-dataframes/commit/c2829e3d976a43c53251c9288266e3a8ec5304c5))
+* Fix return annotation in API docstrings ([#253](https://github.com/googleapis/python-bigquery-dataframes/issues/253)) ([89a1c67](https://github.com/googleapis/python-bigquery-dataframes/commit/89a1c67fa5cbb76c1cc6ae24d5f919e22514705c))
+
 ## [0.15.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.14.1...v0.15.0) (2023-11-29)
 
 
diff --git a/bigframes/version.py b/bigframes/version.py
index 920cb95c3d..3ddf7e0f79 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.15.0"
+__version__ = "0.16.0"