diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml
index a3da1b0d4c..a9bdb1b7ac 100644
--- a/.github/.OwlBot.lock.yaml
+++ b/.github/.OwlBot.lock.yaml
@@ -13,5 +13,5 @@
 # limitations under the License.
 docker:
   image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest
-  digest: sha256:3e3800bb100af5d7f9e810d48212b37812c1856d20ffeafb99ebe66461b61fc7
-# created: 2023-08-02T10:53:29.114535628Z
+  digest: sha256:fac304457974bb530cc5396abd4ab25d26a469cd3bc97cbfb18c8d4324c584eb
+# created: 2023-10-02T21:31:03.517640371Z
diff --git a/.gitignore b/.gitignore
index b4243ced74..d083ea1ddc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,7 @@ docs.metadata
 
 # Virtual environment
 env/
+venv/
 
 # Test logs
 coverage.xml
diff --git a/.kokoro/build.sh b/.kokoro/build.sh
index f91c541c6c..58eaa7fedf 100755
--- a/.kokoro/build.sh
+++ b/.kokoro/build.sh
@@ -15,11 +15,7 @@
 
 set -eo pipefail
 
-if [[ -z "${KOKORO_GOB_COMMIT}" ]]; then
-    PROJECT_SCM="github/python-bigquery-dataframes"
-else
-    PROJECT_SCM="git/bigframes"
-fi
+PROJECT_SCM="github/python-bigquery-dataframes"
 
 if [[ -z "${PROJECT_ROOT:-}" ]]; then
     PROJECT_ROOT="${KOKORO_ARTIFACTS_DIR}/${PROJECT_SCM}"
@@ -30,6 +26,9 @@ cd "${PROJECT_ROOT}"
 # Disable buffering, so that the logs stream through.
 export PYTHONUNBUFFERED=1
 
+# Workaround https://github.com/pytest-dev/pytest/issues/9567
+export PY_IGNORE_IMPORTMISMATCH=1
+
 # Debug: show build environment
 env | grep KOKORO
 
diff --git a/.kokoro/continuous/common.cfg b/.kokoro/continuous/common.cfg
index 5d40578ac7..97e0651aa9 100644
--- a/.kokoro/continuous/common.cfg
+++ b/.kokoro/continuous/common.cfg
@@ -7,4 +7,4 @@ action {
   }
 }
 
-build_file: "bigframes/.kokoro/build.sh"
+build_file: "python-bigquery-dataframes/.kokoro/build.sh"
diff --git a/.kokoro/continuous/nightly.cfg b/.kokoro/continuous/nightly.cfg
index 63c3f51d05..2b7111664f 100644
--- a/.kokoro/continuous/nightly.cfg
+++ b/.kokoro/continuous/nightly.cfg
@@ -1,3 +1,3 @@
 # Format: //devtools/kokoro/config/proto/build.proto
 
-build_file: "bigframes/.kokoro/release-nightly.sh"
+build_file: "python-bigquery-dataframes/.kokoro/release-nightly.sh"
diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt
index 029bd342de..96d593c8c8 100644
--- a/.kokoro/requirements.txt
+++ b/.kokoro/requirements.txt
@@ -113,30 +113,30 @@ commonmark==0.9.1 \
     --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \
     --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9
     # via rich
-cryptography==41.0.3 \
-    --hash=sha256:0d09fb5356f975974dbcb595ad2d178305e5050656affb7890a1583f5e02a306 \
-    --hash=sha256:23c2d778cf829f7d0ae180600b17e9fceea3c2ef8b31a99e3c694cbbf3a24b84 \
-    --hash=sha256:3fb248989b6363906827284cd20cca63bb1a757e0a2864d4c1682a985e3dca47 \
-    --hash=sha256:41d7aa7cdfded09b3d73a47f429c298e80796c8e825ddfadc84c8a7f12df212d \
-    --hash=sha256:42cb413e01a5d36da9929baa9d70ca90d90b969269e5a12d39c1e0d475010116 \
-    --hash=sha256:4c2f0d35703d61002a2bbdcf15548ebb701cfdd83cdc12471d2bae80878a4207 \
-    --hash=sha256:4fd871184321100fb400d759ad0cddddf284c4b696568204d281c902fc7b0d81 \
-    --hash=sha256:5259cb659aa43005eb55a0e4ff2c825ca111a0da1814202c64d28a985d33b087 \
-    --hash=sha256:57a51b89f954f216a81c9d057bf1a24e2f36e764a1ca9a501a6964eb4a6800dd \
-    --hash=sha256:652627a055cb52a84f8c448185922241dd5217443ca194d5739b44612c5e6507 \
-    --hash=sha256:67e120e9a577c64fe1f611e53b30b3e69744e5910ff3b6e97e935aeb96005858 \
-    --hash=sha256:6af1c6387c531cd364b72c28daa29232162010d952ceb7e5ca8e2827526aceae \
-    --hash=sha256:6d192741113ef5e30d89dcb5b956ef4e1578f304708701b8b73d38e3e1461f34 \
-    --hash=sha256:7efe8041897fe7a50863e51b77789b657a133c75c3b094e51b5e4b5cec7bf906 \
-    --hash=sha256:84537453d57f55a50a5b6835622ee405816999a7113267739a1b4581f83535bd \
-    --hash=sha256:8f09daa483aedea50d249ef98ed500569841d6498aa9c9f4b0531b9964658922 \
-    --hash=sha256:95dd7f261bb76948b52a5330ba5202b91a26fbac13ad0e9fc8a3ac04752058c7 \
-    --hash=sha256:a74fbcdb2a0d46fe00504f571a2a540532f4c188e6ccf26f1f178480117b33c4 \
-    --hash=sha256:a983e441a00a9d57a4d7c91b3116a37ae602907a7618b882c8013b5762e80574 \
-    --hash=sha256:ab8de0d091acbf778f74286f4989cf3d1528336af1b59f3e5d2ebca8b5fe49e1 \
-    --hash=sha256:aeb57c421b34af8f9fe830e1955bf493a86a7996cc1338fe41b30047d16e962c \
-    --hash=sha256:ce785cf81a7bdade534297ef9e490ddff800d956625020ab2ec2780a556c313e \
-    --hash=sha256:d0d651aa754ef58d75cec6edfbd21259d93810b73f6ec246436a21b7841908de
+cryptography==41.0.4 \
+    --hash=sha256:004b6ccc95943f6a9ad3142cfabcc769d7ee38a3f60fb0dddbfb431f818c3a67 \
+    --hash=sha256:047c4603aeb4bbd8db2756e38f5b8bd7e94318c047cfe4efeb5d715e08b49311 \
+    --hash=sha256:0d9409894f495d465fe6fda92cb70e8323e9648af912d5b9141d616df40a87b8 \
+    --hash=sha256:23a25c09dfd0d9f28da2352503b23e086f8e78096b9fd585d1d14eca01613e13 \
+    --hash=sha256:2ed09183922d66c4ec5fdaa59b4d14e105c084dd0febd27452de8f6f74704143 \
+    --hash=sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f \
+    --hash=sha256:37480760ae08065437e6573d14be973112c9e6dcaf5f11d00147ee74f37a3829 \
+    --hash=sha256:3b224890962a2d7b57cf5eeb16ccaafba6083f7b811829f00476309bce2fe0fd \
+    --hash=sha256:5a0f09cefded00e648a127048119f77bc2b2ec61e736660b5789e638f43cc397 \
+    --hash=sha256:5b72205a360f3b6176485a333256b9bcd48700fc755fef51c8e7e67c4b63e3ac \
+    --hash=sha256:7e53db173370dea832190870e975a1e09c86a879b613948f09eb49324218c14d \
+    --hash=sha256:7febc3094125fc126a7f6fb1f420d0da639f3f32cb15c8ff0dc3997c4549f51a \
+    --hash=sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839 \
+    --hash=sha256:86defa8d248c3fa029da68ce61fe735432b047e32179883bdb1e79ed9bb8195e \
+    --hash=sha256:8ac4f9ead4bbd0bc8ab2d318f97d85147167a488be0e08814a37eb2f439d5cf6 \
+    --hash=sha256:93530900d14c37a46ce3d6c9e6fd35dbe5f5601bf6b3a5c325c7bffc030344d9 \
+    --hash=sha256:9eeb77214afae972a00dee47382d2591abe77bdae166bda672fb1e24702a3860 \
+    --hash=sha256:b5f4dfe950ff0479f1f00eda09c18798d4f49b98f4e2006d644b3301682ebdca \
+    --hash=sha256:c3391bd8e6de35f6f1140e50aaeb3e2b3d6a9012536ca23ab0d9c35ec18c8a91 \
+    --hash=sha256:c880eba5175f4307129784eca96f4e70b88e57aa3f680aeba3bab0e980b0f37d \
+    --hash=sha256:cecfefa17042941f94ab54f769c8ce0fe14beff2694e9ac684176a2535bf9714 \
+    --hash=sha256:e40211b4923ba5a6dc9769eab704bdb3fbb58d56c5b336d30996c24fcf12aadb \
+    --hash=sha256:efc8ad4e6fc4f1752ebfb58aefece8b4e3c4cae940b0994d43649bdfce8d0d4f
     # via
     #   gcp-releasetool
     #   secretstorage
@@ -382,6 +382,7 @@ protobuf==3.20.3 \
     #   gcp-docuploader
     #   gcp-releasetool
     #   google-api-core
+    #   googleapis-common-protos
 pyasn1==0.4.8 \
     --hash=sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d \
     --hash=sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e4b2bff3c7..880f791625 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,25 @@
 
 [1]: https://pypi.org/project/bigframes/#history
 
+## [0.6.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.5.0...v0.6.0) (2023-10-04)
+
+
+### Features
+
+* Add df.unstack ([#63](https://github.com/googleapis/python-bigquery-dataframes/issues/63)) ([4a84714](https://github.com/googleapis/python-bigquery-dataframes/commit/4a84714e2fb07f70c70c79f8b8da9fcb41096e33))
+* Add idxmin, idxmax to series, dataframe ([#74](https://github.com/googleapis/python-bigquery-dataframes/issues/74)) ([781307e](https://github.com/googleapis/python-bigquery-dataframes/commit/781307ec22d31a7657f8ee5c6eedc0e419450ccd))
+* Add ml.preprocessing.KBinsDiscretizer ([#81](https://github.com/googleapis/python-bigquery-dataframes/issues/81)) ([24c6256](https://github.com/googleapis/python-bigquery-dataframes/commit/24c625638984f6a84191c7a4c8ac9fb6c3cf1dca))
+* Add multi-column dataframe merge ([#73](https://github.com/googleapis/python-bigquery-dataframes/issues/73)) ([c9fa85c](https://github.com/googleapis/python-bigquery-dataframes/commit/c9fa85cc338be5e9a8dde59b255690aedbbc1127))
+* Add update and align methods to dataframe ([#57](https://github.com/googleapis/python-bigquery-dataframes/issues/57)) ([bf050cf](https://github.com/googleapis/python-bigquery-dataframes/commit/bf050cf475ad8a9e3e0ca3f896ddaf96dbe13ae3))
+* Support STRUCT data type with `Series.struct.field` to extract child fields ([#71](https://github.com/googleapis/python-bigquery-dataframes/issues/71)) ([17afac9](https://github.com/googleapis/python-bigquery-dataframes/commit/17afac9ff70a2b93ed70dc7bcce7beb9a53c2ece))
+
+
+### Bug Fixes
+
+* Avoid `403 response too large to return` error with `read_gbq` and large query results ([#77](https://github.com/googleapis/python-bigquery-dataframes/issues/77)) ([8f3b5b2](https://github.com/googleapis/python-bigquery-dataframes/commit/8f3b5b240f0f28fef92465abc53504e875d7335a))
+* Change return type of `Series.loc[scalar]` ([#40](https://github.com/googleapis/python-bigquery-dataframes/issues/40)) ([fff3d45](https://github.com/googleapis/python-bigquery-dataframes/commit/fff3d45f03ffbc7bb23143a1572e3dd157463ca9))
+* Fix df/series.iloc by list with multiindex ([#79](https://github.com/googleapis/python-bigquery-dataframes/issues/79)) ([971d091](https://github.com/googleapis/python-bigquery-dataframes/commit/971d091cac9ad662145a3d43d8f9a785eb0ccc23))
+
 ## [0.5.0](https://github.com/googleapis/python-bigquery-dataframes/compare/v0.4.0...v0.5.0) (2023-09-28)
 
 
diff --git a/bigframes/core/__init__.py b/bigframes/core/__init__.py
index c529f83351..8008c1189a 100644
--- a/bigframes/core/__init__.py
+++ b/bigframes/core/__init__.py
@@ -963,10 +963,11 @@ def unpivot(
         ],
         *,
         passthrough_columns: typing.Sequence[str] = (),
-        index_col_id: str = "index",
+        index_col_ids: typing.Sequence[str] = ["index"],
         dtype: typing.Union[
             bigframes.dtypes.Dtype, typing.Sequence[bigframes.dtypes.Dtype]
         ] = pandas.Float64Dtype(),
+        how="left",
     ) -> ArrayValue:
         """
         Unpivot ArrayValue columns.
@@ -981,8 +982,11 @@ def unpivot(
         Returns:
             ArrayValue: The unpivoted ArrayValue
         """
-        table = self._to_ibis_expr(ordering_mode="offset_col")
+        if how not in ("left", "right"):
+            raise ValueError("'how' must be 'left' or 'right'")
+        table = self._to_ibis_expr(ordering_mode="unordered", expose_hidden_cols=True)
         row_n = len(row_labels)
+        hidden_col_ids = self._hidden_ordering_column_names.keys()
         if not all(
             len(source_columns) == row_n for _, source_columns in unpivot_columns
         ):
@@ -992,33 +996,44 @@ def unpivot(
         unpivot_table = table.cross_join(
             ibis.memtable({unpivot_offset_id: range(row_n)})
         )
-        unpivot_offsets_value = (
-            (
-                (unpivot_table[ORDER_ID_COLUMN] * row_n)
-                + unpivot_table[unpivot_offset_id]
-            )
-            .cast(ibis_dtypes.int64)
-            .name(ORDER_ID_COLUMN),
-        )
-
         # Use ibis memtable to infer type of rowlabels (if possible)
         # TODO: Allow caller to specify dtype
-        labels_ibis_type = ibis.memtable({"col": row_labels})["col"].type()
-        labels_dtype = bigframes.dtypes.ibis_dtype_to_bigframes_dtype(labels_ibis_type)
-        cases = [
-            (
-                i,
-                bigframes.dtypes.literal_to_ibis_scalar(
-                    row_labels[i], force_dtype=labels_dtype  # type:ignore
-                ),
-            )
-            for i in range(len(row_labels))
+        if isinstance(row_labels[0], tuple):
+            labels_table = ibis.memtable(row_labels)
+            labels_ibis_types = [
+                labels_table[col].type() for col in labels_table.columns
+            ]
+        else:
+            labels_ibis_types = [ibis.memtable({"col": row_labels})["col"].type()]
+        labels_dtypes = [
+            bigframes.dtypes.ibis_dtype_to_bigframes_dtype(ibis_type)
+            for ibis_type in labels_ibis_types
         ]
-        labels_value = (
-            typing.cast(ibis_types.IntegerColumn, unpivot_table[unpivot_offset_id])
-            .cases(cases, default=None)  # type:ignore
-            .name(index_col_id)
-        )
+
+        label_columns = []
+        for label_part, (col_id, label_dtype) in enumerate(
+            zip(index_col_ids, labels_dtypes)
+        ):
+            # interpret as tuples even if it wasn't originally so can apply same logic for multi-column labels
+            labels_as_tuples = [
+                label if isinstance(label, tuple) else (label,) for label in row_labels
+            ]
+            cases = [
+                (
+                    i,
+                    bigframes.dtypes.literal_to_ibis_scalar(
+                        label_tuple[label_part],  # type:ignore
+                        force_dtype=label_dtype,  # type:ignore
+                    ),
+                )
+                for i, label_tuple in enumerate(labels_as_tuples)
+            ]
+            labels_value = (
+                typing.cast(ibis_types.IntegerColumn, unpivot_table[unpivot_offset_id])
+                .cases(cases, default=None)  # type:ignore
+                .name(col_id)
+            )
+            label_columns.append(labels_value)
 
         unpivot_values = []
         for j in range(len(unpivot_columns)):
@@ -1042,23 +1057,53 @@ def unpivot(
             unpivot_values.append(unpivot_value.name(result_col))
 
         unpivot_table = unpivot_table.select(
-            passthrough_columns, labels_value, *unpivot_values, unpivot_offsets_value
+            passthrough_columns,
+            *label_columns,
+            *unpivot_values,
+            *hidden_col_ids,
+            unpivot_offset_id,
         )
 
+        # Extend the original ordering using unpivot_offset_id
+        old_ordering = self._ordering
+        if how == "left":
+            new_ordering = ExpressionOrdering(
+                ordering_value_columns=[
+                    *old_ordering.ordering_value_columns,
+                    OrderingColumnReference(unpivot_offset_id),
+                ],
+                total_ordering_columns=frozenset(
+                    [*old_ordering.total_ordering_columns, unpivot_offset_id]
+                ),
+            )
+        else:  # how=="right"
+            new_ordering = ExpressionOrdering(
+                ordering_value_columns=[
+                    OrderingColumnReference(unpivot_offset_id),
+                    *old_ordering.ordering_value_columns,
+                ],
+                total_ordering_columns=frozenset(
+                    [*old_ordering.total_ordering_columns, unpivot_offset_id]
+                ),
+            )
         value_columns = [
             unpivot_table[value_col_id] for value_col_id, _ in unpivot_columns
         ]
         passthrough_values = [unpivot_table[col] for col in passthrough_columns]
+        hidden_ordering_columns = [
+            unpivot_table[unpivot_offset_id],
+            *[unpivot_table[hidden_col] for hidden_col in hidden_col_ids],
+        ]
         return ArrayValue(
             session=self._session,
             table=unpivot_table,
-            columns=[unpivot_table[index_col_id], *value_columns, *passthrough_values],
-            hidden_ordering_columns=[unpivot_table[ORDER_ID_COLUMN]],
-            ordering=ExpressionOrdering(
-                ordering_value_columns=[OrderingColumnReference(ORDER_ID_COLUMN)],
-                integer_encoding=IntegerEncoding(is_encoded=True, is_sequential=True),
-                total_ordering_columns=frozenset([ORDER_ID_COLUMN]),
-            ),
+            columns=[
+                *[unpivot_table[col_id] for col_id in index_col_ids],
+                *value_columns,
+                *passthrough_values,
+            ],
+            hidden_ordering_columns=hidden_ordering_columns,
+            ordering=new_ordering,
         )
 
     def assign(self, source_id: str, destination_id: str) -> ArrayValue:
@@ -1153,8 +1198,8 @@ def cached(self, cluster_cols: typing.Sequence[str]) -> ArrayValue:
         destination = self._session._ibis_to_session_table(
             ibis_expr, cluster_cols=cluster_cols, api_name="cache"
         )
-        table_expression = self._session.ibis_client.sql(
-            f"SELECT * FROM `_SESSION`.`{destination.table_id}`"
+        table_expression = self._session.ibis_client.table(
+            f"{destination.project}.{destination.dataset_id}.{destination.table_id}"
         )
         new_columns = [table_expression[column] for column in self.column_names]
         new_hidden_columns = [
diff --git a/bigframes/core/block_transforms.py b/bigframes/core/block_transforms.py
index d22112417c..30c7902981 100644
--- a/bigframes/core/block_transforms.py
+++ b/bigframes/core/block_transforms.py
@@ -17,6 +17,7 @@
 
 import pandas as pd
 
+import bigframes.constants as constants
 import bigframes.core as core
 import bigframes.core.blocks as blocks
 import bigframes.core.ordering as ordering
@@ -504,3 +505,125 @@ def _kurt_from_moments_and_count(
         kurt_id, na_cond_id, ops.partial_arg3(ops.where_op, None)
     )
     return block, kurt_id
+
+
+def align(
+    left_block: blocks.Block,
+    right_block: blocks.Block,
+    join: str = "outer",
+    axis: typing.Union[str, int, None] = None,
+) -> typing.Tuple[blocks.Block, blocks.Block]:
+    axis_n = core.utils.get_axis_number(axis) if axis is not None else None
+    # Must align columns first as other way will likely create extra joins
+    if (axis_n is None) or axis_n == 1:
+        left_block, right_block = align_columns(left_block, right_block, join=join)
+    if (axis_n is None) or axis_n == 0:
+        left_block, right_block = align_rows(left_block, right_block, join=join)
+    return left_block, right_block
+
+
+def align_rows(
+    left_block: blocks.Block,
+    right_block: blocks.Block,
+    join: str = "outer",
+):
+    joined_index, (get_column_left, get_column_right) = left_block.index.join(
+        right_block.index, how=join
+    )
+    left_columns = [get_column_left(col) for col in left_block.value_columns]
+    right_columns = [get_column_right(col) for col in right_block.value_columns]
+
+    left_block = joined_index._block.select_columns(left_columns)
+    right_block = joined_index._block.select_columns(right_columns)
+    return left_block, right_block
+
+
+def align_columns(
+    left_block: blocks.Block,
+    right_block: blocks.Block,
+    join: str = "outer",
+):
+    columns, lcol_indexer, rcol_indexer = left_block.column_labels.join(
+        right_block.column_labels, how=join, return_indexers=True
+    )
+    column_indices = zip(
+        lcol_indexer if (lcol_indexer is not None) else range(len(columns)),
+        rcol_indexer if (rcol_indexer is not None) else range(len(columns)),
+    )
+    left_column_ids = []
+    right_column_ids = []
+
+    original_left_block = left_block
+    original_right_block = right_block
+
+    for left_index, right_index in column_indices:
+        if left_index >= 0:
+            left_col_id = original_left_block.value_columns[left_index]
+        else:
+            dtype = right_block.dtypes[right_index]
+            left_block, left_col_id = left_block.create_constant(
+                None, dtype=dtype, label=original_right_block.column_labels[right_index]
+            )
+        left_column_ids.append(left_col_id)
+
+        if right_index >= 0:
+            right_col_id = original_right_block.value_columns[right_index]
+        else:
+            dtype = original_left_block.dtypes[left_index]
+            right_block, right_col_id = right_block.create_constant(
+                None, dtype=dtype, label=left_block.column_labels[left_index]
+            )
+        right_column_ids.append(right_col_id)
+    left_final = left_block.select_columns(left_column_ids)
+    right_final = right_block.select_columns(right_column_ids)
+    return left_final, right_final
+
+
+def idxmin(block: blocks.Block) -> blocks.Block:
+    return _idx_extrema(block, "min")
+
+
+def idxmax(block: blocks.Block) -> blocks.Block:
+    return _idx_extrema(block, "max")
+
+
+def _idx_extrema(
+    block: blocks.Block, min_or_max: typing.Literal["min", "max"]
+) -> blocks.Block:
+    if len(block.index_columns) != 1:
+        # TODO: Need support for tuple dtype
+        raise NotImplementedError(
+            f"idxmin not support for multi-index. {constants.FEEDBACK_LINK}"
+        )
+
+    original_block = block
+    result_cols = []
+    for value_col in original_block.value_columns:
+        direction = (
+            ordering.OrderingDirection.ASC
+            if min_or_max == "min"
+            else ordering.OrderingDirection.DESC
+        )
+        # Have to find the min for each
+        order_refs = [
+            ordering.OrderingColumnReference(value_col, direction),
+            *[
+                ordering.OrderingColumnReference(idx_col)
+                for idx_col in original_block.index_columns
+            ],
+        ]
+        window_spec = core.WindowSpec(ordering=order_refs)
+        idx_col = original_block.index_columns[0]
+        block, result_col = block.apply_window_op(
+            idx_col, agg_ops.first_op, window_spec
+        )
+        result_cols.append(result_col)
+
+    block = block.select_columns(result_cols).with_column_labels(
+        original_block.column_labels
+    )
+    # Stack the entire column axis to produce single-column result
+    # Assumption: uniform dtype for stackability
+    return block.aggregate_all_and_stack(
+        agg_ops.AnyValueOp(), dtype=block.dtypes[0]
+    ).with_column_labels([original_block.index.name])
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
index b53c2212c1..0161d17361 100644
--- a/bigframes/core/blocks.py
+++ b/bigframes/core/blocks.py
@@ -38,6 +38,7 @@
 import bigframes.core as core
 import bigframes.core.guid as guid
 import bigframes.core.indexes as indexes
+import bigframes.core.joins as joins
 import bigframes.core.ordering as ordering
 import bigframes.core.utils
 import bigframes.core.utils as utils
@@ -838,7 +839,7 @@ def aggregate_all_and_stack(
             ]
             result_expr = self.expr.aggregate(aggregations, dropna=dropna).unpivot(
                 row_labels=self.column_labels.to_list(),
-                index_col_id="index",
+                index_col_ids=["index"],
                 unpivot_columns=[(value_col_id, self.value_columns)],
                 dtype=dtype,
             )
@@ -849,7 +850,7 @@ def aggregate_all_and_stack(
             expr_with_offsets, offset_col = self.expr.promote_offsets()
             stacked_expr = expr_with_offsets.unpivot(
                 row_labels=self.column_labels.to_list(),
-                index_col_id=guid.generate_guid(),
+                index_col_ids=[guid.generate_guid()],
                 unpivot_columns=[(value_col_id, self.value_columns)],
                 passthrough_columns=[*self.index_columns, offset_col],
                 dtype=dtype,
@@ -1041,7 +1042,7 @@ def summarize(
         expr = self.expr.aggregate(aggregations).unpivot(
             labels,
             unpivot_columns=columns,
-            index_col_id=label_col_id,
+            index_col_ids=[label_col_id],
         )
         labels = self._get_labels_for_columns(column_ids)
         return Block(expr, column_labels=labels, index_columns=[label_col_id])
@@ -1225,116 +1226,83 @@ def pivot(
 
         return result_block.with_column_labels(column_index)
 
-    def stack(self):
+    def stack(self, how="left", dropna=True, sort=True, levels: int = 1):
         """Unpivot last column axis level into row axis"""
-        if isinstance(self.column_labels, pd.MultiIndex):
-            return self._stack_multi()
-        else:
-            return self._stack_mono()
-
-    def _stack_mono(self):
-        if isinstance(self.column_labels, pd.MultiIndex):
-            raise ValueError("Expected single level index")
-
         # These are the values that will be turned into rows
-        stack_values = self.column_labels.drop_duplicates().sort_values()
 
-        # Get matching columns
-        unpivot_columns: List[Tuple[str, List[str]]] = []
-        dtypes: List[bigframes.dtypes.Dtype] = []
-        col_id = guid.generate_guid("unpivot_")
-        dtype = None
-        input_columns: Sequence[Optional[str]] = []
-        for uvalue in stack_values:
-            matching_ids = self.label_to_col_id.get(uvalue, [])
-            input_id = matching_ids[0] if len(matching_ids) > 0 else None
-            if input_id:
-                if dtype and dtype != self._column_type(input_id):
-                    raise NotImplementedError(
-                        "Cannot stack columns with non-matching dtypes."
-                    )
-                else:
-                    dtype = self._column_type(input_id)
-            input_columns.append(input_id)
-        unpivot_columns.append((col_id, input_columns))
-        if dtype:
-            dtypes.append(dtype or pd.Float64Dtype())
+        col_labels, row_labels = utils.split_index(self.column_labels, levels=levels)
+        if dropna:
+            row_labels = row_labels.drop_duplicates()
+        if sort:
+            row_labels = row_labels.sort_values()
 
-        added_index_column = col_id = guid.generate_guid()
-        unpivot_expr = self._expr.unpivot(
-            row_labels=stack_values,
-            passthrough_columns=self.index_columns,
-            unpivot_columns=unpivot_columns,
-            index_col_id=added_index_column,
-            dtype=dtypes,
-        )
-        block = Block(
-            unpivot_expr,
-            index_columns=[*self.index_columns, added_index_column],
-            column_labels=[None],
-            index_labels=[*self._index_labels, self.column_labels.names[-1]],
-        )
-        return block
+        row_label_tuples = utils.index_as_tuples(row_labels)
 
-    def _stack_multi(self):
-        if not isinstance(self.column_labels, pd.MultiIndex):
-            raise ValueError("Expected multi-index")
-
-        # These are the values that will be turned into rows
-        stack_values = (
-            self.column_labels.get_level_values(-1).drop_duplicates().sort_values()
-        )
-
-        result_col_labels = (
-            self.column_labels.droplevel(-1)
-            .drop_duplicates()
-            .sort_values()
-            .dropna(how="all")
-        )
+        if col_labels is not None:
+            result_index = col_labels.drop_duplicates().sort_values().dropna(how="all")
+            result_col_labels = utils.index_as_tuples(result_index)
+        else:
+            result_index = pd.Index([None])
+            result_col_labels = list([()])
 
         # Get matching columns
         unpivot_columns: List[Tuple[str, List[str]]] = []
         dtypes = []
         for val in result_col_labels:
             col_id = guid.generate_guid("unpivot_")
-            dtype = None
-            input_columns: Sequence[Optional[str]] = []
-            for uvalue in stack_values:
-                # Need to unpack if still a multi-index after dropping 1 level
-                label_to_match = (
-                    (val, uvalue) if result_col_labels.nlevels == 1 else (*val, uvalue)
-                )
-                matching_ids = self.label_to_col_id.get(label_to_match, [])
-                input_id = matching_ids[0] if len(matching_ids) > 0 else None
-                if input_id:
-                    if dtype and dtype != self._column_type(input_id):
-                        raise NotImplementedError(
-                            "Cannot stack columns with non-matching dtypes."
-                        )
-                    else:
-                        dtype = self._column_type(input_id)
-                input_columns.append(input_id)
-                # Input column i is the first one that
+            input_columns, dtype = self._create_stack_column(val, row_label_tuples)
             unpivot_columns.append((col_id, input_columns))
             if dtype:
                 dtypes.append(dtype or pd.Float64Dtype())
 
-        added_index_column = col_id = guid.generate_guid()
+        added_index_columns = [guid.generate_guid() for _ in range(row_labels.nlevels)]
         unpivot_expr = self._expr.unpivot(
-            row_labels=stack_values,
+            row_labels=row_label_tuples,
             passthrough_columns=self.index_columns,
             unpivot_columns=unpivot_columns,
-            index_col_id=added_index_column,
+            index_col_ids=added_index_columns,
             dtype=dtypes,
+            how=how,
         )
+        new_index_level_names = self.column_labels.names[-levels:]
+        if how == "left":
+            index_columns = [*self.index_columns, *added_index_columns]
+            index_labels = [*self._index_labels, *new_index_level_names]
+        else:
+            index_columns = [*added_index_columns, *self.index_columns]
+            index_labels = [*new_index_level_names, *self._index_labels]
+
         block = Block(
             unpivot_expr,
-            index_columns=[*self.index_columns, added_index_column],
-            column_labels=result_col_labels,
-            index_labels=[*self._index_labels, self.column_labels.names[-1]],
+            index_columns=index_columns,
+            column_labels=result_index,
+            index_labels=index_labels,
         )
         return block
 
+    def _create_stack_column(
+        self, col_label: typing.Tuple, stack_labels: typing.Sequence[typing.Tuple]
+    ):
+        dtype = None
+        input_columns: list[Optional[str]] = []
+        for uvalue in stack_labels:
+            label_to_match = (*col_label, *uvalue)
+            label_to_match = (
+                label_to_match[0] if len(label_to_match) == 1 else label_to_match
+            )
+            matching_ids = self.label_to_col_id.get(label_to_match, [])
+            input_id = matching_ids[0] if len(matching_ids) > 0 else None
+            if input_id:
+                if dtype and dtype != self._column_type(input_id):
+                    raise NotImplementedError(
+                        "Cannot stack columns with non-matching dtypes."
+                    )
+                else:
+                    dtype = self._column_type(input_id)
+            input_columns.append(input_id)
+            # Input column i is the first one that
+        return input_columns, dtype or pd.Float64Dtype()
+
     def _column_type(self, col_id: str) -> bigframes.dtypes.Dtype:
         col_offset = self.value_columns.index(col_id)
         dtype = self.dtypes[col_offset]
@@ -1436,6 +1404,78 @@ def concat(
             result_block = result_block.reset_index()
         return result_block
 
+    def merge(
+        self,
+        other: Block,
+        how: typing.Literal[
+            "inner",
+            "left",
+            "outer",
+            "right",
+        ],
+        left_col_ids: typing.Sequence[str],
+        right_col_ids: typing.Sequence[str],
+        sort: bool,
+        suffixes: tuple[str, str] = ("_x", "_y"),
+    ) -> Block:
+        (
+            joined_expr,
+            coalesced_join_cols,
+            (get_column_left, get_column_right),
+        ) = joins.join_by_column(
+            self.expr,
+            left_col_ids,
+            other.expr,
+            right_col_ids,
+            how=how,
+            sort=sort,
+        )
+
+        # which join key parts should be coalesced
+        merge_join_key_mask = [
+            str(self.col_id_to_label[left_id]) == str(other.col_id_to_label[right_id])
+            for left_id, right_id in zip(left_col_ids, right_col_ids)
+        ]
+        labels_to_coalesce = [
+            self.col_id_to_label[col_id]
+            for i, col_id in enumerate(left_col_ids)
+            if merge_join_key_mask[i]
+        ]
+
+        def left_col_mapping(col_id: str) -> str:
+            if col_id in left_col_ids:
+                join_key_part = left_col_ids.index(col_id)
+                if merge_join_key_mask[join_key_part]:
+                    return coalesced_join_cols[join_key_part]
+            return get_column_left(col_id)
+
+        def right_col_mapping(col_id: str) -> typing.Optional[str]:
+            if col_id in right_col_ids:
+                join_key_part = right_col_ids.index(col_id)
+                if merge_join_key_mask[join_key_part]:
+                    return None
+            return get_column_right(col_id)
+
+        left_columns = [left_col_mapping(col_id) for col_id in self.value_columns]
+
+        right_columns = [
+            typing.cast(str, right_col_mapping(col_id))
+            for col_id in other.value_columns
+            if right_col_mapping(col_id)
+        ]
+
+        expr = joined_expr.select_columns([*left_columns, *right_columns])
+        labels = utils.merge_column_labels(
+            self.column_labels,
+            other.column_labels,
+            coalesce_labels=labels_to_coalesce,
+            suffixes=suffixes,
+        )
+
+        # Constructs default index
+        expr, offset_index_id = expr.promote_offsets()
+        return Block(expr, index_columns=[offset_index_id], column_labels=labels)
+
     def _force_reproject(self) -> Block:
         """Forces a reprojection of the underlying tables expression. Used to force predicate/order application before subsequent operations."""
         return Block(
diff --git a/bigframes/core/indexers.py b/bigframes/core/indexers.py
index a538c80711..1a88b2abd6 100644
--- a/bigframes/core/indexers.py
+++ b/bigframes/core/indexers.py
@@ -15,7 +15,7 @@
 from __future__ import annotations
 
 import typing
-from typing import Tuple
+from typing import Tuple, Union
 
 import ibis
 import pandas as pd
@@ -29,20 +29,19 @@
 import bigframes.series
 
 if typing.TYPE_CHECKING:
-    LocSingleKey = typing.Union[bigframes.series.Series, indexes.Index, slice]
+    LocSingleKey = Union[
+        bigframes.series.Series, indexes.Index, slice, bigframes.core.scalar.Scalar
+    ]
 
 
 class LocSeriesIndexer:
     def __init__(self, series: bigframes.series.Series):
         self._series = series
 
-    def __getitem__(self, key) -> bigframes.series.Series:
-        """
-        Only indexing by a boolean bigframes.series.Series or list of index entries is currently supported
-        """
-        return typing.cast(
-            bigframes.series.Series, _loc_getitem_series_or_dataframe(self._series, key)
-        )
+    def __getitem__(
+        self, key
+    ) -> Union[bigframes.core.scalar.Scalar, bigframes.series.Series]:
+        return _loc_getitem_series_or_dataframe(self._series, key)
 
     def __setitem__(self, key, value) -> None:
         # TODO(swast): support MultiIndex
@@ -84,7 +83,7 @@ def __init__(self, series: bigframes.series.Series):
 
     def __getitem__(
         self, key
-    ) -> bigframes.core.scalar.Scalar | bigframes.series.Series:
+    ) -> Union[bigframes.core.scalar.Scalar, bigframes.series.Series]:
         """
         Index series using integer offsets. Currently supports index by key type:
 
@@ -103,13 +102,17 @@ def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         self._dataframe = dataframe
 
     @typing.overload
-    def __getitem__(self, key: LocSingleKey) -> bigframes.dataframe.DataFrame:
+    def __getitem__(
+        self, key: LocSingleKey
+    ) -> Union[bigframes.dataframe.DataFrame, pd.Series]:
         ...
 
     # Technically this is wrong since we can have duplicate column labels, but
     # this is expected to be rare.
     @typing.overload
-    def __getitem__(self, key: Tuple[LocSingleKey, str]) -> bigframes.series.Series:
+    def __getitem__(
+        self, key: Tuple[LocSingleKey, str]
+    ) -> Union[bigframes.series.Series, bigframes.core.scalar.Scalar]:
         ...
 
     def __getitem__(self, key):
@@ -173,7 +176,7 @@ class ILocDataFrameIndexer:
     def __init__(self, dataframe: bigframes.dataframe.DataFrame):
         self._dataframe = dataframe
 
-    def __getitem__(self, key) -> bigframes.dataframe.DataFrame | pd.Series:
+    def __getitem__(self, key) -> Union[bigframes.dataframe.DataFrame, pd.Series]:
         """
         Index dataframe using integer offsets. Currently supports index by key type:
 
@@ -188,21 +191,26 @@ def __getitem__(self, key) -> bigframes.dataframe.DataFrame | pd.Series:
 @typing.overload
 def _loc_getitem_series_or_dataframe(
     series_or_dataframe: bigframes.series.Series, key
-) -> bigframes.series.Series:
+) -> Union[bigframes.core.scalar.Scalar, bigframes.series.Series]:
     ...
 
 
 @typing.overload
 def _loc_getitem_series_or_dataframe(
     series_or_dataframe: bigframes.dataframe.DataFrame, key
-) -> bigframes.dataframe.DataFrame:
+) -> Union[bigframes.dataframe.DataFrame, pd.Series]:
     ...
 
 
 def _loc_getitem_series_or_dataframe(
-    series_or_dataframe: bigframes.dataframe.DataFrame | bigframes.series.Series,
+    series_or_dataframe: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
     key: LocSingleKey,
-) -> bigframes.dataframe.DataFrame | bigframes.series.Series:
+) -> Union[
+    bigframes.dataframe.DataFrame,
+    bigframes.series.Series,
+    pd.Series,
+    bigframes.core.scalar.Scalar,
+]:
     if isinstance(key, bigframes.series.Series) and key.dtype == "boolean":
         return series_or_dataframe[key]
     elif isinstance(key, bigframes.series.Series):
@@ -222,7 +230,7 @@ def _loc_getitem_series_or_dataframe(
         # TODO(henryjsolberg): support MultiIndex
         if len(key) == 0:  # type: ignore
             return typing.cast(
-                typing.Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
+                Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
                 series_or_dataframe.iloc[0:0],
             )
 
@@ -258,11 +266,22 @@ def _loc_getitem_series_or_dataframe(
         )
         keys_df = keys_df.set_index(index_name, drop=True)
         keys_df.index.name = None
-        return _perform_loc_list_join(series_or_dataframe, keys_df)
+        result = _perform_loc_list_join(series_or_dataframe, keys_df)
+        pandas_result = result.to_pandas()
+        # although loc[scalar_key] returns multiple results when scalar_key
+        # is not unique, we download the results here and return the computed
+        # individual result (as a scalar or pandas series) when the key is unique,
+        # since we expect unique index keys to be more common. loc[[scalar_key]]
+        # can be used to retrieve one-item DataFrames or Series.
+        if len(pandas_result) == 1:
+            return pandas_result.iloc[0]
+        # when the key is not unique, we return a bigframes data type
+        # as usual for methods that return dataframes/series
+        return result
     else:
         raise TypeError(
-            "Invalid argument type. loc currently only supports indexing with a "
-            "boolean bigframes Series, a list of index entries or a single index entry. "
+            "Invalid argument type. Expected bigframes.Series, bigframes.Index, "
+            "list, : (empty slice), or scalar. "
             f"{constants.FEEDBACK_LINK}"
         )
 
@@ -284,9 +303,9 @@ def _perform_loc_list_join(
 
 
 def _perform_loc_list_join(
-    series_or_dataframe: bigframes.dataframe.DataFrame | bigframes.series.Series,
+    series_or_dataframe: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
     keys_df: bigframes.dataframe.DataFrame,
-) -> bigframes.series.Series | bigframes.dataframe.DataFrame:
+) -> Union[bigframes.series.Series, bigframes.dataframe.DataFrame]:
     # right join based on the old index so that the matching rows from the user's
     # original dataframe will be duplicated and reordered appropriately
     original_index_names = series_or_dataframe.index.names
@@ -309,20 +328,26 @@ def _perform_loc_list_join(
 @typing.overload
 def _iloc_getitem_series_or_dataframe(
     series_or_dataframe: bigframes.series.Series, key
-) -> bigframes.series.Series | bigframes.core.scalar.Scalar:
+) -> Union[bigframes.series.Series, bigframes.core.scalar.Scalar]:
     ...
 
 
 @typing.overload
 def _iloc_getitem_series_or_dataframe(
     series_or_dataframe: bigframes.dataframe.DataFrame, key
-) -> bigframes.dataframe.DataFrame | pd.Series:
+) -> Union[bigframes.dataframe.DataFrame, pd.Series]:
     ...
 
 
 def _iloc_getitem_series_or_dataframe(
-    series_or_dataframe: bigframes.dataframe.DataFrame | bigframes.series.Series, key
-) -> bigframes.dataframe.DataFrame | bigframes.series.Series | bigframes.core.scalar.Scalar | pd.Series:
+    series_or_dataframe: Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
+    key,
+) -> Union[
+    bigframes.dataframe.DataFrame,
+    bigframes.series.Series,
+    bigframes.core.scalar.Scalar,
+    pd.Series,
+]:
     if isinstance(key, int):
         internal_slice_result = series_or_dataframe._slice(key, key + 1, 1)
         result_pd_df = internal_slice_result.to_pandas()
@@ -332,11 +357,9 @@ def _iloc_getitem_series_or_dataframe(
     elif isinstance(key, slice):
         return series_or_dataframe._slice(key.start, key.stop, key.step)
     elif pd.api.types.is_list_like(key):
-        # TODO(henryjsolberg): support MultiIndex
-
         if len(key) == 0:
             return typing.cast(
-                typing.Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
+                Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
                 series_or_dataframe.iloc[0:0],
             )
         df = series_or_dataframe
@@ -346,15 +369,18 @@ def _iloc_getitem_series_or_dataframe(
                 original_series_name if original_series_name is not None else "0"
             )
             df = series_or_dataframe.to_frame()
-        original_index_name = df.index.name
-        temporary_index_name = guid.generate_guid(prefix="temp_iloc_index_")
-        df = df.rename_axis(temporary_index_name)
+        original_index_names = df.index.names
+        temporary_index_names = [
+            guid.generate_guid(prefix="temp_iloc_index_")
+            for _ in range(len(df.index.names))
+        ]
+        df = df.rename_axis(temporary_index_names)
 
         # set to offset index and use regular loc, then restore index
         df = df.reset_index(drop=False)
         result = df.loc[key]
-        result = result.set_index(temporary_index_name)
-        result = result.rename_axis(original_index_name)
+        result = result.set_index(temporary_index_names)
+        result = result.rename_axis(original_index_names)
 
         if isinstance(series_or_dataframe, bigframes.series.Series):
             result = result[series_name]
diff --git a/bigframes/core/io.py b/bigframes/core/io.py
index 3c2e5a25f5..d47efbdddc 100644
--- a/bigframes/core/io.py
+++ b/bigframes/core/io.py
@@ -16,7 +16,8 @@
 
 import datetime
 import textwrap
-from typing import Dict, Union
+import types
+from typing import Dict, Iterable, Union
 
 import google.cloud.bigquery as bigquery
 
@@ -89,6 +90,48 @@ def create_snapshot_sql(
     )
 
 
+# BigQuery REST API returns types in Legacy SQL format
+# https://cloud.google.com/bigquery/docs/data-types but we use Standard SQL
+# names
+# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
+BQ_STANDARD_TYPES = types.MappingProxyType(
+    {
+        "BOOLEAN": "BOOL",
+        "INTEGER": "INT64",
+        "FLOAT": "FLOAT64",
+    }
+)
+
+
+def bq_field_to_type_sql(field: bigquery.SchemaField):
+    if field.mode == "REPEATED":
+        nested_type = bq_field_to_type_sql(
+            bigquery.SchemaField(
+                field.name, field.field_type, mode="NULLABLE", fields=field.fields
+            )
+        )
+        return f"ARRAY<{nested_type}>"
+
+    if field.field_type == "RECORD":
+        nested_fields_sql = ", ".join(
+            bq_field_to_sql(child_field) for child_field in field.fields
+        )
+        return f"STRUCT<{nested_fields_sql}>"
+
+    type_ = field.field_type
+    return BQ_STANDARD_TYPES.get(type_, type_)
+
+
+def bq_field_to_sql(field: bigquery.SchemaField):
+    name = field.name
+    type_ = bq_field_to_type_sql(field)
+    return f"`{name}` {type_}"
+
+
+def bq_schema_to_sql(schema: Iterable[bigquery.SchemaField]):
+    return ", ".join(bq_field_to_sql(field) for field in schema)
+
+
 def format_option(key: str, value: Union[bool, str]) -> str:
     if isinstance(value, bool):
         return f"{key}=true" if value else f"{key}=false"
diff --git a/bigframes/core/joins/single_column.py b/bigframes/core/joins/single_column.py
index 8a9825cf0b..2d616fc3f0 100644
--- a/bigframes/core/joins/single_column.py
+++ b/bigframes/core/joins/single_column.py
@@ -44,7 +44,6 @@ def join_by_column(
         "right",
     ],
     sort: bool = False,
-    coalesce_join_keys: bool = True,
     allow_row_identity_join: bool = True,
 ) -> Tuple[
     core.ArrayValue,
@@ -59,8 +58,6 @@ def join_by_column(
         right: Expression for right table to join.
         right_column_ids: Column IDs (not label) to join by.
         how: The type of join to perform.
-        coalesce_join_keys: if set to False, returned column ids will contain
-            both left and right join key columns.
         allow_row_identity_join (bool):
             If True, allow matching by row identity. Set to False to always
             perform a true JOIN in generated SQL.
@@ -71,8 +68,6 @@ def join_by_column(
         * Sequence[str]: Column IDs of the coalesced join columns. Sometimes either the
           left/right table will have missing rows. This column pulls the
           non-NULL value from either left/right.
-          If coalesce_join_keys is False, will return uncombined left and
-          right key columns.
         * Tuple[Callable, Callable]: For a given column ID from left or right,
           respectively, return the new column id from the combined expression.
     """
@@ -100,9 +95,7 @@ def join_by_column(
         right_join_keys = [
             combined_expr.get_column(get_column_right(col)) for col in right_column_ids
         ]
-        join_key_cols = get_join_cols(
-            left_join_keys, right_join_keys, how, coalesce_join_keys
-        )
+        join_key_cols = get_coalesced_join_cols(left_join_keys, right_join_keys, how)
         join_key_ids = [col.get_name() for col in join_key_cols]
         combined_expr = combined_expr.projection(
             [*join_key_cols, *combined_expr.columns]
@@ -182,9 +175,7 @@ def get_column_right(col_id):
         right_join_keys = [
             combined_table[get_column_right(col)] for col in right_column_ids
         ]
-        join_key_cols = get_join_cols(
-            left_join_keys, right_join_keys, how, coalesce_join_keys
-        )
+        join_key_cols = get_coalesced_join_cols(left_join_keys, right_join_keys, how)
         # We could filter out the original join columns, but predicates/ordering
         # might still reference them in implicit joins.
         columns = (
@@ -226,46 +217,35 @@ def get_column_right(col_id):
         )
 
 
-def get_join_cols(
+def get_coalesced_join_cols(
     left_join_cols: typing.Iterable[ibis_types.Value],
     right_join_cols: typing.Iterable[ibis_types.Value],
     how: str,
-    coalesce_join_keys: bool = True,
 ) -> typing.List[ibis_types.Value]:
     join_key_cols: list[ibis_types.Value] = []
     for left_col, right_col in zip(left_join_cols, right_join_cols):
-        if not coalesce_join_keys:
+        if how == "left" or how == "inner":
             join_key_cols.append(left_col.name(guid.generate_guid(prefix="index_")))
+        elif how == "right":
             join_key_cols.append(right_col.name(guid.generate_guid(prefix="index_")))
-        else:
-            if how == "left" or how == "inner":
+        elif how == "outer":
+            # The left index and the right index might contain null values, for
+            # example due to an outer join with different numbers of rows. Coalesce
+            # these to take the index value from either column.
+            # Use a random name in case the left index and the right index have the
+            # same name. In such a case, _x and _y suffixes will already be used.
+            # Don't need to coalesce if they are exactly the same column.
+            if left_col.name("index").equals(right_col.name("index")):
                 join_key_cols.append(left_col.name(guid.generate_guid(prefix="index_")))
-            elif how == "right":
-                join_key_cols.append(
-                    right_col.name(guid.generate_guid(prefix="index_"))
-                )
-            elif how == "outer":
-                # The left index and the right index might contain null values, for
-                # example due to an outer join with different numbers of rows. Coalesce
-                # these to take the index value from either column.
-                # Use a random name in case the left index and the right index have the
-                # same name. In such a case, _x and _y suffixes will already be used.
-                # Don't need to coalesce if they are exactly the same column.
-                if left_col.name("index").equals(right_col.name("index")):
-                    join_key_cols.append(
-                        left_col.name(guid.generate_guid(prefix="index_"))
-                    )
-                else:
-                    join_key_cols.append(
-                        ibis.coalesce(
-                            left_col,
-                            right_col,
-                        ).name(guid.generate_guid(prefix="index_"))
-                    )
             else:
-                raise ValueError(
-                    f"Unexpected join type: {how}. {constants.FEEDBACK_LINK}"
+                join_key_cols.append(
+                    ibis.coalesce(
+                        left_col,
+                        right_col,
+                    ).name(guid.generate_guid(prefix="index_"))
                 )
+        else:
+            raise ValueError(f"Unexpected join type: {how}. {constants.FEEDBACK_LINK}")
     return join_key_cols
 
 
diff --git a/bigframes/core/utils.py b/bigframes/core/utils.py
index 75175690ce..dc7c709011 100644
--- a/bigframes/core/utils.py
+++ b/bigframes/core/utils.py
@@ -49,6 +49,26 @@ def combine_indices(index1: pd.Index, index2: pd.Index) -> pd.MultiIndex:
     return multi_index
 
 
+def index_as_tuples(index: pd.Index) -> typing.Sequence[typing.Tuple]:
+    if isinstance(index, pd.MultiIndex):
+        return [label for label in index]
+    else:
+        return [(label,) for label in index]
+
+
+def split_index(
+    index: pd.Index, levels: int = 1
+) -> typing.Tuple[typing.Optional[pd.Index], pd.Index]:
+    nlevels = index.nlevels
+    remaining = nlevels - levels
+    if remaining > 0:
+        return index.droplevel(list(range(remaining, nlevels))), index.droplevel(
+            list(range(0, remaining))
+        )
+    else:
+        return (None, index)
+
+
 def get_standardized_ids(
     col_labels: Iterable[Hashable], idx_labels: Iterable[Hashable] = ()
 ) -> tuple[list[str], list[str]]:
@@ -84,3 +104,36 @@ def get_standardized_ids(
     idx_ids, col_ids = ids[: len(idx_ids)], ids[len(idx_ids) :]
 
     return col_ids, idx_ids
+
+
+def merge_column_labels(
+    left_labels: pd.Index,
+    right_labels: pd.Index,
+    coalesce_labels: typing.Sequence,
+    suffixes: tuple[str, str] = ("_x", "_y"),
+) -> pd.Index:
+    result_labels = []
+
+    for col_label in left_labels:
+        if col_label in right_labels:
+            if col_label in coalesce_labels:
+                # Merging on the same column only returns 1 key column from coalesce both.
+                # Take the left key column.
+                result_labels.append(col_label)
+            else:
+                result_labels.append(str(col_label) + suffixes[0])
+        else:
+            result_labels.append(col_label)
+
+    for col_label in right_labels:
+        if col_label in left_labels:
+            if col_label in coalesce_labels:
+                # Merging on the same column only returns 1 key column from coalesce both.
+                # Pass the right key column.
+                pass
+            else:
+                result_labels.append(str(col_label) + suffixes[1])
+        else:
+            result_labels.append(col_label)
+
+    return pd.Index(result_labels)
diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py
index 113355589b..eea8beb130 100644
--- a/bigframes/dataframe.py
+++ b/bigframes/dataframe.py
@@ -46,7 +46,6 @@
 import bigframes.core.indexers as indexers
 import bigframes.core.indexes as indexes
 import bigframes.core.io
-import bigframes.core.joins as joins
 import bigframes.core.ordering as order
 import bigframes.core.utils as utils
 import bigframes.core.window
@@ -161,7 +160,15 @@ def __init__(
                 columns=columns,  # type:ignore
                 dtype=dtype,  # type:ignore
             )
-            if pd_dataframe.size < MAX_INLINE_DF_SIZE:
+            if (
+                pd_dataframe.size < MAX_INLINE_DF_SIZE
+                # TODO(swast): Workaround data types limitation in inline data.
+                and not any(
+                    dt.pyarrow_dtype
+                    for dt in pd_dataframe.dtypes
+                    if isinstance(dt, pandas.ArrowDtype)
+                )
+            ):
                 self._block = blocks.block_from_local(
                     pd_dataframe, session or bigframes.pandas.get_global_session()
                 )
@@ -745,6 +752,55 @@ def rpow(
 
     __rpow__ = rpow
 
+    def align(
+        self,
+        other: typing.Union[DataFrame, bigframes.series.Series],
+        join: str = "outer",
+        axis: typing.Union[str, int, None] = None,
+    ) -> typing.Tuple[
+        typing.Union[DataFrame, bigframes.series.Series],
+        typing.Union[DataFrame, bigframes.series.Series],
+    ]:
+        axis_n = utils.get_axis_number(axis) if axis else None
+        if axis_n == 1 and isinstance(other, bigframes.series.Series):
+            raise NotImplementedError(
+                f"align with series and axis=1 not supported. {constants.FEEDBACK_LINK}"
+            )
+        left_block, right_block = block_ops.align(
+            self._block, other._block, join=join, axis=axis
+        )
+        return DataFrame(left_block), other.__class__(right_block)
+
+    def update(self, other, join: str = "left", overwrite=True, filter_func=None):
+        other = other if isinstance(other, DataFrame) else DataFrame(other)
+        if join != "left":
+            raise ValueError("Only 'left' join supported for update")
+
+        if filter_func is not None:  # Will always take other if possible
+
+            def update_func(
+                left: bigframes.series.Series, right: bigframes.series.Series
+            ) -> bigframes.series.Series:
+                return left.mask(right.notna() & filter_func(left), right)
+
+        elif overwrite:
+
+            def update_func(
+                left: bigframes.series.Series, right: bigframes.series.Series
+            ) -> bigframes.series.Series:
+                return left.mask(right.notna(), right)
+
+        else:
+
+            def update_func(
+                left: bigframes.series.Series, right: bigframes.series.Series
+            ) -> bigframes.series.Series:
+                return left.mask(left.isna(), right)
+
+        result = self.combine(other, update_func, how=join)
+
+        self._set_block(result._block)
+
     def combine(
         self,
         other: DataFrame,
@@ -753,56 +809,31 @@ def combine(
         ],
         fill_value=None,
         overwrite: bool = True,
+        *,
+        how: str = "outer",
     ) -> DataFrame:
-        # Join rows
-        joined_index, (get_column_left, get_column_right) = self._block.index.join(
-            other._block.index, how="outer"
-        )
-        columns, lcol_indexer, rcol_indexer = self.columns.join(
-            other.columns, how="outer", return_indexers=True
-        )
+        l_aligned, r_aligned = block_ops.align(self._block, other._block, join=how)
 
-        column_indices = zip(
-            lcol_indexer if (lcol_indexer is not None) else range(len(columns)),
-            rcol_indexer if (lcol_indexer is not None) else range(len(columns)),
+        other_missing_labels = self._block.column_labels.difference(
+            other._block.column_labels
         )
 
-        block = joined_index._block
+        l_frame = DataFrame(l_aligned)
+        r_frame = DataFrame(r_aligned)
         results = []
-        for left_index, right_index in column_indices:
-            if left_index >= 0 and right_index >= 0:  # -1 indices indicate missing
-                left_col_id = get_column_left(self._block.value_columns[left_index])
-                right_col_id = get_column_right(other._block.value_columns[right_index])
-                left_series = bigframes.series.Series(block.select_column(left_col_id))
-                right_series = bigframes.series.Series(
-                    block.select_column(right_col_id)
-                )
+        for (label, lseries), (_, rseries) in zip(l_frame.items(), r_frame.items()):
+            if not ((label in other_missing_labels) and not overwrite):
                 if fill_value is not None:
-                    left_series = left_series.fillna(fill_value)
-                    right_series = right_series.fillna(fill_value)
-                results.append(func(left_series, right_series))
-            elif left_index >= 0:
-                # Does not exist in other
-                if overwrite:
-                    dtype = self.dtypes[left_index]
-                    block, null_col_id = block.create_constant(None, dtype=dtype)
-                    result = bigframes.series.Series(block.select_column(null_col_id))
-                    results.append(result)
+                    result = func(
+                        lseries.fillna(fill_value), rseries.fillna(fill_value)
+                    )
                 else:
-                    left_col_id = get_column_left(self._block.value_columns[left_index])
-                    result = bigframes.series.Series(block.select_column(left_col_id))
-                    if fill_value is not None:
-                        result = result.fillna(fill_value)
-                    results.append(result)
-            elif right_index >= 0:
-                right_col_id = get_column_right(other._block.value_columns[right_index])
-                result = bigframes.series.Series(block.select_column(right_col_id))
-                if fill_value is not None:
-                    result = result.fillna(fill_value)
-                results.append(result)
+                    result = func(lseries, rseries)
             else:
-                # Should not be possible
-                raise ValueError("No right or left index.")
+                result = (
+                    lseries.fillna(fill_value) if fill_value is not None else lseries
+                )
+            results.append(result)
 
         if all([isinstance(val, bigframes.series.Series) for val in results]):
             import bigframes.core.reshape as rs
@@ -1611,6 +1642,12 @@ def agg(
 
     aggregate = agg
 
+    def idxmin(self) -> bigframes.series.Series:
+        return bigframes.series.Series(block_ops.idxmin(self._block))
+
+    def idxmax(self) -> bigframes.series.Series:
+        return bigframes.series.Series(block_ops.idxmax(self._block))
+
     def describe(self) -> DataFrame:
         df_numeric = self._drop_non_numeric(keep_bool=False)
         if len(df_numeric.columns) == 0:
@@ -1682,6 +1719,27 @@ def stack(self):
             return bigframes.series.Series(result_block)
         return DataFrame(result_block)
 
+    def unstack(self):
+        block = self._block
+        # Special case, unstack with mono-index transpose into a series
+        if self.index.nlevels == 1:
+            block = block.stack(
+                how="right", dropna=False, sort=False, levels=self.columns.nlevels
+            )
+            return bigframes.series.Series(block)
+
+        # Pivot by last level of index
+        index_ids = block.index_columns
+        block = block.reset_index(drop=False)
+        block = block.set_index(index_ids[:-1])
+
+        pivot_block = block.pivot(
+            columns=[index_ids[-1]],
+            values=self._block.value_columns,
+            values_in_index=True,
+        )
+        return DataFrame(pivot_block)
+
     def _drop_non_numeric(self, keep_bool=True) -> DataFrame:
         types_to_keep = set(bigframes.dtypes.NUMERIC_BIGFRAMES_TYPES)
         if not keep_bool:
@@ -1734,12 +1792,10 @@ def merge(
         ] = "inner",
         # TODO(garrettwu): Currently can take inner, outer, left and right. To support
         # cross joins
-        # TODO(garrettwu): Support "on" list of columns and None. Currently a single
-        # column must be provided
-        on: Optional[str] = None,
+        on: Union[blocks.Label, Sequence[blocks.Label], None] = None,
         *,
-        left_on: Optional[str] = None,
-        right_on: Optional[str] = None,
+        left_on: Union[blocks.Label, Sequence[blocks.Label], None] = None,
+        right_on: Union[blocks.Label, Sequence[blocks.Label], None] = None,
         sort: bool = False,
         suffixes: tuple[str, str] = ("_x", "_y"),
     ) -> DataFrame:
@@ -1753,97 +1809,41 @@ def merge(
                 )
             left_on, right_on = on, on
 
-        left = self
-        left_on_sql = self._sql_names(left_on)
-        # 0 elements already throws an exception
-        if len(left_on_sql) > 1:
-            raise ValueError(f"The column label {left_on} is not unique.")
-        left_on_sql = left_on_sql[0]
-
-        right_on_sql = right._sql_names(right_on)
-        if len(right_on_sql) > 1:
-            raise ValueError(f"The column label {right_on} is not unique.")
-        right_on_sql = right_on_sql[0]
-
-        (
-            joined_expr,
-            join_key_ids,
-            (get_column_left, get_column_right),
-        ) = joins.join_by_column(
-            left._block.expr,
-            [left_on_sql],
-            right._block.expr,
-            [right_on_sql],
-            how=how,
-            sort=sort,
-            # In merging on the same column, it only returns 1 key column from coalesced both.
-            # While if 2 different columns, both will be presented in the result.
-            coalesce_join_keys=(left_on == right_on),
-        )
-        # TODO(swast): Add suffixes to the column labels instead of reusing the
-        # column IDs as the new labels.
-        # Drop the index column(s) to be consistent with pandas.
-        left_columns = [
-            join_key_ids[0] if (col_id == left_on_sql) else get_column_left(col_id)
-            for col_id in left._block.value_columns
-        ]
-
-        right_columns = []
-        for col_id in right._block.value_columns:
-            if col_id == right_on_sql:
-                # When left_on == right_on
-                if len(join_key_ids) > 1:
-                    right_columns.append(join_key_ids[1])
-            else:
-                right_columns.append(get_column_right(col_id))
-
-        expr = joined_expr.select_columns([*left_columns, *right_columns])
-        labels = self._get_merged_col_labels(
-            right, left_on=left_on, right_on=right_on, suffixes=suffixes
-        )
+        if utils.is_list_like(left_on):
+            left_on = list(left_on)  # type: ignore
+        else:
+            left_on = [left_on]
 
-        # Constructs default index
-        expr, offset_index_id = expr.promote_offsets()
-        block = blocks.Block(
-            expr, index_columns=[offset_index_id], column_labels=labels
+        if utils.is_list_like(right_on):
+            right_on = list(right_on)  # type: ignore
+        else:
+            right_on = [right_on]
+
+        left_join_ids = []
+        for label in left_on:  # type: ignore
+            left_col_id = self._resolve_label_exact(label)
+            # 0 elements already throws an exception
+            if not left_col_id:
+                raise ValueError(f"No column {label} found in self.")
+            left_join_ids.append(left_col_id)
+
+        right_join_ids = []
+        for label in right_on:  # type: ignore
+            right_col_id = right._resolve_label_exact(label)
+            if not right_col_id:
+                raise ValueError(f"No column {label} found in other.")
+            right_join_ids.append(right_col_id)
+
+        block = self._block.merge(
+            right._block,
+            how,
+            left_join_ids,
+            right_join_ids,
+            sort=sort,
+            suffixes=suffixes,
         )
         return DataFrame(block)
 
-    def _get_merged_col_labels(
-        self,
-        right: DataFrame,
-        left_on: str,
-        right_on: str,
-        suffixes: tuple[str, str] = ("_x", "_y"),
-    ) -> List[blocks.Label]:
-        on_col_equal = left_on == right_on
-
-        left_col_labels: list[blocks.Label] = []
-        for col_label in self._block.column_labels:
-            if col_label in right._block.column_labels:
-                if on_col_equal and col_label == left_on:
-                    # Merging on the same column only returns 1 key column from coalesce both.
-                    # Take the left key column.
-                    left_col_labels.append(col_label)
-                else:
-                    left_col_labels.append(str(col_label) + suffixes[0])
-            else:
-                left_col_labels.append(col_label)
-
-        right_col_labels: list[blocks.Label] = []
-        for col_label in right._block.column_labels:
-            if col_label in self._block.column_labels:
-                if on_col_equal and col_label == left_on:
-                    # Merging on the same column only returns 1 key column from coalesce both.
-                    # Pass the right key column.
-                    pass
-                else:
-                    right_col_labels.append(str(col_label) + suffixes[1])
-            else:
-                right_col_labels.append(col_label)
-
-        return left_col_labels + right_col_labels
-
     def join(
         self, other: DataFrame, *, on: Optional[str] = None, how: str = "left"
     ) -> DataFrame:
diff --git a/bigframes/dtypes.py b/bigframes/dtypes.py
index 59d3007fab..46a7a1cb50 100644
--- a/bigframes/dtypes.py
+++ b/bigframes/dtypes.py
@@ -84,10 +84,10 @@
 
 BIDIRECTIONAL_MAPPINGS: Iterable[Tuple[IbisDtype, Dtype]] = (
     (ibis_dtypes.boolean, pd.BooleanDtype()),
+    (ibis_dtypes.date, pd.ArrowDtype(pa.date32())),
     (ibis_dtypes.float64, pd.Float64Dtype()),
     (ibis_dtypes.int64, pd.Int64Dtype()),
     (ibis_dtypes.string, pd.StringDtype(storage="pyarrow")),
-    (ibis_dtypes.date, pd.ArrowDtype(pa.date32())),
     (ibis_dtypes.time, pd.ArrowDtype(pa.time64("us"))),
     (ibis_dtypes.Timestamp(timezone=None), pd.ArrowDtype(pa.timestamp("us"))),
     (
@@ -100,6 +100,19 @@
     pandas: ibis for ibis, pandas in BIDIRECTIONAL_MAPPINGS
 }
 
+IBIS_TO_ARROW: Dict[ibis_dtypes.DataType, pa.DataType] = {
+    ibis_dtypes.boolean: pa.bool_(),
+    ibis_dtypes.date: pa.date32(),
+    ibis_dtypes.float64: pa.float64(),
+    ibis_dtypes.int64: pa.int64(),
+    ibis_dtypes.string: pa.string(),
+    ibis_dtypes.time: pa.time64("us"),
+    ibis_dtypes.Timestamp(timezone=None): pa.timestamp("us"),
+    ibis_dtypes.Timestamp(timezone="UTC"): pa.timestamp("us", tz="UTC"),
+}
+
+ARROW_TO_IBIS = {arrow: ibis for ibis, arrow in IBIS_TO_ARROW.items()}
+
 IBIS_TO_BIGFRAMES: Dict[ibis_dtypes.DataType, Union[Dtype, np.dtype[Any]]] = {
     ibis: pandas for ibis, pandas in BIDIRECTIONAL_MAPPINGS
 }
@@ -148,11 +161,12 @@ def ibis_dtype_to_bigframes_dtype(
     # Special cases: Ibis supports variations on these types, but currently
     # our IO returns them as objects. Eventually, we should support them as
     # ArrowDType (and update the IO accordingly)
-    if isinstance(ibis_dtype, ibis_dtypes.Array) or isinstance(
-        ibis_dtype, ibis_dtypes.Struct
-    ):
+    if isinstance(ibis_dtype, ibis_dtypes.Array):
         return np.dtype("O")
 
+    if isinstance(ibis_dtype, ibis_dtypes.Struct):
+        return pd.ArrowDtype(ibis_dtype_to_arrow_dtype(ibis_dtype))
+
     if ibis_dtype in IBIS_TO_BIGFRAMES:
         return IBIS_TO_BIGFRAMES[ibis_dtype]
     elif isinstance(ibis_dtype, ibis_dtypes.Null):
@@ -164,6 +178,26 @@ def ibis_dtype_to_bigframes_dtype(
         )
 
 
+def ibis_dtype_to_arrow_dtype(ibis_dtype: ibis_dtypes.DataType) -> pa.DataType:
+    if isinstance(ibis_dtype, ibis_dtypes.Array):
+        return pa.list_(ibis_dtype_to_arrow_dtype(ibis_dtype.value_type))
+
+    if isinstance(ibis_dtype, ibis_dtypes.Struct):
+        return pa.struct(
+            [
+                (name, ibis_dtype_to_arrow_dtype(dtype))
+                for name, dtype in ibis_dtype.fields.items()
+            ]
+        )
+
+    if ibis_dtype in IBIS_TO_ARROW:
+        return IBIS_TO_ARROW[ibis_dtype]
+    else:
+        raise ValueError(
+            f"Unexpected Ibis data type {ibis_dtype}. {constants.FEEDBACK_LINK}"
+        )
+
+
 def ibis_value_to_canonical_type(value: ibis_types.Value) -> ibis_types.Value:
     """Converts an Ibis expression to canonical type.
 
@@ -187,6 +221,24 @@ def ibis_table_to_canonical_types(table: ibis_types.Table) -> ibis_types.Table:
     return table.select(*casted_columns)
 
 
+def arrow_dtype_to_ibis_dtype(arrow_dtype: pa.DataType) -> ibis_dtypes.DataType:
+    if pa.types.is_struct(arrow_dtype):
+        struct_dtype = typing.cast(pa.StructType, arrow_dtype)
+        return ibis_dtypes.Struct.from_tuples(
+            [
+                (field.name, arrow_dtype_to_ibis_dtype(field.type))
+                for field in struct_dtype
+            ]
+        )
+
+    if arrow_dtype in ARROW_TO_IBIS:
+        return ARROW_TO_IBIS[arrow_dtype]
+    else:
+        raise ValueError(
+            f"Unexpected Arrow data type {arrow_dtype}. {constants.FEEDBACK_LINK}"
+        )
+
+
 def bigframes_dtype_to_ibis_dtype(
     bigframes_dtype: Union[DtypeString, Dtype, np.dtype[Any]]
 ) -> ibis_dtypes.DataType:
@@ -202,6 +254,9 @@ def bigframes_dtype_to_ibis_dtype(
     Raises:
         ValueError: If passed a dtype not supported by BigQuery DataFrames.
     """
+    if isinstance(bigframes_dtype, pd.ArrowDtype):
+        return arrow_dtype_to_ibis_dtype(bigframes_dtype.pyarrow_dtype)
+
     type_string = str(bigframes_dtype)
     if type_string in BIGFRAMES_STRING_TO_BIGFRAMES:
         bigframes_dtype = BIGFRAMES_STRING_TO_BIGFRAMES[
diff --git a/bigframes/ml/compose.py b/bigframes/ml/compose.py
index 9effbf1968..bf046ff691 100644
--- a/bigframes/ml/compose.py
+++ b/bigframes/ml/compose.py
@@ -31,6 +31,7 @@
     preprocessing.StandardScaler,
     preprocessing.MaxAbsScaler,
     preprocessing.MinMaxScaler,
+    preprocessing.KBinsDiscretizer,
     preprocessing.LabelEncoder,
 ]
 
@@ -91,18 +92,24 @@ def transformers_(
 
         return result
 
-    def _compile_to_sql(self, columns: List[str]) -> List[Tuple[str, str]]:
+    def _compile_to_sql(
+        self,
+        columns: List[str],
+        X: bpd.DataFrame,
+    ) -> List[Tuple[str, str]]:
         """Compile this transformer to a list of SQL expressions that can be included in
         a BQML TRANSFORM clause
 
         Args:
             columns (List[str]):
                 a list of column names to transform
+            X (bpd.DataFrame):
+                The Dataframe with training data.
 
         Returns:
             a list of tuples of (sql_expression, output_name)"""
         return [
-            transformer._compile_to_sql([column])[0]
+            transformer._compile_to_sql([column], X=X)[0]
             for column in columns
             for _, transformer, target_column in self.transformers_
             if column == target_column
@@ -115,7 +122,7 @@ def fit(
     ) -> ColumnTransformer:
         (X,) = utils.convert_to_dataframe(X)
 
-        compiled_transforms = self._compile_to_sql(X.columns.tolist())
+        compiled_transforms = self._compile_to_sql(X.columns.tolist(), X)
         transform_sqls = [transform_sql for transform_sql, _ in compiled_transforms]
 
         self._bqml_model = self._bqml_model_factory.create_model(
diff --git a/bigframes/ml/model_selection.py b/bigframes/ml/model_selection.py
index 110cbcf493..443b9e7be6 100644
--- a/bigframes/ml/model_selection.py
+++ b/bigframes/ml/model_selection.py
@@ -17,6 +17,7 @@
 https://scikit-learn.org/stable/modules/classes.html#module-sklearn.model_selection."""
 
 
+import typing
 from typing import List, Union
 
 from bigframes.ml import utils
@@ -79,9 +80,10 @@ def train_test_split(
     train_index = split_dfs[0].index
     test_index = split_dfs[1].index
 
-    split_dfs += [
-        df.loc[index] for df in dfs[1:] for index in (train_index, test_index)
-    ]
+    split_dfs += typing.cast(
+        List[bpd.DataFrame],
+        [df.loc[index] for df in dfs[1:] for index in (train_index, test_index)],
+    )
 
     # convert back to Series.
     results: List[Union[bpd.DataFrame, bpd.Series]] = []
diff --git a/bigframes/ml/pipeline.py b/bigframes/ml/pipeline.py
index ac02c39112..ad0b3fae11 100644
--- a/bigframes/ml/pipeline.py
+++ b/bigframes/ml/pipeline.py
@@ -52,6 +52,7 @@ def __init__(self, steps: List[Tuple[str, base.BaseEstimator]]):
                 preprocessing.OneHotEncoder,
                 preprocessing.MaxAbsScaler,
                 preprocessing.MinMaxScaler,
+                preprocessing.KBinsDiscretizer,
                 preprocessing.LabelEncoder,
             ),
         ):
@@ -93,7 +94,7 @@ def fit(
     ) -> Pipeline:
         (X,) = utils.convert_to_dataframe(X)
 
-        compiled_transforms = self._transform._compile_to_sql(X.columns.tolist())
+        compiled_transforms = self._transform._compile_to_sql(X.columns.tolist(), X=X)
         transform_sqls = [transform_sql for transform_sql, _ in compiled_transforms]
 
         if y is not None:
@@ -151,6 +152,7 @@ def _extract_as_column_transformer(
                 preprocessing.StandardScaler,
                 preprocessing.MaxAbsScaler,
                 preprocessing.MinMaxScaler,
+                preprocessing.KBinsDiscretizer,
                 preprocessing.LabelEncoder,
             ],
             Union[str, List[str]],
@@ -190,6 +192,13 @@ def _extract_as_column_transformer(
                     *preprocessing.MinMaxScaler._parse_from_sql(transform_sql),
                 )
             )
+        elif transform_sql.startswith("ML.BUCKETIZE"):
+            transformers.append(
+                (
+                    "k_bins_discretizer",
+                    *preprocessing.KBinsDiscretizer._parse_from_sql(transform_sql),
+                )
+            )
         elif transform_sql.startswith("ML.LABEL_ENCODER"):
             transformers.append(
                 (
@@ -213,6 +222,7 @@ def _merge_column_transformer(
     preprocessing.OneHotEncoder,
     preprocessing.MaxAbsScaler,
     preprocessing.MinMaxScaler,
+    preprocessing.KBinsDiscretizer,
     preprocessing.LabelEncoder,
 ]:
     """Try to merge the column transformer to a simple transformer."""
diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py
index caf4657a63..5f44d40218 100644
--- a/bigframes/ml/preprocessing.py
+++ b/bigframes/ml/preprocessing.py
@@ -23,6 +23,7 @@
 from bigframes.ml import base, core, globals, utils
 import bigframes.pandas as bpd
 import third_party.bigframes_vendored.sklearn.preprocessing._data
+import third_party.bigframes_vendored.sklearn.preprocessing._discretization
 import third_party.bigframes_vendored.sklearn.preprocessing._encoder
 import third_party.bigframes_vendored.sklearn.preprocessing._label
 
@@ -44,12 +45,15 @@ def __init__(self):
     def __eq__(self, other: Any) -> bool:
         return type(other) is StandardScaler and self._bqml_model == other._bqml_model
 
-    def _compile_to_sql(self, columns: List[str]) -> List[Tuple[str, str]]:
+    def _compile_to_sql(self, columns: List[str], X=None) -> List[Tuple[str, str]]:
         """Compile this transformer to a list of SQL expressions that can be included in
         a BQML TRANSFORM clause
 
         Args:
-            columns: a list of column names to transform
+            columns:
+                a list of column names to transform.
+            X (default None):
+                Ignored.
 
         Returns: a list of tuples of (sql_expression, output_name)"""
         return [
@@ -124,12 +128,15 @@ def __init__(self):
     def __eq__(self, other: Any) -> bool:
         return type(other) is MaxAbsScaler and self._bqml_model == other._bqml_model
 
-    def _compile_to_sql(self, columns: List[str]) -> List[Tuple[str, str]]:
+    def _compile_to_sql(self, columns: List[str], X=None) -> List[Tuple[str, str]]:
         """Compile this transformer to a list of SQL expressions that can be included in
         a BQML TRANSFORM clause
 
         Args:
-            columns: a list of column names to transform
+            columns:
+                a list of column names to transform.
+            X (default None):
+                Ignored.
 
         Returns: a list of tuples of (sql_expression, output_name)"""
         return [
@@ -204,12 +211,15 @@ def __init__(self):
     def __eq__(self, other: Any) -> bool:
         return type(other) is MinMaxScaler and self._bqml_model == other._bqml_model
 
-    def _compile_to_sql(self, columns: List[str]) -> List[Tuple[str, str]]:
+    def _compile_to_sql(self, columns: List[str], X=None) -> List[Tuple[str, str]]:
         """Compile this transformer to a list of SQL expressions that can be included in
         a BQML TRANSFORM clause
 
         Args:
-            columns: a list of column names to transform
+            columns:
+                a list of column names to transform.
+            X (default None):
+                Ignored.
 
         Returns: a list of tuples of (sql_expression, output_name)"""
         return [
@@ -267,6 +277,124 @@ def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
         )
 
 
+class KBinsDiscretizer(
+    base.Transformer,
+    third_party.bigframes_vendored.sklearn.preprocessing._discretization.KBinsDiscretizer,
+):
+    __doc__ = (
+        third_party.bigframes_vendored.sklearn.preprocessing._discretization.KBinsDiscretizer.__doc__
+    )
+
+    def __init__(
+        self,
+        n_bins: int = 5,
+        strategy: Literal["uniform", "quantile"] = "quantile",
+    ):
+        if strategy != "uniform":
+            raise NotImplementedError(
+                f"Only strategy = 'uniform' is supported now, input is {strategy}."
+            )
+        if n_bins < 2:
+            raise ValueError(
+                f"n_bins has to be larger than or equal to 2, input is {n_bins}."
+            )
+        self.n_bins = n_bins
+        self.strategy = strategy
+        self._bqml_model: Optional[core.BqmlModel] = None
+        self._bqml_model_factory = globals.bqml_model_factory()
+        self._base_sql_generator = globals.base_sql_generator()
+
+    # TODO(garrettwu): implement __hash__
+    def __eq__(self, other: Any) -> bool:
+        return (
+            type(other) is KBinsDiscretizer
+            and self.n_bins == other.n_bins
+            and self._bqml_model == other._bqml_model
+        )
+
+    def _compile_to_sql(
+        self,
+        columns: List[str],
+        X: bpd.DataFrame,
+    ) -> List[Tuple[str, str]]:
+        """Compile this transformer to a list of SQL expressions that can be included in
+        a BQML TRANSFORM clause
+
+        Args:
+            columns:
+                a list of column names to transform
+            X:
+                The Dataframe with training data.
+
+        Returns: a list of tuples of (sql_expression, output_name)"""
+        array_split_points = {}
+        if self.strategy == "uniform":
+            for column in columns:
+                min_value = X[column].min()
+                max_value = X[column].max()
+                bin_size = (max_value - min_value) / self.n_bins
+                array_split_points[column] = [
+                    min_value + i * bin_size for i in range(self.n_bins - 1)
+                ]
+
+        return [
+            (
+                self._base_sql_generator.ml_bucketize(
+                    column, array_split_points[column], f"kbinsdiscretizer_{column}"
+                ),
+                f"kbinsdiscretizer_{column}",
+            )
+            for column in columns
+        ]
+
+    @classmethod
+    def _parse_from_sql(cls, sql: str) -> tuple[KBinsDiscretizer, str]:
+        """Parse SQL to tuple(KBinsDiscretizer, column_label).
+
+        Args:
+            sql: SQL string of format "ML.BUCKETIZE({col_label}, array_split_points, FALSE) OVER()"
+
+        Returns:
+            tuple(KBinsDiscretizer, column_label)"""
+        s = sql[sql.find("(") + 1 : sql.find(")")]
+        array_split_points = s[s.find("[") + 1 : s.find("]")]
+        col_label = s[: s.find(",")]
+        n_bins = array_split_points.count(",") + 2
+        return cls(n_bins, "uniform"), col_label
+
+    def fit(
+        self,
+        X: Union[bpd.DataFrame, bpd.Series],
+        y=None,  # ignored
+    ) -> KBinsDiscretizer:
+        (X,) = utils.convert_to_dataframe(X)
+
+        compiled_transforms = self._compile_to_sql(X.columns.tolist(), X)
+        transform_sqls = [transform_sql for transform_sql, _ in compiled_transforms]
+
+        self._bqml_model = self._bqml_model_factory.create_model(
+            X,
+            options={"model_type": "transform_only"},
+            transforms=transform_sqls,
+        )
+
+        # The schema of TRANSFORM output is not available in the model API, so save it during fitting
+        self._output_names = [name for _, name in compiled_transforms]
+        return self
+
+    def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame:
+        if not self._bqml_model:
+            raise RuntimeError("Must be fitted before transform")
+
+        (X,) = utils.convert_to_dataframe(X)
+
+        df = self._bqml_model.transform(X)
+        return typing.cast(
+            bpd.DataFrame,
+            df[self._output_names],
+        )
+
+
 class OneHotEncoder(
     base.Transformer,
     third_party.bigframes_vendored.sklearn.preprocessing._encoder.OneHotEncoder,
@@ -308,13 +436,15 @@ def __eq__(self, other: Any) -> bool:
             and self.max_categories == other.max_categories
         )
 
-    def _compile_to_sql(self, columns: List[str]) -> List[Tuple[str, str]]:
+    def _compile_to_sql(self, columns: List[str], X=None) -> List[Tuple[str, str]]:
         """Compile this transformer to a list of SQL expressions that can be included in
         a BQML TRANSFORM clause
 
         Args:
             columns:
-                a list of column names to transform
+                a list of column names to transform.
+            X (default None):
+                Ignored.
 
         Returns: a list of tuples of (sql_expression, output_name)"""
 
@@ -432,13 +562,15 @@ def __eq__(self, other: Any) -> bool:
             and self.max_categories == other.max_categories
         )
 
-    def _compile_to_sql(self, columns: List[str]) -> List[Tuple[str, str]]:
+    def _compile_to_sql(self, columns: List[str], X=None) -> List[Tuple[str, str]]:
         """Compile this transformer to a list of SQL expressions that can be included in
         a BQML TRANSFORM clause
 
         Args:
             columns:
-                a list of column names to transform
+                a list of column names to transform.
+            X (default None):
+                Ignored.
 
         Returns: a list of tuples of (sql_expression, output_name)"""
 
diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py
index 57c8ba672a..601b271099 100644
--- a/bigframes/ml/sql.py
+++ b/bigframes/ml/sql.py
@@ -85,6 +85,15 @@ def ml_min_max_scaler(self, numeric_expr_sql: str, name: str) -> str:
         """Encode ML.MIN_MAX_SCALER for BQML"""
         return f"""ML.MIN_MAX_SCALER({numeric_expr_sql}) OVER() AS {name}"""
 
+    def ml_bucketize(
+        self,
+        numeric_expr_sql: str,
+        array_split_points: Iterable[Union[int, float]],
+        name: str,
+    ) -> str:
+        """Encode ML.MIN_MAX_SCALER for BQML"""
+        return f"""ML.BUCKETIZE({numeric_expr_sql}, {array_split_points}, FALSE) AS {name}"""
+
     def ml_one_hot_encoder(
         self,
         numeric_expr_sql: str,
diff --git a/bigframes/operations/base.py b/bigframes/operations/base.py
index add6af57f4..51eaad18b9 100644
--- a/bigframes/operations/base.py
+++ b/bigframes/operations/base.py
@@ -86,7 +86,15 @@ def __init__(
             if pd_series.name is None:
                 # to_frame will set default numeric column label if unnamed, but we do not support int column label, so must rename
                 pd_dataframe = pd_dataframe.set_axis(["unnamed_col"], axis=1)
-            if pd_dataframe.size < MAX_INLINE_SERIES_SIZE:
+            if (
+                pd_dataframe.size < MAX_INLINE_SERIES_SIZE
+                # TODO(swast): Workaround data types limitation in inline data.
+                and not any(
+                    dt.pyarrow_dtype
+                    for dt in pd_dataframe.dtypes
+                    if isinstance(dt, pd.ArrowDtype)
+                )
+            ):
                 self._block = blocks.block_from_local(
                     pd_dataframe, session or bigframes.pandas.get_global_session()
                 )
diff --git a/bigframes/operations/structs.py b/bigframes/operations/structs.py
new file mode 100644
index 0000000000..80d51115d0
--- /dev/null
+++ b/bigframes/operations/structs.py
@@ -0,0 +1,61 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import typing
+
+import ibis.expr.types as ibis_types
+
+import bigframes.dataframe
+import bigframes.operations
+import bigframes.operations.base
+import bigframes.series
+import third_party.bigframes_vendored.pandas.core.arrays.arrow.accessors as vendoracessors
+
+
+class StructField(bigframes.operations.UnaryOp):
+    def __init__(self, name_or_index: str | int):
+        self._name_or_index = name_or_index
+
+    def _as_ibis(self, x: ibis_types.Value):
+        struct_value = typing.cast(ibis_types.StructValue, x)
+        if isinstance(self._name_or_index, str):
+            name = self._name_or_index
+        else:
+            name = struct_value.names[self._name_or_index]
+        return struct_value[name].name(name)
+
+
+class StructAccessor(
+    bigframes.operations.base.SeriesMethods, vendoracessors.StructAccessor
+):
+    __doc__ = vendoracessors.StructAccessor.__doc__
+
+    def field(self, name_or_index: str | int) -> bigframes.series.Series:
+        series = self._apply_unary_op(StructField(name_or_index))
+        if isinstance(name_or_index, str):
+            name = name_or_index
+        else:
+            struct_field = self._dtype.pyarrow_dtype[name_or_index]
+            name = struct_field.name
+        return series.rename(name)
+
+    def explode(self) -> bigframes.dataframe.DataFrame:
+        import bigframes.pandas
+
+        pa_type = self._dtype.pyarrow_dtype
+        return bigframes.pandas.concat(
+            [self.field(i) for i in range(pa_type.num_fields)], axis="columns"
+        )
diff --git a/bigframes/series.py b/bigframes/series.py
index 47298d59f5..8815a6abde 100644
--- a/bigframes/series.py
+++ b/bigframes/series.py
@@ -51,6 +51,7 @@
 import bigframes.operations.base
 import bigframes.operations.datetimes as dt
 import bigframes.operations.strings as strings
+import bigframes.operations.structs as structs
 import third_party.bigframes_vendored.pandas.core.series as vendored_pandas_series
 
 LevelType = typing.Union[str, int]
@@ -118,6 +119,10 @@ def query_job(self) -> Optional[bigquery.QueryJob]:
             self._set_internal_query_job(self._compute_dry_run())
         return self._query_job
 
+    @property
+    def struct(self) -> structs.StructAccessor:
+        return structs.StructAccessor(self._block)
+
     def _set_internal_query_job(self, query_job: bigquery.QueryJob):
         self._query_job = query_job
 
@@ -882,6 +887,34 @@ def argmin(self) -> int:
             scalars.Scalar, Series(block.select_column(row_nums)).iloc[0]
         )
 
+    def idxmax(self) -> blocks.Label:
+        block = self._block.order_by(
+            [
+                OrderingColumnReference(
+                    self._value_column, direction=OrderingDirection.DESC
+                ),
+                *[
+                    OrderingColumnReference(idx_col)
+                    for idx_col in self._block.index_columns
+                ],
+            ]
+        )
+        block = block.slice(0, 1)
+        return indexes.Index._from_block(block).to_pandas()[0]
+
+    def idxmin(self) -> blocks.Label:
+        block = self._block.order_by(
+            [
+                OrderingColumnReference(self._value_column),
+                *[
+                    OrderingColumnReference(idx_col)
+                    for idx_col in self._block.index_columns
+                ],
+            ]
+        )
+        block = block.slice(0, 1)
+        return indexes.Index._from_block(block).to_pandas()[0]
+
     @property
     def is_monotonic_increasing(self) -> bool:
         return typing.cast(
diff --git a/bigframes/session.py b/bigframes/session.py
index 7b827c7dcf..ac48c977cb 100644
--- a/bigframes/session.py
+++ b/bigframes/session.py
@@ -449,13 +449,6 @@ def _query_to_destination(
         index_cols: List[str],
         api_name: str,
     ) -> Tuple[Optional[bigquery.TableReference], Optional[bigquery.QueryJob]]:
-        # If there are no index columns, then there's no reason to cache to a
-        # (clustered) session table, as we'll just have to query it again to
-        # create a default index & ordering.
-        if not index_cols:
-            _, query_job = self._start_query(query)
-            return query_job.destination, query_job
-
         # If a dry_run indicates this is not a query type job, then don't
         # bother trying to do a CREATE TEMP TABLE ... AS SELECT ... statement.
         dry_run_config = bigquery.QueryJobConfig()
@@ -465,15 +458,24 @@ def _query_to_destination(
             _, query_job = self._start_query(query)
             return query_job.destination, query_job
 
-        # Make sure we cluster by the index column(s) so that subsequent
-        # operations are as speedy as they can be.
+        # Create a table to workaround BigQuery 10 GB query results limit. See:
+        # internal issue 303057336.
+        # Since we have a `statement_type == 'SELECT'`, schema should be populated.
+        schema = typing.cast(Iterable[bigquery.SchemaField], dry_run_job.schema)
+        temp_table = self._create_session_table_empty(api_name, schema, index_cols)
+
+        job_config = bigquery.QueryJobConfig()
+        job_config.destination = temp_table
+
         try:
-            ibis_expr = self.ibis_client.sql(query)
-            return self._ibis_to_session_table(ibis_expr, index_cols, api_name), None
+            # Write to temp table to workaround BigQuery 10 GB query results
+            # limit. See: internal issue 303057336.
+            _, query_job = self._start_query(query, job_config=job_config)
+            return query_job.destination, query_job
         except google.api_core.exceptions.BadRequest:
-            # Some SELECT statements still aren't compatible with CREATE TEMP
-            # TABLE ... AS SELECT ... statements. For example, if the query has
-            # a top-level ORDER BY, this conflicts with our ability to cluster
+            # Some SELECT statements still aren't compatible with cluster
+            # tables as the destination. For example, if the query has a
+            # top-level ORDER BY, this conflicts with our ability to cluster
             # the table by the index column(s).
             _, query_job = self._start_query(query)
             return query_job.destination, query_job
@@ -1231,6 +1233,54 @@ def _create_session_table(self) -> bigquery.TableReference:
         )
         return dataset.table(table_name)
 
+    def _create_session_table_empty(
+        self,
+        api_name: str,
+        schema: Iterable[bigquery.SchemaField],
+        cluster_cols: List[str],
+    ) -> bigquery.TableReference:
+        # Can't set a table in _SESSION as destination via query job API, so we
+        # run DDL, instead.
+        table = self._create_session_table()
+        schema_sql = bigframes_io.bq_schema_to_sql(schema)
+
+        clusterable_cols = [
+            col.name
+            for col in schema
+            if col.name in cluster_cols and _can_cluster_bq(col)
+        ][:_MAX_CLUSTER_COLUMNS]
+
+        if clusterable_cols:
+            cluster_cols_sql = ", ".join(
+                f"`{cluster_col}`" for cluster_col in clusterable_cols
+            )
+            cluster_sql = f"CLUSTER BY {cluster_cols_sql}"
+        else:
+            cluster_sql = ""
+
+        ddl_text = f"""
+        CREATE TEMP TABLE
+        `_SESSION`.`{table.table_id}`
+        ({schema_sql})
+        {cluster_sql}
+        """
+
+        job_config = bigquery.QueryJobConfig()
+
+        # Include a label so that Dataplex Lineage can identify temporary
+        # tables that BigQuery DataFrames creates. Googlers: See internal issue
+        # 296779699. We're labeling the job instead of the table because
+        # otherwise we get `BadRequest: 400 OPTIONS on temporary tables are not
+        # supported`.
+        job_config.labels = {"source": "bigquery-dataframes-temp"}
+        job_config.labels["bigframes-api"] = api_name
+
+        _, query_job = self._start_query(ddl_text, job_config=job_config)
+
+        # Use fully-qualified name instead of `_SESSION` name so that the
+        # created table can be used as the destination table.
+        return query_job.destination
+
     def _create_sequential_ordering(
         self,
         table: ibis_types.Table,
@@ -1249,7 +1299,9 @@ def _create_sequential_ordering(
             cluster_cols=list(index_cols) + [default_ordering_name],
             api_name=api_name,
         )
-        table = self.ibis_client.sql(f"SELECT * FROM `{table_ref.table_id}`")
+        table = self.ibis_client.table(
+            f"{table_ref.project}.{table_ref.dataset_id}.{table_ref.table_id}"
+        )
         ordering_reference = core.OrderingColumnReference(default_ordering_name)
         ordering = core.ExpressionOrdering(
             ordering_value_columns=[ordering_reference],
@@ -1264,55 +1316,13 @@ def _ibis_to_session_table(
         cluster_cols: Iterable[str],
         api_name: str,
     ) -> bigquery.TableReference:
-        clusterable_cols = [
-            col for col in cluster_cols if _can_cluster(table[col].type())
-        ][:_MAX_CLUSTER_COLUMNS]
-        return self._query_to_session_table(
+        desination, _ = self._query_to_destination(
             self.ibis_client.compile(table),
-            cluster_cols=clusterable_cols,
+            index_cols=list(cluster_cols),
             api_name=api_name,
         )
-
-    def _query_to_session_table(
-        self,
-        query_text: str,
-        cluster_cols: Iterable[str],
-        api_name: str,
-    ) -> bigquery.TableReference:
-        if len(list(cluster_cols)) > _MAX_CLUSTER_COLUMNS:
-            raise ValueError(
-                f"Too many cluster columns: {list(cluster_cols)}, max {_MAX_CLUSTER_COLUMNS} allowed."
-            )
-        # Can't set a table in _SESSION as destination via query job API, so we
-        # run DDL, instead.
-        table = self._create_session_table()
-        cluster_cols_sql = ", ".join(f"`{cluster_col}`" for cluster_col in cluster_cols)
-
-        # TODO(swast): This might not support multi-statement SQL queries (scripts).
-        ddl_text = f"""
-        CREATE TEMP TABLE `_SESSION`.`{table.table_id}`
-        CLUSTER BY {cluster_cols_sql}
-        AS {query_text}
-        """
-
-        job_config = bigquery.QueryJobConfig()
-
-        # Include a label so that Dataplex Lineage can identify temporary
-        # tables that BigQuery DataFrames creates. Googlers: See internal issue
-        # 296779699. We're labeling the job instead of the table because
-        # otherwise we get `BadRequest: 400 OPTIONS on temporary tables are not
-        # supported`.
-        job_config.labels = {"source": "bigquery-dataframes-temp"}
-        job_config.labels["bigframes-api"] = api_name
-
-        try:
-            self._start_query(
-                ddl_text, job_config=job_config
-            )  # Wait for the job to complete
-        except google.api_core.exceptions.Conflict:
-            # Allow query retry to succeed.
-            pass
-        return table
+        # There should always be a destination table for this query type.
+        return typing.cast(bigquery.TableReference, desination)
 
     def remote_function(
         self,
@@ -1494,14 +1504,21 @@ def connect(context: Optional[bigquery_options.BigQueryOptions] = None) -> Sessi
     return Session(context)
 
 
-def _can_cluster(ibis_type: ibis_dtypes.DataType):
+def _can_cluster_bq(field: bigquery.SchemaField):
     # https://cloud.google.com/bigquery/docs/clustered-tables
     # Notably, float is excluded
-    return (
-        ibis_type.is_integer()
-        or ibis_type.is_string()
-        or ibis_type.is_decimal()
-        or ibis_type.is_date()
-        or ibis_type.is_timestamp()
-        or ibis_type.is_boolean()
+    type_ = field.field_type
+    return type_ in (
+        "INTEGER",
+        "INT64",
+        "STRING",
+        "NUMERIC",
+        "DECIMAL",
+        "BIGNUMERIC",
+        "BIGDECIMAL",
+        "DATE",
+        "DATETIME",
+        "TIMESTAMP",
+        "BOOL",
+        "BOOLEAN",
     )
diff --git a/bigframes/version.py b/bigframes/version.py
index ad3c3082c5..238b64473a 100644
--- a/bigframes/version.py
+++ b/bigframes/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.5.0"
+__version__ = "0.6.0"
diff --git a/notebooks/vertex_sdk/sdk2_bigframes_pytorch.ipynb b/notebooks/vertex_sdk/sdk2_bigframes_pytorch.ipynb
new file mode 100644
index 0000000000..598d958f0c
--- /dev/null
+++ b/notebooks/vertex_sdk/sdk2_bigframes_pytorch.ipynb
@@ -0,0 +1,723 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ur8xi4C7S06n"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2023 Google LLC\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JAPoU8Sm5E6e"
+      },
+      "source": [
+        "# Train a pytorch model with Vertex AI SDK 2.0 and Bigframes\n",
+        "\n",
+        "<table align=\"left\">\n",
+        "  <td>\n",
+        "    <a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/sdk/sdk2_bigframes_pytorch.ipynb\">\n",
+        "      <img src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Colab logo\"> Run in Colab\n",
+        "    </a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/sdk/sdk2_bigframes_pytorch.ipynb\">\n",
+        "        <img src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" alt=\"GitHub logo\">\n",
+        "      View on GitHub\n",
+        "    </a>\n",
+        "  </td>\n",
+        "    <td>\n",
+        "    <a href=\"https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/sdk/sdk2_bigframes_pytorch.ipynb\">\n",
+        "       <img src=\"https://www.gstatic.com/cloud/images/navigation/vertex-ai.svg\" alt=\"Vertex AI logo\">Open in Vertex AI Workbench\n",
+        "    </a>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tvgnzT1CKxrO"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "This tutorial demonstrates how to train a pytorch model using Vertex AI local-to-remote training with Vertex AI SDK 2.0 and BigQuery Bigframes as the data source.\n",
+        "\n",
+        "Learn more about [bigframes](https://cloud.google.com/bigquery/docs/)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "d975e698c9a4"
+      },
+      "source": [
+        "### Objective\n",
+        "\n",
+        "In this tutorial, you learn to use `Vertex AI SDK 2.0` with Bigframes as input data source.\n",
+        "\n",
+        "\n",
+        "This tutorial uses the following Google Cloud ML services:\n",
+        "\n",
+        "- `Vertex AI Training`\n",
+        "- `Vertex AI Remote Training`\n",
+        "\n",
+        "\n",
+        "The steps performed include:\n",
+        "\n",
+        "- Initialize a dataframe from a BigQuery table and split the dataset\n",
+        "- Perform transformations as a Vertex AI remote training.\n",
+        "- Train the model remotely and evaluate the model locally\n",
+        "\n",
+        "**Local-to-remote training**\n",
+        "\n",
+        "```\n",
+        "import vertexai\n",
+        "from my_module import MyModelClass\n",
+        "\n",
+        "vertexai.preview.init(remote=True, project=\"my-project\", location=\"my-location\", staging_bucket=\"gs://my-bucket\")\n",
+        "\n",
+        "# Wrap the model class with `vertex_ai.preview.remote`\n",
+        "MyModelClass = vertexai.preview.remote(MyModelClass)\n",
+        "\n",
+        "# Instantiate the class\n",
+        "model = MyModelClass(...)\n",
+        "\n",
+        "# Optional set remote config\n",
+        "model.fit.vertex.remote_config.display_name = \"MyModelClass-remote-training\"\n",
+        "model.fit.vertex.remote_config.staging_bucket = \"gs://my-bucket\"\n",
+        "\n",
+        "# This `fit` call will be executed remotely\n",
+        "model.fit(...)\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "08d289fa873f"
+      },
+      "source": [
+        "### Dataset\n",
+        "\n",
+        "This tutorial uses the <a href=\"https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html\">IRIS dataset</a>, which predicts the iris species."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aed92deeb4a0"
+      },
+      "source": [
+        "### Costs\n",
+        "\n",
+        "This tutorial uses billable components of Google Cloud:\n",
+        "\n",
+        "* Vertex AI\n",
+        "* BigQuery\n",
+        "* Cloud Storage\n",
+        "\n",
+        "Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing),\n",
+        "[BigQuery pricing](https://cloud.google.com/bigquery/pricing),\n",
+        "and [Cloud Storage pricing](https://cloud.google.com/storage/pricing), \n",
+        "and use the [Pricing Calculator](https://cloud.google.com/products/calculator/)\n",
+        "to generate a cost estimate based on your projected usage."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "i7EUnXsZhAGF"
+      },
+      "source": [
+        "## Installation\n",
+        "\n",
+        "Install the following packages required to execute this notebook. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2b4ef9b72d43"
+      },
+      "outputs": [],
+      "source": [
+        "# Install the packages\n",
+        "! pip3 install --upgrade --quiet google-cloud-aiplatform[preview]\n",
+        "! pip3 install --upgrade --quiet bigframes\n",
+        "! pip3 install --upgrade --quiet torch"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "58707a750154"
+      },
+      "source": [
+        "### Colab only: Uncomment the following cell to restart the kernel."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "f200f10a1da3"
+      },
+      "outputs": [],
+      "source": [
+        "# Automatically restart kernel after installs so that your environment can access the new packages\n",
+        "# import IPython\n",
+        "\n",
+        "# app = IPython.Application.instance()\n",
+        "# app.kernel.do_shutdown(True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BF1j6f9HApxa"
+      },
+      "source": [
+        "## Before you begin\n",
+        "\n",
+        "### Set up your Google Cloud project\n",
+        "\n",
+        "**The following steps are required, regardless of your notebook environment.**\n",
+        "\n",
+        "1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.\n",
+        "\n",
+        "2. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).\n",
+        "\n",
+        "3. [Enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n",
+        "\n",
+        "4. If you are running this notebook locally, you need to install the [Cloud SDK](https://cloud.google.com/sdk)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WReHDGG5g0XY"
+      },
+      "source": [
+        "#### Set your project ID\n",
+        "\n",
+        "**If you don't know your project ID**, try the following:\n",
+        "* Run `gcloud config list`.\n",
+        "* Run `gcloud projects list`.\n",
+        "* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oM1iC_MfAts1"
+      },
+      "outputs": [],
+      "source": [
+        "PROJECT_ID = \"[your-project-id]\"  # @param {type:\"string\"}\n",
+        "\n",
+        "# Set the project id\n",
+        "! gcloud config set project {PROJECT_ID}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "region"
+      },
+      "source": [
+        "#### Region\n",
+        "\n",
+        "You can also change the `REGION` variable used by Vertex AI. Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "region"
+      },
+      "outputs": [],
+      "source": [
+        "REGION = \"us-central1\"  # @param {type: \"string\"}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sBCra4QMA2wR"
+      },
+      "source": [
+        "### Authenticate your Google Cloud account\n",
+        "\n",
+        "Depending on your Jupyter environment, you may have to manually authenticate. Follow the relevant instructions below."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "74ccc9e52986"
+      },
+      "source": [
+        "**1. Vertex AI Workbench**\n",
+        "* Do nothing as you are already authenticated."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "de775a3773ba"
+      },
+      "source": [
+        "**2. Local JupyterLab instance, uncomment and run:**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "254614fa0c46"
+      },
+      "outputs": [],
+      "source": [
+        "# ! gcloud auth login"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ef21552ccea8"
+      },
+      "source": [
+        "**3. Colab, uncomment and run:**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "603adbbf0532"
+      },
+      "outputs": [],
+      "source": [
+        "# from google.colab import auth\n",
+        "# auth.authenticate_user()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f6b2ccc891ed"
+      },
+      "source": [
+        "**4. Service account or other**\n",
+        "* See how to grant Cloud Storage permissions to your service account at https://cloud.google.com/storage/docs/gsutil/commands/iam#ch-examples."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zgPO1eR3CYjk"
+      },
+      "source": [
+        "### Create a Cloud Storage bucket\n",
+        "\n",
+        "Create a storage bucket to store intermediate artifacts such as datasets."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MzGDU7TWdts_"
+      },
+      "outputs": [],
+      "source": [
+        "BUCKET_URI = f\"gs://your-bucket-name-{PROJECT_ID}-unique\"  # @param {type:\"string\"}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-EcIXiGsCePi"
+      },
+      "source": [
+        "**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NIq7R4HZCfIc"
+      },
+      "outputs": [],
+      "source": [
+        "! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "960505627ddf"
+      },
+      "source": [
+        "### Import libraries and define constants"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "PyQmSRbKA8r-"
+      },
+      "outputs": [],
+      "source": [
+        "import bigframes.pandas as bf\n",
+        "import torch\n",
+        "import vertexai\n",
+        "from vertexai.preview import VertexModel\n",
+        "\n",
+        "bf.options.bigquery.location = \"us\"  # Dataset is in 'us' not 'us-central1'\n",
+        "bf.options.bigquery.project = PROJECT_ID\n",
+        "\n",
+        "from bigframes.ml.model_selection import \\\n",
+        "    train_test_split as bf_train_test_split"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "init_aip:mbsdk,all"
+      },
+      "source": [
+        "## Initialize Vertex AI SDK for Python\n",
+        "\n",
+        "Initialize the Vertex AI SDK for Python for your project and corresponding bucket."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "init_aip:mbsdk,all"
+      },
+      "outputs": [],
+      "source": [
+        "vertexai.init(\n",
+        "    project=PROJECT_ID,\n",
+        "    location=REGION,\n",
+        "    staging_bucket=BUCKET_URI,\n",
+        ")\n",
+        "\n",
+        "REMOTE_JOB_NAME = \"sdk2-bigframes-pytorch\"\n",
+        "REMOTE_JOB_BUCKET = f\"{BUCKET_URI}/{REMOTE_JOB_NAME}\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "105334524e96"
+      },
+      "source": [
+        "## Prepare the dataset\n",
+        "\n",
+        "Now load the Iris dataset and split the data into train and test sets."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "b44cdc4e03f1"
+      },
+      "outputs": [],
+      "source": [
+        "df = bf.read_gbq(\"bigquery-public-data.ml_datasets.iris\")\n",
+        "\n",
+        "species_categories = {\n",
+        "    \"versicolor\": 0,\n",
+        "    \"virginica\": 1,\n",
+        "    \"setosa\": 2,\n",
+        "}\n",
+        "df[\"species\"] = df[\"species\"].map(species_categories)\n",
+        "\n",
+        "# Assign an index column name\n",
+        "index_col = \"index\"\n",
+        "df.index.name = index_col"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9cb8616b1997"
+      },
+      "outputs": [],
+      "source": [
+        "feature_columns = df[[\"sepal_length\", \"sepal_width\", \"petal_length\", \"petal_width\"]]\n",
+        "label_columns = df[[\"species\"]]\n",
+        "train_X, test_X, train_y, test_y = bf_train_test_split(\n",
+        "    feature_columns, label_columns, test_size=0.2\n",
+        ")\n",
+        "\n",
+        "print(\"X_train size: \", train_X.size)\n",
+        "print(\"X_test size: \", test_X.size)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "23fe7b734b08"
+      },
+      "outputs": [],
+      "source": [
+        "# Switch to remote mode for training\n",
+        "vertexai.preview.init(remote=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5904a0f1bb03"
+      },
+      "source": [
+        "## PyTorch remote training with CPU (Custom PyTorch model)\n",
+        "\n",
+        "First, train a PyTorch model as a remote training job:\n",
+        "\n",
+        "- Reinitialize Vertex AI for remote training.\n",
+        "- Set TorchLogisticRegression for the remote training job.\n",
+        "- Invoke TorchLogisticRegression locally which will launch the remote training job."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2a1b85195a17"
+      },
+      "outputs": [],
+      "source": [
+        "# define the custom model\n",
+        "class TorchLogisticRegression(VertexModel, torch.nn.Module):\n",
+        "    def __init__(self, input_size: int, output_size: int):\n",
+        "        torch.nn.Module.__init__(self)\n",
+        "        VertexModel.__init__(self)\n",
+        "        self.linear = torch.nn.Linear(input_size, output_size)\n",
+        "        self.softmax = torch.nn.Softmax(dim=1)\n",
+        "\n",
+        "    def forward(self, x):\n",
+        "        return self.softmax(self.linear(x))\n",
+        "\n",
+        "    @vertexai.preview.developer.mark.train()\n",
+        "    def train(self, X, y, num_epochs, lr):\n",
+        "        X = X.to(torch.float32)\n",
+        "        y = torch.flatten(y)  # necessary to get 1D tensor\n",
+        "        dataloader = torch.utils.data.DataLoader(\n",
+        "            torch.utils.data.TensorDataset(X, y),\n",
+        "            batch_size=10,\n",
+        "            shuffle=True,\n",
+        "            generator=torch.Generator(device=X.device),\n",
+        "        )\n",
+        "\n",
+        "        criterion = torch.nn.CrossEntropyLoss()\n",
+        "        optimizer = torch.optim.SGD(self.parameters(), lr=lr)\n",
+        "\n",
+        "        for t in range(num_epochs):\n",
+        "            for batch, (X, y) in enumerate(dataloader):\n",
+        "                optimizer.zero_grad()\n",
+        "                pred = self(X)\n",
+        "                loss = criterion(pred, y)\n",
+        "                loss.backward()\n",
+        "                optimizer.step()\n",
+        "\n",
+        "    @vertexai.preview.developer.mark.predict()\n",
+        "    def predict(self, X):\n",
+        "        X = torch.tensor(X).to(torch.float32)\n",
+        "        with torch.no_grad():\n",
+        "            pred = torch.argmax(self(X), dim=1)\n",
+        "        return pred"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4e35593f520a"
+      },
+      "outputs": [],
+      "source": [
+        "# Switch to remote mode for training\n",
+        "vertexai.preview.init(remote=True)\n",
+        "\n",
+        "# Instantiate model\n",
+        "model = TorchLogisticRegression(4, 3)\n",
+        "\n",
+        "# Set training config\n",
+        "model.train.vertex.remote_config.custom_commands = [\n",
+        "    \"pip install torchdata\",\n",
+        "    \"pip install torcharrow\",\n",
+        "]\n",
+        "model.train.vertex.remote_config.display_name = REMOTE_JOB_NAME + \"-torch-model\"\n",
+        "model.train.vertex.remote_config.staging_bucket = REMOTE_JOB_BUCKET\n",
+        "\n",
+        "# Train model on Vertex\n",
+        "model.train(train_X, train_y, num_epochs=200, lr=0.05)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "edf4d0708f02"
+      },
+      "source": [
+        "## Remote prediction\n",
+        "\n",
+        "Obtain predictions from the trained model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "42dfbff0ca15"
+      },
+      "outputs": [],
+      "source": [
+        "vertexai.preview.init(remote=True)\n",
+        "\n",
+        "# Set remote config\n",
+        "model.predict.vertex.remote_config.custom_commands = [\n",
+        "    \"pip install torchdata\",\n",
+        "    \"pip install torcharrow\",\n",
+        "]\n",
+        "model.predict.vertex.remote_config.display_name = REMOTE_JOB_NAME + \"-torch-predict\"\n",
+        "model.predict.vertex.remote_config.staging_bucket = REMOTE_JOB_BUCKET\n",
+        "\n",
+        "predictions = model.predict(test_X)\n",
+        "\n",
+        "print(f\"Remote predictions: {predictions}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4340ed8316cd"
+      },
+      "source": [
+        "## Local evaluation\n",
+        "\n",
+        "Evaluate model results locally."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "eb27a31cec6f"
+      },
+      "outputs": [],
+      "source": [
+        "# User must convert bigframes to torch tensor for local evaluation\n",
+        "train_X_tensor = torch.from_numpy(\n",
+        "    train_X.to_pandas().reset_index().drop(columns=[\"index\"]).values.astype(float)\n",
+        ")\n",
+        "train_y_tensor = torch.from_numpy(\n",
+        "    train_y.to_pandas().reset_index().drop(columns=[\"index\"]).values.astype(float)\n",
+        ")\n",
+        "\n",
+        "test_X_tensor = torch.from_numpy(\n",
+        "    test_X.to_pandas().reset_index().drop(columns=[\"index\"]).values.astype(float)\n",
+        ")\n",
+        "test_y_tensor = torch.from_numpy(\n",
+        "    test_y.to_pandas().reset_index().drop(columns=[\"index\"]).values.astype(float)\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "7db44ad81389"
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.metrics import accuracy_score\n",
+        "\n",
+        "# Switch to local mode for evaluation\n",
+        "vertexai.preview.init(remote=False)\n",
+        "\n",
+        "# Evaluate model's accuracy score\n",
+        "print(\n",
+        "    f\"Train accuracy: {accuracy_score(train_y_tensor, model.predict(train_X_tensor))}\"\n",
+        ")\n",
+        "\n",
+        "print(f\"Test accuracy: {accuracy_score(test_y_tensor, model.predict(test_X_tensor))}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TpV-iwP9qw9c"
+      },
+      "source": [
+        "## Cleaning up\n",
+        "\n",
+        "To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud\n",
+        "project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.\n",
+        "\n",
+        "Otherwise, you can delete the individual resources you created in this tutorial:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "sx_vKniMq9ZX"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "\n",
+        "# Delete Cloud Storage objects that were created\n",
+        "delete_bucket = False\n",
+        "if delete_bucket or os.getenv(\"IS_TESTING\"):\n",
+        "    ! gsutil -m rm -r $BUCKET_URI"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "sdk2_bigframes_pytorch.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/notebooks/vertex_sdk/sdk2_bigframes_sklearn.ipynb b/notebooks/vertex_sdk/sdk2_bigframes_sklearn.ipynb
new file mode 100644
index 0000000000..021c070753
--- /dev/null
+++ b/notebooks/vertex_sdk/sdk2_bigframes_sklearn.ipynb
@@ -0,0 +1,727 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ur8xi4C7S06n"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2023 Google LLC\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JAPoU8Sm5E6e"
+      },
+      "source": [
+        "# Train a scikit-learn model with Vertex AI SDK 2.0 and Bigframes\n",
+        "\n",
+        "<table align=\"left\">\n",
+        "  <td>\n",
+        "    <a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/sdk/sdk2_bigframes_sklearn.ipynb\">\n",
+        "      <img src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Colab logo\"> Run in Colab\n",
+        "    </a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/sdk/sdk2_bigframes_sklearn.ipynb\">\n",
+        "        <img src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" alt=\"GitHub logo\">\n",
+        "      View on GitHub\n",
+        "    </a>\n",
+        "  </td>\n",
+        "    <td>\n",
+        "    <a href=\"https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/sdk/sdk2_bigframes_sklearn.ipynb\">\n",
+        "       <img src=\"https://www.gstatic.com/cloud/images/navigation/vertex-ai.svg\" alt=\"Vertex AI logo\">Open in Vertex AI Workbench\n",
+        "    </a>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tvgnzT1CKxrO"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "This tutorial demonstrates how to train a scikit-learn model using Vertex AI local-to-remote training with Vertex AI SDK 2.0 and BigQuery Bigframes as the data source.\n",
+        "\n",
+        "Learn more about [bigframes](https://cloud.google.com/bigquery/docs/)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "d975e698c9a4"
+      },
+      "source": [
+        "### Objective\n",
+        "\n",
+        "In this tutorial, you learn to use `Vertex AI SDK 2.0` with Bigframes as input data source.\n",
+        "\n",
+        "\n",
+        "This tutorial uses the following Google Cloud ML services:\n",
+        "\n",
+        "- `Vertex AI Training`\n",
+        "- `Vertex AI Remote Training`\n",
+        "\n",
+        "\n",
+        "The steps performed include:\n",
+        "\n",
+        "- Initialize a dataframe from a BigQuery table and split the dataset\n",
+        "- Perform transformations as a Vertex AI remote training.\n",
+        "- Train the model remotely and evaluate the model locally\n",
+        "\n",
+        "**Local-to-remote training**\n",
+        "\n",
+        "```\n",
+        "import vertexai\n",
+        "from my_module import MyModelClass\n",
+        "\n",
+        "vertexai.preview.init(remote=True, project=\"my-project\", location=\"my-location\", staging_bucket=\"gs://my-bucket\")\n",
+        "\n",
+        "# Wrap the model class with `vertex_ai.preview.remote`\n",
+        "MyModelClass = vertexai.preview.remote(MyModelClass)\n",
+        "\n",
+        "# Instantiate the class\n",
+        "model = MyModelClass(...)\n",
+        "\n",
+        "# Optional set remote config\n",
+        "model.fit.vertex.remote_config.display_name = \"MyModelClass-remote-training\"\n",
+        "model.fit.vertex.remote_config.staging_bucket = \"gs://my-bucket\"\n",
+        "\n",
+        "# This `fit` call will be executed remotely\n",
+        "model.fit(...)\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "08d289fa873f"
+      },
+      "source": [
+        "### Dataset\n",
+        "\n",
+        "This tutorial uses the <a href=\"https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html\">IRIS dataset</a>, which predicts the iris species."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aed92deeb4a0"
+      },
+      "source": [
+        "### Costs\n",
+        "\n",
+        "This tutorial uses billable components of Google Cloud:\n",
+        "\n",
+        "* Vertex AI\n",
+        "* BigQuery\n",
+        "* Cloud Storage\n",
+        "\n",
+        "Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing),\n",
+        "[BigQuery pricing](https://cloud.google.com/bigquery/pricing),\n",
+        "and [Cloud Storage pricing](https://cloud.google.com/storage/pricing), \n",
+        "and use the [Pricing Calculator](https://cloud.google.com/products/calculator/)\n",
+        "to generate a cost estimate based on your projected usage."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "i7EUnXsZhAGF"
+      },
+      "source": [
+        "## Installation\n",
+        "\n",
+        "Install the following packages required to execute this notebook. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2b4ef9b72d43"
+      },
+      "outputs": [],
+      "source": [
+        "# Install the packages\n",
+        "! pip3 install --upgrade --quiet google-cloud-aiplatform[preview]\n",
+        "! pip3 install --upgrade --quiet bigframes"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "58707a750154"
+      },
+      "source": [
+        "### Colab only: Uncomment the following cell to restart the kernel."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "f200f10a1da3"
+      },
+      "outputs": [],
+      "source": [
+        "# Automatically restart kernel after installs so that your environment can access the new packages\n",
+        "# import IPython\n",
+        "\n",
+        "# app = IPython.Application.instance()\n",
+        "# app.kernel.do_shutdown(True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BF1j6f9HApxa"
+      },
+      "source": [
+        "## Before you begin\n",
+        "\n",
+        "### Set up your Google Cloud project\n",
+        "\n",
+        "**The following steps are required, regardless of your notebook environment.**\n",
+        "\n",
+        "1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.\n",
+        "\n",
+        "2. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).\n",
+        "\n",
+        "3. [Enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n",
+        "\n",
+        "4. If you are running this notebook locally, you need to install the [Cloud SDK](https://cloud.google.com/sdk)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WReHDGG5g0XY"
+      },
+      "source": [
+        "#### Set your project ID\n",
+        "\n",
+        "**If you don't know your project ID**, try the following:\n",
+        "* Run `gcloud config list`.\n",
+        "* Run `gcloud projects list`.\n",
+        "* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oM1iC_MfAts1"
+      },
+      "outputs": [],
+      "source": [
+        "PROJECT_ID = \"[your-project-id]\"  # @param {type:\"string\"}\n",
+        "\n",
+        "# Set the project id\n",
+        "! gcloud config set project {PROJECT_ID}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "region"
+      },
+      "source": [
+        "#### Region\n",
+        "\n",
+        "You can also change the `REGION` variable used by Vertex AI. Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "region"
+      },
+      "outputs": [],
+      "source": [
+        "REGION = \"us-central1\"  # @param {type: \"string\"}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sBCra4QMA2wR"
+      },
+      "source": [
+        "### Authenticate your Google Cloud account\n",
+        "\n",
+        "Depending on your Jupyter environment, you may have to manually authenticate. Follow the relevant instructions below."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "74ccc9e52986"
+      },
+      "source": [
+        "**1. Vertex AI Workbench**\n",
+        "* Do nothing as you are already authenticated."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "de775a3773ba"
+      },
+      "source": [
+        "**2. Local JupyterLab instance, uncomment and run:**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "254614fa0c46"
+      },
+      "outputs": [],
+      "source": [
+        "# ! gcloud auth login"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ef21552ccea8"
+      },
+      "source": [
+        "**3. Colab, uncomment and run:**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "603adbbf0532"
+      },
+      "outputs": [],
+      "source": [
+        "# from google.colab import auth\n",
+        "# auth.authenticate_user()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f6b2ccc891ed"
+      },
+      "source": [
+        "**4. Service account or other**\n",
+        "* See how to grant Cloud Storage permissions to your service account at https://cloud.google.com/storage/docs/gsutil/commands/iam#ch-examples."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zgPO1eR3CYjk"
+      },
+      "source": [
+        "### Create a Cloud Storage bucket\n",
+        "\n",
+        "Create a storage bucket to store intermediate artifacts such as datasets."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MzGDU7TWdts_"
+      },
+      "outputs": [],
+      "source": [
+        "BUCKET_URI = f\"gs://your-bucket-name-{PROJECT_ID}-unique\"  # @param {type:\"string\"}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-EcIXiGsCePi"
+      },
+      "source": [
+        "**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NIq7R4HZCfIc"
+      },
+      "outputs": [],
+      "source": [
+        "! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "960505627ddf"
+      },
+      "source": [
+        "### Import libraries and define constants"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "PyQmSRbKA8r-"
+      },
+      "outputs": [],
+      "source": [
+        "import bigframes.pandas as bf\n",
+        "import vertexai\n",
+        "\n",
+        "bf.options.bigquery.location = \"us\"  # Dataset is in 'us' not 'us-central1'\n",
+        "bf.options.bigquery.project = PROJECT_ID\n",
+        "\n",
+        "from bigframes.ml.model_selection import \\\n",
+        "    train_test_split as bf_train_test_split\n",
+        "\n",
+        "REMOTE_JOB_NAME = \"sdk2-bigframes-sklearn\"\n",
+        "REMOTE_JOB_BUCKET = f\"{BUCKET_URI}/{REMOTE_JOB_NAME}\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "init_aip:mbsdk,all"
+      },
+      "source": [
+        "## Initialize Vertex AI SDK for Python\n",
+        "\n",
+        "Initialize the Vertex AI SDK for Python for your project and corresponding bucket."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "init_aip:mbsdk,all"
+      },
+      "outputs": [],
+      "source": [
+        "vertexai.init(\n",
+        "    project=PROJECT_ID,\n",
+        "    location=REGION,\n",
+        "    staging_bucket=BUCKET_URI,\n",
+        ")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "105334524e96"
+      },
+      "source": [
+        "## Prepare the dataset\n",
+        "\n",
+        "Now load the Iris dataset and split the data into train and test sets."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "b44cdc4e03f1"
+      },
+      "outputs": [],
+      "source": [
+        "df = bf.read_gbq(\"bigquery-public-data.ml_datasets.iris\")\n",
+        "\n",
+        "species_categories = {\n",
+        "    \"versicolor\": 0,\n",
+        "    \"virginica\": 1,\n",
+        "    \"setosa\": 2,\n",
+        "}\n",
+        "df[\"species\"] = df[\"species\"].map(species_categories)\n",
+        "\n",
+        "# Assign an index column name\n",
+        "index_col = \"index\"\n",
+        "df.index.name = index_col"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "9cb8616b1997"
+      },
+      "outputs": [],
+      "source": [
+        "feature_columns = df[[\"sepal_length\", \"sepal_width\", \"petal_length\", \"petal_width\"]]\n",
+        "label_columns = df[[\"species\"]]\n",
+        "train_X, test_X, train_y, test_y = bf_train_test_split(\n",
+        "    feature_columns, label_columns, test_size=0.2\n",
+        ")\n",
+        "\n",
+        "print(\"X_train size: \", train_X.size)\n",
+        "print(\"X_test size: \", test_X.size)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8306545fcc57"
+      },
+      "source": [
+        "## Feature transformation\n",
+        "\n",
+        "Next, you do feature transformations on the data using the Vertex AI remote training service.\n",
+        "\n",
+        "First, you re-initialize Vertex AI to enable remote training."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "55e701c31036"
+      },
+      "outputs": [],
+      "source": [
+        "# Switch to remote mode for training\n",
+        "vertexai.preview.init(remote=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "4a0e9d59b273"
+      },
+      "source": [
+        "### Execute remote job for fit_transform() on training data\n",
+        "\n",
+        "Next, indicate that the `StandardScalar` class is to be executed remotely. Then set up the data transform and call the `fit_transform()` method is executed remotely."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "90333089d362"
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.preprocessing import StandardScaler\n",
+        "\n",
+        "# Wrap classes to enable Vertex remote execution\n",
+        "StandardScaler = vertexai.preview.remote(StandardScaler)\n",
+        "\n",
+        "# Instantiate transformer\n",
+        "transformer = StandardScaler()\n",
+        "\n",
+        "# Set training config\n",
+        "transformer.fit_transform.vertex.remote_config.display_name = (\n",
+        "    f\"{REMOTE_JOB_NAME}-fit-transformer-bigframes\"\n",
+        ")\n",
+        "transformer.fit_transform.vertex.remote_config.staging_bucket = REMOTE_JOB_BUCKET\n",
+        "\n",
+        "# Execute transformer on Vertex (train_X is bigframes.dataframe.DataFrame, X_train is np.array)\n",
+        "X_train = transformer.fit_transform(train_X)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6bf95574c907"
+      },
+      "source": [
+        "### Remote transform on test data"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "da6eea22a89a"
+      },
+      "outputs": [],
+      "source": [
+        "# Transform test dataset before calculate test score\n",
+        "transformer.transform.vertex.remote_config.display_name = (\n",
+        "    REMOTE_JOB_NAME + \"-transformer\"\n",
+        ")\n",
+        "transformer.transform.vertex.remote_config.staging_bucket = REMOTE_JOB_BUCKET\n",
+        "\n",
+        "# Execute transformer on Vertex (test_X is bigframes.dataframe.DataFrame, X_test is np.array)\n",
+        "X_test = transformer.transform(test_X)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ddf906c886e4"
+      },
+      "source": [
+        "## Remote training\n",
+        "\n",
+        "First, train the scikit-learn model as a remote training job:\n",
+        "\n",
+        "- Set LogisticRegression for the remote training job.\n",
+        "- Invoke LogisticRegression locally which will launch the remote training job."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "c7b0116fa60c"
+      },
+      "outputs": [],
+      "source": [
+        "from sklearn.linear_model import LogisticRegression\n",
+        "\n",
+        "# Wrap classes to enable Vertex remote execution\n",
+        "LogisticRegression = vertexai.preview.remote(LogisticRegression)\n",
+        "\n",
+        "# Instantiate model, warm_start=True for uptraining\n",
+        "model = LogisticRegression(warm_start=True)\n",
+        "\n",
+        "# Set training config\n",
+        "model.fit.vertex.remote_config.display_name = REMOTE_JOB_NAME + \"-sklearn-model\"\n",
+        "model.fit.vertex.remote_config.staging_bucket = REMOTE_JOB_BUCKET\n",
+        "\n",
+        "# Train model on Vertex\n",
+        "model.fit(train_X, train_y)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ffe1d5903bcb"
+      },
+      "source": [
+        "## Remote prediction\n",
+        "\n",
+        "Obtain predictions from the trained model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "d00ce35920fa"
+      },
+      "outputs": [],
+      "source": [
+        "# Remote evaluation\n",
+        "vertexai.preview.init(remote=True)\n",
+        "\n",
+        "# Evaluate model's accuracy score\n",
+        "predictions = model.predict(test_X)\n",
+        "\n",
+        "print(f\"Remote predictions: {predictions}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "a8cd6cbd4403"
+      },
+      "source": [
+        "## Local evaluation\n",
+        "\n",
+        "Score model results locally."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dc105dafdfb9"
+      },
+      "outputs": [],
+      "source": [
+        "# User must convert bigframes to pandas dataframe for local evaluation\n",
+        "train_X_pd = train_X.to_pandas().reset_index(drop=True)\n",
+        "train_y_pd = train_y.to_pandas().reset_index(drop=True)\n",
+        "\n",
+        "test_X_pd = test_X.to_pandas().reset_index(drop=True)\n",
+        "test_y_pd = test_y.to_pandas().reset_index(drop=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "25fec549de69"
+      },
+      "outputs": [],
+      "source": [
+        "# Switch to local mode for testing\n",
+        "vertexai.preview.init(remote=False)\n",
+        "\n",
+        "# Evaluate model's accuracy score\n",
+        "print(f\"Train accuracy: {model.score(train_X_pd, train_y_pd)}\")\n",
+        "\n",
+        "print(f\"Test accuracy: {model.score(test_X_pd, test_y_pd)}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TpV-iwP9qw9c"
+      },
+      "source": [
+        "## Cleaning up\n",
+        "\n",
+        "To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud\n",
+        "project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.\n",
+        "\n",
+        "Otherwise, you can delete the individual resources you created in this tutorial:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "sx_vKniMq9ZX"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "\n",
+        "# Delete Cloud Storage objects that were created\n",
+        "delete_bucket = False\n",
+        "if delete_bucket or os.getenv(\"IS_TESTING\"):\n",
+        "    ! gsutil -m rm -r $BUCKET_URI"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "sdk2_bigframes_sklearn.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/notebooks/vertex_sdk/sdk2_bigframes_tensorflow.ipynb b/notebooks/vertex_sdk/sdk2_bigframes_tensorflow.ipynb
new file mode 100644
index 0000000000..e6843b66b5
--- /dev/null
+++ b/notebooks/vertex_sdk/sdk2_bigframes_tensorflow.ipynb
@@ -0,0 +1,646 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ur8xi4C7S06n"
+      },
+      "outputs": [],
+      "source": [
+        "# Copyright 2023 Google LLC\n",
+        "#\n",
+        "# Licensed under the Apache License, Version 2.0 (the \"License\");\n",
+        "# you may not use this file except in compliance with the License.\n",
+        "# You may obtain a copy of the License at\n",
+        "#\n",
+        "#     https://www.apache.org/licenses/LICENSE-2.0\n",
+        "#\n",
+        "# Unless required by applicable law or agreed to in writing, software\n",
+        "# distributed under the License is distributed on an \"AS IS\" BASIS,\n",
+        "# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n",
+        "# See the License for the specific language governing permissions and\n",
+        "# limitations under the License."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "JAPoU8Sm5E6e"
+      },
+      "source": [
+        "# Train a Tensorflow Keras model with Vertex AI SDK 2.0 and Bigframes \n",
+        "\n",
+        "<table align=\"left\">\n",
+        "  <td>\n",
+        "    <a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/sdk/sdk2_bigframes_tensorflow.ipynb\">\n",
+        "      <img src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Colab logo\"> Run in Colab\n",
+        "    </a>\n",
+        "  </td>\n",
+        "  <td>\n",
+        "    <a href=\"https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/sdk/sdk2_bigframes_tensorflow.ipynb\">\n",
+        "        <img src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" alt=\"GitHub logo\">\n",
+        "      View on GitHub\n",
+        "    </a>\n",
+        "  </td>\n",
+        "    <td>\n",
+        "    <a href=\"https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/sdk/sdk2_bigframes_tensorflow.ipynb\">\n",
+        "       <img src=\"https://www.gstatic.com/cloud/images/navigation/vertex-ai.svg\" alt=\"Vertex AI logo\">Open in Vertex AI Workbench\n",
+        "    </a>\n",
+        "</table>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tvgnzT1CKxrO"
+      },
+      "source": [
+        "## Overview\n",
+        "\n",
+        "This tutorial demonstrates how to train a tensorflow keras model using Vertex AI local-to-remote training with Vertex AI SDK 2.0 and BigQuery Bigframes as the data source.\n",
+        "\n",
+        "Learn more about [bigframes](https://cloud.google.com/bigquery/docs/)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "d975e698c9a4"
+      },
+      "source": [
+        "### Objective\n",
+        "\n",
+        "In this tutorial, you learn to use `Vertex AI SDK 2.0` with Bigframes as input data source.\n",
+        "\n",
+        "\n",
+        "This tutorial uses the following Google Cloud ML services:\n",
+        "\n",
+        "- `Vertex AI Training`\n",
+        "- `Vertex AI Remote Training`\n",
+        "\n",
+        "\n",
+        "The steps performed include:\n",
+        "\n",
+        "- Initialize a dataframe from a BigQuery table and split the dataset\n",
+        "- Perform transformations as a Vertex AI remote training.\n",
+        "- Train the model remotely and evaluate the model locally\n",
+        "\n",
+        "**Local-to-remote training**\n",
+        "\n",
+        "```\n",
+        "import vertexai\n",
+        "from my_module import MyModelClass\n",
+        "\n",
+        "vertexai.preview.init(remote=True, project=\"my-project\", location=\"my-location\", staging_bucket=\"gs://my-bucket\")\n",
+        "\n",
+        "# Wrap the model class with `vertex_ai.preview.remote`\n",
+        "MyModelClass = vertexai.preview.remote(MyModelClass)\n",
+        "\n",
+        "# Instantiate the class\n",
+        "model = MyModelClass(...)\n",
+        "\n",
+        "# Optional set remote config\n",
+        "model.fit.vertex.remote_config.display_name = \"MyModelClass-remote-training\"\n",
+        "model.fit.vertex.remote_config.staging_bucket = \"gs://my-bucket\"\n",
+        "\n",
+        "# This `fit` call will be executed remotely\n",
+        "model.fit(...)\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "08d289fa873f"
+      },
+      "source": [
+        "### Dataset\n",
+        "\n",
+        "This tutorial uses the <a href=\"https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html\">IRIS dataset</a>, which predicts the iris species."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "aed92deeb4a0"
+      },
+      "source": [
+        "### Costs\n",
+        "\n",
+        "This tutorial uses billable components of Google Cloud:\n",
+        "\n",
+        "* Vertex AI\n",
+        "* BigQuery\n",
+        "* Cloud Storage\n",
+        "\n",
+        "Learn about [Vertex AI pricing](https://cloud.google.com/vertex-ai/pricing),\n",
+        "[BigQuery pricing](https://cloud.google.com/bigquery/pricing),\n",
+        "and [Cloud Storage pricing](https://cloud.google.com/storage/pricing), \n",
+        "and use the [Pricing Calculator](https://cloud.google.com/products/calculator/)\n",
+        "to generate a cost estimate based on your projected usage."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "i7EUnXsZhAGF"
+      },
+      "source": [
+        "## Installation\n",
+        "\n",
+        "Install the following packages required to execute this notebook. "
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "2b4ef9b72d43"
+      },
+      "outputs": [],
+      "source": [
+        "# Install the packages\n",
+        "! pip3 install --upgrade --quiet google-cloud-aiplatform[preview]\n",
+        "! pip3 install --upgrade --quiet bigframes\n",
+        "! pip3 install --upgrade --quiet tensorflow==2.12.0"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "58707a750154"
+      },
+      "source": [
+        "### Colab only: Uncomment the following cell to restart the kernel."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "f200f10a1da3"
+      },
+      "outputs": [],
+      "source": [
+        "# Automatically restart kernel after installs so that your environment can access the new packages\n",
+        "# import IPython\n",
+        "\n",
+        "# app = IPython.Application.instance()\n",
+        "# app.kernel.do_shutdown(True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BF1j6f9HApxa"
+      },
+      "source": [
+        "## Before you begin\n",
+        "\n",
+        "### Set up your Google Cloud project\n",
+        "\n",
+        "**The following steps are required, regardless of your notebook environment.**\n",
+        "\n",
+        "1. [Select or create a Google Cloud project](https://console.cloud.google.com/cloud-resource-manager). When you first create an account, you get a $300 free credit towards your compute/storage costs.\n",
+        "\n",
+        "2. [Make sure that billing is enabled for your project](https://cloud.google.com/billing/docs/how-to/modify-project).\n",
+        "\n",
+        "3. [Enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).\n",
+        "\n",
+        "4. If you are running this notebook locally, you need to install the [Cloud SDK](https://cloud.google.com/sdk)."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WReHDGG5g0XY"
+      },
+      "source": [
+        "#### Set your project ID\n",
+        "\n",
+        "**If you don't know your project ID**, try the following:\n",
+        "* Run `gcloud config list`.\n",
+        "* Run `gcloud projects list`.\n",
+        "* See the support page: [Locate the project ID](https://support.google.com/googleapi/answer/7014113)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oM1iC_MfAts1"
+      },
+      "outputs": [],
+      "source": [
+        "PROJECT_ID = \"[your-project-id]\"  # @param {type:\"string\"}\n",
+        "\n",
+        "# Set the project id\n",
+        "! gcloud config set project {PROJECT_ID}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "region"
+      },
+      "source": [
+        "#### Region\n",
+        "\n",
+        "You can also change the `REGION` variable used by Vertex AI. Learn more about [Vertex AI regions](https://cloud.google.com/vertex-ai/docs/general/locations)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "region"
+      },
+      "outputs": [],
+      "source": [
+        "REGION = \"us-central1\"  # @param {type: \"string\"}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sBCra4QMA2wR"
+      },
+      "source": [
+        "### Authenticate your Google Cloud account\n",
+        "\n",
+        "Depending on your Jupyter environment, you may have to manually authenticate. Follow the relevant instructions below."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "74ccc9e52986"
+      },
+      "source": [
+        "**1. Vertex AI Workbench**\n",
+        "* Do nothing as you are already authenticated."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "de775a3773ba"
+      },
+      "source": [
+        "**2. Local JupyterLab instance, uncomment and run:**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "254614fa0c46"
+      },
+      "outputs": [],
+      "source": [
+        "# ! gcloud auth login"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ef21552ccea8"
+      },
+      "source": [
+        "**3. Colab, uncomment and run:**"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "603adbbf0532"
+      },
+      "outputs": [],
+      "source": [
+        "# from google.colab import auth\n",
+        "# auth.authenticate_user()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f6b2ccc891ed"
+      },
+      "source": [
+        "**4. Service account or other**\n",
+        "* See how to grant Cloud Storage permissions to your service account at https://cloud.google.com/storage/docs/gsutil/commands/iam#ch-examples."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "zgPO1eR3CYjk"
+      },
+      "source": [
+        "### Create a Cloud Storage bucket\n",
+        "\n",
+        "Create a storage bucket to store intermediate artifacts such as datasets."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MzGDU7TWdts_"
+      },
+      "outputs": [],
+      "source": [
+        "BUCKET_URI = f\"gs://your-bucket-name-{PROJECT_ID}-unique\"  # @param {type:\"string\"}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-EcIXiGsCePi"
+      },
+      "source": [
+        "**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "NIq7R4HZCfIc"
+      },
+      "outputs": [],
+      "source": [
+        "! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "960505627ddf"
+      },
+      "source": [
+        "### Import libraries and define constants"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "PyQmSRbKA8r-"
+      },
+      "outputs": [],
+      "source": [
+        "import bigframes.pandas as bf\n",
+        "import tensorflow as tf\n",
+        "import vertexai\n",
+        "from tensorflow import keras\n",
+        "\n",
+        "bf.options.bigquery.location = \"us\"  # Dataset is in 'us' not 'us-central1'\n",
+        "bf.options.bigquery.project = PROJECT_ID\n",
+        "\n",
+        "from bigframes.ml.model_selection import \\\n",
+        "    train_test_split as bf_train_test_split"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "init_aip:mbsdk,all"
+      },
+      "source": [
+        "## Initialize Vertex AI SDK for Python\n",
+        "\n",
+        "Initialize the Vertex AI SDK for Python for your project and corresponding bucket."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "init_aip:mbsdk,all"
+      },
+      "outputs": [],
+      "source": [
+        "vertexai.init(\n",
+        "    project=PROJECT_ID,\n",
+        "    location=REGION,\n",
+        "    staging_bucket=BUCKET_URI,\n",
+        ")\n",
+        "\n",
+        "REMOTE_JOB_NAME = \"sdk2-bigframes-tensorflow\"\n",
+        "REMOTE_JOB_BUCKET = f\"{BUCKET_URI}/{REMOTE_JOB_NAME}\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "105334524e96"
+      },
+      "source": [
+        "## Prepare the dataset\n",
+        "\n",
+        "Now load the Iris dataset and split the data into train and test sets."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "94576deccd8c"
+      },
+      "outputs": [],
+      "source": [
+        "df = bf.read_gbq(\"bigquery-public-data.ml_datasets.iris\")\n",
+        "\n",
+        "species_categories = {\n",
+        "    \"versicolor\": 0,\n",
+        "    \"virginica\": 1,\n",
+        "    \"setosa\": 2,\n",
+        "}\n",
+        "df[\"target\"] = df[\"species\"].map(species_categories)\n",
+        "df = df.drop(columns=[\"species\"])\n",
+        "\n",
+        "train, test = bf_train_test_split(df, test_size=0.2)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "cfcbce726efa"
+      },
+      "source": [
+        "## Remote training with GPU\n",
+        "\n",
+        "First, train a TensorFlow model as a remote training job:\n",
+        "\n",
+        "- Reinitialize Vertex AI for remote training.\n",
+        "- Instantiate the tensorflow keras model for the remote training job.\n",
+        "- Invoke the tensorflow keras model.fit() locally which will launch the remote training job."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "fd865b0c4e8b"
+      },
+      "outputs": [],
+      "source": [
+        "# Switch to remote mode for training\n",
+        "vertexai.preview.init(remote=True)\n",
+        "\n",
+        "keras.Sequential = vertexai.preview.remote(keras.Sequential)\n",
+        "\n",
+        "# Instantiate model\n",
+        "model = keras.Sequential(\n",
+        "    [keras.layers.Dense(5, input_shape=(4,)), keras.layers.Softmax()]\n",
+        ")\n",
+        "\n",
+        "# Specify optimizer and loss function\n",
+        "model.compile(optimizer=\"adam\", loss=\"mean_squared_error\")\n",
+        "\n",
+        "# Set training config\n",
+        "model.fit.vertex.remote_config.enable_cuda = True\n",
+        "model.fit.vertex.remote_config.display_name = REMOTE_JOB_NAME + \"-keras-model-gpu\"\n",
+        "model.fit.vertex.remote_config.staging_bucket = REMOTE_JOB_BUCKET\n",
+        "model.fit.vertex.remote_config.custom_commands = [\"pip install tensorflow-io==0.32.0\"]\n",
+        "\n",
+        "# Manually set compute resources this time\n",
+        "model.fit.vertex.remote_config.machine_type = \"n1-highmem-4\"\n",
+        "model.fit.vertex.remote_config.accelerator_type = \"NVIDIA_TESLA_K80\"\n",
+        "model.fit.vertex.remote_config.accelerator_count = 4\n",
+        "\n",
+        "# Train model on Vertex\n",
+        "model.fit(train, epochs=10)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f1af94ac1477"
+      },
+      "source": [
+        "## Remote prediction\n",
+        "\n",
+        "Obtain predictions from the trained model."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "1d75879948b5"
+      },
+      "outputs": [],
+      "source": [
+        "vertexai.preview.init(remote=True)\n",
+        "\n",
+        "# Set remote config\n",
+        "model.predict.vertex.remote_config.enable_cuda = False\n",
+        "model.predict.vertex.remote_config.display_name = REMOTE_JOB_NAME + \"-keras-predict-cpu\"\n",
+        "model.predict.vertex.remote_config.staging_bucket = REMOTE_JOB_BUCKET\n",
+        "model.predict.vertex.remote_config.custom_commands = [\n",
+        "    \"pip install tensorflow-io==0.32.0\"\n",
+        "]\n",
+        "\n",
+        "predictions = model.predict(train)\n",
+        "\n",
+        "print(f\"Remote predictions: {predictions}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "798b77c95067"
+      },
+      "source": [
+        "## Local evaluation\n",
+        "\n",
+        "Evaluate model results locally."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "88e734e30791"
+      },
+      "outputs": [],
+      "source": [
+        "# User must convert bigframes to pandas dataframe for local evaluation\n",
+        "feature_columns = [\"sepal_length\", \"sepal_width\", \"petal_length\", \"petal_width\"]\n",
+        "label_columns = [\"target\"]\n",
+        "\n",
+        "train_X_np = train[feature_columns].to_pandas().values.astype(float)\n",
+        "train_y_np = train[label_columns].to_pandas().values.astype(float)\n",
+        "train_ds = tf.data.Dataset.from_tensor_slices((train_X_np, train_y_np))\n",
+        "\n",
+        "test_X_np = test[feature_columns].to_pandas().values.astype(float)\n",
+        "test_y_np = test[label_columns].to_pandas().values.astype(float)\n",
+        "test_ds = tf.data.Dataset.from_tensor_slices((test_X_np, test_y_np))"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cb8637f783ad"
+      },
+      "outputs": [],
+      "source": [
+        "# Switch to local mode for evaluation\n",
+        "vertexai.preview.init(remote=False)\n",
+        "\n",
+        "# Evaluate model's mean square errors\n",
+        "print(f\"Train loss: {model.evaluate(train_ds.batch(32))}\")\n",
+        "\n",
+        "print(f\"Test loss: {model.evaluate(test_ds.batch(32))}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TpV-iwP9qw9c"
+      },
+      "source": [
+        "## Cleaning up\n",
+        "\n",
+        "To clean up all Google Cloud resources used in this project, you can [delete the Google Cloud\n",
+        "project](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects) you used for the tutorial.\n",
+        "\n",
+        "Otherwise, you can delete the individual resources you created in this tutorial:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "sx_vKniMq9ZX"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "\n",
+        "# Delete Cloud Storage objects that were created\n",
+        "delete_bucket = False\n",
+        "if delete_bucket or os.getenv(\"IS_TESTING\"):\n",
+        "    ! gsutil -m rm -r $BUCKET_URI"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "sdk2_bigframes_tensorflow.ipynb",
+      "toc_visible": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/noxfile.py b/noxfile.py
index 033bbfefe4..a113e1fcde 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -362,7 +362,7 @@ def doctest(session: nox.sessions.Session):
     run_system(
         session=session,
         prefix_name="doctest",
-        extra_pytest_options=("--doctest-modules",),
+        extra_pytest_options=("--doctest-modules", "third_party"),
         test_folder="bigframes",
         check_cov=True,
     )
@@ -610,6 +610,9 @@ def notebook(session):
         "notebooks/getting_started/bq_dataframes_llm_code_generation.ipynb",
         "notebooks/getting_started/bq_dataframes_ml_linear_regression.ipynb",
         "notebooks/generative_ai/bq_dataframes_ml_drug_name_generation.ipynb",
+        "notebooks/vertex_sdk/sdk2_bigframes_pytorch.ipynb",
+        "notebooks/vertex_sdk/sdk2_bigframes_sklearn.ipynb",
+        "notebooks/vertex_sdk/sdk2_bigframes_tensorflow.ipynb",
         # The experimental notebooks imagine features that don't yet
         # exist or only exist as temporary prototypes.
         "notebooks/experimental/longer_ml_demo.ipynb",
diff --git a/tests/system/large/ml/test_pipeline.py b/tests/system/large/ml/test_pipeline.py
index 34a2ca0101..9294740dd6 100644
--- a/tests/system/large/ml/test_pipeline.py
+++ b/tests/system/large/ml/test_pipeline.py
@@ -580,6 +580,11 @@ def test_pipeline_columntransformer_fit_predict(session, penguins_df_default_ind
                             preprocessing.MinMaxScaler(),
                             ["culmen_length_mm", "flipper_length_mm"],
                         ),
+                        (
+                            "k_bins_discretizer",
+                            preprocessing.KBinsDiscretizer(strategy="uniform"),
+                            ["culmen_length_mm", "flipper_length_mm"],
+                        ),
                         (
                             "label",
                             preprocessing.LabelEncoder(),
@@ -657,6 +662,11 @@ def test_pipeline_columntransformer_to_gbq(penguins_df_default_index, dataset_id
                             preprocessing.MinMaxScaler(),
                             ["culmen_length_mm", "flipper_length_mm"],
                         ),
+                        (
+                            "k_bins_discretizer",
+                            preprocessing.KBinsDiscretizer(strategy="uniform"),
+                            ["culmen_length_mm", "flipper_length_mm"],
+                        ),
                         (
                             "label",
                             preprocessing.LabelEncoder(),
@@ -696,9 +706,19 @@ def test_pipeline_columntransformer_to_gbq(penguins_df_default_index, dataset_id
         ("standard_scaler", preprocessing.StandardScaler(), "culmen_length_mm"),
         ("max_abs_scaler", preprocessing.MaxAbsScaler(), "culmen_length_mm"),
         ("min_max_scaler", preprocessing.MinMaxScaler(), "culmen_length_mm"),
+        (
+            "k_bins_discretizer",
+            preprocessing.KBinsDiscretizer(strategy="uniform"),
+            "culmen_length_mm",
+        ),
         ("standard_scaler", preprocessing.StandardScaler(), "flipper_length_mm"),
         ("max_abs_scaler", preprocessing.MaxAbsScaler(), "flipper_length_mm"),
         ("min_max_scaler", preprocessing.MinMaxScaler(), "flipper_length_mm"),
+        (
+            "k_bins_discretizer",
+            preprocessing.KBinsDiscretizer(strategy="uniform"),
+            "flipper_length_mm",
+        ),
     ]
 
     assert transformers == expected
@@ -791,6 +811,32 @@ def test_pipeline_min_max_scaler_to_gbq(penguins_df_default_index, dataset_id):
     assert pl_loaded._estimator.fit_intercept is False
 
 
+def test_pipeline_k_bins_discretizer_to_gbq(penguins_df_default_index, dataset_id):
+    pl = pipeline.Pipeline(
+        [
+            ("transform", preprocessing.KBinsDiscretizer(strategy="uniform")),
+            ("estimator", linear_model.LinearRegression(fit_intercept=False)),
+        ]
+    )
+
+    df = penguins_df_default_index.dropna()
+    X_train = df[
+        [
+            "culmen_length_mm",
+        ]
+    ]
+    y_train = df[["body_mass_g"]]
+    pl.fit(X_train, y_train)
+
+    pl_loaded = pl.to_gbq(
+        f"{dataset_id}.test_penguins_pipeline_k_bins_discretizer", replace=True
+    )
+    assert isinstance(pl_loaded._transform, preprocessing.KBinsDiscretizer)
+
+    assert isinstance(pl_loaded._estimator, linear_model.LinearRegression)
+    assert pl_loaded._estimator.fit_intercept is False
+
+
 def test_pipeline_one_hot_encoder_to_gbq(penguins_df_default_index, dataset_id):
     pl = pipeline.Pipeline(
         [
diff --git a/tests/system/small/ml/test_core.py b/tests/system/small/ml/test_core.py
index ace943956f..f911dd7eeb 100644
--- a/tests/system/small/ml/test_core.py
+++ b/tests/system/small/ml/test_core.py
@@ -23,6 +23,7 @@
 
 import bigframes
 from bigframes.ml import core
+import tests.system.utils
 
 
 def test_model_eval(
@@ -224,7 +225,7 @@ def test_pca_model_principal_component_info(penguins_bqml_pca_model: core.BqmlMo
             "cumulative_explained_variance_ratio": [0.469357, 0.651283, 0.812383],
         },
     )
-    pd.testing.assert_frame_equal(
+    tests.system.utils.assert_pandas_df_equal_ignore_ordering(
         result,
         expected,
         check_exact=False,
diff --git a/tests/system/small/ml/test_decomposition.py b/tests/system/small/ml/test_decomposition.py
index c71bbbe3b0..e31681f4a0 100644
--- a/tests/system/small/ml/test_decomposition.py
+++ b/tests/system/small/ml/test_decomposition.py
@@ -15,6 +15,7 @@
 import pandas as pd
 
 from bigframes.ml import decomposition
+import tests.system.utils
 
 
 def test_pca_predict(penguins_pca_model, new_penguins_df):
@@ -129,7 +130,7 @@ def test_pca_explained_variance_(penguins_pca_model: decomposition.PCA):
             "explained_variance": [3.278657, 1.270829, 1.125354],
         },
     )
-    pd.testing.assert_frame_equal(
+    tests.system.utils.assert_pandas_df_equal_ignore_ordering(
         result,
         expected,
         check_exact=False,
@@ -148,7 +149,7 @@ def test_pca_explained_variance_ratio_(penguins_pca_model: decomposition.PCA):
             "explained_variance_ratio": [0.469357, 0.181926, 0.1611],
         },
     )
-    pd.testing.assert_frame_equal(
+    tests.system.utils.assert_pandas_df_equal_ignore_ordering(
         result,
         expected,
         check_exact=False,
diff --git a/tests/system/small/ml/test_preprocessing.py b/tests/system/small/ml/test_preprocessing.py
index fc8f3251bd..45548acca3 100644
--- a/tests/system/small/ml/test_preprocessing.py
+++ b/tests/system/small/ml/test_preprocessing.py
@@ -121,7 +121,7 @@ def test_standard_scaler_series_normalizes(penguins_df_default_index, new_pengui
 
 
 def test_max_abs_scaler_normalizes(penguins_df_default_index, new_penguins_df):
-    # TODO(http://b/292431644): add a second test that compares output to sklearn.preprocessing.StandardScaler, when BQML's change is in prod.
+    # TODO(http://b/292431644): add a second test that compares output to sklearn.preprocessing.MaxAbsScaler, when BQML's change is in prod.
     scaler = bigframes.ml.preprocessing.MaxAbsScaler()
     scaler.fit(
         penguins_df_default_index[
@@ -211,7 +211,7 @@ def test_max_abs_scaler_series_normalizes(penguins_df_default_index, new_penguin
     pd.testing.assert_frame_equal(result, expected, rtol=1e-3)
 
 
-def test_min_max_scaler_normalizeds_fit_transform(new_penguins_df):
+def test_min_max_scaler_normalized_fit_transform(new_penguins_df):
     scaler = bigframes.ml.preprocessing.MinMaxScaler()
     result = scaler.fit_transform(
         new_penguins_df[["culmen_length_mm", "culmen_depth_mm", "flipper_length_mm"]]
@@ -265,7 +265,7 @@ def test_min_max_scaler_series_normalizes(penguins_df_default_index, new_penguin
 
 
 def test_min_max_scaler_normalizes(penguins_df_default_index, new_penguins_df):
-    # TODO(http://b/292431644): add a second test that compares output to sklearn.preprocessing.StandardScaler, when BQML's change is in prod.
+    # TODO(http://b/292431644): add a second test that compares output to sklearn.preprocessing.MinMaxScaler, when BQML's change is in prod.
     scaler = bigframes.ml.preprocessing.MinMaxScaler()
     scaler.fit(
         penguins_df_default_index[
@@ -304,6 +304,131 @@ def test_min_max_scaler_normalizes(penguins_df_default_index, new_penguins_df):
     pd.testing.assert_frame_equal(result, expected, rtol=1e-3)
 
 
+def test_k_bins_discretizer_normalized_fit_transform_default_params(new_penguins_df):
+    discretizer = bigframes.ml.preprocessing.KBinsDiscretizer(strategy="uniform")
+    result = discretizer.fit_transform(
+        new_penguins_df[["culmen_length_mm", "culmen_depth_mm", "flipper_length_mm"]]
+    ).to_pandas()
+
+    # TODO: bug? feature columns seem to be in nondeterministic random order
+    # workaround: sort columns by name. Can't repro it in pantheon, so could
+    # be a bigframes issue...
+    result = result.reindex(sorted(result.columns), axis=1)
+
+    expected = pd.DataFrame(
+        {
+            "kbinsdiscretizer_culmen_depth_mm": ["bin_5", "bin_2", "bin_4"],
+            "kbinsdiscretizer_culmen_length_mm": ["bin_5", "bin_3", "bin_2"],
+            "kbinsdiscretizer_flipper_length_mm": ["bin_5", "bin_2", "bin_4"],
+        },
+        dtype="string[pyarrow]",
+        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
+    )
+
+    pd.testing.assert_frame_equal(result, expected, rtol=1e-3)
+
+
+def test_k_bins_discretizer_series_normalizes(
+    penguins_df_default_index, new_penguins_df
+):
+    discretizer = bigframes.ml.preprocessing.KBinsDiscretizer(strategy="uniform")
+    discretizer.fit(penguins_df_default_index["culmen_length_mm"])
+
+    result = discretizer.transform(
+        penguins_df_default_index["culmen_length_mm"]
+    ).to_pandas()
+    result = discretizer.transform(new_penguins_df).to_pandas()
+
+    # TODO: bug? feature columns seem to be in nondeterministic random order
+    # workaround: sort columns by name. Can't repro it in pantheon, so could
+    # be a bigframes issue...
+    result = result.reindex(sorted(result.columns), axis=1)
+
+    expected = pd.DataFrame(
+        {
+            "kbinsdiscretizer_culmen_length_mm": ["bin_3", "bin_3", "bin_3"],
+        },
+        dtype="string[pyarrow]",
+        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
+    )
+
+    pd.testing.assert_frame_equal(result, expected, rtol=1e-3)
+
+
+def test_k_bins_discretizer_normalizes(penguins_df_default_index, new_penguins_df):
+    # TODO(http://b/292431644): add a second test that compares output to sklearn.preprocessing.KBinsDiscretizer, when BQML's change is in prod.
+    discretizer = bigframes.ml.preprocessing.KBinsDiscretizer(strategy="uniform")
+    discretizer.fit(
+        penguins_df_default_index[
+            ["culmen_length_mm", "culmen_depth_mm", "flipper_length_mm"]
+        ]
+    )
+
+    result = discretizer.transform(
+        penguins_df_default_index[
+            ["culmen_length_mm", "culmen_depth_mm", "flipper_length_mm"]
+        ]
+    ).to_pandas()
+
+    result = discretizer.transform(new_penguins_df).to_pandas()
+
+    # TODO: bug? feature columns seem to be in nondeterministic random order
+    # workaround: sort columns by name. Can't repro it in pantheon, so could
+    # be a bigframes issue...
+    result = result.reindex(sorted(result.columns), axis=1)
+
+    expected = pd.DataFrame(
+        {
+            "kbinsdiscretizer_culmen_depth_mm": ["bin_5", "bin_4", "bin_4"],
+            "kbinsdiscretizer_culmen_length_mm": ["bin_3", "bin_3", "bin_3"],
+            "kbinsdiscretizer_flipper_length_mm": ["bin_4", "bin_2", "bin_3"],
+        },
+        dtype="string[pyarrow]",
+        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
+    )
+
+    pd.testing.assert_frame_equal(result, expected, rtol=1e-3)
+
+
+def test_k_bins_discretizer_normalizes_different_params(
+    penguins_df_default_index, new_penguins_df
+):
+    # TODO(http://b/292431644): add a second test that compares output to sklearn.preprocessing.KBinsDiscretizer, when BQML's change is in prod.
+    discretizer = bigframes.ml.preprocessing.KBinsDiscretizer(
+        n_bins=6, strategy="uniform"
+    )
+    discretizer.fit(
+        penguins_df_default_index[
+            ["culmen_length_mm", "culmen_depth_mm", "flipper_length_mm"]
+        ]
+    )
+
+    result = discretizer.transform(
+        penguins_df_default_index[
+            ["culmen_length_mm", "culmen_depth_mm", "flipper_length_mm"]
+        ]
+    ).to_pandas()
+
+    result = discretizer.transform(new_penguins_df).to_pandas()
+
+    # TODO: bug? feature columns seem to be in nondeterministic random order
+    # workaround: sort columns by name. Can't repro it in pantheon, so could
+    # be a bigframes issue...
+    result = result.reindex(sorted(result.columns), axis=1)
+
+    expected = pd.DataFrame(
+        {
+            "kbinsdiscretizer_culmen_depth_mm": ["bin_6", "bin_4", "bin_5"],
+            "kbinsdiscretizer_culmen_length_mm": ["bin_3", "bin_3", "bin_3"],
+            "kbinsdiscretizer_flipper_length_mm": ["bin_4", "bin_2", "bin_3"],
+        },
+        dtype="string[pyarrow]",
+        index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"),
+    )
+
+    pd.testing.assert_frame_equal(result, expected, rtol=1e-3)
+
+
 def test_one_hot_encoder_default_params(new_penguins_df):
     encoder = bigframes.ml.preprocessing.OneHotEncoder()
     encoder.fit(new_penguins_df[["species", "sex"]])
diff --git a/tests/system/small/test_dataframe.py b/tests/system/small/test_dataframe.py
index adf17848ee..b8616a54d6 100644
--- a/tests/system/small/test_dataframe.py
+++ b/tests/system/small/test_dataframe.py
@@ -757,7 +757,7 @@ def test_df_isin_dict(scalars_dfs):
         ("right",),
     ],
 )
-def test_merge(scalars_dfs, merge_how):
+def test_df_merge(scalars_dfs, merge_how):
     scalars_df, scalars_pandas_df = scalars_dfs
     on = "rowindex_2"
     left_columns = ["int64_col", "float64_col", "rowindex_2"]
@@ -782,6 +782,39 @@ def test_merge(scalars_dfs, merge_how):
     assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
 
 
+@pytest.mark.parametrize(
+    ("left_on", "right_on"),
+    [
+        (["int64_col", "rowindex_2"], ["int64_col", "rowindex_2"]),
+        (["rowindex_2", "int64_col"], ["int64_col", "rowindex_2"]),
+        (["rowindex_2", "float64_col"], ["int64_col", "rowindex_2"]),
+    ],
+)
+def test_df_merge_multi_key(scalars_dfs, left_on, right_on):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    left_columns = ["int64_col", "float64_col", "rowindex_2"]
+    right_columns = ["int64_col", "bool_col", "string_col", "rowindex_2"]
+
+    left = scalars_df[left_columns]
+    # Offset the rows somewhat so that outer join can have an effect.
+    right = scalars_df[right_columns].assign(rowindex_2=scalars_df["rowindex_2"] + 2)
+
+    df = left.merge(right, "outer", left_on=left_on, right_on=right_on, sort=True)
+    bf_result = df.to_pandas()
+
+    pd_result = scalars_pandas_df[left_columns].merge(
+        scalars_pandas_df[right_columns].assign(
+            rowindex_2=scalars_pandas_df["rowindex_2"] + 2
+        ),
+        "outer",
+        left_on=left_on,
+        right_on=right_on,
+        sort=True,
+    )
+
+    assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
+
+
 @pytest.mark.parametrize(
     ("merge_how",),
     [
@@ -884,7 +917,19 @@ def test_get_dtypes_array_struct(session):
     dtypes = df.dtypes
     pd.testing.assert_series_equal(
         dtypes,
-        pd.Series({"array_column": np.dtype("O"), "struct_column": np.dtype("O")}),
+        pd.Series(
+            {
+                "array_column": np.dtype("O"),
+                "struct_column": pd.ArrowDtype(
+                    pa.struct(
+                        [
+                            ("string_field", pa.string()),
+                            ("float_field", pa.float64()),
+                        ]
+                    )
+                ),
+            }
+        ),
     )
 
 
@@ -1211,6 +1256,105 @@ def test_combine(
     pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
 
 
+@pytest.mark.parametrize(
+    ("overwrite", "filter_func"),
+    [
+        (True, None),
+        (False, None),
+        (True, lambda x: x.isna() | (x % 2 == 0)),
+    ],
+    ids=[
+        "default",
+        "overwritefalse",
+        "customfilter",
+    ],
+)
+def test_df_update(overwrite, filter_func):
+    if pd.__version__.startswith("1."):
+        pytest.skip("dtype handled differently in pandas 1.x.")
+    index1 = pandas.Index([1, 2, 3, 4], dtype="Int64")
+    index2 = pandas.Index([1, 2, 4, 5], dtype="Int64")
+    pd_df1 = pandas.DataFrame(
+        {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
+    )
+    pd_df2 = pandas.DataFrame(
+        {"a": [None, 20, 30, 40], "c": [90, None, 110, 120]},
+        dtype="Int64",
+        index=index2,
+    )
+
+    bf_df1 = dataframe.DataFrame(pd_df1)
+    bf_df2 = dataframe.DataFrame(pd_df2)
+
+    bf_df1.update(bf_df2, overwrite=overwrite, filter_func=filter_func)
+    pd_df1.update(pd_df2, overwrite=overwrite, filter_func=filter_func)
+
+    pd.testing.assert_frame_equal(bf_df1.to_pandas(), pd_df1)
+
+
+def test_df_idxmin():
+    pd_df = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [7, None, 3], "c": [4, 4, 4]}, index=["x", "y", "z"]
+    )
+    bf_df = dataframe.DataFrame(pd_df)
+
+    bf_result = bf_df.idxmin().to_pandas()
+    pd_result = pd_df.idxmin()
+
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_index_type=False, check_dtype=False
+    )
+
+
+def test_df_idxmax():
+    pd_df = pd.DataFrame(
+        {"a": [1, 2, 3], "b": [7, None, 3], "c": [4, 4, 4]}, index=["x", "y", "z"]
+    )
+    bf_df = dataframe.DataFrame(pd_df)
+
+    bf_result = bf_df.idxmax().to_pandas()
+    pd_result = pd_df.idxmax()
+
+    pd.testing.assert_series_equal(
+        bf_result, pd_result, check_index_type=False, check_dtype=False
+    )
+
+
+@pytest.mark.parametrize(
+    ("join", "axis"),
+    [
+        ("outer", None),
+        ("outer", 0),
+        ("outer", 1),
+        ("left", 0),
+        ("right", 1),
+        ("inner", None),
+        ("inner", 1),
+    ],
+)
+def test_df_align(join, axis):
+    index1 = pandas.Index([1, 2, 3, 4], dtype="Int64")
+    index2 = pandas.Index([1, 2, 4, 5], dtype="Int64")
+    pd_df1 = pandas.DataFrame(
+        {"a": [1, None, 3, 4], "b": [5, 6, None, 8]}, dtype="Int64", index=index1
+    )
+    pd_df2 = pandas.DataFrame(
+        {"a": [None, 20, 30, 40], "c": [90, None, 110, 120]},
+        dtype="Int64",
+        index=index2,
+    )
+
+    bf_df1 = dataframe.DataFrame(pd_df1)
+    bf_df2 = dataframe.DataFrame(pd_df2)
+
+    bf_result1, bf_result2 = bf_df1.align(bf_df2, join=join, axis=axis)
+    pd_result1, pd_result2 = pd_df1.align(pd_df2, join=join, axis=axis)
+
+    # Don't check dtype as pandas does unnecessary float conversion
+    pd.testing.assert_frame_equal(bf_result1.to_pandas(), pd_result1, check_dtype=False)
+    pd.testing.assert_frame_equal(bf_result2.to_pandas(), pd_result2, check_dtype=False)
+
+
 def test_combine_first(
     scalars_df_index,
     scalars_df_2_index,
@@ -1232,11 +1376,6 @@ def test_combine_first(
     pd_df_b.columns = ["b", "a", "d"]
     pd_result = pd_df_a.combine_first(pd_df_b)
 
-    print("pandas")
-    print(pd_result.to_string())
-    print("bigframes")
-    print(bf_result.to_string())
-
     # Some dtype inconsistency for all-NULL columns
     pd.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
 
@@ -1705,6 +1844,26 @@ def test_df_stack(scalars_dfs):
     pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
 
 
+def test_df_unstack(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    # To match bigquery dataframes
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_pandas_df.columns = scalars_pandas_df.columns.astype("string[pyarrow]")
+    # Can only stack identically-typed columns
+    columns = [
+        "rowindex_2",
+        "int64_col",
+        "int64_too",
+    ]
+
+    # unstack on mono-index produces series
+    bf_result = scalars_df[columns].unstack().to_pandas()
+    pd_result = scalars_pandas_df[columns].unstack()
+
+    # Pandas produces NaN, where bq dataframes produces pd.NA
+    pd.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     ("values", "index", "columns"),
     [
@@ -1922,7 +2081,7 @@ def test_loc_single_index_no_duplicate(scalars_df_index, scalars_pandas_df_index
     bf_result = scalars_df_index.loc[index]
     pd_result = scalars_pandas_df_index.loc[index]
     pd.testing.assert_series_equal(
-        bf_result.to_pandas().iloc[0, :],
+        bf_result,
         pd_result,
     )
 
@@ -2439,6 +2598,24 @@ def test_iloc_list(scalars_df_index, scalars_pandas_df_index):
     )
 
 
+def test_iloc_list_multiindex(scalars_dfs):
+    scalars_df, scalars_pandas_df = scalars_dfs
+    scalars_df = scalars_df.copy()
+    scalars_pandas_df = scalars_pandas_df.copy()
+    scalars_df = scalars_df.set_index(["bytes_col", "numeric_col"])
+    scalars_pandas_df = scalars_pandas_df.set_index(["bytes_col", "numeric_col"])
+
+    index_list = [0, 0, 0, 5, 4, 7]
+
+    bf_result = scalars_df.iloc[index_list]
+    pd_result = scalars_pandas_df.iloc[index_list]
+
+    pd.testing.assert_frame_equal(
+        bf_result.to_pandas(),
+        pd_result,
+    )
+
+
 def test_iloc_empty_list(scalars_df_index, scalars_pandas_df_index):
     index_list = []
 
diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py
index 1e38b47b4c..19f1c557ef 100644
--- a/tests/system/small/test_multiindex.py
+++ b/tests/system/small/test_multiindex.py
@@ -41,6 +41,17 @@ def test_reset_multi_index(scalars_df_index, scalars_pandas_df_index):
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+def test_series_multi_index_idxmin(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.set_index(["bool_col", "int64_too"])[
+        "float64_col"
+    ].idxmin()
+    pd_result = scalars_pandas_df_index.set_index(["bool_col", "int64_too"])[
+        "float64_col"
+    ].idxmin()
+
+    assert bf_result == pd_result
+
+
 def test_binop_series_series_matching_multi_indices(
     scalars_df_index, scalars_pandas_df_index
 ):
@@ -729,6 +740,26 @@ def test_column_multi_index_stack(scalars_df_index, scalars_pandas_df_index):
     )
 
 
+def test_column_multi_index_unstack(scalars_df_index, scalars_pandas_df_index):
+    columns = ["int64_too", "int64_col", "rowindex_2"]
+    level1 = pandas.Index(["b", "a", "b"], dtype="string[pyarrow]")
+    # Need resulting column to be pyarrow string rather than object dtype
+    level2 = pandas.Index(["a", "b", "b"], dtype="string[pyarrow]")
+    multi_columns = pandas.MultiIndex.from_arrays([level1, level2])
+    bf_df = scalars_df_index[columns].copy()
+    bf_df.columns = multi_columns
+    pd_df = scalars_pandas_df_index[columns].copy()
+    pd_df.columns = multi_columns
+
+    bf_result = bf_df.unstack().to_pandas()
+    # Shifting sort behavior in stack
+    pd_result = pd_df.unstack()
+
+    # Pandas produces NaN, where bq dataframes produces pd.NA
+    # Column ordering seems to depend on pandas version
+    pandas.testing.assert_series_equal(bf_result, pd_result, check_dtype=False)
+
+
 @pytest.mark.skip(reason="Pandas fails in newer versions.")
 def test_column_multi_index_w_na_stack(scalars_df_index, scalars_pandas_df_index):
     columns = ["int64_too", "int64_col", "rowindex_2"]
@@ -866,6 +897,17 @@ def test_column_multi_index_reorder_levels(scalars_df_index, scalars_pandas_df_i
     pandas.testing.assert_frame_equal(bf_result, pd_result)
 
 
+def test_multi_index_unstack(hockey_df, hockey_pandas_df):
+    bf_result = (
+        hockey_df.set_index(["team_name", "season", "position"]).unstack().to_pandas()
+    )
+    pd_result = hockey_pandas_df.set_index(
+        ["team_name", "season", "position"]
+    ).unstack()
+
+    pandas.testing.assert_frame_equal(bf_result, pd_result, check_dtype=False)
+
+
 def test_column_multi_index_swaplevel(scalars_df_index, scalars_pandas_df_index):
     columns = ["int64_too", "string_col", "bool_col"]
     multi_columns = pandas.MultiIndex.from_tuples(
diff --git a/tests/system/small/test_pandas_options.py b/tests/system/small/test_pandas_options.py
index 6510c4fa27..956b29ae12 100644
--- a/tests/system/small/test_pandas_options.py
+++ b/tests/system/small/test_pandas_options.py
@@ -75,7 +75,7 @@ def test_read_gbq_start_sets_session_location(
     # Now read_gbq* from another location should fail
     with pytest.raises(
         google.api_core.exceptions.NotFound,
-        match=f"404 Not found: Dataset {dataset_id_permanent} was not found in location {tokyo_location}",
+        match=dataset_id_permanent,
     ):
         read_method(query)
 
@@ -100,7 +100,7 @@ def test_read_gbq_start_sets_session_location(
     # Now read_gbq* from another location should fail
     with pytest.raises(
         google.api_core.exceptions.NotFound,
-        match=f"404 Not found: Dataset {dataset_id_permanent_tokyo} was not found in location US",
+        match=dataset_id_permanent_tokyo,
     ):
         read_method(query_tokyo)
 
@@ -146,7 +146,7 @@ def test_read_gbq_after_session_start_must_comply_with_default_location(
     # Doing read_gbq* from a table in another location should fail
     with pytest.raises(
         google.api_core.exceptions.NotFound,
-        match=f"404 Not found: Dataset {dataset_id_permanent_tokyo} was not found in location US",
+        match=dataset_id_permanent_tokyo,
     ):
         read_method(query_tokyo)
 
@@ -194,7 +194,7 @@ def test_read_gbq_must_comply_with_set_location_US(
     # Starting user journey with read_gbq* from another location should fail
     with pytest.raises(
         google.api_core.exceptions.NotFound,
-        match=f"404 Not found: Dataset {dataset_id_permanent_tokyo} was not found in location US",
+        match=dataset_id_permanent_tokyo,
     ):
         read_method(query_tokyo)
 
@@ -244,7 +244,7 @@ def test_read_gbq_must_comply_with_set_location_non_US(
     # Starting user journey with read_gbq* from another location should fail
     with pytest.raises(
         google.api_core.exceptions.NotFound,
-        match=f"404 Not found: Dataset {dataset_id_permanent} was not found in location {tokyo_location}",
+        match=dataset_id_permanent,
     ):
         read_method(query)
 
diff --git a/tests/system/small/test_series.py b/tests/system/small/test_series.py
index 588dcc2c83..8c1c36720b 100644
--- a/tests/system/small/test_series.py
+++ b/tests/system/small/test_series.py
@@ -118,7 +118,7 @@ def test_series_get_with_default_index(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_result = scalars_df[col_name].get(key)
     pd_result = scalars_pandas_df[col_name].get(key)
-    assert bf_result.to_pandas().iloc[0] == pd_result
+    assert bf_result == pd_result
 
 
 @pytest.mark.parametrize(
@@ -157,7 +157,7 @@ def test_series___getitem___with_default_index(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs
     bf_result = scalars_df[col_name][key]
     pd_result = scalars_pandas_df[col_name][key]
-    assert bf_result.to_pandas().iloc[0] == pd_result
+    assert bf_result == pd_result
 
 
 @pytest.mark.parametrize(
@@ -2468,6 +2468,18 @@ def test_argmax(scalars_df_index, scalars_pandas_df_index):
     assert bf_result == pd_result
 
 
+def test_series_idxmin(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.string_col.idxmin()
+    pd_result = scalars_pandas_df_index.string_col.idxmin()
+    assert bf_result == pd_result
+
+
+def test_series_idxmax(scalars_df_index, scalars_pandas_df_index):
+    bf_result = scalars_df_index.int64_too.idxmax()
+    pd_result = scalars_pandas_df_index.int64_too.idxmax()
+    assert bf_result == pd_result
+
+
 def test_getattr_attribute_error_when_pandas_has(scalars_df_index):
     # asof is implemented in pandas but not in bigframes
     with pytest.raises(AttributeError):
@@ -2640,7 +2652,7 @@ def test_loc_single_index_no_duplicate(scalars_df_index, scalars_pandas_df_index
     index = -2345
     bf_result = scalars_df_index.date_col.loc[index]
     pd_result = scalars_pandas_df_index.date_col.loc[index]
-    assert bf_result.to_pandas().iloc[0] == pd_result
+    assert bf_result == pd_result
 
 
 def test_series_bool_interpretation_error(scalars_df_index):
diff --git a/tests/system/small/test_session.py b/tests/system/small/test_session.py
index 614c953764..53ddfa3c49 100644
--- a/tests/system/small/test_session.py
+++ b/tests/system/small/test_session.py
@@ -57,6 +57,7 @@ def test_read_gbq_tokyo(
         ),
         pytest.param(
             """SELECT
+                t.int64_col + 1 as my_ints,
                 t.float64_col * 2 AS my_floats,
                 CONCAT(t.string_col, "_2") AS my_strings,
                 t.int64_col > 0 AS my_bools,
@@ -321,11 +322,10 @@ def test_read_pandas_multi_index(session, scalars_pandas_df_multi_index):
 
 
 def test_read_pandas_rowid_exists_adds_suffix(session, scalars_pandas_df_default_index):
-    scalars_pandas_df_default_index["rowid"] = np.arange(
-        scalars_pandas_df_default_index.shape[0]
-    )
+    pandas_df = scalars_pandas_df_default_index.copy()
+    pandas_df["rowid"] = np.arange(pandas_df.shape[0])
 
-    df = session.read_pandas(scalars_pandas_df_default_index)
+    df = session.read_pandas(pandas_df)
     total_order_col = df._block._expr._ordering.total_order_col
     assert total_order_col and total_order_col.column_id == "rowid_2"
 
diff --git a/tests/unit/core/test_io.py b/tests/unit/core/test_io.py
index c5074f80c2..afb38a5f75 100644
--- a/tests/unit/core/test_io.py
+++ b/tests/unit/core/test_io.py
@@ -13,8 +13,10 @@
 # limitations under the License.
 
 import datetime
+from typing import Iterable
 
 import google.cloud.bigquery as bigquery
+import pytest
 
 import bigframes.core.io
 
@@ -47,3 +49,56 @@ def test_create_snapshot_sql_doesnt_timetravel_session_datasets():
 
     # Don't need the project ID for _SESSION tables.
     assert "my-test-project" not in sql
+
+
+@pytest.mark.parametrize(
+    ("schema", "expected"),
+    (
+        (
+            [bigquery.SchemaField("My Column", "INTEGER")],
+            "`My Column` INT64",
+        ),
+        (
+            [
+                bigquery.SchemaField("My Column", "INTEGER"),
+                bigquery.SchemaField("Float Column", "FLOAT"),
+                bigquery.SchemaField("Bool Column", "BOOLEAN"),
+            ],
+            "`My Column` INT64, `Float Column` FLOAT64, `Bool Column` BOOL",
+        ),
+        (
+            [
+                bigquery.SchemaField("My Column", "INTEGER", mode="REPEATED"),
+                bigquery.SchemaField("Float Column", "FLOAT", mode="REPEATED"),
+                bigquery.SchemaField("Bool Column", "BOOLEAN", mode="REPEATED"),
+            ],
+            "`My Column` ARRAY<INT64>, `Float Column` ARRAY<FLOAT64>, `Bool Column` ARRAY<BOOL>",
+        ),
+        (
+            [
+                bigquery.SchemaField(
+                    "My Column",
+                    "RECORD",
+                    mode="REPEATED",
+                    fields=(
+                        bigquery.SchemaField("Float Column", "FLOAT", mode="REPEATED"),
+                        bigquery.SchemaField("Bool Column", "BOOLEAN", mode="REPEATED"),
+                        bigquery.SchemaField(
+                            "Nested Column",
+                            "RECORD",
+                            fields=(bigquery.SchemaField("Int Column", "INTEGER"),),
+                        ),
+                    ),
+                ),
+            ],
+            (
+                "`My Column` ARRAY<STRUCT<"
+                + "`Float Column` ARRAY<FLOAT64>,"
+                + " `Bool Column` ARRAY<BOOL>,"
+                + " `Nested Column` STRUCT<`Int Column` INT64>>>"
+            ),
+        ),
+    ),
+)
+def test_bq_schema_to_sql(schema: Iterable[bigquery.SchemaField], expected: str):
+    pass
diff --git a/tests/unit/ml/test_compose.py b/tests/unit/ml/test_compose.py
index 8c8fbd6ab5..60dcc75b63 100644
--- a/tests/unit/ml/test_compose.py
+++ b/tests/unit/ml/test_compose.py
@@ -23,6 +23,7 @@ def test_columntransformer_init_expectedtransforms():
     standard_scaler_transformer = preprocessing.StandardScaler()
     max_abs_scaler_transformer = preprocessing.MaxAbsScaler()
     min_max_scaler_transformer = preprocessing.MinMaxScaler()
+    k_bins_discretizer_transformer = preprocessing.KBinsDiscretizer(strategy="uniform")
     label_transformer = preprocessing.LabelEncoder()
     column_transformer = compose.ColumnTransformer(
         [
@@ -42,6 +43,11 @@ def test_columntransformer_init_expectedtransforms():
                 min_max_scaler_transformer,
                 ["culmen_length_mm", "flipper_length_mm"],
             ),
+            (
+                "k_bins_discretizer",
+                k_bins_discretizer_transformer,
+                ["culmen_length_mm", "flipper_length_mm"],
+            ),
             ("label", label_transformer, "species"),
         ]
     )
@@ -54,6 +60,8 @@ def test_columntransformer_init_expectedtransforms():
         ("max_abs_scale", max_abs_scaler_transformer, "flipper_length_mm"),
         ("min_max_scale", min_max_scaler_transformer, "culmen_length_mm"),
         ("min_max_scale", min_max_scaler_transformer, "flipper_length_mm"),
+        ("k_bins_discretizer", k_bins_discretizer_transformer, "culmen_length_mm"),
+        ("k_bins_discretizer", k_bins_discretizer_transformer, "flipper_length_mm"),
         ("label", label_transformer, "species"),
     ]
 
@@ -81,6 +89,11 @@ def test_columntransformer_repr():
                 preprocessing.MinMaxScaler(),
                 ["culmen_length_mm", "flipper_length_mm"],
             ),
+            (
+                "k_bins_discretizer",
+                preprocessing.KBinsDiscretizer(strategy="uniform"),
+                ["culmen_length_mm", "flipper_length_mm"],
+            ),
         ]
     )
 
@@ -92,6 +105,9 @@ def test_columntransformer_repr():
                                 ('max_abs_scale', MaxAbsScaler(),
                                  ['culmen_length_mm', 'flipper_length_mm']),
                                 ('min_max_scale', MinMaxScaler(),
+                                 ['culmen_length_mm', 'flipper_length_mm']),
+                                ('k_bins_discretizer',
+                                 KBinsDiscretizer(strategy='uniform'),
                                  ['culmen_length_mm', 'flipper_length_mm'])])"""
     )
 
@@ -119,6 +135,11 @@ def test_columntransformer_repr_matches_sklearn():
                 preprocessing.MinMaxScaler(),
                 ["culmen_length_mm", "flipper_length_mm"],
             ),
+            (
+                "k_bins_discretizer",
+                preprocessing.KBinsDiscretizer(strategy="uniform"),
+                ["culmen_length_mm", "flipper_length_mm"],
+            ),
         ]
     )
     sk_column_transformer = sklearn_compose.ColumnTransformer(
@@ -143,6 +164,11 @@ def test_columntransformer_repr_matches_sklearn():
                 sklearn_preprocessing.MinMaxScaler(),
                 ["culmen_length_mm", "flipper_length_mm"],
             ),
+            (
+                "k_bins_discretizer",
+                sklearn_preprocessing.KBinsDiscretizer(strategy="uniform"),
+                ["culmen_length_mm", "flipper_length_mm"],
+            ),
         ]
     )
 
diff --git a/tests/unit/ml/test_sql.py b/tests/unit/ml/test_sql.py
index a3338e762d..34a02edd42 100644
--- a/tests/unit/ml/test_sql.py
+++ b/tests/unit/ml/test_sql.py
@@ -95,6 +95,13 @@ def test_min_max_scaler_produces_correct_sql(
     assert sql == "ML.MIN_MAX_SCALER(col_a) OVER() AS scaled_col_a"
 
 
+def test_k_bins_discretizer_produces_correct_sql(
+    base_sql_generator: ml_sql.BaseSqlGenerator,
+):
+    sql = base_sql_generator.ml_bucketize("col_a", [1, 2, 3, 4], "scaled_col_a")
+    assert sql == "ML.BUCKETIZE(col_a, [1, 2, 3, 4], FALSE) AS scaled_col_a"
+
+
 def test_one_hot_encoder_produces_correct_sql(
     base_sql_generator: ml_sql.BaseSqlGenerator,
 ):
diff --git a/tests/unit/test_dtypes.py b/tests/unit/test_dtypes.py
index bb8ae570dc..3baff2e1f5 100644
--- a/tests/unit/test_dtypes.py
+++ b/tests/unit/test_dtypes.py
@@ -85,6 +85,70 @@ def test_ibis_float32_raises_unexpected_datatype():
         bigframes.dtypes.ibis_dtype_to_bigframes_dtype(ibis_dtypes.float32)
 
 
+IBIS_ARROW_DTYPES = (
+    (ibis_dtypes.boolean, pa.bool_()),
+    (ibis_dtypes.date, pa.date32()),
+    (ibis_dtypes.Timestamp(), pa.timestamp("us")),
+    (ibis_dtypes.float64, pa.float64()),
+    (
+        ibis_dtypes.Timestamp(timezone="UTC"),
+        pa.timestamp("us", tz="UTC"),
+    ),
+    (
+        ibis_dtypes.Struct.from_tuples(
+            [
+                ("name", ibis_dtypes.string()),
+                ("version", ibis_dtypes.int64()),
+            ]
+        ),
+        pa.struct(
+            [
+                ("name", pa.string()),
+                ("version", pa.int64()),
+            ]
+        ),
+    ),
+    (
+        ibis_dtypes.Struct.from_tuples(
+            [
+                (
+                    "nested",
+                    ibis_dtypes.Struct.from_tuples(
+                        [
+                            ("field", ibis_dtypes.string()),
+                        ]
+                    ),
+                ),
+            ]
+        ),
+        pa.struct(
+            [
+                (
+                    "nested",
+                    pa.struct(
+                        [
+                            ("field", pa.string()),
+                        ]
+                    ),
+                ),
+            ]
+        ),
+    ),
+)
+
+
+@pytest.mark.parametrize(("ibis_dtype", "arrow_dtype"), IBIS_ARROW_DTYPES)
+def test_arrow_dtype_to_ibis_dtype(ibis_dtype, arrow_dtype):
+    result = bigframes.dtypes.arrow_dtype_to_ibis_dtype(arrow_dtype)
+    assert result == ibis_dtype
+
+
+@pytest.mark.parametrize(("ibis_dtype", "arrow_dtype"), IBIS_ARROW_DTYPES)
+def test_ibis_dtype_to_arrow_dtype(ibis_dtype, arrow_dtype):
+    result = bigframes.dtypes.ibis_dtype_to_arrow_dtype(ibis_dtype)
+    assert result == arrow_dtype
+
+
 @pytest.mark.parametrize(
     ["bigframes_dtype", "ibis_dtype"],
     [
diff --git a/third_party/bigframes_vendored/pandas/core/arrays/__init__.py b/third_party/bigframes_vendored/pandas/core/arrays/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/__init__.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
new file mode 100644
index 0000000000..8e3ea06a3d
--- /dev/null
+++ b/third_party/bigframes_vendored/pandas/core/arrays/arrow/accessors.py
@@ -0,0 +1,94 @@
+# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/arrays/arrow/accessors.py
+"""Accessors for arrow-backed data."""
+
+from __future__ import annotations
+
+from bigframes import constants
+
+
+class StructAccessor:
+    """
+    Accessor object for structured data properties of the Series values.
+    """
+
+    def field(self, name_or_index: str | int):
+        """
+        Extract a child field of a struct as a Series.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> import pyarrow as pa
+            >>> bpd.options.display.progress_bar = None
+            >>> s = bpd.Series(
+            ...     [
+            ...         {"version": 1, "project": "pandas"},
+            ...         {"version": 2, "project": "pandas"},
+            ...         {"version": 1, "project": "numpy"},
+            ...     ],
+            ...     dtype=bpd.ArrowDtype(pa.struct(
+            ...         [("version", pa.int64()), ("project", pa.string())]
+            ...     ))
+            ... )
+
+        Extract by field name.
+
+            >>> s.struct.field("project")
+            0    pandas
+            1    pandas
+            2     numpy
+            Name: project, dtype: string
+
+        Extract by field index.
+
+            >>> s.struct.field(0)
+            0    1
+            1    2
+            2    1
+            Name: version, dtype: Int64
+
+        Args:
+            name_or_index:
+                Name (str) or index (int) of the child field to extract.
+
+        Returns:
+            Series:
+                The data corresponding to the selected child field.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def explode(self):
+        """
+        Extract all child fields of a struct as a DataFrame.
+
+        **Examples:**
+
+            >>> import bigframes.pandas as bpd
+            >>> import pyarrow as pa
+            >>> bpd.options.display.progress_bar = None
+            >>> s = bpd.Series(
+            ...     [
+            ...         {"version": 1, "project": "pandas"},
+            ...         {"version": 2, "project": "pandas"},
+            ...         {"version": 1, "project": "numpy"},
+            ...     ],
+            ...     dtype=bpd.ArrowDtype(pa.struct(
+            ...         [("version", pa.int64()), ("project", pa.string())]
+            ...     ))
+            ... )
+
+        Extract all child fields.
+
+            >>> s.struct.explode()
+               version project
+            0        1  pandas
+            1        2  pandas
+            2        1   numpy
+            <BLANKLINE>
+            [3 rows x 2 columns]
+
+        Returns:
+            DataFrame:
+                The data corresponding to all child fields.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
diff --git a/third_party/bigframes_vendored/pandas/core/frame.py b/third_party/bigframes_vendored/pandas/core/frame.py
index 6ce11cd7e9..17d941fbdd 100644
--- a/third_party/bigframes_vendored/pandas/core/frame.py
+++ b/third_party/bigframes_vendored/pandas/core/frame.py
@@ -503,6 +503,35 @@ def drop(
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def align(
+        self,
+        other,
+        join="outer",
+        axis=None,
+    ) -> tuple:
+        """
+        Align two objects on their axes with the specified join method.
+
+        Join method is specified for each axis Index.
+
+        Args:
+            other (DataFrame or Series):
+            join ({{'outer', 'inner', 'left', 'right'}}, default 'outer'):
+                Type of alignment to be performed.
+                left: use only keys from left frame, preserve key order.
+                right: use only keys from right frame, preserve key order.
+                outer: use union of keys from both frames, sort keys lexicographically.
+                inner: use intersection of keys from both frames,
+                preserve the order of the left keys.
+
+            axis (allowed axis of the other object, default None):
+                Align on index (0), columns (1), or both (None).
+
+        Returns:
+            tuple of (DataFrame, type of other): Aligned objects.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def rename(
         self,
         *,
@@ -1265,6 +1294,39 @@ def combine_first(self, other) -> DataFrame:
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def update(
+        self, other, join: str = "left", overwrite: bool = True, filter_func=None
+    ) -> DataFrame:
+        """
+        Modify in place using non-NA values from another DataFrame.
+
+        Aligns on indices. There is no return value.
+
+        Args:
+            other (DataFrame, or object coercible into a DataFrame):
+                Should have at least one matching index/column label
+                with the original DataFrame. If a Series is passed,
+                its name attribute must be set, and that will be
+                used as the column name to align with the original DataFrame.
+            join ({'left'}, default 'left'):
+                Only left join is implemented, keeping the index and columns of the
+                original object.
+            overwrite (bool, default True):
+                How to handle non-NA values for overlapping keys:
+                True: overwrite original DataFrame's values
+                with values from `other`.
+                False: only update values that are NA in
+                the original DataFrame.
+
+            filter_func (callable(1d-array) -> bool 1d-array, optional):
+                Can choose to replace values other than NA. Return True for values
+                that should be updated.
+
+        Returns:
+            None: This method directly changes calling object.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     # ----------------------------------------------------------------------
     # Data reshaping
 
@@ -1406,14 +1468,14 @@ def merge(
                 ``inner``: use intersection of keys from both frames, similar to a SQL inner
                 join; preserve the order of the left keys.
 
-            on:
-                Column join on. It must be found in both DataFrames. Either on or left_on + right_on
+            on (label or list of labels):
+                Columns to join on. It must be found in both DataFrames. Either on or left_on + right_on
                 must be passed in.
-            left_on:
-                Column join on in the left DataFrame. Either on or left_on + right_on
+            left_on (label or list of labels):
+                Columns to join on in the left DataFrame. Either on or left_on + right_on
                 must be passed in.
-            right_on:
-                Column join on in the right DataFrame. Either on or left_on + right_on
+            right_on (label or list of labels):
+                Columns to join on in the right DataFrame. Either on or left_on + right_on
                 must be passed in.
             sort:
                 Default False. Sort the join keys lexicographically in the
@@ -1743,6 +1805,28 @@ def nsmallest(self, n: int, columns, keep: str = "first"):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def idxmin(self):
+        """
+        Return index of first occurrence of minimum over requested axis.
+
+        NA/null values are excluded.
+
+        Returns:
+            Series: Indexes of minima along the specified axis.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def idxmax(self):
+        """
+        Return index of first occurrence of maximum over requested axis.
+
+        NA/null values are excluded.
+
+        Returns:
+            Series: Indexes of maxima along the specified axis.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def nunique(self):
         """
         Count number of distinct elements in specified axis.
@@ -1910,6 +1994,21 @@ def stack(self):
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def unstack(self):
+        """
+        Pivot a level of the (necessarily hierarchical) index labels.
+
+        Returns a DataFrame having a new level of column labels whose inner-most level
+        consists of the pivoted index labels.
+
+        If the index is not a MultiIndex, the output will be a Series
+        (the analogue of stack when the columns are not a MultiIndex).
+
+        Returns:
+            DataFrame or Series
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     # ----------------------------------------------------------------------
     # Add index and columns
 
diff --git a/third_party/bigframes_vendored/pandas/core/reshape/merge.py b/third_party/bigframes_vendored/pandas/core/reshape/merge.py
index ee02d698da..cc81de405b 100644
--- a/third_party/bigframes_vendored/pandas/core/reshape/merge.py
+++ b/third_party/bigframes_vendored/pandas/core/reshape/merge.py
@@ -51,14 +51,14 @@ def merge(
             ``inner``: use intersection of keys from both frames, similar to a SQL inner
             join; preserve the order of the left keys.
 
-        on:
-            Column join on. It must be found in both DataFrames. Either on or left_on + right_on
+        on (label or list of labels):
+            Columns to join on. It must be found in both DataFrames. Either on or left_on + right_on
             must be passed in.
-        left_on:
-            Column join on in the left DataFrame. Either on or left_on + right_on
+        left_on (label or list of labels):
+            Columns to join on in the left DataFrame. Either on or left_on + right_on
             must be passed in.
-        right_on:
-            Column join on in the right DataFrame. Either on or left_on + right_on
+        right_on (label or list of labels):
+            Columns to join on in the right DataFrame. Either on or left_on + right_on
             must be passed in.
         sort:
             Default False. Sort the join keys lexicographically in the
diff --git a/third_party/bigframes_vendored/pandas/core/series.py b/third_party/bigframes_vendored/pandas/core/series.py
index d58c1ccc3b..a41a3454ca 100644
--- a/third_party/bigframes_vendored/pandas/core/series.py
+++ b/third_party/bigframes_vendored/pandas/core/series.py
@@ -8,7 +8,6 @@
 import numpy as np
 from pandas._libs import lib
 from pandas._typing import Axis, FilePath, NaPosition, WriteBuffer
-import pandas.io.formats.format as fmt
 
 from bigframes import constants
 from third_party.bigframes_vendored.pandas.core.generic import NDFrame
@@ -151,21 +150,6 @@ def to_string(
             str or None: String representation of Series if ``buf=None``,
                 otherwise None.
         """
-        formatter = fmt.SeriesFormatter(
-            self,
-            name=name,
-            length=length,
-            header=header,
-            index=index,
-            dtype=dtype,
-            na_rep=na_rep,
-            float_format=float_format,
-            min_rows=min_rows,
-            max_rows=max_rows,
-        )
-        result = formatter.to_string()
-
-        # catch contract violations
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
     def to_markdown(
@@ -475,6 +459,30 @@ def duplicated(self, keep="first") -> Series:
         """
         raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
 
+    def idxmin(self) -> Hashable:
+        """
+        Return the row label of the minimum value.
+
+        If multiple values equal the minimum, the first row label with that
+        value is returned.
+
+        Returns:
+            Index: Label of the minimum value.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def idxmax(self) -> Hashable:
+        """
+        Return the row label of the maximum value.
+
+        If multiple values equal the maximum, the first row label with that
+        value is returned.
+
+        Returns:
+            Index: Label of the maximum value.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
     def round(self, decimals: int = 0) -> Series:
         """
         Round each value in a Series to the given number of decimals.
diff --git a/third_party/bigframes_vendored/sklearn/__init__.py b/third_party/bigframes_vendored/sklearn/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/bigframes_vendored/sklearn/ensemble/__init__.py b/third_party/bigframes_vendored/sklearn/ensemble/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/third_party/bigframes_vendored/sklearn/preprocessing/_discretization.py b/third_party/bigframes_vendored/sklearn/preprocessing/_discretization.py
new file mode 100644
index 0000000000..0236558dd4
--- /dev/null
+++ b/third_party/bigframes_vendored/sklearn/preprocessing/_discretization.py
@@ -0,0 +1,47 @@
+# Author: Henry Lin <hlin117@gmail.com>
+#         Tom Dupré la Tour
+
+# License: BSD
+
+from bigframes import constants
+from third_party.bigframes_vendored.sklearn.base import BaseEstimator, TransformerMixin
+
+
+class KBinsDiscretizer(TransformerMixin, BaseEstimator):
+    """
+    Bin continuous data into intervals.
+
+    Args:
+        n_bins (int, default 5):
+            The number of bins to produce. Raises ValueError if ``n_bins < 2``.
+        strategy ({'uniform', 'quantile'}, default='quantile'):
+            Strategy used to define the widths of the bins. 'uniform': All bins
+            in each feature have identical widths. 'quantile': All bins in each
+            feature have the same number of points. Only `uniform` is supported now.
+    """
+
+    def fit(self, X, y=None):
+        """Fit the estimator.
+
+        Args:
+            X (bigframes.dataframe.DataFrame or bigframes.series.Series):
+                The Dataframe or Series with training data.
+
+            y (default None):
+                Ignored.
+
+        Returns:
+            KBinsDiscretizer: Fitted scaler.
+        """
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
+
+    def transform(self, X):
+        """Discretize the data.
+
+        Args:
+            X (bigframes.dataframe.DataFrame or bigframes.series.Series):
+                The DataFrame or Series to be transformed.
+
+        Returns:
+            bigframes.dataframe.DataFrame: Transformed result."""
+        raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
diff --git a/third_party/bigframes_vendored/xgboost/__init__.py b/third_party/bigframes_vendored/xgboost/__init__.py
new file mode 100644
index 0000000000..e69de29bb2