googleapis · gcf-merge-on-green · Oct 18, 2023 · Sep 28, 2023 · Sep 29, 2023 · Oct 2, 2023
@@ -374,7 +374,9 @@ def _to_dataframe(
         cls, result, schema: typing.Mapping[str, bigframes.dtypes.Dtype]
     ) -> pd.DataFrame:
         """Convert BigQuery data to pandas DataFrame with specific dtypes."""
+        dtypes = bigframes.dtypes.to_pandas_dtypes_overrides(result.schema)
         df = result.to_dataframe(
+            dtypes=dtypes,
             bool_dtype=pd.BooleanDtype(),
             int_dtype=pd.Int64Dtype(),
             float_dtype=pd.Float64Dtype(),

@@ -19,6 +19,8 @@
 from typing import Any, Dict, Iterable, Literal, Tuple, Union
 
 import geopandas as gpd  # type: ignore
+import google.cloud.bigquery as bigquery
+import google.cloud.bigquery._pandas_helpers
 import ibis
 import ibis.expr.datatypes as ibis_dtypes
 import ibis.expr.types as ibis_types
@@ -401,3 +403,18 @@ def cast_ibis_value(
     raise TypeError(
         f"Unsupported cast {value.type()} to {to_type}. {constants.FEEDBACK_LINK}"
     )
+
+
+def to_pandas_dtypes_overrides(schema: Iterable[bigquery.SchemaField]) -> Dict:
+    """For each STRUCT field, make sure we specify the full type to use."""
+    # TODO(swast): Also override ARRAY fields.
+    dtypes = {}
+    for field in schema:
+        if field.field_type == "RECORD" and field.mode != "REPEATED":
+            # TODO(swast): We're using a private API here. Would likely be
+            # better if we called `to_arrow()` and converted to a pandas
+            # DataFrame ourselves from that.
+            dtypes[field.name] = pd.ArrowDtype(
+                google.cloud.bigquery._pandas_helpers.bq_to_arrow_data_type(field)
+            )
+    return dtypes
@@ -16,6 +16,7 @@
 
 import google.api_core.exceptions
 import pandas as pd
+import pyarrow as pa
 import pytest
 
 from tests.system.utils import (
@@ -44,7 +45,7 @@ def test_to_pandas_w_correct_dtypes(scalars_df_default_index):
 
 
 def test_to_pandas_array_struct_correct_result(session):
-    """In future, we should support arrays and structs with arrow types.
+    """In future, we should support arrays with arrow types.
     For now we fall back to the current connector behavior of converting
     to Python objects"""
     df = session.read_gbq(
@@ -59,11 +60,27 @@ def test_to_pandas_array_struct_correct_result(session):
     expected = pd.DataFrame(
         {
             "array_column": [[1, 3, 2]],
-            "struct_column": [{"string_field": "a", "float_field": 1.2}],
+            "struct_column": pd.Series(
+                [{"string_field": "a", "float_field": 1.2}],
+                dtype=pd.ArrowDtype(
+                    pa.struct(
+                        [
+                            ("string_field", pa.string()),
+                            ("float_field", pa.float64()),
+                        ]
+                    )
+                ),
+            ),
         }
     )
     expected.index = expected.index.astype("Int64")
-    pd.testing.assert_frame_equal(result, expected)
+    pd.testing.assert_series_equal(result.dtypes, expected.dtypes)
+    pd.testing.assert_series_equal(result["array_column"], expected["array_column"])
+    # assert_series_equal not implemented for struct columns yet. Compare
+    # values as Python objects, instead.
+    pd.testing.assert_series_equal(
+        result["struct_column"].astype("O"), expected["struct_column"].astype("O")
+    )
 
 
 @pytest.mark.parametrize(