fix: handle multi-level columns for df aggregates properly (#305)

TrevorBergeron · web-flow · commit 5bb45ba5560f · 2024-01-10T01:20:16.000Z
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
diff --git a/bigframes/core/blocks.py b/bigframes/core/blocks.py
@@ -855,13 +855,21 @@ def aggregate_all_and_stack(
             aggregations = [
                 (col_id, operation, col_id) for col_id in self.value_columns
             ]
+            index_col_ids = [
+                guid.generate_guid() for i in range(self.column_labels.nlevels)
+            ]
             result_expr = self.expr.aggregate(aggregations, dropna=dropna).unpivot(
                 row_labels=self.column_labels.to_list(),
-                index_col_ids=["index"],
+                index_col_ids=index_col_ids,
                 unpivot_columns=tuple([(value_col_id, tuple(self.value_columns))]),
                 dtype=dtype,
             )
-            return Block(result_expr, index_columns=["index"], column_labels=[None])
+            return Block(
+                result_expr,
+                index_columns=index_col_ids,
+                column_labels=[None],
+                index_labels=self.column_labels.names,
+            )
         else:  # axis_n == 1
             # using offsets as identity to group on.
             # TODO: Allow to promote identity/total_order columns instead for better perf
diff --git a/tests/system/small/test_multiindex.py b/tests/system/small/test_multiindex.py
@@ -713,6 +713,26 @@ def test_column_multi_index_binary_op(scalars_df_index, scalars_pandas_df_index)
     pandas.testing.assert_series_equal(bf_result, pd_result)
 
 
+@skip_legacy_pandas
+def test_column_multi_index_any():
+    columns = pandas.MultiIndex.from_tuples(
+        [("col0", "col00"), ("col0", "col00"), ("col1", "col11")]
+    )
+    pd_df = pandas.DataFrame(
+        [[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], columns=columns
+    )
+    bf_df = bpd.DataFrame(pd_df)
+
+    pd_result = pd_df.isna().any()
+    bf_result = bf_df.isna().any().to_pandas()
+
+    pandas.testing.assert_frame_equal(
+        bf_result.reset_index(drop=False),
+        pd_result.reset_index(drop=False),
+        check_dtype=False,
+    )
+
+
 def test_column_multi_index_agg(scalars_df_index, scalars_pandas_df_index):
     columns = ["int64_too", "int64_col", "float64_col"]
     multi_columns = pandas.MultiIndex.from_tuples(zip(["a", "b", "a"], ["a", "b", "b"]))