Skip to content

Commit 5bb45ba

Browse files
fix: handle multi-level columns for df aggregates properly (#305)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent aac35a3 commit 5bb45ba

File tree

2 files changed

+30
-2
lines changed

2 files changed

+30
-2
lines changed

bigframes/core/blocks.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -855,13 +855,21 @@ def aggregate_all_and_stack(
855855
aggregations = [
856856
(col_id, operation, col_id) for col_id in self.value_columns
857857
]
858+
index_col_ids = [
859+
guid.generate_guid() for i in range(self.column_labels.nlevels)
860+
]
858861
result_expr = self.expr.aggregate(aggregations, dropna=dropna).unpivot(
859862
row_labels=self.column_labels.to_list(),
860-
index_col_ids=["index"],
863+
index_col_ids=index_col_ids,
861864
unpivot_columns=tuple([(value_col_id, tuple(self.value_columns))]),
862865
dtype=dtype,
863866
)
864-
return Block(result_expr, index_columns=["index"], column_labels=[None])
867+
return Block(
868+
result_expr,
869+
index_columns=index_col_ids,
870+
column_labels=[None],
871+
index_labels=self.column_labels.names,
872+
)
865873
else: # axis_n == 1
866874
# using offsets as identity to group on.
867875
# TODO: Allow to promote identity/total_order columns instead for better perf

tests/system/small/test_multiindex.py

+20
Original file line numberDiff line numberDiff line change
@@ -713,6 +713,26 @@ def test_column_multi_index_binary_op(scalars_df_index, scalars_pandas_df_index)
713713
pandas.testing.assert_series_equal(bf_result, pd_result)
714714

715715

716+
@skip_legacy_pandas
717+
def test_column_multi_index_any():
718+
columns = pandas.MultiIndex.from_tuples(
719+
[("col0", "col00"), ("col0", "col00"), ("col1", "col11")]
720+
)
721+
pd_df = pandas.DataFrame(
722+
[[0, 1, 2], [0, 1, 2], [0, 1, 2], [0, 1, 2]], columns=columns
723+
)
724+
bf_df = bpd.DataFrame(pd_df)
725+
726+
pd_result = pd_df.isna().any()
727+
bf_result = bf_df.isna().any().to_pandas()
728+
729+
pandas.testing.assert_frame_equal(
730+
bf_result.reset_index(drop=False),
731+
pd_result.reset_index(drop=False),
732+
check_dtype=False,
733+
)
734+
735+
716736
def test_column_multi_index_agg(scalars_df_index, scalars_pandas_df_index):
717737
columns = ["int64_too", "int64_col", "float64_col"]
718738
multi_columns = pandas.MultiIndex.from_tuples(zip(["a", "b", "a"], ["a", "b", "b"]))

0 commit comments

Comments
 (0)