Skip to content

Commit 52b7786

Browse files
fix: Fix caching from generating row numbers in partial ordering mode (#872)
1 parent 5317327 commit 52b7786

File tree

3 files changed

+18
-3
lines changed

3 files changed

+18
-3
lines changed

bigframes/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ def head(self, n: int = 5) -> Series:
641641
def tail(self, n: int = 5) -> Series:
642642
return typing.cast(Series, self.iloc[-n:])
643643

644-
def peek(self, n: int = 5, *, force: bool = True) -> pandas.DataFrame:
644+
def peek(self, n: int = 5, *, force: bool = True) -> pandas.Series:
645645
"""
646646
Preview n arbitrary elements from the series without guarantees about row selection or ordering.
647647

bigframes/session/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1997,8 +1997,10 @@ def _cache_with_session_awareness(self, array_value: core.ArrayValue) -> None:
19971997
)
19981998
if len(cluster_cols) > 0:
19991999
self._cache_with_cluster_cols(core.ArrayValue(target), cluster_cols)
2000-
else:
2000+
elif self._strictly_ordered:
20012001
self._cache_with_offsets(core.ArrayValue(target))
2002+
else:
2003+
self._cache_with_cluster_cols(core.ArrayValue(target), [])
20022004

20032005
def _simplify_with_caching(self, array_value: core.ArrayValue):
20042006
"""Attempts to handle the complexity by caching duplicated subtrees and breaking the query into pieces."""

tests/system/small/test_unordered.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,11 @@
1919

2020
import bigframes.exceptions
2121
import bigframes.pandas as bpd
22-
from tests.system.utils import assert_pandas_df_equal, skip_legacy_pandas
22+
from tests.system.utils import (
23+
assert_pandas_df_equal,
24+
assert_series_equal,
25+
skip_legacy_pandas,
26+
)
2327

2428

2529
def test_unordered_mode_sql_no_hash(unordered_session):
@@ -51,6 +55,15 @@ def test_unordered_mode_cache_aggregate(unordered_session):
5155
assert_pandas_df_equal(bf_result, pd_result, ignore_order=True)
5256

5357

58+
def test_unordered_mode_series_peek(unordered_session):
59+
pd_series = pd.Series([1, 2, 3, 4, 5, 6], dtype=pd.Int64Dtype())
60+
bf_series = bpd.Series(pd_series, session=unordered_session)
61+
pd_result = pd_series.groupby(pd_series % 4).sum()
62+
bf_peek = bf_series.groupby(bf_series % 4).sum().peek(2)
63+
64+
assert_series_equal(bf_peek, pd_result.reindex(bf_peek.index))
65+
66+
5467
def test_unordered_mode_single_aggregate(unordered_session):
5568
pd_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=pd.Int64Dtype())
5669
bf_df = bpd.DataFrame(pd_df, session=unordered_session)

0 commit comments

Comments
 (0)