Skip to content

Commit ec5b068

Browse files
refactor: Decorate api methods that require total ordering (#802)
* refactor: label all apis that use total ordering * reorganize flags * fix config reference and add another test * fix broken attribute reference * add _session property to groupby objects * restore session_tokyo fixture * add docstring for OrderRequiredError * add _session property to index object * handled methods where only some parameterizations need total order * fix inverted validation * import validations module and not functions * mark some analytic ops as order independent * rename validate.py to validations.py * docstring for WindowSpec.row_bounded * Apply suggestions from code review --------- Co-authored-by: Tim Sweña (Swast) <[email protected]>
1 parent 1e3feda commit ec5b068

File tree

11 files changed

+264
-7
lines changed

11 files changed

+264
-7
lines changed

bigframes/core/__init__.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -344,10 +344,12 @@ def project_window_op(
344344
never_skip_nulls: will disable null skipping for operators that would otherwise do so
345345
skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection
346346
"""
347-
if not self.session._strictly_ordered:
348-
# TODO: Support unbounded windows with aggregate ops and some row-order-independent analytic ops
349-
# TODO: Support non-deterministic windowing
350-
raise ValueError("Windowed ops not supported in unordered mode")
347+
# TODO: Support non-deterministic windowing
348+
if window_spec.row_bounded or not op.order_independent:
349+
if not self.session._strictly_ordered:
350+
raise ValueError(
351+
"Order-dependent windowed ops not supported in unordered mode"
352+
)
351353
return ArrayValue(
352354
nodes.WindowOpNode(
353355
child=self.node,

bigframes/core/groupby/__init__.py

+26
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import bigframes.core.blocks as blocks
2828
import bigframes.core.ordering as order
2929
import bigframes.core.utils as utils
30+
import bigframes.core.validations as validations
3031
import bigframes.core.window as windows
3132
import bigframes.core.window_spec as window_specs
3233
import bigframes.dataframe as df
@@ -72,6 +73,10 @@ def __init__(
7273
if col_id not in self._by_col_ids
7374
]
7475

76+
@property
77+
def _session(self) -> core.Session:
78+
return self._block.session
79+
7580
def __getitem__(
7681
self,
7782
key: typing.Union[
@@ -229,20 +234,25 @@ def count(self) -> df.DataFrame:
229234
def nunique(self) -> df.DataFrame:
230235
return self._aggregate_all(agg_ops.nunique_op)
231236

237+
@validations.requires_strict_ordering()
232238
def cumsum(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame:
233239
if not numeric_only:
234240
self._raise_on_non_numeric("cumsum")
235241
return self._apply_window_op(agg_ops.sum_op, numeric_only=True)
236242

243+
@validations.requires_strict_ordering()
237244
def cummin(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame:
238245
return self._apply_window_op(agg_ops.min_op, numeric_only=numeric_only)
239246

247+
@validations.requires_strict_ordering()
240248
def cummax(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame:
241249
return self._apply_window_op(agg_ops.max_op, numeric_only=numeric_only)
242250

251+
@validations.requires_strict_ordering()
243252
def cumprod(self, *args, **kwargs) -> df.DataFrame:
244253
return self._apply_window_op(agg_ops.product_op, numeric_only=True)
245254

255+
@validations.requires_strict_ordering()
246256
def shift(self, periods=1) -> series.Series:
247257
window = window_specs.rows(
248258
grouping_keys=tuple(self._by_col_ids),
@@ -251,6 +261,7 @@ def shift(self, periods=1) -> series.Series:
251261
)
252262
return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)
253263

264+
@validations.requires_strict_ordering()
254265
def diff(self, periods=1) -> series.Series:
255266
window = window_specs.rows(
256267
grouping_keys=tuple(self._by_col_ids),
@@ -259,6 +270,7 @@ def diff(self, periods=1) -> series.Series:
259270
)
260271
return self._apply_window_op(agg_ops.DiffOp(periods), window=window)
261272

273+
@validations.requires_strict_ordering()
262274
def rolling(self, window: int, min_periods=None) -> windows.Window:
263275
# To get n size window, need current row and n-1 preceding rows.
264276
window_spec = window_specs.rows(
@@ -274,6 +286,7 @@ def rolling(self, window: int, min_periods=None) -> windows.Window:
274286
block, window_spec, self._selected_cols, drop_null_groups=self._dropna
275287
)
276288

289+
@validations.requires_strict_ordering()
277290
def expanding(self, min_periods: int = 1) -> windows.Window:
278291
window_spec = window_specs.cumulative_rows(
279292
grouping_keys=tuple(self._by_col_ids),
@@ -514,6 +527,10 @@ def __init__(
514527
self._value_name = value_name
515528
self._dropna = dropna # Applies to aggregations but not windowing
516529

530+
@property
531+
def _session(self) -> core.Session:
532+
return self._block.session
533+
517534
def head(self, n: int = 5) -> series.Series:
518535
block = self._block
519536
if self._dropna:
@@ -631,26 +648,31 @@ def agg(self, func=None) -> typing.Union[df.DataFrame, series.Series]:
631648

632649
aggregate = agg
633650

651+
@validations.requires_strict_ordering()
634652
def cumsum(self, *args, **kwargs) -> series.Series:
635653
return self._apply_window_op(
636654
agg_ops.sum_op,
637655
)
638656

657+
@validations.requires_strict_ordering()
639658
def cumprod(self, *args, **kwargs) -> series.Series:
640659
return self._apply_window_op(
641660
agg_ops.product_op,
642661
)
643662

663+
@validations.requires_strict_ordering()
644664
def cummax(self, *args, **kwargs) -> series.Series:
645665
return self._apply_window_op(
646666
agg_ops.max_op,
647667
)
648668

669+
@validations.requires_strict_ordering()
649670
def cummin(self, *args, **kwargs) -> series.Series:
650671
return self._apply_window_op(
651672
agg_ops.min_op,
652673
)
653674

675+
@validations.requires_strict_ordering()
654676
def cumcount(self, *args, **kwargs) -> series.Series:
655677
return (
656678
self._apply_window_op(
@@ -660,6 +682,7 @@ def cumcount(self, *args, **kwargs) -> series.Series:
660682
- 1
661683
)
662684

685+
@validations.requires_strict_ordering()
663686
def shift(self, periods=1) -> series.Series:
664687
"""Shift index by desired number of periods."""
665688
window = window_specs.rows(
@@ -669,6 +692,7 @@ def shift(self, periods=1) -> series.Series:
669692
)
670693
return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)
671694

695+
@validations.requires_strict_ordering()
672696
def diff(self, periods=1) -> series.Series:
673697
window = window_specs.rows(
674698
grouping_keys=tuple(self._by_col_ids),
@@ -677,6 +701,7 @@ def diff(self, periods=1) -> series.Series:
677701
)
678702
return self._apply_window_op(agg_ops.DiffOp(periods), window=window)
679703

704+
@validations.requires_strict_ordering()
680705
def rolling(self, window: int, min_periods=None) -> windows.Window:
681706
# To get n size window, need current row and n-1 preceding rows.
682707
window_spec = window_specs.rows(
@@ -696,6 +721,7 @@ def rolling(self, window: int, min_periods=None) -> windows.Window:
696721
is_series=True,
697722
)
698723

724+
@validations.requires_strict_ordering()
699725
def expanding(self, min_periods: int = 1) -> windows.Window:
700726
window_spec = window_specs.cumulative_rows(
701727
grouping_keys=tuple(self._by_col_ids),

bigframes/core/indexes/base.py

+11
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import bigframes.core.expression as ex
3131
import bigframes.core.ordering as order
3232
import bigframes.core.utils as utils
33+
import bigframes.core.validations as validations
3334
import bigframes.dtypes
3435
import bigframes.formatting_helpers as formatter
3536
import bigframes.operations as ops
@@ -114,6 +115,10 @@ def from_frame(
114115
index._linked_frame = frame
115116
return index
116117

118+
@property
119+
def _session(self):
120+
return self._block.session
121+
117122
@property
118123
def name(self) -> blocks.Label:
119124
names = self.names
@@ -179,6 +184,7 @@ def empty(self) -> bool:
179184
return self.shape[0] == 0
180185

181186
@property
187+
@validations.requires_strict_ordering()
182188
def is_monotonic_increasing(self) -> bool:
183189
"""
184190
Return a boolean if the values are equal or increasing.
@@ -192,6 +198,7 @@ def is_monotonic_increasing(self) -> bool:
192198
)
193199

194200
@property
201+
@validations.requires_strict_ordering()
195202
def is_monotonic_decreasing(self) -> bool:
196203
"""
197204
Return a boolean if the values are equal or decreasing.
@@ -341,6 +348,7 @@ def max(self) -> typing.Any:
341348
def min(self) -> typing.Any:
342349
return self._apply_aggregation(agg_ops.min_op)
343350

351+
@validations.requires_strict_ordering()
344352
def argmax(self) -> int:
345353
block, row_nums = self._block.promote_offsets()
346354
block = block.order_by(
@@ -353,6 +361,7 @@ def argmax(self) -> int:
353361

354362
return typing.cast(int, series.Series(block.select_column(row_nums)).iloc[0])
355363

364+
@validations.requires_strict_ordering()
356365
def argmin(self) -> int:
357366
block, row_nums = self._block.promote_offsets()
358367
block = block.order_by(
@@ -424,6 +433,8 @@ def dropna(self, how: typing.Literal["all", "any"] = "any") -> Index:
424433
return Index(result)
425434

426435
def drop_duplicates(self, *, keep: str = "first") -> Index:
436+
if keep is not False:
437+
validations.enforce_ordered(self, "drop_duplicates")
427438
block = block_ops.drop_duplicates(self._block, self._block.index_columns, keep)
428439
return Index(block)
429440

bigframes/core/validations.py

+51
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""DataFrame is a two dimensional data structure."""
16+
17+
from __future__ import annotations
18+
19+
import functools
20+
from typing import Protocol, TYPE_CHECKING
21+
22+
import bigframes.constants
23+
import bigframes.exceptions
24+
25+
if TYPE_CHECKING:
26+
from bigframes import Session
27+
28+
29+
class HasSession(Protocol):
30+
@property
31+
def _session(self) -> Session:
32+
...
33+
34+
35+
def requires_strict_ordering():
36+
def decorator(meth):
37+
@functools.wraps(meth)
38+
def guarded_meth(object: HasSession, *args, **kwargs):
39+
enforce_ordered(object, meth.__name__)
40+
return meth(object, *args, **kwargs)
41+
42+
return guarded_meth
43+
44+
return decorator
45+
46+
47+
def enforce_ordered(object: HasSession, opname: str) -> None:
48+
if not object._session._strictly_ordered:
49+
raise bigframes.exceptions.OrderRequiredError(
50+
f"Op {opname} not supported when strict ordering is disabled. {bigframes.constants.FEEDBACK_LINK}"
51+
)

bigframes/core/window_spec.py

+10
Original file line numberDiff line numberDiff line change
@@ -152,3 +152,13 @@ class WindowSpec:
152152
ordering: Tuple[orderings.OrderingExpression, ...] = tuple()
153153
bounds: Union[RowsWindowBounds, RangeWindowBounds, None] = None
154154
min_periods: int = 0
155+
156+
@property
157+
def row_bounded(self):
158+
"""
159+
Whether the window is bounded by row offsets.
160+
161+
This is relevant for determining whether the window requires a total order
162+
to calculate deterministically.
163+
"""
164+
return isinstance(self.bounds, RowsWindowBounds)

0 commit comments

Comments
 (0)