Skip to content

refactor: Decorate api methods that require total ordering #802

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
0220ef5
refactor: label all apis that use total ordering
TrevorBergeron May 20, 2024
22cef3c
Merge remote-tracking branch 'github/main' into unordered_mode
TrevorBergeron Jun 18, 2024
db91e4b
reorganize flags
TrevorBergeron Jun 19, 2024
c3a364a
Merge remote-tracking branch 'github/main' into unordered_mode
TrevorBergeron Jun 20, 2024
01888e5
fix config reference and add another test
TrevorBergeron Jun 20, 2024
e54cec0
fix broken attribute reference
TrevorBergeron Jun 20, 2024
32bceac
add _session property to groupby objects
TrevorBergeron Jun 20, 2024
d1a123e
restore session_tokyo fixture
TrevorBergeron Jun 20, 2024
d745ca2
add docstring for OrderRequiredError
TrevorBergeron Jun 20, 2024
cac44aa
add _session property to index object
TrevorBergeron Jun 20, 2024
bfcdeb9
handled methods where only some parameterizations need total order
TrevorBergeron Jun 25, 2024
6ef878d
Merge remote-tracking branch 'github/main' into unordered_mode
TrevorBergeron Jun 25, 2024
c37c683
fix inverted validation
TrevorBergeron Jun 25, 2024
809833a
Merge remote-tracking branch 'github/main' into unordered_mode
TrevorBergeron Jun 26, 2024
d933bda
import validations module and not functions
TrevorBergeron Jun 26, 2024
c09f5f6
mark some analytic ops as order independent
TrevorBergeron Jun 26, 2024
c999a14
rename validate.py to validations.py
TrevorBergeron Jun 26, 2024
50895de
docstring for WindowSpec.row_bounded
TrevorBergeron Jun 26, 2024
0b50a86
Merge branch 'main' into unordered_mode
TrevorBergeron Jun 26, 2024
5726019
Apply suggestions from code review
tswast Jun 28, 2024
3899eea
Merge branch 'main' into unordered_mode
tswast Jun 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions bigframes/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,10 +344,12 @@ def project_window_op(
never_skip_nulls: will disable null skipping for operators that would otherwise do so
skip_reproject_unsafe: skips the reprojection step, can be used when performing many non-dependent window operations, user responsible for not nesting window expressions, or using outputs as join, filter or aggregation keys before a reprojection
"""
if not self.session._strictly_ordered:
# TODO: Support unbounded windows with aggregate ops and some row-order-independent analytic ops
# TODO: Support non-deterministic windowing
raise ValueError("Windowed ops not supported in unordered mode")
# TODO: Support non-deterministic windowing
if window_spec.row_bounded or not op.order_independent:
if not self.session._strictly_ordered:
raise ValueError(
"Order-dependent windowed ops not supported in unordered mode"
)
return ArrayValue(
nodes.WindowOpNode(
child=self.node,
Expand Down
26 changes: 26 additions & 0 deletions bigframes/core/groupby/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import bigframes.core.blocks as blocks
import bigframes.core.ordering as order
import bigframes.core.utils as utils
import bigframes.core.validations as validations
import bigframes.core.window as windows
import bigframes.core.window_spec as window_specs
import bigframes.dataframe as df
Expand Down Expand Up @@ -72,6 +73,10 @@ def __init__(
if col_id not in self._by_col_ids
]

@property
def _session(self) -> core.Session:
return self._block.session

def __getitem__(
self,
key: typing.Union[
Expand Down Expand Up @@ -229,20 +234,25 @@ def count(self) -> df.DataFrame:
def nunique(self) -> df.DataFrame:
return self._aggregate_all(agg_ops.nunique_op)

@validations.requires_strict_ordering()
def cumsum(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame:
if not numeric_only:
self._raise_on_non_numeric("cumsum")
return self._apply_window_op(agg_ops.sum_op, numeric_only=True)

@validations.requires_strict_ordering()
def cummin(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame:
return self._apply_window_op(agg_ops.min_op, numeric_only=numeric_only)

@validations.requires_strict_ordering()
def cummax(self, *args, numeric_only: bool = False, **kwargs) -> df.DataFrame:
return self._apply_window_op(agg_ops.max_op, numeric_only=numeric_only)

@validations.requires_strict_ordering()
def cumprod(self, *args, **kwargs) -> df.DataFrame:
return self._apply_window_op(agg_ops.product_op, numeric_only=True)

@validations.requires_strict_ordering()
def shift(self, periods=1) -> series.Series:
window = window_specs.rows(
grouping_keys=tuple(self._by_col_ids),
Expand All @@ -251,6 +261,7 @@ def shift(self, periods=1) -> series.Series:
)
return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)

@validations.requires_strict_ordering()
def diff(self, periods=1) -> series.Series:
window = window_specs.rows(
grouping_keys=tuple(self._by_col_ids),
Expand All @@ -259,6 +270,7 @@ def diff(self, periods=1) -> series.Series:
)
return self._apply_window_op(agg_ops.DiffOp(periods), window=window)

@validations.requires_strict_ordering()
def rolling(self, window: int, min_periods=None) -> windows.Window:
# To get n size window, need current row and n-1 preceding rows.
window_spec = window_specs.rows(
Expand All @@ -274,6 +286,7 @@ def rolling(self, window: int, min_periods=None) -> windows.Window:
block, window_spec, self._selected_cols, drop_null_groups=self._dropna
)

@validations.requires_strict_ordering()
def expanding(self, min_periods: int = 1) -> windows.Window:
window_spec = window_specs.cumulative_rows(
grouping_keys=tuple(self._by_col_ids),
Expand Down Expand Up @@ -514,6 +527,10 @@ def __init__(
self._value_name = value_name
self._dropna = dropna # Applies to aggregations but not windowing

@property
def _session(self) -> core.Session:
return self._block.session

def head(self, n: int = 5) -> series.Series:
block = self._block
if self._dropna:
Expand Down Expand Up @@ -631,26 +648,31 @@ def agg(self, func=None) -> typing.Union[df.DataFrame, series.Series]:

aggregate = agg

@validations.requires_strict_ordering()
def cumsum(self, *args, **kwargs) -> series.Series:
return self._apply_window_op(
agg_ops.sum_op,
)

@validations.requires_strict_ordering()
def cumprod(self, *args, **kwargs) -> series.Series:
return self._apply_window_op(
agg_ops.product_op,
)

@validations.requires_strict_ordering()
def cummax(self, *args, **kwargs) -> series.Series:
return self._apply_window_op(
agg_ops.max_op,
)

@validations.requires_strict_ordering()
def cummin(self, *args, **kwargs) -> series.Series:
return self._apply_window_op(
agg_ops.min_op,
)

@validations.requires_strict_ordering()
def cumcount(self, *args, **kwargs) -> series.Series:
return (
self._apply_window_op(
Expand All @@ -660,6 +682,7 @@ def cumcount(self, *args, **kwargs) -> series.Series:
- 1
)

@validations.requires_strict_ordering()
def shift(self, periods=1) -> series.Series:
"""Shift index by desired number of periods."""
window = window_specs.rows(
Expand All @@ -669,6 +692,7 @@ def shift(self, periods=1) -> series.Series:
)
return self._apply_window_op(agg_ops.ShiftOp(periods), window=window)

@validations.requires_strict_ordering()
def diff(self, periods=1) -> series.Series:
window = window_specs.rows(
grouping_keys=tuple(self._by_col_ids),
Expand All @@ -677,6 +701,7 @@ def diff(self, periods=1) -> series.Series:
)
return self._apply_window_op(agg_ops.DiffOp(periods), window=window)

@validations.requires_strict_ordering()
def rolling(self, window: int, min_periods=None) -> windows.Window:
# To get n size window, need current row and n-1 preceding rows.
window_spec = window_specs.rows(
Expand All @@ -696,6 +721,7 @@ def rolling(self, window: int, min_periods=None) -> windows.Window:
is_series=True,
)

@validations.requires_strict_ordering()
def expanding(self, min_periods: int = 1) -> windows.Window:
window_spec = window_specs.cumulative_rows(
grouping_keys=tuple(self._by_col_ids),
Expand Down
11 changes: 11 additions & 0 deletions bigframes/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import bigframes.core.expression as ex
import bigframes.core.ordering as order
import bigframes.core.utils as utils
import bigframes.core.validations as validations
import bigframes.dtypes
import bigframes.formatting_helpers as formatter
import bigframes.operations as ops
Expand Down Expand Up @@ -114,6 +115,10 @@ def from_frame(
index._linked_frame = frame
return index

@property
def _session(self):
return self._block.session

@property
def name(self) -> blocks.Label:
names = self.names
Expand Down Expand Up @@ -179,6 +184,7 @@ def empty(self) -> bool:
return self.shape[0] == 0

@property
@validations.requires_strict_ordering()
def is_monotonic_increasing(self) -> bool:
"""
Return a boolean if the values are equal or increasing.
Expand All @@ -192,6 +198,7 @@ def is_monotonic_increasing(self) -> bool:
)

@property
@validations.requires_strict_ordering()
def is_monotonic_decreasing(self) -> bool:
"""
Return a boolean if the values are equal or decreasing.
Expand Down Expand Up @@ -341,6 +348,7 @@ def max(self) -> typing.Any:
def min(self) -> typing.Any:
return self._apply_aggregation(agg_ops.min_op)

@validations.requires_strict_ordering()
def argmax(self) -> int:
block, row_nums = self._block.promote_offsets()
block = block.order_by(
Expand All @@ -353,6 +361,7 @@ def argmax(self) -> int:

return typing.cast(int, series.Series(block.select_column(row_nums)).iloc[0])

@validations.requires_strict_ordering()
def argmin(self) -> int:
block, row_nums = self._block.promote_offsets()
block = block.order_by(
Expand Down Expand Up @@ -424,6 +433,8 @@ def dropna(self, how: typing.Literal["all", "any"] = "any") -> Index:
return Index(result)

def drop_duplicates(self, *, keep: str = "first") -> Index:
if keep is not False:
validations.enforce_ordered(self, "drop_duplicates")
block = block_ops.drop_duplicates(self._block, self._block.index_columns, keep)
return Index(block)

Expand Down
51 changes: 51 additions & 0 deletions bigframes/core/validations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""DataFrame is a two dimensional data structure."""

from __future__ import annotations

import functools
from typing import Protocol, TYPE_CHECKING

import bigframes.constants
import bigframes.exceptions

if TYPE_CHECKING:
from bigframes import Session


class HasSession(Protocol):
@property
def _session(self) -> Session:
...


def requires_strict_ordering():
def decorator(meth):
@functools.wraps(meth)
def guarded_meth(object: HasSession, *args, **kwargs):
enforce_ordered(object, meth.__name__)
return meth(object, *args, **kwargs)

return guarded_meth

return decorator


def enforce_ordered(object: HasSession, opname: str) -> None:
if not object._session._strictly_ordered:
raise bigframes.exceptions.OrderRequiredError(
f"Op {opname} not supported when strict ordering is disabled. {bigframes.constants.FEEDBACK_LINK}"
)
10 changes: 10 additions & 0 deletions bigframes/core/window_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,3 +152,13 @@ class WindowSpec:
ordering: Tuple[orderings.OrderingExpression, ...] = tuple()
bounds: Union[RowsWindowBounds, RangeWindowBounds, None] = None
min_periods: int = 0

@property
def row_bounded(self):
"""
Whether the window is bounded by row offsets.

This is relevant for determining whether the window requires a total order
to calculate deterministically.
"""
return isinstance(self.bounds, RowsWindowBounds)
Loading