Skip to content

feat: support subtraction in DATETIME/TIMESTAMP columns with timedelta columns #1390

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Feb 14, 2025
5 changes: 5 additions & 0 deletions bigframes/core/compile/scalar_op_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,11 @@ def timestamp_add_op_impl(x: ibis_types.TimestampValue, y: ibis_types.IntegerVal
return x + y.to_interval("us")


@scalar_op_compiler.register_binary_op(ops.timestamp_sub_op)
def timestamp_sub_op_impl(x: ibis_types.TimestampValue, y: ibis_types.IntegerValue):
return x - y.to_interval("us")


@scalar_op_compiler.register_unary_op(ops.FloorDtOp, pass_op=True)
def floor_dt_op_impl(x: ibis_types.Value, op: ops.FloorDtOp):
supported_freqs = ["Y", "Q", "M", "W", "D", "h", "min", "s", "ms", "us", "ns"]
Expand Down
3 changes: 3 additions & 0 deletions bigframes/core/rewrite/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ def _rewrite_sub_op(left: _TypedExpr, right: _TypedExpr) -> _TypedExpr:
if dtypes.is_datetime_like(left.dtype) and dtypes.is_datetime_like(right.dtype):
return _TypedExpr.create_op_expr(ops.timestamp_diff_op, left, right)

if dtypes.is_datetime_like(left.dtype) and right.dtype is dtypes.TIMEDELTA_DTYPE:
return _TypedExpr.create_op_expr(ops.timestamp_sub_op, left, right)

return _TypedExpr.create_op_expr(ops.sub_op, left, right)


Expand Down
7 changes: 6 additions & 1 deletion bigframes/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,11 @@
)
from bigframes.operations.struct_ops import StructFieldOp, StructOp
from bigframes.operations.time_ops import hour_op, minute_op, normalize_op, second_op
from bigframes.operations.timedelta_ops import timestamp_add_op, ToTimedeltaOp
from bigframes.operations.timedelta_ops import (
timestamp_add_op,
timestamp_sub_op,
ToTimedeltaOp,
)

__all__ = [
# Base ops
Expand Down Expand Up @@ -251,6 +255,7 @@
"normalize_op",
# Timedelta ops
"timestamp_add_op",
"timestamp_sub_op",
"ToTimedeltaOp",
# Datetime ops
"date_op",
Expand Down
3 changes: 3 additions & 0 deletions bigframes/operations/numeric_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,9 @@ def output_type(self, *input_types):
if dtypes.is_datetime_like(left_type) and dtypes.is_datetime_like(right_type):
return dtypes.TIMEDELTA_DTYPE

if dtypes.is_datetime_like(left_type) and right_type is dtypes.TIMEDELTA_DTYPE:
return left_type

raise TypeError(f"Cannot subtract dtypes {left_type} and {right_type}")


Expand Down
20 changes: 20 additions & 0 deletions bigframes/operations/timedelta_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,23 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT


timestamp_add_op = TimestampAdd()


@dataclasses.dataclass(frozen=True)
class TimestampSub(base_ops.BinaryOp):
name: typing.ClassVar[str] = "timestamp_sub"

def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
# timestamp - timedelta => timestamp
if (
dtypes.is_datetime_like(input_types[0])
and input_types[1] is dtypes.TIMEDELTA_DTYPE
):
return input_types[0]

raise TypeError(
f"unsupported types for timestamp_sub. left: {input_types[0]} right: {input_types[1]}"
)


timestamp_sub_op = TimestampSub()
90 changes: 90 additions & 0 deletions tests/system/small/operations/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,96 @@ def test_timestamp_add_dataframes(temporal_dfs):
)


@pytest.mark.parametrize(
("column", "pd_dtype"),
[
("datetime_col", "<M8[ns]"),
("timestamp_col", "datetime64[ns, UTC]"),
],
)
def test_timestamp_sub__ts_series_minus_td_series(temporal_dfs, column, pd_dtype):
bf_df, pd_df = temporal_dfs

actual_result = (
(bf_df[column] - bf_df["timedelta_col_1"]).to_pandas().astype(pd_dtype)
)

expected_result = pd_df[column] - pd_df["timedelta_col_1"]
pandas.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)


@pytest.mark.parametrize(
("column", "pd_dtype"),
[
("datetime_col", "<M8[ns]"),
("timestamp_col", "datetime64[ns, UTC]"),
],
)
def test_timestamp_sub__ts_series_minus_td_literal(temporal_dfs, column, pd_dtype):
bf_df, pd_df = temporal_dfs
literal = pd.Timedelta(1, "h")

actual_result = (bf_df[column] - literal).to_pandas().astype(pd_dtype)

expected_result = pd_df[column] - literal
pandas.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)


def test_timestamp_sub__ts_literal_minus_td_series(temporal_dfs):
bf_df, pd_df = temporal_dfs
literal = pd.Timestamp("2025-01-01 01:00:00")

actual_result = (literal - bf_df["timedelta_col_1"]).to_pandas().astype("<M8[ns]")

expected_result = literal - pd_df["timedelta_col_1"]
pandas.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)


@pytest.mark.parametrize(
("column", "pd_dtype"),
[
("datetime_col", "<M8[ns]"),
("timestamp_col", "datetime64[ns, UTC]"),
],
)
def test_timestamp_sub_with_numpy_op(temporal_dfs, column, pd_dtype):
bf_df, pd_df = temporal_dfs

actual_result = (
np.subtract(bf_df[column], bf_df["timedelta_col_1"])
.to_pandas()
.astype(pd_dtype)
)

expected_result = np.subtract(pd_df[column], pd_df["timedelta_col_1"])
pandas.testing.assert_series_equal(
actual_result, expected_result, check_index_type=False
)


def test_timestamp_sub_dataframes(temporal_dfs):
columns = ["datetime_col", "timestamp_col"]
timedelta = pd.Timedelta(1, unit="s")
bf_df, pd_df = temporal_dfs

actual_result = (bf_df[columns] - timedelta).to_pandas()
actual_result["datetime_col"] = actual_result["datetime_col"].astype("<M8[ns]")
actual_result["timestamp_col"] = actual_result["timestamp_col"].astype(
"datetime64[ns, UTC]"
)

expected_result = pd_df[columns] - timedelta
pandas.testing.assert_frame_equal(
actual_result, expected_result, check_index_type=False
)


@pytest.mark.parametrize(
"compare_func",
[
Expand Down