feat: Add support for numpy expm1, log1p, floor, ceil, arctan2 ops (#505)

TrevorBergeron · tswast · web-flow · commit e8e66cf25887 · 2024-03-25T20:40:27.000-07:00
Co-authored-by: Tim Sweña (Swast) &lt;swast@google.com&gt;
diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py
@@ -257,6 +257,13 @@ def arctan_op_impl(x: ibis_types.Value):
     return typing.cast(ibis_types.NumericValue, x).atan()
 
 
+@scalar_op_compiler.register_binary_op(ops.arctan2_op)
+def arctan2_op_impl(x: ibis_types.Value, y: ibis_types.Value):
+    return typing.cast(ibis_types.NumericValue, x).atan2(
+        typing.cast(ibis_types.NumericValue, y)
+    )
+
+
 # Hyperbolic trig functions
 # BQ has these functions, but Ibis doesn't
 @scalar_op_compiler.register_unary_op(ops.sinh_op)
@@ -319,6 +326,30 @@ def arctanh_op_impl(x: ibis_types.Value):
 
 
 # Numeric Ops
+@scalar_op_compiler.register_unary_op(ops.floor_op)
+def floor_op_impl(x: ibis_types.Value):
+    x_numeric = typing.cast(ibis_types.NumericValue, x)
+    if x_numeric.type().is_integer():
+        return x_numeric.cast(ibis_dtypes.Float64())
+    if x_numeric.type().is_floating():
+        # Default ibis impl tries to cast to integer, which doesn't match pandas and can overflow
+        return float_floor(x_numeric)
+    else:  # numeric
+        return x_numeric.floor()
+
+
+@scalar_op_compiler.register_unary_op(ops.ceil_op)
+def ceil_op_impl(x: ibis_types.Value):
+    x_numeric = typing.cast(ibis_types.NumericValue, x)
+    if x_numeric.type().is_integer():
+        return x_numeric.cast(ibis_dtypes.Float64())
+    if x_numeric.type().is_floating():
+        # Default ibis impl tries to cast to integer, which doesn't match pandas and can overflow
+        return float_ceil(x_numeric)
+    else:  # numeric
+        return x_numeric.ceil()
+
+
 @scalar_op_compiler.register_unary_op(ops.abs_op)
 def abs_op_impl(x: ibis_types.Value):
     return typing.cast(ibis_types.NumericValue, x).abs()
@@ -347,13 +378,23 @@ def ln_op_impl(x: ibis_types.Value):
     return (~domain).ifelse(out_of_domain, numeric_value.ln())
 
 
+@scalar_op_compiler.register_unary_op(ops.log1p_op)
+def log1p_op_impl(x: ibis_types.Value):
+    return ln_op_impl(_ibis_num(1) + x)
+
+
 @scalar_op_compiler.register_unary_op(ops.exp_op)
 def exp_op_impl(x: ibis_types.Value):
     numeric_value = typing.cast(ibis_types.NumericValue, x)
     domain = numeric_value < _FLOAT64_EXP_BOUND
     return (~domain).ifelse(_INF, numeric_value.exp())
 
 
+@scalar_op_compiler.register_unary_op(ops.expm1_op)
+def expm1_op_impl(x: ibis_types.Value):
+    return exp_op_impl(x) - _ibis_num(1)
+
+
 @scalar_op_compiler.register_unary_op(ops.invert_op)
 def invert_op_impl(x: ibis_types.Value):
     return typing.cast(ibis_types.NumericValue, x).negate()
@@ -1318,3 +1359,16 @@ def _ibis_num(number: float):
 @ibis.udf.scalar.builtin
 def timestamp(a: str) -> ibis_dtypes.timestamp:
     """Convert string to timestamp."""
+
+
+# Need these because ibis otherwise tries to do casts to int that can fail
+@ibis.udf.scalar.builtin(name="floor")
+def float_floor(a: float) -> float:
+    """Convert string to timestamp."""
+    return 0  # pragma: NO COVER
+
+
+@ibis.udf.scalar.builtin(name="ceil")
+def float_ceil(a: float) -> float:
+    """Convert string to timestamp."""
+    return 0  # pragma: NO COVER
diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py
@@ -246,11 +246,16 @@ def create_ternary_op(
 arcsinh_op = create_unary_op(name="arcsinh", type_rule=op_typing.REAL_NUMERIC)
 arccosh_op = create_unary_op(name="arccosh", type_rule=op_typing.REAL_NUMERIC)
 arctanh_op = create_unary_op(name="arctanh", type_rule=op_typing.REAL_NUMERIC)
+arctan2_op = create_binary_op(name="arctan2", type_rule=op_typing.REAL_NUMERIC)
 ## Numeric Ops
+floor_op = create_unary_op(name="floor", type_rule=op_typing.REAL_NUMERIC)
+ceil_op = create_unary_op(name="ceil", type_rule=op_typing.REAL_NUMERIC)
 abs_op = create_unary_op(name="abs", type_rule=op_typing.INPUT_TYPE)
 exp_op = create_unary_op(name="exp", type_rule=op_typing.REAL_NUMERIC)
+expm1_op = create_unary_op(name="expm1", type_rule=op_typing.REAL_NUMERIC)
 ln_op = create_unary_op(name="log", type_rule=op_typing.REAL_NUMERIC)
 log10_op = create_unary_op(name="log10", type_rule=op_typing.REAL_NUMERIC)
+log1p_op = create_unary_op(name="log1p", type_rule=op_typing.REAL_NUMERIC)
 sqrt_op = create_unary_op(name="sqrt", type_rule=op_typing.REAL_NUMERIC)
 
 
@@ -540,6 +545,10 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
     np.log10: log10_op,
     np.sqrt: sqrt_op,
     np.abs: abs_op,
+    np.floor: floor_op,
+    np.ceil: ceil_op,
+    np.log1p: log1p_op,
+    np.expm1: expm1_op,
 }
 
 
@@ -549,4 +558,5 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
     np.multiply: mul_op,
     np.divide: div_op,
     np.power: pow_op,
+    np.arctan2: arctan2_op,
 }
diff --git a/tests/system/small/test_numpy.py b/tests/system/small/test_numpy.py
@@ -56,6 +56,10 @@ def test_series_ufuncs(floats_pd, floats_bf, opname):
         ("log10",),
         ("sqrt",),
         ("abs",),
+        ("floor",),
+        ("ceil",),
+        ("expm1",),
+        ("log1p",),
     ],
 )
 def test_df_ufuncs(scalars_dfs, opname):
@@ -77,6 +81,7 @@ def test_df_ufuncs(scalars_dfs, opname):
         ("multiply",),
         ("divide",),
         ("power",),
+        ("arctan2",),
     ],
 )
 def test_series_binary_ufuncs(floats_product_pd, floats_product_bf, opname):
@@ -112,6 +117,23 @@ def test_df_binary_ufuncs(scalars_dfs, opname):
     pd.testing.assert_frame_equal(bf_result, pd_result)
 
 
+@pytest.mark.parametrize(
+    ("x", "y"),
+    [
+        ("int64_col", "int64_col"),
+        ("float64_col", "int64_col"),
+    ],
+)
+def test_series_atan2(scalars_dfs, x, y):
+    # Test atan2 separately as pandas errors when passing entire df as input, so pass only series
+    scalars_df, scalars_pandas_df = scalars_dfs
+
+    bf_result = np.arctan2(scalars_df[x], scalars_df[y]).to_pandas()
+    pd_result = np.arctan2(scalars_pandas_df[x], scalars_pandas_df[y])
+
+    pd.testing.assert_series_equal(bf_result, pd_result)
+
+
 def test_series_binary_ufuncs_reverse(scalars_dfs):
     scalars_df, scalars_pandas_df = scalars_dfs