Skip to content

Commit 0070e77

Browse files
authored
fix: window operations over JSON columns (#1451)
1 parent 2bbf53f commit 0070e77

File tree

3 files changed

+28
-7
lines changed

3 files changed

+28
-7
lines changed

bigframes/core/compile/compiled.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import bigframes.core.compile.googlesql
3131
import bigframes.core.compile.ibis_types
3232
import bigframes.core.compile.scalar_op_compiler as op_compilers
33+
import bigframes.core.compile.scalar_op_compiler as scalar_op_compiler
3334
import bigframes.core.expression as ex
3435
import bigframes.core.guid
3536
from bigframes.core.ordering import OrderingExpression
@@ -676,4 +677,7 @@ def _as_groupable(value: ibis_types.Value):
676677
# Some types need to be converted to string to enable groupby
677678
if value.type().is_float64() or value.type().is_geospatial():
678679
return value.cast(ibis_dtypes.str)
679-
return value
680+
elif value.type().is_json():
681+
return scalar_op_compiler.to_json_string(value)
682+
else:
683+
return value

tests/data/json.jsonl

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
{"rowindex": 5, "json_col": []}
77
{"rowindex": 6, "json_col": [1, 2, 3]}
88
{"rowindex": 7, "json_col": [{"a": 1}, {"a": 2}, {"a": null}, {}]}
9-
{"rowindex": 8, "json_col": {"bool_value": true}}
9+
{"rowindex": 8, "json_col": "100"}
1010
{"rowindex": 9, "json_col": {"folat_num": 3.14159}}
1111
{"rowindex": 10, "json_col": {"date": "2024-07-16"}}
12-
{"rowindex": 11, "json_col": {"null_filed": null}}
12+
{"rowindex": 11, "json_col": 100}
1313
{"rowindex": 12, "json_col": {"int_value": 2, "null_filed": null}}
1414
{"rowindex": 13, "json_col": {"list_data": [10, 20, 30]}}
1515
{"rowindex": 14, "json_col": {"person": {"name": "Alice", "age": 35}}}

tests/system/small/test_dataframe.py

+21-4
Original file line numberDiff line numberDiff line change
@@ -4534,11 +4534,28 @@ def test_loc_bf_index_integer_index_renamed_col(
45344534
)
45354535
def test_df_drop_duplicates(scalars_df_index, scalars_pandas_df_index, keep, subset):
45364536
columns = ["bool_col", "int64_too", "int64_col"]
4537-
bf_series = scalars_df_index[columns].drop_duplicates(subset, keep=keep).to_pandas()
4538-
pd_series = scalars_pandas_df_index[columns].drop_duplicates(subset, keep=keep)
4537+
bf_df = scalars_df_index[columns].drop_duplicates(subset, keep=keep).to_pandas()
4538+
pd_df = scalars_pandas_df_index[columns].drop_duplicates(subset, keep=keep)
45394539
pd.testing.assert_frame_equal(
4540-
pd_series,
4541-
bf_series,
4540+
pd_df,
4541+
bf_df,
4542+
)
4543+
4544+
4545+
@pytest.mark.parametrize(
4546+
("keep",),
4547+
[
4548+
("first",),
4549+
("last",),
4550+
(False,),
4551+
],
4552+
)
4553+
def test_df_drop_duplicates_w_json(json_df, keep):
4554+
bf_df = json_df.drop_duplicates(keep=keep).to_pandas()
4555+
pd_df = json_df.to_pandas().drop_duplicates(keep=keep)
4556+
pd.testing.assert_frame_equal(
4557+
pd_df,
4558+
bf_df,
45424559
)
45434560

45444561

0 commit comments

Comments
 (0)