From 12da4a2b54a1f383244626f37e2b77467f7c834c Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 2 Apr 2024 18:44:13 +0000 Subject: [PATCH 1/3] fix: plot.scatter s parameter cannot accept float-like column --- bigframes/operations/_matplotlib/core.py | 18 ++++++++----- .../system/small/operations/test_plotting.py | 27 ++++++++++++++++++- 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py index 2c1c2bc4ac..65754f7412 100644 --- a/bigframes/operations/_matplotlib/core.py +++ b/bigframes/operations/_matplotlib/core.py @@ -115,6 +115,18 @@ def _compute_plot_data(self): if self._is_column_name(c, sample) and sample[c].dtype == dtypes.STRING_DTYPE: sample[c] = sample[c].astype("object") + # To avoid Matplotlib's automatic conversion of `Float64` or `Int64` columns + # to `object` types (which breaks float-like behavior), this code proactively + # converts the column to a compatible format." + s = self.kwargs.get("s", None) + if pd.core.dtypes.common.is_integer(s): + s = self.data.columns[s] + if self._is_column_name(s, sample): + if sample[s].dtype == dtypes.INT_DTYPE: + sample[s] = sample[s].astype("int64") + elif sample[s].dtype == dtypes.FLOAT_DTYPE: + sample[s] = sample[s].astype("float64") + return sample def _is_sequence_arg(self, arg): @@ -130,9 +142,3 @@ def _is_column_name(self, arg, data): and pd.core.dtypes.common.is_hashable(arg) and arg in data.columns ) - - def _generate_new_column_name(self, data): - col_name = None - while col_name is None or col_name in data.columns: - col_name = f"plot_temp_{str(uuid.uuid4())[:8]}" - return col_name diff --git a/tests/system/small/operations/test_plotting.py b/tests/system/small/operations/test_plotting.py index 824125adf2..0e9d71ed2b 100644 --- a/tests/system/small/operations/test_plotting.py +++ b/tests/system/small/operations/test_plotting.py @@ -240,6 +240,32 @@ def test_scatter_args_c(c): ) +@pytest.mark.parametrize( + ("s"), + [ + pytest.param([10, 34, 50], id="int"), + pytest.param([1.0, 3.4, 5.0], id="float"), + pytest.param( + [True, True, False], id="bool", marks=pytest.mark.xfail(raises=ValueError) + ), + ], +) +def test_scatter_args_s(s): + data = { + "a": [1, 2, 3], + "b": [1, 2, 3], + } + data["s"] = s + df = bpd.DataFrame(data) + pd_df = pd.DataFrame(data) + + ax = df.plot.scatter(x="a", y="b", s="s") + pd_ax = pd_df.plot.scatter(x="a", y="b", s="s") + tm.assert_numpy_array_equal( + ax.collections[0].get_sizes(), pd_ax.collections[0].get_sizes() + ) + + @pytest.mark.parametrize( ("arg_name"), [ @@ -255,7 +281,6 @@ def test_scatter_sequence_arg(arg_name): arg_value = [3, 3, 1] bpd.DataFrame(data).plot.scatter(x="a", y="b", **{arg_name: arg_value}) - def test_sampling_plot_args_n(): df = bpd.DataFrame(np.arange(bf_mpl.DEFAULT_SAMPLING_N * 10), columns=["one"]) ax = df.plot.line() From 4e78ca1abb5aac71e0d0f57bd80ebcf7829ffa61 Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Tue, 2 Apr 2024 18:53:52 +0000 Subject: [PATCH 2/3] fixing lint --- bigframes/operations/_matplotlib/core.py | 1 - tests/system/small/operations/test_plotting.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py index 65754f7412..3220280b7c 100644 --- a/bigframes/operations/_matplotlib/core.py +++ b/bigframes/operations/_matplotlib/core.py @@ -14,7 +14,6 @@ import abc import typing -import uuid import pandas as pd diff --git a/tests/system/small/operations/test_plotting.py b/tests/system/small/operations/test_plotting.py index 0e9d71ed2b..6542ce6de3 100644 --- a/tests/system/small/operations/test_plotting.py +++ b/tests/system/small/operations/test_plotting.py @@ -281,6 +281,7 @@ def test_scatter_sequence_arg(arg_name): arg_value = [3, 3, 1] bpd.DataFrame(data).plot.scatter(x="a", y="b", **{arg_name: arg_value}) + def test_sampling_plot_args_n(): df = bpd.DataFrame(np.arange(bf_mpl.DEFAULT_SAMPLING_N * 10), columns=["one"]) ax = df.plot.line() From a0091c60e9caf0dc74df09d0c53eaae84e20184c Mon Sep 17 00:00:00 2001 From: Chelsea Lin Date: Wed, 3 Apr 2024 17:59:13 +0000 Subject: [PATCH 3/3] addressing comments --- bigframes/operations/_matplotlib/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/operations/_matplotlib/core.py b/bigframes/operations/_matplotlib/core.py index 3220280b7c..04534e20a9 100644 --- a/bigframes/operations/_matplotlib/core.py +++ b/bigframes/operations/_matplotlib/core.py @@ -116,7 +116,7 @@ def _compute_plot_data(self): # To avoid Matplotlib's automatic conversion of `Float64` or `Int64` columns # to `object` types (which breaks float-like behavior), this code proactively - # converts the column to a compatible format." + # converts the column to a compatible format. s = self.kwargs.get("s", None) if pd.core.dtypes.common.is_integer(s): s = self.data.columns[s]