Skip to content

Commit ca81cb0

Browse files
committed
BUG: Fix replacing in string series with NA (pandas-dev#32621)
1 parent 23da71c commit ca81cb0

File tree

1 file changed

+17
-11
lines changed

1 file changed

+17
-11
lines changed

pandas/core/internals/managers.py

+17-11
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from pandas.core.dtypes.concat import concat_compat
2929
from pandas.core.dtypes.dtypes import ExtensionDtype
3030
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
31-
from pandas.core.dtypes.missing import isna, na_value_for_dtype
31+
from pandas.core.dtypes.missing import isna
3232

3333
import pandas.core.algorithms as algos
3434
from pandas.core.arrays.sparse import SparseDtype
@@ -1948,22 +1948,28 @@ def _compare_or_regex_search(a, b, regex=False):
19481948
is_a_array = isinstance(a, np.ndarray)
19491949
is_b_array = isinstance(b, np.ndarray)
19501950

1951-
# GH#32621 replace all pd.NAs to avoid failure of element-wise comparison
1952-
mask = isna(a) | isna(b)
1953-
if is_a_array:
1954-
a = np.where(mask, na_value_for_dtype(a.dtype, compat=False), a)
1955-
if is_b_array:
1956-
b = np.where(mask, na_value_for_dtype(b.dtype, compat=False), b)
1957-
19581951
if is_datetimelike_v_numeric(a, b) or is_numeric_v_string_like(a, b):
19591952
# GH#29553 avoid deprecation warnings from numpy
19601953
result = False
19611954
else:
1955+
# GH#32621 use mask to avoid comparing to NAs
1956+
if is_a_array and not is_b_array:
1957+
mask = np.reshape(~(isna(a)), a.shape)
1958+
elif is_b_array and not is_a_array:
1959+
mask = np.reshape(~(isna(b)), b.shape)
1960+
elif is_a_array and is_b_array:
1961+
mask = ~(isna(a) | isna(b))
1962+
1963+
if is_a_array:
1964+
a = a[mask]
1965+
if is_b_array:
1966+
b = b[mask]
19621967
result = op(a)
1968+
19631969
if isinstance(result, np.ndarray):
1964-
result[mask] = na_value_for_dtype(result.dtype, compat=False)
1965-
elif isna(result):
1966-
result = na_value_for_dtype(np.bool, compat=False)
1970+
tmp = np.zeros(mask.shape, dtype=np.bool)
1971+
tmp[mask] = result
1972+
result = tmp
19671973

19681974
if is_scalar(result) and (is_a_array or is_b_array):
19691975
type_names = [type(a).__name__, type(b).__name__]

0 commit comments

Comments
 (0)