From eaf486002c8f8c7ac5184835993ec78d18231b8f Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 29 Nov 2019 18:41:14 +0200 Subject: [PATCH 01/24] STY: x.__class__ to type(x) #batch-2 (#29893) --- pandas/core/dtypes/dtypes.py | 2 +- pandas/core/generic.py | 10 +++++----- pandas/core/groupby/base.py | 2 +- pandas/core/groupby/generic.py | 2 +- pandas/core/groupby/grouper.py | 2 +- pandas/core/indexes/base.py | 12 ++++++------ pandas/core/indexes/datetimes.py | 2 +- pandas/core/indexes/frozen.py | 14 +++++++------- pandas/core/indexes/interval.py | 6 +++--- pandas/core/indexes/multi.py | 8 +++----- pandas/core/indexes/range.py | 12 ++++++------ 11 files changed, 35 insertions(+), 37 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 2c601b01dbae52..fb3097684f0c35 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -420,7 +420,7 @@ def __repr__(self) -> str_type: if self.categories is None: data = "None, " else: - data = self.categories._format_data(name=self.__class__.__name__) + data = self.categories._format_data(name=type(self).__name__) return tpl.format(data=data, ordered=self._ordered) @staticmethod diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 29eeb5999b88f6..b13aee238efb36 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -251,7 +251,7 @@ def _validate_dtype(self, dtype): if dtype.kind == "V": raise NotImplementedError( "compound dtypes are not implemented" - " in the {0} constructor".format(self.__class__.__name__) + " in the {0} constructor".format(type(self).__name__) ) return dtype @@ -1534,7 +1534,7 @@ def __nonzero__(self): raise ValueError( "The truth value of a {0} is ambiguous. " "Use a.empty, a.bool(), a.item(), a.any() or a.all().".format( - self.__class__.__name__ + type(self).__name__ ) ) @@ -1559,7 +1559,7 @@ def bool(self): elif is_scalar(v): raise ValueError( "bool cannot act on a non-boolean single element " - "{0}".format(self.__class__.__name__) + "{0}".format(type(self).__name__) ) self.__nonzero__() @@ -1865,7 +1865,7 @@ def _drop_labels_or_levels(self, keys, axis=0): def __hash__(self): raise TypeError( "{0!r} objects are mutable, thus they cannot be" - " hashed".format(self.__class__.__name__) + " hashed".format(type(self).__name__) ) def __iter__(self): @@ -2059,7 +2059,7 @@ def __repr__(self) -> str: # string representation based upon iterating over self # (since, by definition, `PandasContainers` are iterable) prepr = "[%s]" % ",".join(map(pprint_thing, self)) - return f"{self.__class__.__name__}({prepr})" + return f"{type(self).__name__}({prepr})" def _repr_latex_(self): """ diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py index 407cd8342d486b..e088400b25f0f7 100644 --- a/pandas/core/groupby/base.py +++ b/pandas/core/groupby/base.py @@ -41,7 +41,7 @@ def _gotitem(self, key, ndim, subset=None): except IndexError: groupby = self._groupby - self = self.__class__(subset, groupby=groupby, parent=self, **kwargs) + self = type(self)(subset, groupby=groupby, parent=self, **kwargs) self._reset_cache() if subset.ndim == 2: if is_scalar(key) and key in subset or is_list_like(key): diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 99ef281e842b16..4726cdfb05a702 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -473,7 +473,7 @@ def _transform_general(self, func, *args, **kwargs): """ Transform with a non-str `func`. """ - klass = self._selected_obj.__class__ + klass = type(self._selected_obj) results = [] for name, group in self: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index dc924455b141d0..9b2f43d8dd4845 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -211,7 +211,7 @@ def __repr__(self) -> str: if getattr(self, attr_name) is not None ) attrs = ", ".join(attrs_list) - cls_name = self.__class__.__name__ + cls_name = type(self).__name__ return f"{cls_name}({attrs})" diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 486cc0cd9032d6..4a3fa26c3460ef 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -815,7 +815,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): else: if allow_fill and fill_value is not None: msg = "Unable to fill values because {0} cannot contain NA" - raise ValueError(msg.format(self.__class__.__name__)) + raise ValueError(msg.format(type(self).__name__)) taken = self.values.take(indices) return self._shallow_copy(taken) @@ -948,7 +948,7 @@ def __repr__(self): """ Return a string representation for this object. """ - klass = self.__class__.__name__ + klass_name = type(self).__name__ data = self._format_data() attrs = self._format_attrs() space = self._format_space() @@ -959,7 +959,7 @@ def __repr__(self): if data is None: data = "" - res = f"{klass}({data}{prepr})" + res = f"{klass_name}({data}{prepr})" return res @@ -1287,7 +1287,7 @@ def _set_names(self, values, level=None): for name in values: if not is_hashable(name): raise TypeError( - "{}.name must be a hashable type".format(self.__class__.__name__) + "{}.name must be a hashable type".format(type(self).__name__) ) self.name = values[0] @@ -1794,7 +1794,7 @@ def is_all_dates(self) -> bool: def __reduce__(self): d = dict(data=self._data) d.update(self._get_attributes_dict()) - return _new_Index, (self.__class__, d), None + return _new_Index, (type(self), d), None def __setstate__(self, state): """ @@ -2290,7 +2290,7 @@ def __nonzero__(self): raise ValueError( "The truth value of a {0} is ambiguous. " "Use a.empty, a.bool(), a.item(), a.any() or a.all().".format( - self.__class__.__name__ + type(self).__name__ ) ) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ab9f57ff9ac69f..0d368845ea4f2d 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -423,7 +423,7 @@ def __reduce__(self): d = dict(data=self._data) d.update(self._get_attributes_dict()) - return _new_DatetimeIndex, (self.__class__, d), None + return _new_DatetimeIndex, (type(self), d), None def __setstate__(self, state): """Necessary for making this object picklable""" diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index 13c386187a9e5e..ab9852157b9ef1 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -69,13 +69,13 @@ def difference(self, other) -> "FrozenList": def __getitem__(self, n): if isinstance(n, slice): - return self.__class__(super().__getitem__(n)) + return type(self)(super().__getitem__(n)) return super().__getitem__(n) def __radd__(self, other): if isinstance(other, tuple): other = list(other) - return self.__class__(other + list(self)) + return type(self)(other + list(self)) def __eq__(self, other) -> bool: if isinstance(other, (tuple, FrozenList)): @@ -85,12 +85,12 @@ def __eq__(self, other) -> bool: __req__ = __eq__ def __mul__(self, other): - return self.__class__(super().__mul__(other)) + return type(self)(super().__mul__(other)) __imul__ = __mul__ def __reduce__(self): - return self.__class__, (list(self),) + return type(self), (list(self),) def __hash__(self): return hash(tuple(self)) @@ -99,7 +99,7 @@ def _disabled(self, *args, **kwargs): """This method will not function because object is immutable.""" raise TypeError( "'{cls}' does not support mutable operations.".format( - cls=self.__class__.__name__ + cls=type(self).__name__ ) ) @@ -107,7 +107,7 @@ def __str__(self) -> str: return pprint_thing(self, quote_strings=True, escape_chars=("\t", "\r", "\n")) def __repr__(self) -> str: - return f"{self.__class__.__name__}({str(self)})" + return f"{type(self).__name__}({str(self)})" __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled pop = append = extend = remove = sort = insert = _disabled @@ -132,7 +132,7 @@ def __new__(cls, data, dtype=None, copy=False): def _disabled(self, *args, **kwargs): """This method will not function because object is immutable.""" raise TypeError( - "'{cls}' does not support mutable operations.".format(cls=self.__class__) + "'{cls}' does not support mutable operations.".format(cls=type(self)) ) __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 35e8405e0f1aa9..a9e119f3c5f877 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -497,7 +497,7 @@ def __array_wrap__(self, result, context=None): def __reduce__(self): d = dict(left=self.left, right=self.right) d.update(self._get_attributes_dict()) - return _new_IntervalIndex, (self.__class__, d), None + return _new_IntervalIndex, (type(self), d), None @Appender(_index_shared_docs["copy"]) def copy(self, deep=False, name=None): @@ -512,7 +512,7 @@ def copy(self, deep=False, name=None): @Appender(_index_shared_docs["astype"]) def astype(self, dtype, copy=True): - with rewrite_exception("IntervalArray", self.__class__.__name__): + with rewrite_exception("IntervalArray", type(self).__name__): new_values = self.values.astype(dtype, copy=copy) if is_interval_dtype(new_values): return self._shallow_copy(new_values.left, new_values.right) @@ -1205,7 +1205,7 @@ def _format_attrs(self): return attrs def _format_space(self): - space = " " * (len(self.__class__.__name__) + 1) + space = " " * (len(type(self).__name__) + 1) return "\n{space}".format(space=space) # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 048112cbf0836b..d151fb7260a58a 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1245,9 +1245,7 @@ def _set_names(self, names, level=None, validate=True): # All items in 'names' need to be hashable: if not is_hashable(name): raise TypeError( - "{}.name must be a hashable type".format( - self.__class__.__name__ - ) + "{}.name must be a hashable type".format(type(self).__name__) ) self._names[lev] = name @@ -1911,7 +1909,7 @@ def __reduce__(self): sortorder=self.sortorder, names=list(self.names), ) - return ibase._new_Index, (self.__class__, d), None + return ibase._new_Index, (type(self), d), None def __setstate__(self, state): """Necessary for making this object picklable""" @@ -3264,7 +3262,7 @@ def astype(self, dtype, copy=True): elif not is_object_dtype(dtype): msg = ( "Setting {cls} dtype to anything other than object is not supported" - ).format(cls=self.__class__) + ).format(cls=type(self)) raise TypeError(msg) elif copy is True: return self._shallow_copy() diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index f7bbbee461e8d6..f300cde3b5bcc4 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -179,7 +179,7 @@ def _get_data_as_items(self): def __reduce__(self): d = self._get_attributes_dict() d.update(dict(self._get_data_as_items())) - return ibase._new_Index, (self.__class__, d), None + return ibase._new_Index, (type(self), d), None # -------------------------------------------------------------------- # Rendering Methods @@ -592,27 +592,27 @@ def _union(self, other, sort): and (start_s - end_o) <= step_s and (start_o - end_s) <= step_s ): - return self.__class__(start_r, end_r + step_s, step_s) + return type(self)(start_r, end_r + step_s, step_s) if ( (step_s % 2 == 0) and (abs(start_s - start_o) <= step_s / 2) and (abs(end_s - end_o) <= step_s / 2) ): - return self.__class__(start_r, end_r + step_s / 2, step_s / 2) + return type(self)(start_r, end_r + step_s / 2, step_s / 2) elif step_o % step_s == 0: if ( (start_o - start_s) % step_s == 0 and (start_o + step_s >= start_s) and (end_o - step_s <= end_s) ): - return self.__class__(start_r, end_r + step_s, step_s) + return type(self)(start_r, end_r + step_s, step_s) elif step_s % step_o == 0: if ( (start_s - start_o) % step_o == 0 and (start_s + step_o >= start_o) and (end_s - step_o <= end_o) ): - return self.__class__(start_r, end_r + step_o, step_o) + return type(self)(start_r, end_r + step_o, step_o) return self._int64index._union(other, sort=sort) @Appender(_index_shared_docs["join"]) @@ -781,7 +781,7 @@ def _evaluate_numeric_binop(self, other): rstart = op(left.start, right) rstop = op(left.stop, right) - result = self.__class__(rstart, rstop, rstep, **attrs) + result = type(self)(rstart, rstop, rstep, **attrs) # for compat with numpy / Int64Index # even if we can represent as a RangeIndex, return From ed98ff0ea7a920356dbb1c3af9e29eab1c041823 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 29 Nov 2019 18:43:43 +0200 Subject: [PATCH 02/24] STY: x.__class__ to type(x) #batch-3 (#29894) --- pandas/core/indexing.py | 6 ++++-- pandas/core/internals/blocks.py | 4 ++-- pandas/core/internals/concat.py | 2 +- pandas/core/internals/managers.py | 22 ++++++++++------------ pandas/core/resample.py | 4 ++-- pandas/core/series.py | 9 +++------ pandas/core/window/rolling.py | 2 +- pandas/errors/__init__.py | 2 +- pandas/io/clipboard/__init__.py | 4 ++-- 9 files changed, 26 insertions(+), 29 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b52015b738c6e8..67412ed5e5b262 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -105,7 +105,7 @@ class _NDFrameIndexer(_NDFrameIndexerBase): def __call__(self, axis=None): # we need to return a copy of ourselves - new_self = self.__class__(self.name, self.obj) + new_self = type(self)(self.name, self.obj) if axis is not None: axis = self.obj._get_axis_number(axis) @@ -228,7 +228,9 @@ def _validate_key(self, key, axis: int): raise AbstractMethodError(self) def _has_valid_tuple(self, key: Tuple): - """ check the key for valid keys across my indexer """ + """ + Check the key for valid keys across my indexer. + """ for i, k in enumerate(key): if i >= self.ndim: raise IndexingError("Too many indexers") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 2d6ffb7277742e..e4de1c94da4509 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -257,11 +257,11 @@ def make_block_same_class(self, values, placement=None, ndim=None): placement = self.mgr_locs if ndim is None: ndim = self.ndim - return make_block(values, placement=placement, ndim=ndim, klass=self.__class__) + return make_block(values, placement=placement, ndim=ndim, klass=type(self)) def __repr__(self) -> str: # don't want to print out all of the items here - name = pprint_thing(self.__class__.__name__) + name = type(self).__name__ if self._is_single_block: result = "{name}: {len} dtype: {dtype}".format( diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index f981c00fdad369..6c4ab2882d67f0 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -122,7 +122,7 @@ def __init__(self, block, shape, indexers=None): def __repr__(self) -> str: return "{name}({block!r}, {indexers})".format( - name=self.__class__.__name__, block=self.block, indexers=self.indexers + name=type(self).__name__, block=self.block, indexers=self.indexers ) @cache_readonly diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 19901dc510199b..0fe95a4b7f3707 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -153,7 +153,7 @@ def make_empty(self, axes=None): blocks = np.array([], dtype=self.array_dtype) else: blocks = [] - return self.__class__(blocks, axes) + return type(self)(blocks, axes) def __nonzero__(self): return True @@ -316,7 +316,7 @@ def __len__(self) -> int: return len(self.items) def __repr__(self) -> str: - output = pprint_thing(self.__class__.__name__) + output = type(self).__name__ for i, ax in enumerate(self.axes): if i == 0: output += "\nItems: {ax}".format(ax=ax) @@ -430,7 +430,7 @@ def apply( if len(result_blocks) == 0: return self.make_empty(axes or self.axes) - bm = self.__class__( + bm = type(self)( result_blocks, axes or self.axes, do_integrity_check=do_integrity_check ) bm._consolidate_inplace() @@ -519,7 +519,7 @@ def get_axe(block, qs, axes): for b in blocks ] - return self.__class__(blocks, new_axes) + return type(self)(blocks, new_axes) # single block, i.e. ndim == {1} values = concat_compat([b.values for b in blocks]) @@ -629,7 +629,7 @@ def comp(s, regex=False): rb = new_rb result_blocks.extend(rb) - bm = self.__class__(result_blocks, self.axes) + bm = type(self)(result_blocks, self.axes) bm._consolidate_inplace() return bm @@ -724,7 +724,7 @@ def combine(self, blocks, copy=True): axes = list(self.axes) axes[0] = self.items.take(indexer) - return self.__class__(new_blocks, axes, do_integrity_check=False) + return type(self)(new_blocks, axes, do_integrity_check=False) def get_slice(self, slobj, axis=0): if axis >= self.ndim: @@ -741,7 +741,7 @@ def get_slice(self, slobj, axis=0): new_axes = list(self.axes) new_axes[axis] = new_axes[axis][slobj] - bm = self.__class__(new_blocks, new_axes, do_integrity_check=False) + bm = type(self)(new_blocks, new_axes, do_integrity_check=False) bm._consolidate_inplace() return bm @@ -917,7 +917,7 @@ def consolidate(self): if self.is_consolidated(): return self - bm = self.__class__(self.blocks, self.axes) + bm = type(self)(self.blocks, self.axes) bm._is_consolidated = False bm._consolidate_inplace() return bm @@ -1251,7 +1251,7 @@ def reindex_indexer( new_axes = list(self.axes) new_axes[axis] = new_axis - return self.__class__(new_blocks, new_axes) + return type(self)(new_blocks, new_axes) def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None): """ @@ -1521,9 +1521,7 @@ def get_slice(self, slobj, axis=0): if axis >= self.ndim: raise IndexError("Requested axis not found in manager") - return self.__class__( - self._block._slice(slobj), self.index[slobj], fastpath=True - ) + return type(self)(self._block._slice(slobj), self.index[slobj], fastpath=True) @property def index(self): diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2433e3f52b4a9d..58c4a97d651d84 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -96,7 +96,7 @@ def __str__(self) -> str: if getattr(self.groupby, k, None) is not None ) return "{klass} [{attrs}]".format( - klass=self.__class__.__name__, attrs=", ".join(attrs) + klass=type(self).__name__, attrs=", ".join(attrs) ) def __getattr__(self, attr): @@ -885,7 +885,7 @@ def count(self): result = self._downsample("count") if not len(self.ax): if self._selected_obj.ndim == 1: - result = self._selected_obj.__class__( + result = type(self._selected_obj)( [], index=result.index, dtype="int64", name=self._selected_obj.name ) else: diff --git a/pandas/core/series.py b/pandas/core/series.py index 7f63b2575382a5..56039605651aca 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -256,9 +256,7 @@ def __init__( elif is_extension_array_dtype(data): pass elif isinstance(data, (set, frozenset)): - raise TypeError( - "{0!r} type is unordered".format(data.__class__.__name__) - ) + raise TypeError(f"{repr(type(data).__name__)} type is unordered") elif isinstance(data, ABCSparseArray): # handle sparse passed here (and force conversion) data = data.to_dense() @@ -1535,9 +1533,8 @@ def to_string( # catch contract violations if not isinstance(result, str): raise AssertionError( - "result must be of type unicode, type" - " of result is {0!r}" - "".format(result.__class__.__name__) + "result must be of type str, type" + f" of result is {repr(type(result).__name__)}" ) if buf is None: diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 7f3404100f71cc..d8aa362080093c 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -204,7 +204,7 @@ def _get_window(self, other=None, win_type: Optional[str] = None) -> int: @property def _window_type(self) -> str: - return self.__class__.__name__ + return type(self).__name__ def __repr__(self) -> str: """ diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 73cc40ae0e0d34..34838af5fd6e45 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -178,6 +178,6 @@ def __str__(self) -> str: if self.methodtype == "classmethod": name = self.class_instance.__name__ else: - name = self.class_instance.__class__.__name__ + name = type(self.class_instance).__name__ msg = "This {methodtype} must be defined in the concrete class {name}" return msg.format(methodtype=self.methodtype, name=name) diff --git a/pandas/io/clipboard/__init__.py b/pandas/io/clipboard/__init__.py index 7d3dbaf6ee0218..f808b7e706afbe 100644 --- a/pandas/io/clipboard/__init__.py +++ b/pandas/io/clipboard/__init__.py @@ -95,8 +95,8 @@ def _stringifyText(text) -> str: acceptedTypes = (str, int, float, bool) if not isinstance(text, acceptedTypes): raise PyperclipException( - f"only str, int, float, and bool values" - f"can be copied to the clipboard, not {text.__class__.__name__}" + f"only str, int, float, and bool values " + f"can be copied to the clipboard, not {type(text).__name__}" ) return str(text) From 6f03e76f9d47ecfcfdd44641de6df1fc7dd57a01 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 29 Nov 2019 18:45:11 +0200 Subject: [PATCH 03/24] x.__class__ to type(x) (#29902) --- pandas/io/formats/format.py | 2 +- pandas/io/formats/printing.py | 2 +- pandas/io/packers.py | 24 ++++++++++++------------ pandas/io/pytables.py | 2 +- pandas/io/stata.py | 5 ++--- pandas/tests/extension/base/dtype.py | 6 +++--- pandas/tests/extension/base/ops.py | 8 ++------ pandas/tests/extension/base/printing.py | 2 +- pandas/tests/frame/test_apply.py | 2 +- pandas/tests/indexes/common.py | 18 +++++++++--------- 10 files changed, 33 insertions(+), 38 deletions(-) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b18f0db622b3ee..f8f5d337185c46 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -352,7 +352,7 @@ def to_string(self) -> str: if len(series) == 0: return "{name}([], {footer})".format( - name=self.series.__class__.__name__, footer=footer + name=type(self.series).__name__, footer=footer ) fmt_index, have_header = self._get_formatted_index() diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index a4f1488fb6b693..82187991299526 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -321,7 +321,7 @@ def format_object_summary( if display_width is None: display_width = get_option("display.width") or 80 if name is None: - name = obj.__class__.__name__ + name = type(obj).__name__ if indent_for_name: name_len = len(name) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 253441ab25813e..bb7b00571b0df1 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -404,7 +404,7 @@ def encode(obj): if isinstance(obj, RangeIndex): return { "typ": "range_index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "start": obj._range.start, "stop": obj._range.stop, @@ -413,7 +413,7 @@ def encode(obj): elif isinstance(obj, PeriodIndex): return { "typ": "period_index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "freq": getattr(obj, "freqstr", None), "dtype": obj.dtype.name, @@ -429,7 +429,7 @@ def encode(obj): obj = obj.tz_convert("UTC") return { "typ": "datetime_index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "dtype": obj.dtype.name, "data": convert(obj.asi8), @@ -444,7 +444,7 @@ def encode(obj): typ = "interval_array" return { "typ": typ, - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "left": getattr(obj, "left", None), "right": getattr(obj, "right", None), @@ -453,7 +453,7 @@ def encode(obj): elif isinstance(obj, MultiIndex): return { "typ": "multi_index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "names": getattr(obj, "names", None), "dtype": obj.dtype.name, "data": convert(obj.values), @@ -462,7 +462,7 @@ def encode(obj): else: return { "typ": "index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "dtype": obj.dtype.name, "data": convert(obj.values), @@ -472,7 +472,7 @@ def encode(obj): elif isinstance(obj, Categorical): return { "typ": "category", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "codes": obj.codes, "categories": obj.categories, @@ -483,7 +483,7 @@ def encode(obj): elif isinstance(obj, Series): return { "typ": "series", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "name": getattr(obj, "name", None), "index": obj.index, "dtype": obj.dtype.name, @@ -498,7 +498,7 @@ def encode(obj): # the block manager return { "typ": "block_manager", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "axes": data.axes, "blocks": [ { @@ -506,7 +506,7 @@ def encode(obj): "values": convert(b.values), "shape": b.values.shape, "dtype": b.dtype.name, - "klass": b.__class__.__name__, + "klass": type(b).__name__, "compress": compressor, } for b in data.blocks @@ -553,7 +553,7 @@ def encode(obj): elif isinstance(obj, BlockIndex): return { "typ": "block_index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "blocs": obj.blocs, "blengths": obj.blengths, "length": obj.length, @@ -561,7 +561,7 @@ def encode(obj): elif isinstance(obj, IntIndex): return { "typ": "int_index", - "klass": obj.__class__.__name__, + "klass": type(obj).__name__, "indices": obj.indices, "length": obj.length, } diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index bf7aa5970519fd..fb63bdcaaa8760 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -3692,7 +3692,7 @@ def create_axes( # the non_index_axes info info = _get_info(self.info, i) info["names"] = list(a.names) - info["type"] = a.__class__.__name__ + info["type"] = type(a).__name__ self.non_index_axes.append((i, append_axis)) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index bd5e215730397c..59bb4e3bf236a8 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -856,12 +856,11 @@ def __str__(self) -> str: return self.string def __repr__(self) -> str: - # not perfect :-/ - return "{cls}({obj})".format(cls=self.__class__, obj=self) + return f"{type(self)}({self})" def __eq__(self, other) -> bool: return ( - isinstance(other, self.__class__) + isinstance(other, type(self)) and self.string == other.string and self.value == other.value ) diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py index a5040c8cfc2fc8..d1e1717225e155 100644 --- a/pandas/tests/extension/base/dtype.py +++ b/pandas/tests/extension/base/dtype.py @@ -96,7 +96,7 @@ def test_eq(self, dtype): assert dtype != "anonther_type" def test_construct_from_string(self, dtype): - dtype_instance = dtype.__class__.construct_from_string(dtype.name) - assert isinstance(dtype_instance, dtype.__class__) + dtype_instance = type(dtype).construct_from_string(dtype.name) + assert isinstance(dtype_instance, type(dtype)) with pytest.raises(TypeError): - dtype.__class__.construct_from_string("another_type") + type(dtype).construct_from_string("another_type") diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 5e4fb6d69e52cb..20d06ef2e5647b 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -123,9 +123,7 @@ def test_direct_arith_with_series_returns_not_implemented(self, data): result = data.__add__(other) assert result is NotImplemented else: - raise pytest.skip( - "{} does not implement add".format(data.__class__.__name__) - ) + raise pytest.skip(f"{type(data).__name__} does not implement add") class BaseComparisonOpsTests(BaseOpsUtil): @@ -169,6 +167,4 @@ def test_direct_arith_with_series_returns_not_implemented(self, data): result = data.__eq__(other) assert result is NotImplemented else: - raise pytest.skip( - "{} does not implement __eq__".format(data.__class__.__name__) - ) + raise pytest.skip(f"{type(data).__name__} does not implement __eq__") diff --git a/pandas/tests/extension/base/printing.py b/pandas/tests/extension/base/printing.py index 0f10efbf32a494..5d17a4b0cbee27 100644 --- a/pandas/tests/extension/base/printing.py +++ b/pandas/tests/extension/base/printing.py @@ -18,7 +18,7 @@ def test_array_repr(self, data, size): data = type(data)._concat_same_type([data] * 5) result = repr(data) - assert data.__class__.__name__ in result + assert type(data).__name__ in result assert "Length: {}".format(len(data)) in result assert str(data.dtype) in result if size == "big": diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index 3c97a87c95bd2f..26a3c738750caa 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -642,7 +642,7 @@ def test_applymap_box(self): } ) - result = df.applymap(lambda x: "{0}".format(x.__class__.__name__)) + result = df.applymap(lambda x: type(x).__name__) expected = pd.DataFrame( { "a": ["Timestamp", "Timestamp"], diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index c35c4c3568f74f..102949fe3f05eb 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -244,7 +244,7 @@ def test_str(self): idx = self.create_index() idx.name = "foo" assert "'foo'" in str(idx) - assert idx.__class__.__name__ in str(idx) + assert type(idx).__name__ in str(idx) def test_repr_max_seq_item_setting(self): # GH10182 @@ -260,8 +260,8 @@ def test_copy_name(self, indices): if isinstance(indices, MultiIndex): return - first = indices.__class__(indices, copy=True, name="mario") - second = first.__class__(first, copy=False) + first = type(indices)(indices, copy=True, name="mario") + second = type(first)(first, copy=False) # Even though "copy=False", we want a new object. assert first is not second @@ -292,7 +292,7 @@ def test_ensure_copied_data(self, indices): # MultiIndex and CategoricalIndex are tested separately return - index_type = indices.__class__ + index_type = type(indices) result = index_type(indices.values, copy=True, **init_kwargs) tm.assert_index_equal(indices, result) tm.assert_numpy_array_equal( @@ -502,7 +502,7 @@ def test_difference_base(self, sort, indices): cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: if isinstance(indices, (DatetimeIndex, TimedeltaIndex)): - assert result.__class__ == answer.__class__ + assert type(result) == type(answer) tm.assert_numpy_array_equal( result.sort_values().asi8, answer.sort_values().asi8 ) @@ -677,9 +677,9 @@ def test_hasnans_isnans(self, indices): values[1] = np.nan if isinstance(indices, PeriodIndex): - idx = indices.__class__(values, freq=indices.freq) + idx = type(indices)(values, freq=indices.freq) else: - idx = indices.__class__(values) + idx = type(indices)(values) expected = np.array([False] * len(idx), dtype=bool) expected[1] = True @@ -716,9 +716,9 @@ def test_fillna(self, indices): values[1] = np.nan if isinstance(indices, PeriodIndex): - idx = indices.__class__(values, freq=indices.freq) + idx = type(indices)(values, freq=indices.freq) else: - idx = indices.__class__(values) + idx = type(indices)(values) expected = np.array([False] * len(idx), dtype=bool) expected[1] = True From dbae24036cf16d1e74eb7648752acf07a2ce9817 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 29 Nov 2019 18:46:36 +0200 Subject: [PATCH 04/24] x.__class__ to type(x) (#29904) --- pandas/tests/indexes/datetimelike.py | 2 +- pandas/tests/indexes/multi/test_missing.py | 6 +++--- .../tests/indexes/period/test_partial_slicing.py | 2 +- pandas/tests/indexes/test_base.py | 8 ++++---- pandas/tests/reshape/test_concat.py | 8 ++++---- pandas/tests/series/test_apply.py | 16 ++++++++-------- pandas/tests/test_base.py | 2 +- pandas/tests/tseries/holiday/test_holiday.py | 2 +- pandas/tests/tseries/offsets/test_offsets.py | 4 ++-- pandas/tseries/holiday.py | 2 +- 10 files changed, 26 insertions(+), 26 deletions(-) diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py index e6e38ce9921f5e..42244626749b9f 100644 --- a/pandas/tests/indexes/datetimelike.py +++ b/pandas/tests/indexes/datetimelike.py @@ -38,7 +38,7 @@ def test_str(self): idx.name = "foo" assert not "length={}".format(len(idx)) in str(idx) assert "'foo'" in str(idx) - assert idx.__class__.__name__ in str(idx) + assert type(idx).__name__ in str(idx) if hasattr(idx, "tz"): if idx.tz is not None: diff --git a/pandas/tests/indexes/multi/test_missing.py b/pandas/tests/indexes/multi/test_missing.py index 15bbd2ce97c3c0..31de40512c4740 100644 --- a/pandas/tests/indexes/multi/test_missing.py +++ b/pandas/tests/indexes/multi/test_missing.py @@ -42,9 +42,9 @@ def test_fillna(idx): values[1] = np.nan if isinstance(index, PeriodIndex): - idx = index.__class__(values, freq=index.freq) + idx = type(index)(values, freq=index.freq) else: - idx = index.__class__(values) + idx = type(index)(values) expected = np.array([False] * len(idx), dtype=bool) expected[1] = True @@ -115,7 +115,7 @@ def test_hasnans_isnans(idx): values = index.values values[1] = np.nan - index = idx.__class__(values) + index = type(idx)(values) expected = np.array([False] * len(index), dtype=bool) expected[1] = True diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 50a12baf352d9a..501c2a4d8edccb 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -123,7 +123,7 @@ def test_range_slice_outofbounds(self): for idx in [didx, pidx]: df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx) - empty = DataFrame(index=idx.__class__([], freq="D"), columns=["units"]) + empty = DataFrame(index=type(idx)([], freq="D"), columns=["units"]) empty["units"] = empty["units"].astype("int64") tm.assert_frame_equal(df["2013/09/01":"2013/09/30"], empty) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 1f99ba7ad01db9..77d81a4a9566ec 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -752,7 +752,7 @@ def test_fancy(self): @pytest.mark.parametrize("dtype", [np.int_, np.bool_]) def test_empty_fancy(self, index, dtype): empty_arr = np.array([], dtype=dtype) - empty_index = index.__class__([]) + empty_index = type(index)([]) assert index[[]].identical(empty_index) assert index[empty_arr].identical(empty_index) @@ -762,7 +762,7 @@ def test_empty_fancy_raises(self, index): # pd.DatetimeIndex is excluded, because it overrides getitem and should # be tested separately. empty_farr = np.array([], dtype=np.float_) - empty_index = index.__class__([]) + empty_index = type(index)([]) assert index[[]].identical(empty_index) # np.ndarray only accepts ndarray of int & bool dtypes, so should Index @@ -2446,8 +2446,8 @@ def test_copy_name(self): # GH12309 index = self.create_index() - first = index.__class__(index, copy=True, name="mario") - second = first.__class__(first, copy=False) + first = type(index)(index, copy=True, name="mario") + second = type(first)(first, copy=False) # Even though "copy=False", we want a new object. assert first is not second diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index bb8339439d3394..63f1ef7595f31d 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -949,7 +949,7 @@ def test_append_preserve_index_name(self): all_indexes = indexes_can_append + indexes_cannot_append_with_other - @pytest.mark.parametrize("index", all_indexes, ids=lambda x: x.__class__.__name__) + @pytest.mark.parametrize("index", all_indexes, ids=lambda x: type(x).__name__) def test_append_same_columns_type(self, index): # GH18359 @@ -979,7 +979,7 @@ def test_append_same_columns_type(self, index): @pytest.mark.parametrize( "df_columns, series_index", combinations(indexes_can_append, r=2), - ids=lambda x: x.__class__.__name__, + ids=lambda x: type(x).__name__, ) def test_append_different_columns_types(self, df_columns, series_index): # GH18359 @@ -1004,12 +1004,12 @@ def test_append_different_columns_types(self, df_columns, series_index): tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( - "index_can_append", indexes_can_append, ids=lambda x: x.__class__.__name__ + "index_can_append", indexes_can_append, ids=lambda x: type(x).__name__ ) @pytest.mark.parametrize( "index_cannot_append_with_other", indexes_cannot_append_with_other, - ids=lambda x: x.__class__.__name__, + ids=lambda x: type(x).__name__, ) def test_append_different_columns_types_raises( self, index_can_append, index_cannot_append_with_other diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index bdbfa333ef33af..eb4f3273f87133 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -92,7 +92,7 @@ def test_apply_box(self): s = pd.Series(vals) assert s.dtype == "datetime64[ns]" # boxed value must be Timestamp instance - res = s.apply(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") exp = pd.Series(["Timestamp_1_None", "Timestamp_2_None"]) tm.assert_series_equal(res, exp) @@ -102,7 +102,7 @@ def test_apply_box(self): ] s = pd.Series(vals) assert s.dtype == "datetime64[ns, US/Eastern]" - res = s.apply(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") exp = pd.Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) tm.assert_series_equal(res, exp) @@ -110,7 +110,7 @@ def test_apply_box(self): vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] s = pd.Series(vals) assert s.dtype == "timedelta64[ns]" - res = s.apply(lambda x: "{0}_{1}".format(x.__class__.__name__, x.days)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") exp = pd.Series(["Timedelta_1", "Timedelta_2"]) tm.assert_series_equal(res, exp) @@ -118,7 +118,7 @@ def test_apply_box(self): vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] s = pd.Series(vals) assert s.dtype == "Period[M]" - res = s.apply(lambda x: "{0}_{1}".format(x.__class__.__name__, x.freqstr)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") exp = pd.Series(["Period_M", "Period_M"]) tm.assert_series_equal(res, exp) @@ -614,7 +614,7 @@ def test_map_box(self): s = pd.Series(vals) assert s.dtype == "datetime64[ns]" # boxed value must be Timestamp instance - res = s.map(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") exp = pd.Series(["Timestamp_1_None", "Timestamp_2_None"]) tm.assert_series_equal(res, exp) @@ -624,7 +624,7 @@ def test_map_box(self): ] s = pd.Series(vals) assert s.dtype == "datetime64[ns, US/Eastern]" - res = s.map(lambda x: "{0}_{1}_{2}".format(x.__class__.__name__, x.day, x.tz)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") exp = pd.Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) tm.assert_series_equal(res, exp) @@ -632,7 +632,7 @@ def test_map_box(self): vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] s = pd.Series(vals) assert s.dtype == "timedelta64[ns]" - res = s.map(lambda x: "{0}_{1}".format(x.__class__.__name__, x.days)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") exp = pd.Series(["Timedelta_1", "Timedelta_2"]) tm.assert_series_equal(res, exp) @@ -640,7 +640,7 @@ def test_map_box(self): vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] s = pd.Series(vals) assert s.dtype == "Period[M]" - res = s.map(lambda x: "{0}_{1}".format(x.__class__.__name__, x.freqstr)) + res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") exp = pd.Series(["Period_M", "Period_M"]) tm.assert_series_equal(res, exp) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index f24bb9e72aef52..e65388be2ba7d3 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -400,7 +400,7 @@ def test_value_counts_unique_nunique(self): result = o.unique() if isinstance(o, Index): - assert isinstance(result, o.__class__) + assert isinstance(result, type(o)) tm.assert_index_equal(result, orig) assert result.dtype == orig.dtype elif is_datetime64tz_dtype(o): diff --git a/pandas/tests/tseries/holiday/test_holiday.py b/pandas/tests/tseries/holiday/test_holiday.py index 06869fcd7a4f87..7748b965f8962d 100644 --- a/pandas/tests/tseries/holiday/test_holiday.py +++ b/pandas/tests/tseries/holiday/test_holiday.py @@ -238,7 +238,7 @@ class TestCalendar(AbstractHolidayCalendar): rules = [] calendar = get_calendar("TestCalendar") - assert TestCalendar == calendar.__class__ + assert TestCalendar == type(calendar) def test_factory(): diff --git a/pandas/tests/tseries/offsets/test_offsets.py b/pandas/tests/tseries/offsets/test_offsets.py index d70780741aa88b..ae78d5a55bb5e5 100644 --- a/pandas/tests/tseries/offsets/test_offsets.py +++ b/pandas/tests/tseries/offsets/test_offsets.py @@ -358,7 +358,7 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=Fals ts = Timestamp(dt) + Nano(5) if ( - offset_s.__class__.__name__ == "DateOffset" + type(offset_s).__name__ == "DateOffset" and (funcname == "apply" or normalize) and ts.nanosecond > 0 ): @@ -395,7 +395,7 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected, normalize=Fals ts = Timestamp(dt, tz=tz) + Nano(5) if ( - offset_s.__class__.__name__ == "DateOffset" + type(offset_s).__name__ == "DateOffset" and (funcname == "apply" or normalize) and ts.nanosecond > 0 ): diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 9417dc4b484997..2e5477ea00e39f 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -363,7 +363,7 @@ def __init__(self, name=None, rules=None): """ super().__init__() if name is None: - name = self.__class__.__name__ + name = type(self).__name__ self.name = name if rules is not None: From 878d31914197e81f9dabe427effd285f59b8db5d Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <50263213+MomIsBestFriend@users.noreply.github.com> Date: Fri, 29 Nov 2019 18:48:03 +0200 Subject: [PATCH 05/24] STY: x.__class__ to type(x) #batch-6 (#29905) --- pandas/tseries/offsets.py | 9 ++++----- pandas/util/_depr_module.py | 2 +- pandas/util/testing.py | 18 +++++++++--------- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 0620f2b9aae49c..9c0bceb1d5110c 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -309,9 +309,8 @@ def apply_index(self, i): if type(self) is not DateOffset: raise NotImplementedError( - "DateOffset subclass {name} " - "does not have a vectorized " - "implementation".format(name=self.__class__.__name__) + f"DateOffset subclass {type(self).__name__} " + "does not have a vectorized implementation" ) kwds = self.kwds relativedelta_fast = { @@ -402,7 +401,7 @@ def rollback(self, dt): """ dt = as_timestamp(dt) if not self.onOffset(dt): - dt = dt - self.__class__(1, normalize=self.normalize, **self.kwds) + dt = dt - type(self)(1, normalize=self.normalize, **self.kwds) return dt def rollforward(self, dt): @@ -416,7 +415,7 @@ def rollforward(self, dt): """ dt = as_timestamp(dt) if not self.onOffset(dt): - dt = dt + self.__class__(1, normalize=self.normalize, **self.kwds) + dt = dt + type(self)(1, normalize=self.normalize, **self.kwds) return dt def onOffset(self, dt): diff --git a/pandas/util/_depr_module.py b/pandas/util/_depr_module.py index 45e7db92818372..ae3c6359d20e08 100644 --- a/pandas/util/_depr_module.py +++ b/pandas/util/_depr_module.py @@ -32,7 +32,7 @@ def __init__(self, deprmod, deprmodto=None, removals=None, moved=None): self.moved = moved # For introspection purposes. - self.self_dir = frozenset(dir(self.__class__)) + self.self_dir = frozenset(dir(type(self))) def __dir__(self): deprmodule = self._import_deprmod() diff --git a/pandas/util/testing.py b/pandas/util/testing.py index bcd12eba1651ae..9adbf4cee5d740 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -713,7 +713,7 @@ def repr_class(x): return x try: - return x.__class__.__name__ + return type(x).__name__ except AttributeError: return repr(type(x)) @@ -780,17 +780,17 @@ def assert_is_valid_plot_return_object(objs): if isinstance(objs, (pd.Series, np.ndarray)): for el in objs.ravel(): msg = ( - "one of 'objs' is not a matplotlib Axes instance, type " - "encountered {name!r}" - ).format(name=el.__class__.__name__) + "one of 'objs' is not a matplotlib Axes instance, " + f"type encountered {repr(type(el).__name__)}" + ) assert isinstance(el, (plt.Axes, dict)), msg else: - assert isinstance(objs, (plt.Artist, tuple, dict)), ( - "objs is neither an ndarray of Artist instances nor a " - 'single Artist instance, tuple, or dict, "objs" is a {name!r}'.format( - name=objs.__class__.__name__ - ) + msg = ( + "objs is neither an ndarray of Artist instances nor a single " + "ArtistArtist instance, tuple, or dict, 'objs' is a " + f"{repr(type(objs).__name__)}" ) + assert isinstance(objs, (plt.Artist, tuple, dict)), msg def isiterable(obj): From 69ef5433a00616c24499fd7f06fc5c44b138ba32 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 29 Nov 2019 16:59:58 +0000 Subject: [PATCH 06/24] TYP: some types for pandas/core/arrays/sparse/array.py (#29898) --- pandas/core/arrays/sparse/array.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 943dea42524997..593ba7a643193d 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -260,6 +260,7 @@ class SparseArray(PandasObject, ExtensionArray, ExtensionOpsMixin): _pandas_ftype = "sparse" _subtyp = "sparse_array" # register ABCSparseArray _deprecations = PandasObject._deprecations | frozenset(["get_values"]) + _sparse_index: SparseIndex def __init__( self, @@ -372,8 +373,8 @@ def __init__( @classmethod def _simple_new( - cls, sparse_array: np.ndarray, sparse_index: SparseIndex, dtype: SparseDtype - ) -> ABCSparseArray: + cls, sparse_array: np.ndarray, sparse_index: SparseIndex, dtype: SparseDtype, + ) -> "SparseArray": new = cls([]) new._sparse_index = sparse_index new._sparse_values = sparse_array @@ -1392,8 +1393,8 @@ def __abs__(self): # ------------------------------------------------------------------------ @classmethod - def _create_unary_method(cls, op): - def sparse_unary_method(self): + def _create_unary_method(cls, op) -> Callable[["SparseArray"], "SparseArray"]: + def sparse_unary_method(self) -> "SparseArray": fill_value = op(np.array(self.fill_value)).item() values = op(self.sp_values) dtype = SparseDtype(values.dtype, fill_value) From 29fdc6946be8a60373d089c0aded86eea5d3a567 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 29 Nov 2019 17:01:13 +0000 Subject: [PATCH 07/24] TYP: some types for pandas/core/arrays/sparse/dtype.py (#29899) --- pandas/core/arrays/sparse/dtype.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/sparse/dtype.py b/pandas/core/arrays/sparse/dtype.py index 3b656705f55681..0124304727ab33 100644 --- a/pandas/core/arrays/sparse/dtype.py +++ b/pandas/core/arrays/sparse/dtype.py @@ -1,7 +1,7 @@ """Sparse Dtype""" import re -from typing import Any +from typing import Any, Tuple import numpy as np @@ -223,7 +223,7 @@ def construct_from_string(cls, string): raise TypeError(msg) @staticmethod - def _parse_subtype(dtype): + def _parse_subtype(dtype: str) -> Tuple[str, bool]: """ Parse a string to get the subtype @@ -249,7 +249,7 @@ def _parse_subtype(dtype): has_fill_value = False if m: subtype = m.groupdict()["subtype"] - has_fill_value = m.groupdict()["fill_value"] or has_fill_value + has_fill_value = bool(m.groupdict()["fill_value"]) elif dtype == "Sparse": subtype = "float64" else: From c63ae14859cd74cd4a7e64e33570e49cbc12663d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 29 Nov 2019 18:31:41 +0100 Subject: [PATCH 08/24] DOC: remove reference to get_value (removed) in DataFrame.lookup docstring (#29925) --- doc/source/user_guide/indexing.rst | 2 +- pandas/core/frame.py | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst index cf55ce0c9a6d4e..0229331127441b 100644 --- a/doc/source/user_guide/indexing.rst +++ b/doc/source/user_guide/indexing.rst @@ -374,7 +374,7 @@ For getting values with a boolean array: df1.loc['a'] > 0 df1.loc[:, df1.loc['a'] > 0] -For getting a value explicitly (equivalent to deprecated ``df.get_value('a','A')``): +For getting a value explicitly: .. ipython:: python diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d436385ba61ce4..d2e396284c5a73 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3544,13 +3544,6 @@ def lookup(self, row_labels, col_labels): ------- numpy.ndarray - Notes - ----- - Akin to:: - - result = [df.get_value(row, col) - for row, col in zip(row_labels, col_labels)] - Examples -------- values : ndarray From fc100fea90bb8ee95aaf33e4218e98b3655535d4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 29 Nov 2019 09:39:39 -0800 Subject: [PATCH 09/24] DEPR: Remove errors argument in tz_localize (#29911) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/_libs/tslibs/nattype.pyx | 12 -------- pandas/_libs/tslibs/timestamps.pyx | 28 +---------------- pandas/core/arrays/datetimes.py | 30 +------------------ .../tests/indexes/datetimes/test_timezones.py | 22 ++------------ .../tests/scalar/timestamp/test_timezones.py | 29 ++++-------------- pandas/tests/series/test_timezones.py | 15 ---------- 7 files changed, 11 insertions(+), 126 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index e54397e635c77f..1468718b161706 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -458,6 +458,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, - :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` to ``False`` (:issue:`20584`) - Removed previously deprecated :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` (:issue:`18164`) +- Removed previously deprecated ``errors`` argument in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`) - .. _whatsnew_1000.performance: diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index 966f72dcd78893..76a694c64e1fb5 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -720,18 +720,6 @@ default 'raise' nonexistent times. .. versionadded:: 0.24.0 - errors : 'raise', 'coerce', default None - Determine how errors should be handled. - - The behavior is as follows: - - * 'raise' will raise a NonExistentTimeError if a timestamp is not - valid in the specified timezone (e.g. due to a transition from - or to DST time). Use ``nonexistent='raise'`` instead. - * 'coerce' will return NaT if the timestamp can not be converted - into the specified timezone. Use ``nonexistent='NaT'`` instead. - - .. deprecated:: 0.24.0 Returns ------- diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 08e504ada789e7..e7dc911ff0baee 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -753,8 +753,7 @@ timedelta}, default 'raise' # GH#21336, GH#21365 return Timedelta(nanoseconds=1) - def tz_localize(self, tz, ambiguous='raise', nonexistent='raise', - errors=None): + def tz_localize(self, tz, ambiguous='raise', nonexistent='raise'): """ Convert naive Timestamp to local time zone, or remove timezone from tz-aware Timestamp. @@ -797,18 +796,6 @@ default 'raise' nonexistent times. .. versionadded:: 0.24.0 - errors : 'raise', 'coerce', default None - Determine how errors should be handled. - - The behavior is as follows: - - * 'raise' will raise a NonExistentTimeError if a timestamp is not - valid in the specified timezone (e.g. due to a transition from - or to DST time). Use ``nonexistent='raise'`` instead. - * 'coerce' will return NaT if the timestamp can not be converted - into the specified timezone. Use ``nonexistent='NaT'`` instead. - - .. deprecated:: 0.24.0 Returns ------- @@ -822,19 +809,6 @@ default 'raise' if ambiguous == 'infer': raise ValueError('Cannot infer offset with only one time.') - if errors is not None: - warnings.warn("The errors argument is deprecated and will be " - "removed in a future release. Use " - "nonexistent='NaT' or nonexistent='raise' " - "instead.", FutureWarning) - if errors == 'coerce': - nonexistent = 'NaT' - elif errors == 'raise': - nonexistent = 'raise' - else: - raise ValueError("The errors argument must be either 'coerce' " - "or 'raise'.") - nonexistent_options = ('raise', 'NaT', 'shift_forward', 'shift_backward') if nonexistent not in nonexistent_options and not isinstance( diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 07cbaa8cd1eb6e..47f236c19ffe7e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -955,7 +955,7 @@ def tz_convert(self, tz): dtype = tz_to_dtype(tz) return self._simple_new(self.asi8, dtype=dtype, freq=self.freq) - def tz_localize(self, tz, ambiguous="raise", nonexistent="raise", errors=None): + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): """ Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. @@ -1004,17 +1004,6 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise", errors=None): .. versionadded:: 0.24.0 - errors : {'raise', 'coerce'}, default None - The method to handle errors: - - - 'raise' will raise a NonExistentTimeError if a timestamp is not - valid in the specified time zone (e.g. due to a transition from - or to DST time). Use ``nonexistent='raise'`` instead. - - 'coerce' will return NaT if the timestamp can not be converted - to the specified time zone. Use ``nonexistent='NaT'`` instead. - - .. deprecated:: 0.24.0 - Returns ------- Same type as self @@ -1105,23 +1094,6 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise", errors=None): 1 2015-03-29 03:30:00+02:00 dtype: datetime64[ns, 'Europe/Warsaw'] """ - if errors is not None: - warnings.warn( - "The errors argument is deprecated and will be " - "removed in a future release. Use " - "nonexistent='NaT' or nonexistent='raise' " - "instead.", - FutureWarning, - ) - if errors == "coerce": - nonexistent = "NaT" - elif errors == "raise": - nonexistent = "raise" - else: - raise ValueError( - "The errors argument must be either 'coerce' or 'raise'." - ) - nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward") if nonexistent not in nonexistent_options and not isinstance( nonexistent, timedelta diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 059dbb00019d8e..3f942f9b794287 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -323,13 +323,9 @@ def test_dti_tz_localize_nonexistent_raise_coerce(self): index.tz_localize(tz=tz) with pytest.raises(pytz.NonExistentTimeError): - with tm.assert_produces_warning(FutureWarning): - index.tz_localize(tz=tz, errors="raise") + index.tz_localize(tz=tz, nonexistent="raise") - with tm.assert_produces_warning( - FutureWarning, clear=FutureWarning, check_stacklevel=False - ): - result = index.tz_localize(tz=tz, errors="coerce") + result = index.tz_localize(tz=tz, nonexistent="NaT") test_times = ["2015-03-08 01:00-05:00", "NaT", "2015-03-08 03:00-04:00"] dti = to_datetime(test_times, utc=True) expected = dti.tz_convert("US/Eastern") @@ -704,20 +700,6 @@ def test_dti_tz_localize_nonexistent_shift_invalid(self, offset, tz_type): with pytest.raises(ValueError, match=msg): dti.tz_localize(tz, nonexistent=timedelta(seconds=offset)) - @pytest.mark.filterwarnings("ignore::FutureWarning") - def test_dti_tz_localize_errors_deprecation(self): - # GH 22644 - tz = "Europe/Warsaw" - n = 60 - dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - with pytest.raises(ValueError): - dti.tz_localize(tz, errors="foo") - # make sure errors='coerce' gets mapped correctly to nonexistent - result = dti.tz_localize(tz, errors="coerce") - expected = dti.tz_localize(tz, nonexistent="NaT") - tm.assert_index_equal(result, expected) - # ------------------------------------------------------------- # DatetimeIndex.normalize diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 250f48b7e711b7..6537f6ccd8432c 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -14,7 +14,6 @@ import pandas.util._test_decorators as td from pandas import NaT, Timestamp -import pandas.util.testing as tm class TestTimestampTZOperations: @@ -80,7 +79,6 @@ def test_tz_localize_ambiguous(self): ("2015-03-29 02:30", "Europe/Belgrade"), ], ) - @pytest.mark.filterwarnings("ignore::FutureWarning") def test_tz_localize_nonexistent(self, stamp, tz): # GH#13057 ts = Timestamp(stamp) @@ -88,36 +86,21 @@ def test_tz_localize_nonexistent(self, stamp, tz): ts.tz_localize(tz) # GH 22644 with pytest.raises(NonExistentTimeError): - with tm.assert_produces_warning(FutureWarning): - ts.tz_localize(tz, errors="raise") - with tm.assert_produces_warning(FutureWarning): - assert ts.tz_localize(tz, errors="coerce") is NaT + ts.tz_localize(tz, nonexistent="raise") + assert ts.tz_localize(tz, nonexistent="NaT") is NaT - def test_tz_localize_errors_ambiguous(self): + def test_tz_localize_ambiguous_raise(self): # GH#13057 ts = Timestamp("2015-11-1 01:00") with pytest.raises(AmbiguousTimeError): - with tm.assert_produces_warning(FutureWarning): - ts.tz_localize("US/Pacific", errors="coerce") + ts.tz_localize("US/Pacific", ambiguous="raise") - @pytest.mark.filterwarnings("ignore::FutureWarning") - def test_tz_localize_errors_invalid_arg(self): + def test_tz_localize_nonexistent_invalid_arg(self): # GH 22644 tz = "Europe/Warsaw" ts = Timestamp("2015-03-29 02:00:00") with pytest.raises(ValueError): - with tm.assert_produces_warning(FutureWarning): - ts.tz_localize(tz, errors="foo") - - def test_tz_localize_errors_coerce(self): - # GH 22644 - # make sure errors='coerce' gets mapped correctly to nonexistent - tz = "Europe/Warsaw" - ts = Timestamp("2015-03-29 02:00:00") - with tm.assert_produces_warning(FutureWarning): - result = ts.tz_localize(tz, errors="coerce") - expected = ts.tz_localize(tz, nonexistent="NaT") - assert result is expected + ts.tz_localize(tz, nonexistent="foo") @pytest.mark.parametrize( "stamp", diff --git a/pandas/tests/series/test_timezones.py b/pandas/tests/series/test_timezones.py index c16e2864b131f0..c03101265f7e7a 100644 --- a/pandas/tests/series/test_timezones.py +++ b/pandas/tests/series/test_timezones.py @@ -33,21 +33,6 @@ def test_series_tz_localize(self): with pytest.raises(TypeError, match="Already tz-aware"): ts.tz_localize("US/Eastern") - @pytest.mark.filterwarnings("ignore::FutureWarning") - def test_tz_localize_errors_deprecation(self): - # GH 22644 - tz = "Europe/Warsaw" - n = 60 - rng = date_range(start="2015-03-29 02:00:00", periods=n, freq="min") - ts = Series(rng) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - with pytest.raises(ValueError): - ts.dt.tz_localize(tz, errors="foo") - # make sure errors='coerce' gets mapped correctly to nonexistent - result = ts.dt.tz_localize(tz, errors="coerce") - expected = ts.dt.tz_localize(tz, nonexistent="NaT") - tm.assert_series_equal(result, expected) - def test_series_tz_localize_ambiguous_bool(self): # make sure that we are correctly accepting bool values as ambiguous From f98d2b6587b74c9a640b062d94911b199d962119 Mon Sep 17 00:00:00 2001 From: Tang Heyi Date: Sat, 30 Nov 2019 01:51:04 +0800 Subject: [PATCH 10/24] BUG: .count() raises if use_inf_as_na is enabled (#29888) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/dtypes/missing.py | 2 +- pandas/tests/series/test_analytics.py | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 1468718b161706..14f36a808c468b 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -682,6 +682,7 @@ Other - Bug in :meth:`DataFrame.append` that raised ``IndexError`` when appending with empty list (:issue:`28769`) - Fix :class:`AbstractHolidayCalendar` to return correct results for years after 2030 (now goes up to 2200) (:issue:`27790`) +- Bug in :meth:`Series.count` raises if use_inf_as_na is enabled (:issue:`29478`) .. _whatsnew_1000.contributors: diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index cb4199272f5740..205ca193636c65 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -176,7 +176,7 @@ def _isna_old(obj): raise NotImplementedError("isna is not defined for MultiIndex") elif isinstance(obj, type): return False - elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass)): + elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass, ABCExtensionArray)): return _isna_ndarraylike_old(obj) elif isinstance(obj, ABCGeneric): return obj._constructor(obj._data.isna(func=_isna_old)) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index e25c4456147f77..fe9306a06efc72 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -554,6 +554,10 @@ def test_count(self, datetime_series): ts.iloc[[0, 3, 5]] = np.nan tm.assert_series_equal(ts.count(level=1), right - 1) + # GH29478 + with pd.option_context("use_inf_as_na", True): + assert pd.Series([pd.Timestamp("1990/1/1")]).count() == 1 + def test_dot(self): a = Series(np.random.randn(4), index=["p", "q", "r", "s"]) b = DataFrame( From 774fe7b30572883c5a49d9b36e3dacaa0fa20994 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 13:07:55 -0800 Subject: [PATCH 11/24] BUG/DEPR: Timestamp/Timedelta resolution (#29910) * BUG/DEPR: Timestamp/Timedelta resolution * GH ref --- doc/source/whatsnew/v1.0.0.rst | 2 + pandas/_libs/tslibs/timedeltas.pyx | 51 +------------------ pandas/_libs/tslibs/timestamps.pyx | 10 +--- .../tests/scalar/timedelta/test_timedelta.py | 10 ++-- .../tests/scalar/timestamp/test_timestamp.py | 4 ++ 5 files changed, 15 insertions(+), 62 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 14f36a808c468b..bb4a24e1b4eb55 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -457,6 +457,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`) - Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, - :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` to ``False`` (:issue:`20584`) +- Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`) - Removed previously deprecated :attr:`Timestamp.weekday_name`, :attr:`DatetimeIndex.weekday_name`, and :attr:`Series.dt.weekday_name` (:issue:`18164`) - Removed previously deprecated ``errors`` argument in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`) - @@ -516,6 +517,7 @@ Datetimelike - Bug in :func:`pandas._config.localization.get_locales` where the ``locales -a`` encodes the locales list as windows-1252 (:issue:`23638`, :issue:`24760`, :issue:`27368`) - Bug in :meth:`Series.var` failing to raise ``TypeError`` when called with ``timedelta64[ns]`` dtype (:issue:`28289`) - Bug in :meth:`DatetimeIndex.strftime` and :meth:`Series.dt.strftime` where ``NaT`` was converted to the string ``'NaT'`` instead of ``np.nan`` (:issue:`29578`) +- Bug in :attr:`Timestamp.resolution` being a property instead of a class attribute (:issue:`29910`) Timedelta ^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 48a2a05011ab55..726d664c1ebea3 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1005,56 +1005,6 @@ cdef class _Timedelta(timedelta): else: return "D" - @property - def resolution(self): - """ - Return a string representing the lowest timedelta resolution. - - Each timedelta has a defined resolution that represents the lowest OR - most granular level of precision. Each level of resolution is - represented by a short string as defined below: - - Resolution: Return value - - * Days: 'D' - * Hours: 'H' - * Minutes: 'T' - * Seconds: 'S' - * Milliseconds: 'L' - * Microseconds: 'U' - * Nanoseconds: 'N' - - Returns - ------- - str - Timedelta resolution. - - Examples - -------- - >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns') - >>> td.resolution - 'N' - - >>> td = pd.Timedelta('1 days 2 min 3 us') - >>> td.resolution - 'U' - - >>> td = pd.Timedelta('2 min 3 s') - >>> td.resolution - 'S' - - >>> td = pd.Timedelta(36, unit='us') - >>> td.resolution - 'U' - """ - # See GH#21344 - warnings.warn("Timedelta.resolution is deprecated, in a future " - "version will behave like the standard library " - "datetime.timedelta.resolution attribute. " - "Use Timedelta.resolution_string instead.", - FutureWarning) - return self.resolution_string - @property def nanoseconds(self): """ @@ -1602,3 +1552,4 @@ cdef _broadcast_floordiv_td64(int64_t value, object other, # resolution in ns Timedelta.min = Timedelta(np.iinfo(np.int64).min + 1) Timedelta.max = Timedelta(np.iinfo(np.int64).max) +Timedelta.resolution = Timedelta(nanoseconds=1) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e7dc911ff0baee..fbe71a0a6d1985 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -744,15 +744,6 @@ timedelta}, default 'raise' """ return bool(ccalendar.is_leapyear(self.year)) - @property - def resolution(self): - """ - Return resolution describing the smallest difference between two - times that can be represented by Timestamp object_state. - """ - # GH#21336, GH#21365 - return Timedelta(nanoseconds=1) - def tz_localize(self, tz, ambiguous='raise', nonexistent='raise'): """ Convert naive Timestamp to local time zone, or remove @@ -1036,3 +1027,4 @@ cdef int64_t _NS_LOWER_BOUND = -9223372036854775000 # Resolution is in nanoseconds Timestamp.min = Timestamp(_NS_LOWER_BOUND) Timestamp.max = Timestamp(_NS_UPPER_BOUND) +Timestamp.resolution = Timedelta(nanoseconds=1) # GH#21336, GH#21365 diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index d4881ff0e1747a..5a5724401029c7 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -804,9 +804,13 @@ def test_resolution_string(self): def test_resolution_deprecated(self): # GH#21344 td = Timedelta(days=4, hours=3) - with tm.assert_produces_warning(FutureWarning) as w: - td.resolution - assert "Use Timedelta.resolution_string instead" in str(w[0].message) + result = td.resolution + assert result == Timedelta(nanoseconds=1) + + # Check that the attribute is available on the class, mirroring + # the stdlib timedelta behavior + result = Timedelta.resolution + assert result == Timedelta(nanoseconds=1) @pytest.mark.parametrize( diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index a33afc8b3ccca9..d6251ffc7940d1 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -192,6 +192,10 @@ def test_resolution(self): dt = Timestamp("2100-01-01 00:00:00") assert dt.resolution == Timedelta(nanoseconds=1) + # Check that the attribute is available on the class, mirroring + # the stdlib datetime behavior + assert Timestamp.resolution == Timedelta(nanoseconds=1) + class TestTimestampConstructors: def test_constructor(self): From 7e9da33f346a3328632fba5441a9b4524f3aa17c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 14:27:06 -0800 Subject: [PATCH 12/24] DEPR: passing a dtype alias to DatetimeTZDtype(unit) (#29927) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/dtypes/dtypes.py | 2 +- pandas/tests/dtypes/test_dtypes.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index bb4a24e1b4eb55..b5f8e8ca4a1264 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -427,6 +427,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed the previously deprecated :meth:`MultiIndex.set_labels`, use :meth:`MultiIndex.set_codes` instead (:issue:`23752`) - Removed the previously deprecated "labels" keyword from :meth:`MultiIndex.set_codes`, :meth:`MultiIndex.copy`, :meth:`MultiIndex.drop`, use "codes" instead (:issue:`23752`) - Removed support for legacy HDF5 formats (:issue:`29787`) +- Passing a dtype alias (e.g. 'datetime64[ns, UTC]') to :class:`DatetimeTZDtype` is no longer allowed, use :meth:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`) - :func:`read_excel` removed support for "skip_footer" argument, use "skipfooter" instead (:issue:`18836`) - :func:`read_excel` no longer allows an integer value for the parameter ``usecols``, instead pass a list of integers from 0 to ``usecols`` inclusive (:issue:`23635`) - :meth:`DataFrame.to_records` no longer supports the argument "convert_datetime64" (:issue:`18902`) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index fb3097684f0c35..94147864242452 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -676,7 +676,7 @@ def __init__(self, unit="ns", tz=None): "to DatetimeTZDtype is deprecated. Use " "'DatetimeTZDtype.construct_from_string()' instead." ) - warnings.warn(msg.format(tz=tz), FutureWarning, stacklevel=2) + raise ValueError(msg) else: raise ValueError("DatetimeTZDtype only supports ns units") diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index fc896e6a9d348e..bbf44006611fba 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -187,7 +187,7 @@ def create(self): def test_alias_to_unit_raises(self): # 23990 - with tm.assert_produces_warning(FutureWarning): + with pytest.raises(ValueError, match="Passing a dtype alias"): DatetimeTZDtype("datetime64[ns, US/Central]") def test_alias_to_unit_bad_alias_raises(self): From 8ffecdc729d6a5e1ef636034200d5ebc6cc32b06 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 29 Nov 2019 22:36:09 +0000 Subject: [PATCH 13/24] CI: Highlighting flake8 and grep errors in GitHub Actions (#29915) --- ci/code_checks.sh | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 7c6c98d910492e..4e25fd0ddb5ea9 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -34,17 +34,13 @@ function invgrep { # # This is useful for the CI, as we want to fail if one of the patterns # that we want to avoid is found by grep. - if [[ "$AZURE" == "true" ]]; then - set -o pipefail - grep -n "$@" | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Found unwanted pattern: " $3}' - else - grep "$@" - fi - return $((! $?)) + grep -n "$@" | sed "s/^/$INVGREP_PREPEND/" | sed "s/$/$INVGREP_APPEND/" ; EXIT_STATUS=${PIPESTATUS[0]} + return $((! $EXIT_STATUS)) } -if [[ "$AZURE" == "true" ]]; then - FLAKE8_FORMAT="##vso[task.logissue type=error;sourcepath=%(path)s;linenumber=%(row)s;columnnumber=%(col)s;code=%(code)s;]%(text)s" +if [[ "$GITHUB_ACTIONS" == "true" ]]; then + FLAKE8_FORMAT="##[error]%(path)s:%(row)s:%(col)s:%(code):%(text)s" + INVGREP_PREPEND="##[error]" else FLAKE8_FORMAT="default" fi @@ -199,14 +195,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG - set -o pipefail - if [[ "$AZURE" == "true" ]]; then - # we exclude all c/cpp files as the c/cpp files of pandas code base are tested when Linting .c and .h files - ! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}' - else - ! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}' - fi + INVGREP_APPEND=" <- trailing whitespaces found" + invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" * RET=$(($RET + $?)) ; echo $MSG "DONE" + unset INVGREP_APPEND fi ### CODE ### From 7cf189d13a4681f6ebe30224dbf45e3278e1d60e Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 14:36:49 -0800 Subject: [PATCH 14/24] DEPR: CategoricalBlock.where casting to object (#29913) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/internals/blocks.py | 31 ------------------- .../tests/arrays/categorical/test_indexing.py | 17 ++++------ 3 files changed, 7 insertions(+), 42 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index b5f8e8ca4a1264..052d58f1487fe7 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -418,6 +418,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed :meth:`DataFrame.as_blocks`, :meth:`Series.as_blocks`, `DataFrame.blocks`, :meth:`Series.blocks` (:issue:`17656`) - :meth:`pandas.Series.str.cat` now defaults to aligning ``others``, using ``join='left'`` (:issue:`27611`) - :meth:`pandas.Series.str.cat` does not accept list-likes *within* list-likes anymore (:issue:`27611`) +- :meth:`Series.where` with ``Categorical`` dtype (or :meth:`DataFrame.where` with ``Categorical`` column) no longer allows setting new categories (:issue:`24114`) - :func:`core.internals.blocks.make_block` no longer accepts the "fastpath" keyword(:issue:`19265`) - :meth:`Block.make_block_same_class` no longer accepts the "dtype" keyword(:issue:`19434`) - Removed the previously deprecated :meth:`ExtensionArray._formatting_values`. Use :attr:`ExtensionArray._formatter` instead. (:issue:`23601`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index e4de1c94da4509..b0382755f2edbc 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2887,37 +2887,6 @@ def concat_same_type(self, to_concat, placement=None): values, placement=placement or slice(0, len(values), 1), ndim=self.ndim ) - def where( - self, - other, - cond, - align=True, - errors="raise", - try_cast: bool = False, - axis: int = 0, - ) -> List["Block"]: - # TODO(CategoricalBlock.where): - # This can all be deleted in favor of ExtensionBlock.where once - # we enforce the deprecation. - object_msg = ( - "Implicitly converting categorical to object-dtype ndarray. " - "One or more of the values in 'other' are not present in this " - "categorical's categories. A future version of pandas will raise " - "a ValueError when 'other' contains different categories.\n\n" - "To preserve the current behavior, add the new categories to " - "the categorical before calling 'where', or convert the " - "categorical to a different dtype." - ) - try: - # Attempt to do preserve categorical dtype. - result = super().where(other, cond, align, errors, try_cast, axis) - except (TypeError, ValueError): - warnings.warn(object_msg, FutureWarning, stacklevel=6) - result = self.astype(object).where( - other, cond, align=align, errors=errors, try_cast=try_cast, axis=axis - ) - return result - def replace( self, to_replace, diff --git a/pandas/tests/arrays/categorical/test_indexing.py b/pandas/tests/arrays/categorical/test_indexing.py index 6edd7fd00b7073..f929eb24c9f194 100644 --- a/pandas/tests/arrays/categorical/test_indexing.py +++ b/pandas/tests/arrays/categorical/test_indexing.py @@ -206,13 +206,11 @@ def test_where_other_categorical(self): expected = pd.Series(Categorical(["a", "c", "c"], dtype=ser.dtype)) tm.assert_series_equal(result, expected) - def test_where_warns(self): + def test_where_new_category_raises(self): ser = pd.Series(Categorical(["a", "b", "c"])) - with tm.assert_produces_warning(FutureWarning): - result = ser.where([True, False, True], "d") - - expected = pd.Series(np.array(["a", "d", "c"], dtype="object")) - tm.assert_series_equal(result, expected) + msg = "Cannot setitem on a Categorical with a new category" + with pytest.raises(ValueError, match=msg): + ser.where([True, False, True], "d") def test_where_ordered_differs_rasies(self): ser = pd.Series( @@ -221,11 +219,8 @@ def test_where_ordered_differs_rasies(self): other = Categorical( ["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True ) - with tm.assert_produces_warning(FutureWarning): - result = ser.where([True, False, True], other) - - expected = pd.Series(np.array(["a", "c", "c"], dtype=object)) - tm.assert_series_equal(result, expected) + with pytest.raises(ValueError, match="without identical categories"): + ser.where([True, False, True], other) @pytest.mark.parametrize("index", [True, False]) From fe2117cf5c4354426baddbf29e082fc232714253 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 14:39:27 -0800 Subject: [PATCH 15/24] DEPR: categorical.take allow_fill default (#29912) --- doc/source/whatsnew/v1.0.0.rst | 2 ++ pandas/core/arrays/categorical.py | 36 +++++++------------ pandas/tests/arrays/categorical/test_algos.py | 13 +++++-- 3 files changed, 24 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 052d58f1487fe7..d66d165bf71256 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -365,6 +365,7 @@ Deprecations is equivalent to ``arr[idx.get_loc(idx_val)] = val``, which should be used instead (:issue:`28621`). - :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`) - :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`) +- :meth:`Categorical.take_nd` is deprecated, use :meth:`Categorical.take` instead (:issue:`27745`) .. _whatsnew_1000.prior_deprecations: @@ -457,6 +458,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - In :func:`concat` the default value for ``sort`` has been changed from ``None`` to ``False`` (:issue:`20613`) - Removed previously deprecated "raise_conflict" argument from :meth:`DataFrame.update`, use "errors" instead (:issue:`23585`) - Removed previously deprecated keyword "n" from :meth:`DatetimeIndex.shift`, :meth:`TimedeltaIndex.shift`, :meth:`PeriodIndex.shift`, use "periods" instead (:issue:`22458`) +- Changed the default ``fill_value`` in :meth:`Categorical.take` from ``True`` to ``False`` (:issue:`20841`) - Changed the default value for the `raw` argument in :func:`Series.rolling().apply() `, :func:`DataFrame.rolling().apply() `, - :func:`Series.expanding().apply() `, and :func:`DataFrame.expanding().apply() ` to ``False`` (:issue:`20584`) - Changed :meth:`Timedelta.resolution` to match the behavior of the standard library ``datetime.timedelta.resolution``, for the old behavior, use :meth:`Timedelta.resolution_string` (:issue:`26839`) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index f20308be1ee095..bb4ceea420d8de 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1,6 +1,5 @@ import operator from shutil import get_terminal_size -import textwrap from typing import Type, Union, cast from warnings import warn @@ -59,18 +58,6 @@ from .base import ExtensionArray, _extension_array_shared_docs, try_cast_to_ea -_take_msg = textwrap.dedent( - """\ - Interpreting negative values in 'indexer' as missing values. - In the future, this will change to meaning positional indices - from the right. - - Use 'allow_fill=True' to retain the previous behavior and silence this - warning. - - Use 'allow_fill=False' to accept the new behavior.""" -) - def _cat_compare_op(op): opname = f"__{op.__name__}__" @@ -1829,7 +1816,7 @@ def fillna(self, value=None, method=None, limit=None): return self._constructor(codes, dtype=self.dtype, fastpath=True) - def take_nd(self, indexer, allow_fill=None, fill_value=None): + def take(self, indexer, allow_fill: bool = False, fill_value=None): """ Take elements from the Categorical. @@ -1838,7 +1825,7 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None): indexer : sequence of int The indices in `self` to take. The meaning of negative values in `indexer` depends on the value of `allow_fill`. - allow_fill : bool, default None + allow_fill : bool, default False How to handle negative values in `indexer`. * False: negative values in `indices` indicate positional indices @@ -1849,11 +1836,9 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None): (the default). These values are set to `fill_value`. Any other other negative values raise a ``ValueError``. - .. versionchanged:: 0.23.0 + .. versionchanged:: 1.0.0 - Deprecated the default value of `allow_fill`. The deprecated - default is ``True``. In the future, this will change to - ``False``. + Default value changed from ``True`` to ``False``. fill_value : object The value to use for `indices` that are missing (-1), when @@ -1903,10 +1888,6 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None): will raise a ``TypeError``. """ indexer = np.asarray(indexer, dtype=np.intp) - if allow_fill is None: - if (indexer < 0).any(): - warn(_take_msg, FutureWarning, stacklevel=2) - allow_fill = True dtype = self.dtype @@ -1927,7 +1908,14 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None): result = type(self).from_codes(codes, dtype=dtype) return result - take = take_nd + def take_nd(self, indexer, allow_fill: bool = False, fill_value=None): + # GH#27745 deprecate alias that other EAs dont have + warn( + "Categorical.take_nd is deprecated, use Categorical.take instead", + FutureWarning, + stacklevel=2, + ) + return self.take(indexer, allow_fill=allow_fill, fill_value=fill_value) def __len__(self) -> int: """ diff --git a/pandas/tests/arrays/categorical/test_algos.py b/pandas/tests/arrays/categorical/test_algos.py index e076015c5f61dd..dce3c4e4d5e982 100644 --- a/pandas/tests/arrays/categorical/test_algos.py +++ b/pandas/tests/arrays/categorical/test_algos.py @@ -89,10 +89,12 @@ def test_isin_empty(empty): class TestTake: # https://github.com/pandas-dev/pandas/issues/20664 - def test_take_warns(self): + def test_take_default_allow_fill(self): cat = pd.Categorical(["a", "b"]) - with tm.assert_produces_warning(FutureWarning): - cat.take([0, -1]) + with tm.assert_produces_warning(None): + result = cat.take([0, -1]) + + assert result.equals(cat) def test_take_positive_no_warning(self): cat = pd.Categorical(["a", "b"]) @@ -158,3 +160,8 @@ def test_take_fill_value_new_raises(self): xpr = r"'fill_value' \('d'\) is not in this Categorical's categories." with pytest.raises(TypeError, match=xpr): cat.take([0, 1, -1], fill_value="d", allow_fill=True) + + def test_take_nd_deprecated(self): + cat = pd.Categorical(["a", "b", "c"]) + with tm.assert_produces_warning(FutureWarning): + cat.take_nd([0, 1]) From 00561438b9b7deda2df99d8ae591cf72f3eb7955 Mon Sep 17 00:00:00 2001 From: Marc Garcia Date: Fri, 29 Nov 2019 22:50:19 +0000 Subject: [PATCH 16/24] CI: Making benchmark errors easier to find (#29907) --- .github/workflows/ci.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b689da8e39ff08..f68080d05bea67 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -80,15 +80,18 @@ jobs: git fetch upstream if git diff upstream/master --name-only | grep -q "^asv_bench/"; then asv machine --yes - ASV_OUTPUT="$(asv dev)" - if [[ $(echo "$ASV_OUTPUT" | grep "failed") ]]; then - echo "##vso[task.logissue type=error]Benchmarks run with errors" - echo "$ASV_OUTPUT" + asv dev | sed "/failed$/ s/^/##[error]/" | tee benchmarks.log + if grep "failed" benchmarks.log > /dev/null ; then exit 1 - else - echo "Benchmarks run without errors" fi else echo "Benchmarks did not run, no changes detected" fi if: true + + - name: Publish benchmarks artifact + uses: actions/upload-artifact@master + with: + name: Benchmarks log + path: asv_bench/benchmarks.log + if: failure() From 3a374643e127159c065f0f83d9aca1c4f2c471f8 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 29 Nov 2019 22:59:10 +0000 Subject: [PATCH 17/24] TYP: some types for pandas/_config/config.py (#29897) --- pandas/_config/config.py | 59 ++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 814f855cceeac5..8f75d0381c1a64 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -51,7 +51,7 @@ from collections import namedtuple from contextlib import contextmanager import re -from typing import Dict, List +from typing import Any, Dict, Iterable, List import warnings DeprecatedOption = namedtuple("DeprecatedOption", "key msg rkey removal_ver") @@ -64,7 +64,7 @@ _registered_options: Dict[str, RegisteredOption] = {} # holds the current values for registered options -_global_config: Dict[str, str] = {} +_global_config: Dict[str, Any] = {} # keys which have a special meaning _reserved_keys: List[str] = ["all"] @@ -85,7 +85,7 @@ def _get_single_key(pat, silent): if len(keys) == 0: if not silent: _warn_if_deprecated(pat) - raise OptionError("No such keys(s): {pat!r}".format(pat=pat)) + raise OptionError(f"No such keys(s): {repr(pat)}") if len(keys) > 1: raise OptionError("Pattern matched multiple keys") key = keys[0] @@ -116,8 +116,8 @@ def _set_option(*args, **kwargs): silent = kwargs.pop("silent", False) if kwargs: - msg = '_set_option() got an unexpected keyword argument "{kwarg}"' - raise TypeError(msg.format(list(kwargs.keys())[0])) + kwarg = list(kwargs.keys())[0] + raise TypeError(f'_set_option() got an unexpected keyword argument "{kwarg}"') for k, v in zip(args[::2], args[1::2]): key = _get_single_key(k, silent) @@ -412,7 +412,7 @@ def __exit__(self, *args): _set_option(pat, val, silent=True) -def register_option(key, defval, doc="", validator=None, cb=None): +def register_option(key: str, defval: object, doc="", validator=None, cb=None): """Register an option in the package-wide pandas config object Parameters @@ -441,11 +441,9 @@ def register_option(key, defval, doc="", validator=None, cb=None): key = key.lower() if key in _registered_options: - msg = "Option '{key}' has already been registered" - raise OptionError(msg.format(key=key)) + raise OptionError(f"Option '{key}' has already been registered") if key in _reserved_keys: - msg = "Option '{key}' is a reserved key" - raise OptionError(msg.format(key=key)) + raise OptionError(f"Option '{key}' is a reserved key") # the default value should be legal if validator: @@ -455,10 +453,12 @@ def register_option(key, defval, doc="", validator=None, cb=None): path = key.split(".") for k in path: - if not bool(re.match("^" + tokenize.Name + "$", k)): - raise ValueError("{k} is not a valid identifier".format(k=k)) + # NOTE: tokenize.Name is not a public constant + # error: Module has no attribute "Name" [attr-defined] + if not re.match("^" + tokenize.Name + "$", k): # type: ignore + raise ValueError(f"{k} is not a valid identifier") if keyword.iskeyword(k): - raise ValueError("{k} is a python keyword".format(k=k)) + raise ValueError(f"{k} is a python keyword") cursor = _global_config msg = "Path prefix to option '{option}' is already an option" @@ -522,8 +522,7 @@ def deprecate_option(key, msg=None, rkey=None, removal_ver=None): key = key.lower() if key in _deprecated_options: - msg = "Option '{key}' has already been defined as deprecated." - raise OptionError(msg.format(key=key)) + raise OptionError(f"Option '{key}' has already been defined as deprecated.") _deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver) @@ -621,11 +620,11 @@ def _warn_if_deprecated(key): print(d.msg) warnings.warn(d.msg, FutureWarning) else: - msg = "'{key}' is deprecated".format(key=key) + msg = f"'{key}' is deprecated" if d.removal_ver: - msg += " and will be removed in {version}".format(version=d.removal_ver) + msg += f" and will be removed in {d.removal_ver}" if d.rkey: - msg += ", please use '{rkey}' instead.".format(rkey=d.rkey) + msg += f", please use '{d.rkey}' instead." else: msg += ", please refrain from using it." @@ -640,7 +639,7 @@ def _build_option_description(k): o = _get_registered_option(k) d = _get_deprecated_option(k) - s = "{k} ".format(k=k) + s = f"{k} " if o.doc: s += "\n".join(o.doc.strip().split("\n")) @@ -648,9 +647,7 @@ def _build_option_description(k): s += "No description available." if o: - s += "\n [default: {default}] [currently: {current}]".format( - default=o.defval, current=_get_option(k, True) - ) + s += f"\n [default: {o.defval}] [currently: {_get_option(k, True)}]" if d: s += "\n (Deprecated" @@ -666,7 +663,7 @@ def pp_options_list(keys, width=80, _print=False): from textwrap import wrap from itertools import groupby - def pp(name, ks): + def pp(name: str, ks: Iterable[str]) -> List[str]: pfx = "- " + name + ".[" if name else "" ls = wrap( ", ".join(ks), @@ -679,7 +676,7 @@ def pp(name, ks): ls[-1] = ls[-1] + "]" return ls - ls = [] + ls: List[str] = [] singles = [x for x in sorted(keys) if x.find(".") < 0] if singles: ls += pp("", singles) @@ -731,7 +728,7 @@ def config_prefix(prefix): def wrap(func): def inner(key, *args, **kwds): - pkey = "{prefix}.{key}".format(prefix=prefix, key=key) + pkey = f"{prefix}.{key}" return func(pkey, *args, **kwds) return inner @@ -768,8 +765,7 @@ def is_type_factory(_type): def inner(x): if type(x) != _type: - msg = "Value must have type '{typ!s}'" - raise ValueError(msg.format(typ=_type)) + raise ValueError(f"Value must have type '{_type}'") return inner @@ -792,12 +788,11 @@ def is_instance_factory(_type): _type = tuple(_type) type_repr = "|".join(map(str, _type)) else: - type_repr = "'{typ}'".format(typ=_type) + type_repr = f"'{_type}'" def inner(x): if not isinstance(x, _type): - msg = "Value must be an instance of {type_repr}" - raise ValueError(msg.format(type_repr=type_repr)) + raise ValueError(f"Value must be an instance of {type_repr}") return inner @@ -813,10 +808,10 @@ def inner(x): if not any(c(x) for c in callables): uvals = [str(lval) for lval in legal_values] pp_values = "|".join(uvals) - msg = "Value must be one of {pp_values}" + msg = f"Value must be one of {pp_values}" if len(callables): msg += " or a callable" - raise ValueError(msg.format(pp_values=pp_values)) + raise ValueError(msg) return inner From 273dd40f4f8b75d12df6aa53da5de48df44e6bfd Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 15:06:22 -0800 Subject: [PATCH 18/24] TYP: io.pytables annotations (#29861) --- pandas/io/pytables.py | 66 +++++++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 25 deletions(-) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fb63bdcaaa8760..dae16dfdef01fc 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -53,7 +53,7 @@ from pandas.io.formats.printing import adjoin, pprint_thing if TYPE_CHECKING: - from tables import File # noqa:F401 + from tables import File, Node # noqa:F401 # versioning attribute @@ -244,7 +244,7 @@ def to_hdf( key, value, mode=None, - complevel=None, + complevel: Optional[int] = None, complib=None, append=None, **kwargs, @@ -459,12 +459,14 @@ class HDFStore: """ _handle: Optional["File"] + _complevel: int + _fletcher32: bool def __init__( self, path, mode=None, - complevel=None, + complevel: Optional[int] = None, complib=None, fletcher32: bool = False, **kwargs, @@ -526,7 +528,7 @@ def __getattr__(self, name: str): f"'{type(self).__name__}' object has no attribute '{name}'" ) - def __contains__(self, key: str): + def __contains__(self, key: str) -> bool: """ check for existence of this key can match the exact pathname or the pathnm w/o the leading '/' """ @@ -1267,18 +1269,22 @@ def walk(self, where="/"): yield (g._v_pathname.rstrip("/"), groups, leaves) - def get_node(self, key: str): + def get_node(self, key: str) -> Optional["Node"]: """ return the node with the key or None if it does not exist """ self._check_if_open() if not key.startswith("/"): key = "/" + key assert self._handle is not None + assert _table_mod is not None # for mypy try: - return self._handle.get_node(self.root, key) - except _table_mod.exceptions.NoSuchNodeError: # type: ignore + node = self._handle.get_node(self.root, key) + except _table_mod.exceptions.NoSuchNodeError: return None + assert isinstance(node, _table_mod.Node), type(node) + return node + def get_storer(self, key: str) -> Union["GenericFixed", "Table"]: """ return the storer object for a key, raise if not in the file """ group = self.get_node(key) @@ -1296,7 +1302,7 @@ def copy( propindexes: bool = True, keys=None, complib=None, - complevel=None, + complevel: Optional[int] = None, fletcher32: bool = False, overwrite=True, ): @@ -1387,7 +1393,9 @@ def info(self) -> str: return output - # private methods ###### + # ------------------------------------------------------------------------ + # private methods + def _check_if_open(self): if not self.is_open: raise ClosedFileError(f"{self._path} file is not open!") @@ -1559,7 +1567,7 @@ def _write_to_group( if isinstance(s, Table) and index: s.create_index(columns=index) - def _read_group(self, group, **kwargs): + def _read_group(self, group: "Node", **kwargs): s = self._create_storer(group) s.infer_axes() return s.read(**kwargs) @@ -1786,7 +1794,7 @@ def copy(self): new_self = copy.copy(self) return new_self - def infer(self, handler): + def infer(self, handler: "Table"): """infer this column from the table: create and return a new object""" table = handler.table new_self = self.copy() @@ -2499,9 +2507,16 @@ class Fixed: pandas_kind: str obj_type: Type[Union[DataFrame, Series]] ndim: int + parent: HDFStore + group: "Node" is_table = False - def __init__(self, parent, group, encoding=None, errors="strict", **kwargs): + def __init__( + self, parent: HDFStore, group: "Node", encoding=None, errors="strict", **kwargs + ): + assert isinstance(parent, HDFStore), type(parent) + assert _table_mod is not None # needed for mypy + assert isinstance(group, _table_mod.Node), type(group) self.parent = parent self.group = group self.encoding = _ensure_encoding(encoding) @@ -2568,11 +2583,11 @@ def _filters(self): return self.parent._filters @property - def _complevel(self): + def _complevel(self) -> int: return self.parent._complevel @property - def _fletcher32(self): + def _fletcher32(self) -> bool: return self.parent._fletcher32 @property @@ -2637,7 +2652,7 @@ def read( def write(self, **kwargs): raise NotImplementedError( - "cannot write on an abstract storer: sublcasses should implement" + "cannot write on an abstract storer: subclasses should implement" ) def delete( @@ -2803,7 +2818,7 @@ def write_index(self, key: str, index: Index): if isinstance(index, DatetimeIndex) and index.tz is not None: node._v_attrs.tz = _get_tz(index.tz) - def write_multi_index(self, key, index): + def write_multi_index(self, key: str, index: MultiIndex): setattr(self.attrs, f"{key}_nlevels", index.nlevels) for i, (lev, level_codes, name) in enumerate( @@ -2828,7 +2843,7 @@ def write_multi_index(self, key, index): label_key = f"{key}_label{i}" self.write_array(label_key, level_codes) - def read_multi_index(self, key, **kwargs) -> MultiIndex: + def read_multi_index(self, key: str, **kwargs) -> MultiIndex: nlevels = getattr(self.attrs, f"{key}_nlevels") levels = [] @@ -2849,7 +2864,7 @@ def read_multi_index(self, key, **kwargs) -> MultiIndex: ) def read_index_node( - self, node, start: Optional[int] = None, stop: Optional[int] = None + self, node: "Node", start: Optional[int] = None, stop: Optional[int] = None ): data = node[start:stop] # If the index was an empty array write_array_empty() will @@ -3310,7 +3325,7 @@ def values_cols(self) -> List[str]: """ return a list of my values cols """ return [i.cname for i in self.values_axes] - def _get_metadata_path(self, key) -> str: + def _get_metadata_path(self, key: str) -> str: """ return the metadata pathname for this key """ group = self.group._v_pathname return f"{group}/meta/{key}/meta" @@ -3877,10 +3892,10 @@ def process_filter(field, filt): def create_description( self, complib=None, - complevel=None, + complevel: Optional[int] = None, fletcher32: bool = False, expectedrows: Optional[int] = None, - ): + ) -> Dict[str, Any]: """ create the description of the table from the axes & values """ # provided expected rows if its passed @@ -4537,10 +4552,10 @@ def _set_tz(values, tz, preserve_UTC: bool = False, coerce: bool = False): return values -def _convert_index(name: str, index, encoding=None, errors="strict"): +def _convert_index(name: str, index: Index, encoding=None, errors="strict"): assert isinstance(name, str) - index_name = getattr(index, "name", None) + index_name = index.name if isinstance(index, DatetimeIndex): converted = index.asi8 @@ -4630,8 +4645,9 @@ def _convert_index(name: str, index, encoding=None, errors="strict"): ) -def _unconvert_index(data, kind, encoding=None, errors="strict"): - kind = _ensure_decoded(kind) +def _unconvert_index(data, kind: str, encoding=None, errors="strict"): + index: Union[Index, np.ndarray] + if kind == "datetime64": index = DatetimeIndex(data) elif kind == "timedelta64": From a91194e4eef4dfc78efbdb7dfa4dd6d5d90d92de Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 15:07:40 -0800 Subject: [PATCH 19/24] DEPR: DataFrame.sort_index by keyword (#29931) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/frame.py | 12 ------ pandas/tests/frame/test_sorting.py | 68 ------------------------------ 3 files changed, 1 insertion(+), 80 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index d66d165bf71256..4279f949db1aad 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -409,6 +409,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed the previously deprecated :meth:`Series.get_value`, :meth:`Series.set_value`, :meth:`DataFrame.get_value`, :meth:`DataFrame.set_value` (:issue:`17739`) - Changed the the default value of `inplace` in :meth:`DataFrame.set_index` and :meth:`Series.set_axis`. It now defaults to False (:issue:`27600`) - Removed the previously deprecated :attr:`Series.cat.categorical`, :attr:`Series.cat.index`, :attr:`Series.cat.name` (:issue:`24751`) +- Removed the previously deprecated "by" keyword from :meth:`DataFrame.sort_index`, use :meth:`DataFrame.sort_values` instead (:issue:`10726`) - Removed support for nested renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`18529`) - Passing ``datetime64`` data to :class:`TimedeltaIndex` or ``timedelta64`` data to ``DatetimeIndex`` now raises ``TypeError`` (:issue:`23539`, :issue:`23937`) - A tuple passed to :meth:`DataFrame.groupby` is now exclusively treated as a single key (:issue:`18314`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d2e396284c5a73..5dfa7002abfca4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4764,24 +4764,12 @@ def sort_index( kind="quicksort", na_position="last", sort_remaining=True, - by=None, ): # TODO: this can be combined with Series.sort_index impl as # almost identical inplace = validate_bool_kwarg(inplace, "inplace") - # 10726 - if by is not None: - warnings.warn( - "by argument to sort_index is deprecated, " - "please use .sort_values(by=...)", - FutureWarning, - stacklevel=2, - ) - if level is not None: - raise ValueError("unable to simultaneously sort by and level") - return self.sort_values(by, axis=axis, ascending=ascending, inplace=inplace) axis = self._get_axis_number(axis) labels = self._get_axis(axis) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index 6ed245b6ebb98e..64294d5cdcb81e 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -385,17 +385,11 @@ def test_sort_index_multicolumn(self): random.shuffle(B) frame = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - frame.sort_index(by=["A", "B"]) result = frame.sort_values(by=["A", "B"]) indexer = np.lexsort((frame["B"], frame["A"])) expected = frame.take(indexer) tm.assert_frame_equal(result, expected) - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - frame.sort_index(by=["A", "B"], ascending=False) result = frame.sort_values(by=["A", "B"], ascending=False) indexer = np.lexsort( (frame["B"].rank(ascending=False), frame["A"].rank(ascending=False)) @@ -403,9 +397,6 @@ def test_sort_index_multicolumn(self): expected = frame.take(indexer) tm.assert_frame_equal(result, expected) - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - frame.sort_index(by=["B", "A"]) result = frame.sort_values(by=["B", "A"]) indexer = np.lexsort((frame["A"], frame["B"])) expected = frame.take(indexer) @@ -452,14 +443,8 @@ def test_sort_index_different_sortorder(self): df = DataFrame({"A": A, "B": B, "C": np.random.randn(100)}) - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - df.sort_index(by=["A", "B"], ascending=[1, 0]) - result = df.sort_values(by=["A", "B"], ascending=[1, 0]) - ex_indexer = np.lexsort((df.B.max() - df.B, df.A)) expected = df.take(ex_indexer) - tm.assert_frame_equal(result, expected) # test with multiindex, too idf = df.set_index(["A", "B"]) @@ -472,59 +457,6 @@ def test_sort_index_different_sortorder(self): result = idf["C"].sort_index(ascending=[1, 0]) tm.assert_series_equal(result, expected["C"]) - def test_sort_index_duplicates(self): - - # with 9816, these are all translated to .sort_values - - df = DataFrame([range(5, 9), range(4)], columns=["a", "a", "b", "b"]) - - with pytest.raises(ValueError, match="not unique"): - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - df.sort_index(by="a") - with pytest.raises(ValueError, match="not unique"): - df.sort_values(by="a") - - with pytest.raises(ValueError, match="not unique"): - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - df.sort_index(by=["a"]) - with pytest.raises(ValueError, match="not unique"): - df.sort_values(by=["a"]) - - with pytest.raises(ValueError, match="not unique"): - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - # multi-column 'by' is separate codepath - df.sort_index(by=["a", "b"]) - with pytest.raises(ValueError, match="not unique"): - # multi-column 'by' is separate codepath - df.sort_values(by=["a", "b"]) - - # with multi-index - # GH4370 - df = DataFrame( - np.random.randn(4, 2), columns=MultiIndex.from_tuples([("a", 0), ("a", 1)]) - ) - with pytest.raises(ValueError, match="level"): - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - df.sort_index(by="a") - with pytest.raises(ValueError, match="level"): - df.sort_values(by="a") - - # convert tuples to a list of tuples - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - df.sort_index(by=[("a", 1)]) - expected = df.sort_values(by=[("a", 1)]) - - # use .sort_values #9816 - with tm.assert_produces_warning(FutureWarning): - df.sort_index(by=("a", 1)) - result = df.sort_values(by=("a", 1)) - tm.assert_frame_equal(result, expected) - def test_sort_index_level(self): mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC")) df = DataFrame([[1, 2], [3, 4]], mi) From 9dd1b50f2cf70c0ec7de07137b08ae5866afae69 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 15:10:02 -0800 Subject: [PATCH 20/24] DEPR: remove FrozenNDarray (#29840) --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/compat/pickle_compat.py | 17 +---- pandas/core/indexes/frozen.py | 80 -------------------- pandas/core/indexes/multi.py | 37 +++++++-- pandas/tests/indexes/multi/test_integrity.py | 2 +- pandas/tests/indexes/test_frozen.py | 64 +--------------- 6 files changed, 35 insertions(+), 166 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4279f949db1aad..05aba65888c557 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -441,6 +441,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed previously deprecated :func:`pandas.tseries.plotting.tsplot` (:issue:`18627`) - Removed the previously deprecated ``reduce`` and ``broadcast`` arguments from :meth:`DataFrame.apply` (:issue:`18577`) - Removed the previously deprecated ``assert_raises_regex`` function in ``pandas.util.testing`` (:issue:`29174`) +- Removed the previously deprecated ``FrozenNDArray`` class in ``pandas.core.indexes.frozen`` (:issue:`29335`) - Removed previously deprecated "nthreads" argument from :func:`read_feather`, use "use_threads" instead (:issue:`23053`) - Removed :meth:`Index.is_lexsorted_for_tuple` (:issue:`29305`) - Removed support for nexted renaming in :meth:`DataFrame.aggregate`, :meth:`Series.aggregate`, :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`Rolling.aggregate` (:issue:`29608`) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 458c0c07c76029..aeec5e8a0400a3 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -89,21 +89,8 @@ def __new__(cls) -> "DataFrame": # type: ignore _class_locations_map = { ("pandas.core.sparse.array", "SparseArray"): ("pandas.core.arrays", "SparseArray"), # 15477 - # - # TODO: When FrozenNDArray is removed, add - # the following lines for compat: - # - # ('pandas.core.base', 'FrozenNDArray'): - # ('numpy', 'ndarray'), - # ('pandas.core.indexes.frozen', 'FrozenNDArray'): - # ('numpy', 'ndarray'), - # - # Afterwards, remove the current entry - # for `pandas.core.base.FrozenNDArray`. - ("pandas.core.base", "FrozenNDArray"): ( - "pandas.core.indexes.frozen", - "FrozenNDArray", - ), + ("pandas.core.base", "FrozenNDArray"): ("numpy", "ndarray"), + ("pandas.core.indexes.frozen", "FrozenNDArray"): ("numpy", "ndarray"), ("pandas.core.base", "FrozenList"): ("pandas.core.indexes.frozen", "FrozenList"), # 10890 ("pandas.core.series", "TimeSeries"): ("pandas.core.series", "Series"), diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py index ab9852157b9ef1..2ea83ba889fd21 100644 --- a/pandas/core/indexes/frozen.py +++ b/pandas/core/indexes/frozen.py @@ -4,14 +4,8 @@ These are used for: - .names (FrozenList) -- .levels & .codes (FrozenNDArray) """ -import warnings - -import numpy as np - -from pandas.core.dtypes.cast import coerce_indexer_dtype from pandas.core.base import PandasObject @@ -111,77 +105,3 @@ def __repr__(self) -> str: __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled pop = append = extend = remove = sort = insert = _disabled - - -class FrozenNDArray(PandasObject, np.ndarray): - - # no __array_finalize__ for now because no metadata - def __new__(cls, data, dtype=None, copy=False): - warnings.warn( - "\nFrozenNDArray is deprecated and will be removed in a " - "future version.\nPlease use `numpy.ndarray` instead.\n", - FutureWarning, - stacklevel=2, - ) - - if copy is None: - copy = not isinstance(data, FrozenNDArray) - res = np.array(data, dtype=dtype, copy=copy).view(cls) - return res - - def _disabled(self, *args, **kwargs): - """This method will not function because object is immutable.""" - raise TypeError( - "'{cls}' does not support mutable operations.".format(cls=type(self)) - ) - - __setitem__ = __setslice__ = __delitem__ = __delslice__ = _disabled - put = itemset = fill = _disabled - - def _shallow_copy(self): - return self.view() - - def values(self): - """returns *copy* of underlying array""" - arr = self.view(np.ndarray).copy() - return arr - - def __repr__(self) -> str: - """ - Return a string representation for this object. - """ - prepr = pprint_thing(self, escape_chars=("\t", "\r", "\n"), quote_strings=True) - return f"{type(self).__name__}({prepr}, dtype='{self.dtype}')" - - def searchsorted(self, value, side="left", sorter=None): - """ - Find indices to insert `value` so as to maintain order. - - For full documentation, see `numpy.searchsorted` - - See Also - -------- - numpy.searchsorted : Equivalent function. - """ - - # We are much more performant if the searched - # indexer is the same type as the array. - # - # This doesn't matter for int64, but DOES - # matter for smaller int dtypes. - # - # xref: https://github.com/numpy/numpy/issues/5370 - try: - value = self.dtype.type(value) - except ValueError: - pass - - return super().searchsorted(value, side=side, sorter=sorter) - - -def _ensure_frozen(array_like, categories, copy=False): - array_like = coerce_indexer_dtype(array_like, categories) - array_like = array_like.view(FrozenNDArray) - if copy: - array_like = array_like.copy() - return array_like diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d151fb7260a58a..f319c1e74452cb 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -13,6 +13,7 @@ from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.util._decorators import Appender, cache_readonly +from pandas.core.dtypes.cast import coerce_indexer_dtype from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, @@ -40,7 +41,7 @@ _index_shared_docs, ensure_index, ) -from pandas.core.indexes.frozen import FrozenList, _ensure_frozen +from pandas.core.indexes.frozen import FrozenList import pandas.core.missing as missing from pandas.core.sorting import ( get_group_index, @@ -821,7 +822,7 @@ def _set_codes( if level is None: new_codes = FrozenList( - _ensure_frozen(level_codes, lev, copy=copy)._shallow_copy() + _coerce_indexer_frozen(level_codes, lev, copy=copy).view() for lev, level_codes in zip(self._levels, codes) ) else: @@ -829,9 +830,7 @@ def _set_codes( new_codes = list(self._codes) for lev_num, level_codes in zip(level_numbers, codes): lev = self.levels[lev_num] - new_codes[lev_num] = _ensure_frozen( - level_codes, lev, copy=copy - )._shallow_copy() + new_codes[lev_num] = _coerce_indexer_frozen(level_codes, lev, copy=copy) new_codes = FrozenList(new_codes) if verify_integrity: @@ -1095,7 +1094,8 @@ def _format_native_types(self, na_rep="nan", **kwargs): if mask.any(): nan_index = len(level) level = np.append(level, na_rep) - level_codes = level_codes.values() + assert not level_codes.flags.writeable # i.e. copy is needed + level_codes = level_codes.copy() # make writeable level_codes[mask] = nan_index new_levels.append(level) new_codes.append(level_codes) @@ -1998,7 +1998,7 @@ def _assert_take_fillable( if mask.any(): masked = [] for new_label in taken: - label_values = new_label.values() + label_values = new_label label_values[mask] = na_value masked.append(np.asarray(label_values)) taken = masked @@ -3431,3 +3431,26 @@ def maybe_droplevels(index, key): pass return index + + +def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray: + """ + Coerce the array_like indexer to the smallest integer dtype that can encode all + of the given categories. + + Parameters + ---------- + array_like : array-like + categories : array-like + copy : bool + + Returns + ------- + np.ndarray + Non-writeable. + """ + array_like = coerce_indexer_dtype(array_like, categories) + if copy: + array_like = array_like.copy() + array_like.flags.writeable = False + return array_like diff --git a/pandas/tests/indexes/multi/test_integrity.py b/pandas/tests/indexes/multi/test_integrity.py index 472a404c2a8eff..7cdb5cf31338a9 100644 --- a/pandas/tests/indexes/multi/test_integrity.py +++ b/pandas/tests/indexes/multi/test_integrity.py @@ -210,7 +210,7 @@ def test_metadata_immutable(idx): # ditto for labels with pytest.raises(TypeError, match=mutable_regex): codes[0] = codes[0] - with pytest.raises(TypeError, match=mutable_regex): + with pytest.raises(ValueError, match="assignment destination is read-only"): codes[0][0] = codes[0][0] # and for names names = idx.names diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index c7b219b5ee890f..9f6b0325b7b33d 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -1,11 +1,7 @@ -import warnings - -import numpy as np import pytest -from pandas.core.indexes.frozen import FrozenList, FrozenNDArray +from pandas.core.indexes.frozen import FrozenList from pandas.tests.test_base import CheckImmutable, CheckStringMixin -import pandas.util.testing as tm class TestFrozenList(CheckImmutable, CheckStringMixin): @@ -55,61 +51,3 @@ def test_tricky_container_to_bytes_raises(self): msg = "^'str' object cannot be interpreted as an integer$" with pytest.raises(TypeError, match=msg): bytes(self.unicode_container) - - -class TestFrozenNDArray(CheckImmutable, CheckStringMixin): - mutable_methods = ("put", "itemset", "fill") - - def setup_method(self, _): - self.lst = [3, 5, 7, -2] - self.klass = FrozenNDArray - - with warnings.catch_warnings(record=True): - warnings.simplefilter("ignore", FutureWarning) - - self.container = FrozenNDArray(self.lst) - self.unicode_container = FrozenNDArray(["\u05d0", "\u05d1", "c"]) - - def test_constructor_warns(self): - # see gh-9031 - with tm.assert_produces_warning(FutureWarning): - FrozenNDArray([1, 2, 3]) - - def test_tricky_container_to_bytes(self): - bytes(self.unicode_container) - - def test_shallow_copying(self): - original = self.container.copy() - assert isinstance(self.container.view(), FrozenNDArray) - assert not isinstance(self.container.view(np.ndarray), FrozenNDArray) - assert self.container.view() is not self.container - tm.assert_numpy_array_equal(self.container, original) - - # Shallow copy should be the same too - assert isinstance(self.container._shallow_copy(), FrozenNDArray) - - # setting should not be allowed - def testit(container): - container[0] = 16 - - self.check_mutable_error(testit, self.container) - - def test_values(self): - original = self.container.view(np.ndarray).copy() - n = original[0] + 15 - - vals = self.container.values() - tm.assert_numpy_array_equal(original, vals) - - assert original is not vals - vals[0] = n - - assert isinstance(self.container, FrozenNDArray) - tm.assert_numpy_array_equal(self.container.values(), original) - assert vals[0] == n - - def test_searchsorted(self): - expected = 2 - assert self.container.searchsorted(7) == expected - - assert self.container.searchsorted(value=7) == expected From cb05112fa1f8859279125246d4f41425867db929 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 29 Nov 2019 15:31:45 -0800 Subject: [PATCH 21/24] DEPR: loc with listlikes with missing elements (#29802) --- pandas/core/indexing.py | 16 +-- pandas/io/formats/excel.py | 12 +-- pandas/tests/indexing/test_datetime.py | 16 +-- pandas/tests/indexing/test_floats.py | 28 ++--- pandas/tests/indexing/test_iloc.py | 16 +-- pandas/tests/indexing/test_indexing.py | 56 +++------- pandas/tests/indexing/test_loc.py | 78 ++++++-------- pandas/tests/indexing/test_partial.py | 102 ++++++------------ pandas/tests/io/excel/test_writers.py | 6 +- pandas/tests/series/indexing/test_indexing.py | 21 ++-- pandas/tests/series/indexing/test_numeric.py | 6 +- 11 files changed, 116 insertions(+), 241 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 67412ed5e5b262..5b875ef3fdc4f9 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1176,18 +1176,12 @@ def _validate_read_indexer( # non-missing values), but a bit later in the # code, so we want to avoid warning & then # just raising - - _missing_key_warning = textwrap.dedent( - """ - Passing list-likes to .loc or [] with any missing label will raise - KeyError in the future, you can use .reindex() as an alternative. - - See the documentation here: - https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike""" # noqa: E501 - ) - if not (ax.is_categorical() or ax.is_interval()): - warnings.warn(_missing_key_warning, FutureWarning, stacklevel=6) + raise KeyError( + "Passing list-likes to .loc or [] with any missing labels " + "is no longer supported, see " + "https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike" # noqa:E501 + ) def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False): """ diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index cd0889044094f1..0413dcf18d04a6 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -393,16 +393,12 @@ def __init__( if not len(Index(cols) & df.columns): raise KeyError("passes columns are not ALL present dataframe") - # deprecatedin gh-17295 - # 1 missing is ok (for now) if len(Index(cols) & df.columns) != len(cols): - warnings.warn( - "Not all names specified in 'columns' are found; " - "this will raise a KeyError in the future", - FutureWarning, - ) + # Deprecated in GH#17295, enforced in 1.0.0 + raise KeyError("Not all names specified in 'columns' are found") + + self.df = df - self.df = df.reindex(columns=cols) self.columns = self.df.columns self.float_format = float_format self.index = index diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index ab4a8fe89c6e3a..f2e3f7f6b37239 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -2,6 +2,7 @@ from dateutil import tz import numpy as np +import pytest import pandas as pd from pandas import DataFrame, Index, Series, Timestamp, date_range @@ -242,11 +243,8 @@ def test_series_partial_set_datetime(self): Timestamp("2011-01-02"), Timestamp("2011-01-03"), ] - exp = Series( - [np.nan, 0.2, np.nan], index=pd.DatetimeIndex(keys, name="idx"), name="s" - ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - tm.assert_series_equal(ser.loc[keys], exp, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[keys] def test_series_partial_set_period(self): # GH 11497 @@ -273,12 +271,8 @@ def test_series_partial_set_period(self): pd.Period("2011-01-02", freq="D"), pd.Period("2011-01-03", freq="D"), ] - exp = Series( - [np.nan, 0.2, np.nan], index=pd.PeriodIndex(keys, name="idx"), name="s" - ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ser.loc[keys] - tm.assert_series_equal(result, exp) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[keys] def test_nanosecond_getitem_setitem_with_tz(self): # GH 11679 diff --git a/pandas/tests/indexing/test_floats.py b/pandas/tests/indexing/test_floats.py index eadaeaba63a26e..0a3b513ff01670 100644 --- a/pandas/tests/indexing/test_floats.py +++ b/pandas/tests/indexing/test_floats.py @@ -726,25 +726,15 @@ def test_floating_misc(self): tm.assert_series_equal(result1, result3) tm.assert_series_equal(result1, result4) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result1 = s[[1.6, 5, 10]] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result2 = s.loc[[1.6, 5, 10]] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result3 = s.loc[[1.6, 5, 10]] - tm.assert_series_equal(result1, result2) - tm.assert_series_equal(result1, result3) - tm.assert_series_equal(result1, Series([np.nan, 2, 4], index=[1.6, 5, 10])) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result1 = s[[0, 1, 2]] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result2 = s.loc[[0, 1, 2]] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result3 = s.loc[[0, 1, 2]] - tm.assert_series_equal(result1, result2) - tm.assert_series_equal(result1, result3) - tm.assert_series_equal(result1, Series([0.0, np.nan, np.nan], index=[0, 1, 2])) + with pytest.raises(KeyError, match="with any missing labels"): + s[[1.6, 5, 10]] + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[1.6, 5, 10]] + + with pytest.raises(KeyError, match="with any missing labels"): + s[[0, 1, 2]] + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[0, 1, 2]] result1 = s.loc[[2.5, 5]] result2 = s.loc[[2.5, 5]] diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index d826d89f85ef5f..e4d387fd3ac380 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -728,20 +728,8 @@ def test_iloc_non_unique_indexing(self): df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000}) df2 = concat([df2, 2 * df2, 3 * df2]) - sidx = df2.index.to_series() - expected = df2.iloc[idx[idx <= sidx.max()]] - - new_list = [] - for r, s in expected.iterrows(): - new_list.append(s) - new_list.append(s * 2) - new_list.append(s * 3) - - expected = DataFrame(new_list) - expected = concat([expected, DataFrame(index=idx[idx > sidx.max()])], sort=True) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df2.loc[idx] - tm.assert_frame_equal(result, expected, check_index_type=False) + with pytest.raises(KeyError, match="with any missing labels"): + df2.loc[idx] def test_iloc_empty_list_indexer_is_ok(self): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 09a66efb6a3127..e53e02ed750cbf 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -299,32 +299,13 @@ def test_dups_fancy_indexing(self): tm.assert_frame_equal(result, expected) rows = ["C", "B", "E"] - expected = DataFrame( - { - "test": [11, 9, np.nan], - "test1": [7.0, 6, np.nan], - "other": ["d", "c", np.nan], - }, - index=rows, - ) - - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df.loc[rows] - tm.assert_frame_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[rows] # see GH5553, make sure we use the right indexer rows = ["F", "G", "H", "C", "B", "E"] - expected = DataFrame( - { - "test": [np.nan, np.nan, np.nan, 11, 9, np.nan], - "test1": [np.nan, np.nan, np.nan, 7.0, 6, np.nan], - "other": [np.nan, np.nan, np.nan, "d", "c", np.nan], - }, - index=rows, - ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df.loc[rows] - tm.assert_frame_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[rows] # List containing only missing label dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD")) @@ -340,38 +321,25 @@ def test_dups_fancy_indexing(self): # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df.loc[[0, 8, 0]] - expected = DataFrame({"A": [0, np.nan, 0]}, index=[0, 8, 0]) - tm.assert_frame_equal(result, expected, check_index_type=False) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[[0, 8, 0]] df = DataFrame({"A": list("abc")}) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df.loc[[0, 8, 0]] - expected = DataFrame({"A": ["a", np.nan, "a"]}, index=[0, 8, 0]) - tm.assert_frame_equal(result, expected, check_index_type=False) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[[0, 8, 0]] # non unique with non unique selector df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"]) - expected = DataFrame( - {"test": [5, 7, 5, 7, np.nan]}, index=["A", "A", "A", "A", "E"] - ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df.loc[["A", "A", "E"]] - tm.assert_frame_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[["A", "A", "E"]] def test_dups_fancy_indexing2(self): # GH 5835 # dups on index and missing values df = DataFrame(np.random.randn(5, 5), columns=["A", "B", "B", "B", "A"]) - expected = pd.concat( - [df.loc[:, ["A", "B"]], DataFrame(np.nan, columns=["C"], index=df.index)], - axis=1, - ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = df.loc[:, ["A", "B", "C"]] - tm.assert_frame_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + df.loc[:, ["A", "B", "C"]] # GH 6504, multi-axis indexing df = DataFrame( diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index d3af3f6322ef25..cb523efb78cf44 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -159,48 +159,46 @@ def test_loc_getitem_label_list_with_missing(self): self.check_result( "loc", [0, 1, 2], "indexer", [0, 1, 2], typs=["empty"], fails=KeyError, ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.check_result( - "loc", - [0, 2, 10], - "ix", - [0, 2, 10], - typs=["ints", "uints", "floats"], - axes=0, - fails=KeyError, - ) + self.check_result( + "loc", + [0, 2, 10], + "ix", + [0, 2, 10], + typs=["ints", "uints", "floats"], + axes=0, + fails=KeyError, + ) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.check_result( - "loc", - [3, 6, 7], - "ix", - [3, 6, 7], - typs=["ints", "uints", "floats"], - axes=1, - fails=KeyError, - ) + self.check_result( + "loc", + [3, 6, 7], + "ix", + [3, 6, 7], + typs=["ints", "uints", "floats"], + axes=1, + fails=KeyError, + ) # GH 17758 - MultiIndex and missing keys - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - self.check_result( - "loc", - [(1, 3), (1, 4), (2, 5)], - "ix", - [(1, 3), (1, 4), (2, 5)], - typs=["multi"], - axes=0, - ) + self.check_result( + "loc", + [(1, 3), (1, 4), (2, 5)], + "ix", + [(1, 3), (1, 4), (2, 5)], + typs=["multi"], + axes=0, + fails=KeyError, + ) def test_getitem_label_list_with_missing(self): s = Series(range(3), index=["a", "b", "c"]) # consistency - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(KeyError, match="with any missing labels"): s[["a", "d"]] s = Series(range(3)) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(KeyError, match="with any missing labels"): s[[0, 3]] def test_loc_getitem_label_list_fails(self): @@ -305,10 +303,8 @@ def test_loc_to_fail(self): s.loc[["4"]] s.loc[-1] = 3 - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[[-1, -2]] - expected = Series([3, np.nan], index=[-1, -2]) - tm.assert_series_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[-1, -2]] s["a"] = 2 msg = ( @@ -354,10 +350,8 @@ def test_loc_getitem_list_with_fail(self): s.loc[[3]] # a non-match and a match - with tm.assert_produces_warning(FutureWarning): - expected = s.loc[[2, 3]] - result = s.reindex([2, 3]) - tm.assert_series_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[2, 3]] def test_loc_getitem_label_slice(self): @@ -1034,10 +1028,8 @@ def test_series_loc_getitem_label_list_missing_values(): ["2001-01-04", "2001-01-02", "2001-01-04", "2001-01-14"], dtype="datetime64" ) s = Series([2, 5, 8, 11], date_range("2001-01-01", freq="D", periods=4)) - expected = Series([11.0, 5.0, 11.0, np.nan], index=key) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[key] - tm.assert_series_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[key] @pytest.mark.parametrize( diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 0fb71bfea76c04..aa49edd51aa399 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -186,17 +186,15 @@ def test_series_partial_set(self): # loc equiv to .reindex expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(KeyError, match="with any missing labels"): result = ser.loc[[3, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) result = ser.reindex([3, 2, 3]) tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(KeyError, match="with any missing labels"): result = ser.loc[[3, 2, 3, "x"]] - tm.assert_series_equal(result, expected, check_index_type=True) result = ser.reindex([3, 2, 3, "x"]) tm.assert_series_equal(result, expected, check_index_type=True) @@ -206,9 +204,8 @@ def test_series_partial_set(self): tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(KeyError, match="with any missing labels"): result = ser.loc[[2, 2, "x", 1]] - tm.assert_series_equal(result, expected, check_index_type=True) result = ser.reindex([2, 2, "x", 1]) tm.assert_series_equal(result, expected, check_index_type=True) @@ -222,54 +219,48 @@ def test_series_partial_set(self): ser.loc[[3, 3, 3]] expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ser.loc[[2, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[2, 2, 3]] result = ser.reindex([2, 2, 3]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3], index=[1, 2, 3]) expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[[3, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[3, 4, 4]] result = s.reindex([3, 4, 4]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[[5, 3, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[5, 3, 3]] result = s.reindex([5, 3, 3]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[[5, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[5, 4, 4]] result = s.reindex([5, 4, 4]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7]) expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[[7, 2, 2]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[7, 2, 2]] result = s.reindex([7, 2, 2]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s.loc[[4, 5, 5]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[[4, 5, 5]] result = s.reindex([4, 5, 5]) tm.assert_series_equal(result, expected, check_index_type=True) @@ -286,28 +277,19 @@ def test_series_partial_set_with_name(self): ser = Series([0.1, 0.2], index=idx, name="s") # loc - exp_idx = Index([3, 2, 3], dtype="int64", name="idx") - expected = Series([np.nan, 0.2, np.nan], index=exp_idx, name="s") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ser.loc[[3, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[3, 2, 3]] - exp_idx = Index([3, 2, 3, "x"], dtype="object", name="idx") - expected = Series([np.nan, 0.2, np.nan, np.nan], index=exp_idx, name="s") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ser.loc[[3, 2, 3, "x"]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[3, 2, 3, "x"]] exp_idx = Index([2, 2, 1], dtype="int64", name="idx") expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s") result = ser.loc[[2, 2, 1]] tm.assert_series_equal(result, expected, check_index_type=True) - exp_idx = Index([2, 2, "x", 1], dtype="object", name="idx") - expected = Series([0.2, 0.2, np.nan, 0.1], index=exp_idx, name="s") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ser.loc[[2, 2, "x", 1]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[2, 2, "x", 1]] # raises as nothing in in the index msg = ( @@ -317,46 +299,28 @@ def test_series_partial_set_with_name(self): with pytest.raises(KeyError, match=msg): ser.loc[[3, 3, 3]] - exp_idx = Index([2, 2, 3], dtype="int64", name="idx") - expected = Series([0.2, 0.2, np.nan], index=exp_idx, name="s") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = ser.loc[[2, 2, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + ser.loc[[2, 2, 3]] - exp_idx = Index([3, 4, 4], dtype="int64", name="idx") - expected = Series([0.3, np.nan, np.nan], index=exp_idx, name="s") idx = Index([1, 2, 3], dtype="int64", name="idx") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]] - exp_idx = Index([5, 3, 3], dtype="int64", name="idx") - expected = Series([np.nan, 0.3, 0.3], index=exp_idx, name="s") idx = Index([1, 2, 3, 4], dtype="int64", name="idx") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]] - exp_idx = Index([5, 4, 4], dtype="int64", name="idx") - expected = Series([np.nan, 0.4, 0.4], index=exp_idx, name="s") idx = Index([1, 2, 3, 4], dtype="int64", name="idx") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]] - exp_idx = Index([7, 2, 2], dtype="int64", name="idx") - expected = Series([0.4, np.nan, np.nan], index=exp_idx, name="s") idx = Index([4, 5, 6, 7], dtype="int64", name="idx") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]] - exp_idx = Index([4, 5, 5], dtype="int64", name="idx") - expected = Series([0.4, np.nan, np.nan], index=exp_idx, name="s") idx = Index([1, 2, 3, 4], dtype="int64", name="idx") - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]] - tm.assert_series_equal(result, expected, check_index_type=True) + with pytest.raises(KeyError, match="with any missing labels"): + Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]] # iloc exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx") diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index a7730e079a1bba..b1be0a1a2feceb 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -1009,13 +1009,9 @@ def test_invalid_columns(self, path): # see gh-10982 write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]}) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + with pytest.raises(KeyError, match="Not all names specified"): write_frame.to_excel(path, "test1", columns=["B", "C"]) - expected = write_frame.reindex(columns=["B", "C"]) - read_frame = pd.read_excel(path, "test1", index_col=0) - tm.assert_frame_equal(expected, read_frame) - with pytest.raises( KeyError, match="'passes columns are not ALL present dataframe'" ): diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 5aba2920999d5e..173bc9d9d6409b 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -52,15 +52,11 @@ def test_basic_getitem_with_labels(datetime_series): s = Series(np.random.randn(10), index=list(range(0, 20, 2))) inds = [0, 2, 5, 7, 8] arr_inds = np.array([0, 2, 5, 7, 8]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s[inds] - expected = s.reindex(inds) - tm.assert_series_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + s[inds] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s[arr_inds] - expected = s.reindex(arr_inds) - tm.assert_series_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + s[arr_inds] # GH12089 # with tz for values @@ -262,12 +258,11 @@ def test_getitem_dups_with_missing(): # breaks reindex, so need to use .loc internally # GH 4246 s = Series([1, 2, 3, 4], ["foo", "bar", "foo", "bah"]) - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - expected = s.loc[["foo", "bar", "bah", "bam"]] + with pytest.raises(KeyError, match="with any missing labels"): + s.loc[["foo", "bar", "bah", "bam"]] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - result = s[["foo", "bar", "bah", "bam"]] - tm.assert_series_equal(result, expected) + with pytest.raises(KeyError, match="with any missing labels"): + s[["foo", "bar", "bah", "bam"]] def test_getitem_dups(): diff --git a/pandas/tests/series/indexing/test_numeric.py b/pandas/tests/series/indexing/test_numeric.py index 60b89c01cc22d1..426a98b00827e6 100644 --- a/pandas/tests/series/indexing/test_numeric.py +++ b/pandas/tests/series/indexing/test_numeric.py @@ -123,12 +123,10 @@ def test_get_nan_multiple(): s = pd.Float64Index(range(10)).to_series() idx = [2, 30] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - tm.assert_series_equal(s.get(idx), Series([2, np.nan], index=idx)) + assert s.get(idx) is None idx = [2, np.nan] - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - tm.assert_series_equal(s.get(idx), Series([2, np.nan], index=idx)) + assert s.get(idx) is None # GH 17295 - all missing keys idx = [20, 30] From 877869a1c1d75e5da3d7c3cdf2caf2887cd82e18 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 27 Nov 2019 21:57:06 +0000 Subject: [PATCH 22/24] add f-strings to indexes.base.py --- pandas/core/indexes/base.py | 66 ++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 37 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 4a3fa26c3460ef..89a5ceb5253256 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -120,7 +120,7 @@ def cmp_method(self, other): return result return ops.invalid_comparison(self, other, op) - name = "__{name}__".format(name=op.__name__) + name = f"__{op.__name__}__" return set_function_name(cmp_method, name, cls) @@ -136,7 +136,7 @@ def index_arithmetic_method(self, other): return (Index(result[0]), Index(result[1])) return Index(result) - name = "__{name}__".format(name=op.__name__) + name = f"__{op.__name__}__" # TODO: docstring? return set_function_name(index_arithmetic_method, name, cls) @@ -768,8 +768,7 @@ def astype(self, dtype, copy=True): self.values.astype(dtype, copy=copy), name=self.name, dtype=dtype ) except (TypeError, ValueError): - msg = "Cannot cast {name} to dtype {dtype}" - raise TypeError(msg.format(name=type(self).__name__, dtype=dtype)) + raise TypeError(f"Cannot cast {type(self).__name__} to dtype {dtype}") _index_shared_docs[ "take" @@ -814,8 +813,10 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): ) else: if allow_fill and fill_value is not None: - msg = "Unable to fill values because {0} cannot contain NA" - raise ValueError(msg.format(type(self).__name__)) + cls_name = self.__class__.__name__ + raise ValueError( + f"Unable to fill values because {cls_name} cannot contain NA" + ) taken = self.values.take(indices) return self._shallow_copy(taken) @@ -1287,7 +1288,7 @@ def _set_names(self, values, level=None): for name in values: if not is_hashable(name): raise TypeError( - "{}.name must be a hashable type".format(type(self).__name__) + f"{self.__class__.__name__}.name must be a hashable type" ) self.name = values[0] @@ -1456,13 +1457,11 @@ def _validate_index_level(self, level): ) elif level > 0: raise IndexError( - "Too many levels: Index has only 1 level, not %d" % (level + 1) + f"Too many levels: Index has only 1 level, not {level + 1}" ) elif level != self.name: raise KeyError( - "Requested level ({}) does not match index name ({})".format( - level, self.name - ) + f"Requested level ({level}) does not match index name ({self.name})" ) def _get_level_number(self, level): @@ -1558,9 +1557,8 @@ def droplevel(self, level=0): return self if len(level) >= self.nlevels: raise ValueError( - "Cannot remove {} levels from an index with {} " - "levels: at least one level must be " - "left.".format(len(level), self.nlevels) + f"Cannot remove {len(level)} levels from an index with {self.nlevels} " + "levels: at least one level must be left." ) # The two checks above guarantee that here self is a MultiIndex @@ -2014,7 +2012,7 @@ def fillna(self, value=None, downcast=None): @Appender(_index_shared_docs["dropna"]) def dropna(self, how="any"): if how not in ("any", "all"): - raise ValueError("invalid how option: {0}".format(how)) + raise ValueError(f"invalid how option: {how}") if self.hasnans: return self._shallow_copy(self.values[~self._isnan]) @@ -2288,10 +2286,8 @@ def __xor__(self, other): def __nonzero__(self): raise ValueError( - "The truth value of a {0} is ambiguous. " - "Use a.empty, a.bool(), a.item(), a.any() or a.all().".format( - type(self).__name__ - ) + f"The truth value of a {self.__class__.__name__} is ambiguous. " + "Use a.empty, a.bool(), a.item(), a.any() or a.all()." ) __bool__ = __nonzero__ @@ -2354,7 +2350,7 @@ def _validate_sort_keyword(self, sort): if sort not in [None, False]: raise ValueError( "The 'sort' keyword only takes the values of " - "None or False; {0} was passed.".format(sort) + f"None or False; {sort} was passed." ) def union(self, other, sort=None): @@ -2481,10 +2477,9 @@ def _union(self, other, sort): if sort is None: try: result = algos.safe_sort(result) - except TypeError as e: + except TypeError as err: warnings.warn( - "{}, sort order is undefined for " - "incomparable objects".format(e), + f"{err}, sort order is undefined for incomparable objects", RuntimeWarning, stacklevel=3, ) @@ -2939,8 +2934,8 @@ def _get_fill_indexer_searchsorted(self, target, method, limit=None): """ if limit is not None: raise ValueError( - "limit argument for %r method only well-defined " - "if index and target are monotonic" % method + f"limit argument for {method!r} method only well-defined " + "if index and target are monotonic" ) side = "left" if method == "pad" else "right" @@ -3227,10 +3222,8 @@ def _invalid_indexer(self, form, key): Consistent invalid indexer message. """ raise TypeError( - "cannot do {form} indexing on {klass} with these " - "indexers [{key}] of {kind}".format( - form=form, klass=type(self), key=key, kind=type(key) - ) + f"cannot do {form} indexing on {type(self)} with these " + f"indexers [{key}] of {type(key)}" ) # -------------------------------------------------------------------- @@ -3992,8 +3985,8 @@ def _scalar_data_error(cls, data): # We return the TypeError so that we can raise it from the constructor # in order to keep mypy happy return TypeError( - "{0}(...) must be called with a collection of some " - "kind, {1} was passed".format(cls.__name__, repr(data)) + f"{cls.__name__}(...) must be called with a collection of some " + f"kind, {data!r} was passed" ) @classmethod @@ -4037,8 +4030,7 @@ def _assert_can_do_op(self, value): Check value is valid for scalar op. """ if not is_scalar(value): - msg = "'value' must be a scalar, passed: {0}" - raise TypeError(msg.format(type(value).__name__)) + raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}") def _is_memory_usage_qualified(self) -> bool: """ @@ -4113,7 +4105,7 @@ def contains(self, key) -> bool: return key in self def __hash__(self): - raise TypeError("unhashable type: %r" % type(self).__name__) + raise TypeError(f"unhashable type: {type(self).__name__!r}") def __setitem__(self, key, value): raise TypeError("Index does not support mutable operations") @@ -5052,8 +5044,8 @@ def get_slice_bound(self, label, side, kind): slc = lib.maybe_indices_to_slice(slc.astype("i8"), len(self)) if isinstance(slc, np.ndarray): raise KeyError( - "Cannot get %s slice bound for non-unique " - "label: %r" % (side, original_label) + f"Cannot get {side} slice bound for non-unique " + f"label: {original_label!r}" ) if isinstance(slc, slice): @@ -5211,7 +5203,7 @@ def drop(self, labels, errors="raise"): mask = indexer == -1 if mask.any(): if errors != "ignore": - raise KeyError("{} not found in axis".format(labels[mask])) + raise KeyError(f"{labels[mask]} not found in axis") indexer = indexer[~mask] return self.delete(indexer) From e0ea38ddd09910e8579fe6f2509e2ef3dfe5107d Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 27 Nov 2019 22:48:08 +0000 Subject: [PATCH 23/24] renamed self.__class__ to type(self) --- pandas/core/indexes/base.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 89a5ceb5253256..c57a5a03f10588 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -813,7 +813,7 @@ def take(self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs): ) else: if allow_fill and fill_value is not None: - cls_name = self.__class__.__name__ + cls_name = type(self).__name__ raise ValueError( f"Unable to fill values because {cls_name} cannot contain NA" ) @@ -1287,9 +1287,7 @@ def _set_names(self, values, level=None): # All items in 'name' need to be hashable: for name in values: if not is_hashable(name): - raise TypeError( - f"{self.__class__.__name__}.name must be a hashable type" - ) + raise TypeError(f"{type(self).__name__}.name must be a hashable type") self.name = values[0] names = property(fset=_set_names, fget=_get_names) @@ -2286,7 +2284,7 @@ def __xor__(self, other): def __nonzero__(self): raise ValueError( - f"The truth value of a {self.__class__.__name__} is ambiguous. " + f"The truth value of a {type(self).__name__} is ambiguous. " "Use a.empty, a.bool(), a.item(), a.any() or a.all()." ) From 38d9d691832a0fe10b388c8f000d3adf19433287 Mon Sep 17 00:00:00 2001 From: tp Date: Sat, 30 Nov 2019 09:36:59 +0000 Subject: [PATCH 24/24] use repr --- pandas/core/indexes/base.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c57a5a03f10588..aa2326eeab8fa4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -441,7 +441,7 @@ def __new__( except IncompatibleFrequency: pass if kwargs: - raise TypeError(f"Unexpected keyword arguments {set(kwargs)!r}") + raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") return cls._simple_new(subarr, name, **kwargs) elif hasattr(data, "__array__"): @@ -2932,7 +2932,7 @@ def _get_fill_indexer_searchsorted(self, target, method, limit=None): """ if limit is not None: raise ValueError( - f"limit argument for {method!r} method only well-defined " + f"limit argument for {repr(method)} method only well-defined " "if index and target are monotonic" ) @@ -3984,7 +3984,7 @@ def _scalar_data_error(cls, data): # in order to keep mypy happy return TypeError( f"{cls.__name__}(...) must be called with a collection of some " - f"kind, {data!r} was passed" + f"kind, {repr(data)} was passed" ) @classmethod @@ -4103,7 +4103,7 @@ def contains(self, key) -> bool: return key in self def __hash__(self): - raise TypeError(f"unhashable type: {type(self).__name__!r}") + raise TypeError(f"unhashable type: {repr(type(self).__name__)}") def __setitem__(self, key, value): raise TypeError("Index does not support mutable operations") @@ -5043,7 +5043,7 @@ def get_slice_bound(self, label, side, kind): if isinstance(slc, np.ndarray): raise KeyError( f"Cannot get {side} slice bound for non-unique " - f"label: {original_label!r}" + f"label: {repr(original_label)}" ) if isinstance(slc, slice):