fixup! ENH: Allow SparseDataFrame/SparseSeries values assignment

pandas-dev · kernc · Jun 15, 2017 · Oct 3, 2017 · Jun 15, 2017 · Jul 12, 2018
commit 83c1f2bc2783d6d2e47b634c2150ccba2672e885
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
@@ -382,26 +382,25 @@ def set_values(self, indexer, value):
         # If indexer is not a single int position, easiest to handle via dense
         if not is_scalar(indexer):
             warnings.warn(
-                'Setting SparseSeries/Array values is particularly '
-                'inefficient when indexing with multiple keys because the '
-                'whole series is made dense interim.',
+                'Setting SparseSeries/Array values is inefficient when '
+                'indexing with multiple keys because the whole series '
+                'is made dense interim.',
                 PerformanceWarning, stacklevel=2)
 
             values = self.to_dense()
             values[indexer] = value
             return SparseArray(values, kind=self.kind,
                                fill_value=self.fill_value)
 
-        warnings.warn(
-            'Setting SparseSeries/Array values is inefficient '
-            '(a copy of data is made).', PerformanceWarning, stacklevel=2)
-
         # If label already in sparse index, just switch the value on a copy
         idx = self.sp_index.lookup(indexer)
         if idx != -1:
-            obj = self.copy()
-            obj.sp_values[idx] = value
-            return obj
+            self.sp_values[idx] = value
+            return self
+
+        warnings.warn(
+            'Setting new SparseSeries values is inefficient '
+            '(a copy of data is made).', PerformanceWarning, stacklevel=2)
 
         # Otherwise, construct a new array, and insert the new value in the
         # correct position
@@ -410,6 +409,7 @@ def set_values(self, indexer, value):
 
         indices = np.insert(indices, pos, indexer)
         sp_values = np.insert(self.sp_values, pos, value)
+
         # Length can be increased when adding a new value into index
         length = max(self.sp_index.length, indexer + 1)
         sp_index = _make_index(length, indices, self.kind)

diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
@@ -277,11 +277,13 @@ def __array_wrap__(self, result, context=None):
         else:
             fill_value = self.fill_value
 
-        # Assume: If result size matches, old sparse index is valid (ok???)
+        # Only reuse old sparse index if result size matches
+        # (fails e.g. for ~sparseseries)
         if np.size(result) == self.sp_index.npoints:
             sp_index = self.sp_index
         else:
             sp_index = None
+
         return self._constructor(result, index=self.index,
                                  sparse_index=sp_index,
                                  fill_value=fill_value,
@@ -490,10 +492,10 @@ def set_value(self, label, value, takeable=False):
                       "in a future release. Please use "
                       ".at[] or .iat[] accessors instead", FutureWarning,
                       stacklevel=2)
+        self._data = self._data.copy()
         return self._set_value(label, value, takeable=takeable)
 
     def _set_value(self, label, value, takeable=False):
-        self._data = self._data.copy()
         try:
             idx = self.index.get_loc(label)
         except KeyError:

diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
@@ -10,7 +10,6 @@
 
 from pandas import Series, DataFrame, bdate_range, Panel
 from pandas.core.indexes.datetimes import DatetimeIndex
-from pandas.errors import PerformanceWarning
 from pandas.tseries.offsets import BDay
 from pandas.util import testing as tm
 from pandas.compat import lrange
@@ -461,33 +460,28 @@ def test_set_value(self):
         # ok, as the index gets converted to object
         frame = self.frame.copy()
         with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False,
-                                        ignore_extra=True):
+                                        check_stacklevel=False):
             res = frame.set_value('foobar', 'B', 1.5)
         assert res.index.dtype == 'object'
 
         res = self.frame
         res.index = res.index.astype(object)
 
         with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False,
-                                        ignore_extra=True):
+                                        check_stacklevel=False):
             res = self.frame.set_value('foobar', 'B', 1.5)
         assert res.index[-1] == 'foobar'
         with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False,
-                                        ignore_extra=True):
+                                        check_stacklevel=False):
             assert res.get_value('foobar', 'B') == 1.5
 
         with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False,
-                                        ignore_extra=True):
+                                        check_stacklevel=False):
             res2 = res.set_value('foobar', 'qux', 1.5)
         tm.assert_index_equal(res2.columns,
                               pd.Index(list(self.frame.columns)))
         with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False,
-                                        ignore_extra=True):
+                                        check_stacklevel=False):
             assert res2.get_value('foobar', 'qux') == 1.5
 
     def test_fancy_index_misc(self):
@@ -594,9 +588,8 @@ def test_setitem_chained_no_consolidate(self):
         # issuecomment-361696418
         # chained setitem used to cause consolidation
         sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]])
-        with tm.assert_produces_warning(PerformanceWarning):
-            with pd.option_context('mode.chained_assignment', None):
-                sdf[0][1] = 2
+        with pd.option_context('mode.chained_assignment', None):
+            sdf[0][1] = 2
         assert len(sdf._data.blocks) == 2
 
     def test_delitem(self):

diff --git a/pandas/tests/sparse/series/test_series.py b/pandas/tests/sparse/series/test_series.py
@@ -486,14 +486,12 @@ def test_get_get_value(self):
     def test_set_value(self):
         idx = self.btseries.index[7]
         with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False,
-                                        ignore_extra=True):
+                                        check_stacklevel=False):
             self.btseries.set_value(idx, 0)
         assert self.btseries[idx] == 0
 
         with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False,
-                                        ignore_extra=True):
+                                        check_stacklevel=False):
             self.iseries.set_value('foobar', 0)
         assert self.iseries.index[-1] == 'foobar'
         assert self.iseries['foobar'] == 0

diff --git a/pandas/tests/sparse/test_format.py b/pandas/tests/sparse/test_format.py
@@ -8,7 +8,6 @@
 from pandas.compat import (is_platform_windows,
                            is_platform_32bit)
 from pandas.core.config import option_context
-from pandas.errors import PerformanceWarning
 
 use_32bit_repr = is_platform_windows() or is_platform_32bit()
 
@@ -124,10 +123,9 @@ def test_sparse_repr_after_set(self):
         sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]])
         res = sdf.copy()
 
-        with tm.assert_produces_warning(PerformanceWarning):
-            # Ignore the warning
-            with pd.option_context('mode.chained_assignment', None):
-                sdf[0][1] = 2  # This line triggers the bug
+        # Ignore the warning
+        with pd.option_context('mode.chained_assignment', None):
+            sdf[0][1] = 2  # This line triggers the bug
 
         repr(sdf)
         tm.assert_sp_frame_equal(sdf, res)
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -2493,8 +2493,7 @@ def exception_matches(self, exc_type, exc_value, trace_back):
 
 @contextmanager
 def assert_produces_warning(expected_warning=Warning, filter_level="always",
-                            clear=None, check_stacklevel=True,
-                            ignore_extra=False):
+                            clear=None, check_stacklevel=True):
     """
     Context manager for running code expected to either raise a specific
     warning, or not raise any warnings. Verifies that the code raises the
@@ -2531,8 +2530,6 @@ class for all warnings. To check that no warning is returned,
         If True, displays the line that called the function containing
         the warning to show were the function is called. Otherwise, the
         line that implements the function is displayed.
-    ignore_extra : bool, default False
-        If False, any extra, unexpected warnings are raised as errors.
 
     Examples
     --------
@@ -2599,7 +2596,6 @@ class for all warnings. To check that no warning is returned,
             msg = "Did not see expected warning of class {name!r}.".format(
                 name=expected_warning.__name__)
             assert saw_warning, msg
-    if not ignore_extra:
         assert not extra_warnings, ("Caused unexpected warning(s): {extra!r}."
                                     ).format(extra=extra_warnings)