Skip to content

ENH: SparseDataFrame/SparseSeries value assignment #17785

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fixup! ENH: Allow SparseDataFrame/SparseSeries values assignment
  • Loading branch information
kernc committed Aug 14, 2018
commit 83c1f2bc2783d6d2e47b634c2150ccba2672e885
20 changes: 10 additions & 10 deletions pandas/core/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,26 +382,25 @@ def set_values(self, indexer, value):
# If indexer is not a single int position, easiest to handle via dense
if not is_scalar(indexer):
warnings.warn(
'Setting SparseSeries/Array values is particularly '
'inefficient when indexing with multiple keys because the '
'whole series is made dense interim.',
'Setting SparseSeries/Array values is inefficient when '
'indexing with multiple keys because the whole series '
'is made dense interim.',
PerformanceWarning, stacklevel=2)

values = self.to_dense()
values[indexer] = value
return SparseArray(values, kind=self.kind,
fill_value=self.fill_value)

warnings.warn(
'Setting SparseSeries/Array values is inefficient '
'(a copy of data is made).', PerformanceWarning, stacklevel=2)

# If label already in sparse index, just switch the value on a copy
idx = self.sp_index.lookup(indexer)
if idx != -1:
obj = self.copy()
obj.sp_values[idx] = value
return obj
self.sp_values[idx] = value
return self

warnings.warn(
'Setting new SparseSeries values is inefficient '
'(a copy of data is made).', PerformanceWarning, stacklevel=2)

# Otherwise, construct a new array, and insert the new value in the
# correct position
Expand All @@ -410,6 +409,7 @@ def set_values(self, indexer, value):

indices = np.insert(indices, pos, indexer)
sp_values = np.insert(self.sp_values, pos, value)

# Length can be increased when adding a new value into index
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a line before comment

length = max(self.sp_index.length, indexer + 1)
sp_index = _make_index(length, indices, self.kind)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no copy here AFICT ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is above with

sp_values = np.insert(self.sp_values, pos, value)

Expand Down
6 changes: 4 additions & 2 deletions pandas/core/sparse/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,13 @@ def __array_wrap__(self, result, context=None):
else:
fill_value = self.fill_value

# Assume: If result size matches, old sparse index is valid (ok???)
# Only reuse old sparse index if result size matches
# (fails e.g. for ~sparseseries)
if np.size(result) == self.sp_index.npoints:
sp_index = self.sp_index
else:
sp_index = None

return self._constructor(result, index=self.index,
sparse_index=sp_index,
fill_value=fill_value,
Expand Down Expand Up @@ -490,10 +492,10 @@ def set_value(self, label, value, takeable=False):
"in a future release. Please use "
".at[] or .iat[] accessors instead", FutureWarning,
stacklevel=2)
self._data = self._data.copy()
return self._set_value(label, value, takeable=takeable)

def _set_value(self, label, value, takeable=False):
self._data = self._data.copy()
try:
idx = self.index.get_loc(label)
except KeyError:
Expand Down
21 changes: 7 additions & 14 deletions pandas/tests/sparse/frame/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

from pandas import Series, DataFrame, bdate_range, Panel
from pandas.core.indexes.datetimes import DatetimeIndex
from pandas.errors import PerformanceWarning
from pandas.tseries.offsets import BDay
from pandas.util import testing as tm
from pandas.compat import lrange
Expand Down Expand Up @@ -461,33 +460,28 @@ def test_set_value(self):
# ok, as the index gets converted to object
frame = self.frame.copy()
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False,
ignore_extra=True):
check_stacklevel=False):
res = frame.set_value('foobar', 'B', 1.5)
assert res.index.dtype == 'object'

res = self.frame
res.index = res.index.astype(object)

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False,
ignore_extra=True):
check_stacklevel=False):
res = self.frame.set_value('foobar', 'B', 1.5)
assert res.index[-1] == 'foobar'
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False,
ignore_extra=True):
check_stacklevel=False):
assert res.get_value('foobar', 'B') == 1.5

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False,
ignore_extra=True):
check_stacklevel=False):
res2 = res.set_value('foobar', 'qux', 1.5)
tm.assert_index_equal(res2.columns,
pd.Index(list(self.frame.columns)))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To justify, this test changed because (deprecated) SparseDataFrame.set_value() was removed in favor of superclass frame's (deprecated) set_value() which edits and returns the same object.

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False,
ignore_extra=True):
check_stacklevel=False):
assert res2.get_value('foobar', 'qux') == 1.5

def test_fancy_index_misc(self):
Expand Down Expand Up @@ -594,9 +588,8 @@ def test_setitem_chained_no_consolidate(self):
# issuecomment-361696418
# chained setitem used to cause consolidation
sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]])
with tm.assert_produces_warning(PerformanceWarning):
with pd.option_context('mode.chained_assignment', None):
sdf[0][1] = 2
with pd.option_context('mode.chained_assignment', None):
sdf[0][1] = 2
assert len(sdf._data.blocks) == 2

def test_delitem(self):
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/sparse/series/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -486,14 +486,12 @@ def test_get_get_value(self):
def test_set_value(self):
idx = self.btseries.index[7]
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False,
ignore_extra=True):
check_stacklevel=False):
self.btseries.set_value(idx, 0)
assert self.btseries[idx] == 0

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False,
ignore_extra=True):
check_stacklevel=False):
self.iseries.set_value('foobar', 0)
assert self.iseries.index[-1] == 'foobar'
assert self.iseries['foobar'] == 0
Expand Down
8 changes: 3 additions & 5 deletions pandas/tests/sparse/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from pandas.compat import (is_platform_windows,
is_platform_32bit)
from pandas.core.config import option_context
from pandas.errors import PerformanceWarning

use_32bit_repr = is_platform_windows() or is_platform_32bit()

Expand Down Expand Up @@ -124,10 +123,9 @@ def test_sparse_repr_after_set(self):
sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]])
res = sdf.copy()

with tm.assert_produces_warning(PerformanceWarning):
# Ignore the warning
with pd.option_context('mode.chained_assignment', None):
sdf[0][1] = 2 # This line triggers the bug
# Ignore the warning
with pd.option_context('mode.chained_assignment', None):
sdf[0][1] = 2 # This line triggers the bug

repr(sdf)
tm.assert_sp_frame_equal(sdf, res)
6 changes: 1 addition & 5 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2493,8 +2493,7 @@ def exception_matches(self, exc_type, exc_value, trace_back):

@contextmanager
def assert_produces_warning(expected_warning=Warning, filter_level="always",
clear=None, check_stacklevel=True,
ignore_extra=False):
clear=None, check_stacklevel=True):
"""
Context manager for running code expected to either raise a specific
warning, or not raise any warnings. Verifies that the code raises the
Expand Down Expand Up @@ -2531,8 +2530,6 @@ class for all warnings. To check that no warning is returned,
If True, displays the line that called the function containing
the warning to show were the function is called. Otherwise, the
line that implements the function is displayed.
ignore_extra : bool, default False
If False, any extra, unexpected warnings are raised as errors.

Examples
--------
Expand Down Expand Up @@ -2599,7 +2596,6 @@ class for all warnings. To check that no warning is returned,
msg = "Did not see expected warning of class {name!r}.".format(
name=expected_warning.__name__)
assert saw_warning, msg
if not ignore_extra:
assert not extra_warnings, ("Caused unexpected warning(s): {extra!r}."
).format(extra=extra_warnings)

Expand Down