-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
ENH: SparseDataFrame/SparseSeries value assignment #17785
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
b4889c8
22b0346
8748339
35e8c61
83c1f2b
4779c36
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -382,26 +382,25 @@ def set_values(self, indexer, value): | |
# If indexer is not a single int position, easiest to handle via dense | ||
if not is_scalar(indexer): | ||
warnings.warn( | ||
'Setting SparseSeries/Array values is particularly ' | ||
'inefficient when indexing with multiple keys because the ' | ||
'whole series is made dense interim.', | ||
'Setting SparseSeries/Array values is inefficient when ' | ||
'indexing with multiple keys because the whole series ' | ||
'is made dense interim.', | ||
PerformanceWarning, stacklevel=2) | ||
|
||
values = self.to_dense() | ||
values[indexer] = value | ||
return SparseArray(values, kind=self.kind, | ||
fill_value=self.fill_value) | ||
|
||
warnings.warn( | ||
'Setting SparseSeries/Array values is inefficient ' | ||
'(a copy of data is made).', PerformanceWarning, stacklevel=2) | ||
|
||
# If label already in sparse index, just switch the value on a copy | ||
idx = self.sp_index.lookup(indexer) | ||
if idx != -1: | ||
obj = self.copy() | ||
obj.sp_values[idx] = value | ||
return obj | ||
self.sp_values[idx] = value | ||
return self | ||
|
||
warnings.warn( | ||
'Setting new SparseSeries values is inefficient ' | ||
'(a copy of data is made).', PerformanceWarning, stacklevel=2) | ||
|
||
# Otherwise, construct a new array, and insert the new value in the | ||
# correct position | ||
|
@@ -410,6 +409,7 @@ def set_values(self, indexer, value): | |
|
||
indices = np.insert(indices, pos, indexer) | ||
sp_values = np.insert(self.sp_values, pos, value) | ||
|
||
# Length can be increased when adding a new value into index | ||
length = max(self.sp_index.length, indexer + 1) | ||
sp_index = _make_index(length, indices, self.kind) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no copy here AFICT ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is above with sp_values = np.insert(self.sp_values, pos, value) |
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,7 +10,6 @@ | |
|
||
from pandas import Series, DataFrame, bdate_range, Panel | ||
from pandas.core.indexes.datetimes import DatetimeIndex | ||
from pandas.errors import PerformanceWarning | ||
from pandas.tseries.offsets import BDay | ||
from pandas.util import testing as tm | ||
from pandas.compat import lrange | ||
|
@@ -461,33 +460,28 @@ def test_set_value(self): | |
# ok, as the index gets converted to object | ||
frame = self.frame.copy() | ||
with tm.assert_produces_warning(FutureWarning, | ||
check_stacklevel=False, | ||
ignore_extra=True): | ||
check_stacklevel=False): | ||
res = frame.set_value('foobar', 'B', 1.5) | ||
assert res.index.dtype == 'object' | ||
|
||
res = self.frame | ||
res.index = res.index.astype(object) | ||
|
||
with tm.assert_produces_warning(FutureWarning, | ||
check_stacklevel=False, | ||
ignore_extra=True): | ||
check_stacklevel=False): | ||
res = self.frame.set_value('foobar', 'B', 1.5) | ||
assert res.index[-1] == 'foobar' | ||
with tm.assert_produces_warning(FutureWarning, | ||
check_stacklevel=False, | ||
ignore_extra=True): | ||
check_stacklevel=False): | ||
assert res.get_value('foobar', 'B') == 1.5 | ||
|
||
with tm.assert_produces_warning(FutureWarning, | ||
check_stacklevel=False, | ||
ignore_extra=True): | ||
check_stacklevel=False): | ||
res2 = res.set_value('foobar', 'qux', 1.5) | ||
tm.assert_index_equal(res2.columns, | ||
pd.Index(list(self.frame.columns))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To justify, this test changed because (deprecated) |
||
with tm.assert_produces_warning(FutureWarning, | ||
check_stacklevel=False, | ||
ignore_extra=True): | ||
check_stacklevel=False): | ||
assert res2.get_value('foobar', 'qux') == 1.5 | ||
|
||
def test_fancy_index_misc(self): | ||
|
@@ -594,9 +588,8 @@ def test_setitem_chained_no_consolidate(self): | |
# issuecomment-361696418 | ||
# chained setitem used to cause consolidation | ||
sdf = pd.SparseDataFrame([[np.nan, 1], [2, np.nan]]) | ||
with tm.assert_produces_warning(PerformanceWarning): | ||
with pd.option_context('mode.chained_assignment', None): | ||
sdf[0][1] = 2 | ||
with pd.option_context('mode.chained_assignment', None): | ||
sdf[0][1] = 2 | ||
assert len(sdf._data.blocks) == 2 | ||
|
||
def test_delitem(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add a line before comment