Skip to content

BUG: unstack with nulls & Timedelta/DateTime index #9491

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 16, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.16.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ Bug Fixes
- Bug in left ``join`` on multi-index with ``sort=True`` or null values (:issue:`9210`).
- Bug in ``MultiIndex`` where inserting new keys would fail (:issue:`9250`).
- Bug in ``groupby`` when key space exceeds ``int64`` bounds (:issue:`9096`).
- Bug in ``unstack`` with ``TimedeltaIndex`` or ``DatetimeIndex`` and nulls (:issue:`9491`).


- Fixed character encoding bug in ``read_stata`` and ``StataReader`` when loading data from a URL (:issue:`9231`).
Expand Down
30 changes: 9 additions & 21 deletions pandas/core/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import pandas.core.common as com
import pandas.algos as algos

from pandas.core.index import MultiIndex
from pandas.core.index import MultiIndex, _get_na_value


class _Unstacker(object):
Expand Down Expand Up @@ -194,8 +194,11 @@ def get_new_values(self):

def get_new_columns(self):
if self.value_columns is None:
return _make_new_index(self.removed_level, None) \
if self.lift != 0 else self.removed_level
if self.lift == 0:
return self.removed_level

lev = self.removed_level
return lev.insert(0, _get_na_value(lev.dtype.type))

stride = len(self.removed_level) + self.lift
width = len(self.value_columns)
Expand All @@ -222,31 +225,16 @@ def get_new_index(self):
# construct the new index
if len(self.new_index_levels) == 1:
lev, lab = self.new_index_levels[0], result_labels[0]
return _make_new_index(lev, lab) \
if (lab == -1).any() else lev.take(lab)
if (lab == -1).any():
lev = lev.insert(len(lev), _get_na_value(lev.dtype.type))
return lev.take(lab)

return MultiIndex(levels=self.new_index_levels,
labels=result_labels,
names=self.new_index_names,
verify_integrity=False)


def _make_new_index(lev, lab):
from pandas.core.index import Index, _get_na_value

nan = _get_na_value(lev.dtype.type)
vals = lev.values.astype('object')
vals = np.insert(vals, 0, nan) if lab is None else \
np.insert(vals, len(vals), nan).take(lab)

try:
vals = vals.astype(lev.dtype, subok=False, copy=False)
except ValueError:
return Index(vals, **lev._get_attributes_dict())

return lev._shallow_copy(vals)


def _unstack_multiple(data, clocs):
from pandas.core.groupby import decons_obs_group_ids

Expand Down
15 changes: 15 additions & 0 deletions pandas/tools/tests/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,21 @@ def test_pivot_index_with_nan(self):
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(df.pivot('b', 'a', 'c'), expected.T)

# GH9491
df = DataFrame({'a':pd.date_range('2014-02-01', periods=6, freq='D'),
'c':100 + np.arange(6)})
df['b'] = df['a'] - pd.Timestamp('2014-02-02')
df.loc[1, 'a'] = df.loc[3, 'a'] = nan
df.loc[1, 'b'] = df.loc[4, 'b'] = nan

pv = df.pivot('a', 'b', 'c')
self.assertEqual(pv.notnull().values.sum(), len(df))

for _, row in df.iterrows():
self.assertEqual(pv.loc[row['a'], row['b']], row['c'])

tm.assert_frame_equal(df.pivot('b', 'a', 'c'), pv.T)

def test_pivot_with_tz(self):
# GH 5878
df = DataFrame({'dt1': [datetime.datetime(2013, 1, 1, 9, 0),
Expand Down
2 changes: 1 addition & 1 deletion pandas/tseries/tdi.py
Original file line number Diff line number Diff line change
Expand Up @@ -828,7 +828,7 @@ def insert(self, loc, item):
pass

freq = None
if isinstance(item, Timedelta):
if isinstance(item, (Timedelta, tslib.NaTType)):

# check freq can be preserved on edge cases
if self.freq is not None:
Expand Down