Skip to content

Commit 4f5994e

Browse files
committed
BUG: Row-wise concat of differeing dtypes failing in certain cases (GH5754)
1 parent c34d634 commit 4f5994e

File tree

3 files changed

+72
-26
lines changed

3 files changed

+72
-26
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -828,6 +828,7 @@ Bug Fixes
828828
- Bug in fillna with Series and a passed series/dict (:issue:`5703`)
829829
- Bug in groupby transform with a datetime-like grouper (:issue:`5712`)
830830
- Bug in multi-index selection in PY3 when using certain keys (:issue:`5725`)
831+
- Row-wise concat of differeing dtypes failing in certain cases (:issue:`5754`)
831832

832833
pandas 0.12.0
833834
-------------

pandas/tests/test_frame.py

+42
Original file line numberDiff line numberDiff line change
@@ -6154,6 +6154,48 @@ def test_append_empty_dataframe(self):
61546154
expected = df1.copy()
61556155
assert_frame_equal(result, expected)
61566156

6157+
def test_append_dtypes(self):
6158+
6159+
# GH 5754
6160+
# row appends of different dtypes (so need to do by-item)
6161+
# can sometimes infer the correct type
6162+
6163+
df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(5))
6164+
df2 = DataFrame()
6165+
result = df1.append(df2)
6166+
expected = df1.copy()
6167+
assert_frame_equal(result, expected)
6168+
6169+
df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1))
6170+
df2 = DataFrame({ 'bar' : 'foo' }, index=lrange(1,2))
6171+
result = df1.append(df2)
6172+
expected = DataFrame({ 'bar' : [ Timestamp('20130101'), 'foo' ]})
6173+
assert_frame_equal(result, expected)
6174+
6175+
df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1))
6176+
df2 = DataFrame({ 'bar' : np.nan }, index=lrange(1,2))
6177+
result = df1.append(df2)
6178+
expected = DataFrame({ 'bar' : Series([ Timestamp('20130101'), np.nan ],dtype='M8[ns]') })
6179+
assert_frame_equal(result, expected)
6180+
6181+
df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1))
6182+
df2 = DataFrame({ 'bar' : np.nan }, index=lrange(1,2), dtype=object)
6183+
result = df1.append(df2)
6184+
expected = DataFrame({ 'bar' : Series([ Timestamp('20130101'), np.nan ],dtype='M8[ns]') })
6185+
assert_frame_equal(result, expected)
6186+
6187+
df1 = DataFrame({ 'bar' : np.nan }, index=lrange(1))
6188+
df2 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1,2))
6189+
result = df1.append(df2)
6190+
expected = DataFrame({ 'bar' : Series([ np.nan, Timestamp('20130101')] ,dtype='M8[ns]') })
6191+
assert_frame_equal(result, expected)
6192+
6193+
df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1))
6194+
df2 = DataFrame({ 'bar' : 1 }, index=lrange(1,2), dtype=object)
6195+
result = df1.append(df2)
6196+
expected = DataFrame({ 'bar' : Series([ Timestamp('20130101'), 1 ]) })
6197+
assert_frame_equal(result, expected)
6198+
61576199
def test_asfreq(self):
61586200
offset_monthly = self.tsframe.asfreq(datetools.bmonthEnd)
61596201
rule_monthly = self.tsframe.asfreq('BM')

pandas/tools/merge.py

+29-26
Original file line numberDiff line numberDiff line change
@@ -1139,52 +1139,55 @@ def _concat_blocks(self, blocks):
11391139

11401140
def _concat_single_item(self, objs, item):
11411141
# this is called if we don't have consistent dtypes in a row-wise append
1142-
11431142
all_values = []
1144-
dtypes = set()
1143+
dtypes = []
1144+
alls = set()
11451145

1146+
# figure out the resulting dtype of the combination
11461147
for data, orig in zip(objs, self.objs):
1148+
d = dict([ (t,False) for t in ['object','datetime','timedelta','other'] ])
11471149
if item in orig:
11481150
values = data.get(item)
11491151
if hasattr(values,'to_dense'):
11501152
values = values.to_dense()
1151-
dtypes.add(values.dtype)
11521153
all_values.append(values)
1153-
else:
1154-
all_values.append(None)
11551154

1156-
# figure out the resulting dtype of the combination
1157-
alls = set()
1158-
seen = []
1159-
for dtype in dtypes:
1160-
d = dict([ (t,False) for t in ['object','datetime','timedelta','other'] ])
1161-
if issubclass(dtype.type, (np.object_, np.bool_)):
1162-
d['object'] = True
1163-
alls.add('object')
1164-
elif is_datetime64_dtype(dtype):
1165-
d['datetime'] = True
1166-
alls.add('datetime')
1167-
elif is_timedelta64_dtype(dtype):
1168-
d['timedelta'] = True
1169-
alls.add('timedelta')
1155+
dtype = values.dtype
1156+
1157+
if issubclass(dtype.type, (np.object_, np.bool_)):
1158+
d['object'] = True
1159+
alls.add('object')
1160+
elif is_datetime64_dtype(dtype):
1161+
d['datetime'] = True
1162+
alls.add('datetime')
1163+
elif is_timedelta64_dtype(dtype):
1164+
d['timedelta'] = True
1165+
alls.add('timedelta')
1166+
else:
1167+
d['other'] = True
1168+
alls.add('other')
1169+
11701170
else:
1171+
all_values.append(None)
11711172
d['other'] = True
11721173
alls.add('other')
1173-
seen.append(d)
1174+
1175+
dtypes.append(d)
11741176

11751177
if 'datetime' in alls or 'timedelta' in alls:
11761178

11771179
if 'object' in alls or 'other' in alls:
1178-
for v, s in zip(all_values,seen):
1179-
if s.get('datetime') or s.get('timedelta'):
1180+
1181+
for v, d in zip(all_values,dtypes):
1182+
if d.get('datetime') or d.get('timedelta'):
11801183
pass
11811184

11821185
# if we have all null, then leave a date/time like type
11831186
# if we have only that type left
1184-
elif isnull(v).all():
1187+
elif v is None or isnull(v).all():
11851188

1186-
alls.remove('other')
1187-
alls.remove('object')
1189+
alls.discard('other')
1190+
alls.discard('object')
11881191

11891192
# create the result
11901193
if 'object' in alls:
@@ -1200,7 +1203,7 @@ def _concat_single_item(self, objs, item):
12001203

12011204
to_concat = []
12021205
for obj, item_values in zip(objs, all_values):
1203-
if item_values is None:
1206+
if item_values is None or isnull(item_values).all():
12041207
shape = obj.shape[1:]
12051208
missing_arr = np.empty(shape, dtype=empty_dtype)
12061209
missing_arr.fill(fill_value)

0 commit comments

Comments
 (0)