Skip to content

Commit 8129afc

Browse files
committed
Merge pull request #5757 from jreback/fixes
BUG: empty Series construction (GH5756), concat issues (GH5754)
2 parents b319d24 + 4f5994e commit 8129afc

File tree

5 files changed

+86
-30
lines changed

5 files changed

+86
-30
lines changed

doc/source/release.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,8 @@ API Changes
247247
(:issue:`4390`)
248248
- allow ``ix/loc`` for Series/DataFrame/Panel to set on any axis even when
249249
the single-key is not currently contained in the index for that axis
250-
(:issue:`2578`, :issue:`5226`, :issue:`5632`, :issue:`5720`, :issue:`5744`)
250+
(:issue:`2578`, :issue:`5226`, :issue:`5632`, :issue:`5720`,
251+
:issue:`5744`, :issue:`5756`)
251252
- Default export for ``to_clipboard`` is now csv with a sep of `\t` for
252253
compat (:issue:`3368`)
253254
- ``at`` now will enlarge the object inplace (and return the same)
@@ -827,6 +828,7 @@ Bug Fixes
827828
- Bug in fillna with Series and a passed series/dict (:issue:`5703`)
828829
- Bug in groupby transform with a datetime-like grouper (:issue:`5712`)
829830
- Bug in multi-index selection in PY3 when using certain keys (:issue:`5725`)
831+
- Row-wise concat of differeing dtypes failing in certain cases (:issue:`5754`)
830832

831833
pandas 0.12.0
832834
-------------

pandas/core/frame.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -325,15 +325,16 @@ def _init_dict(self, data, index, columns, dtype=None):
325325
def _init_ndarray(self, values, index, columns, dtype=None,
326326
copy=False):
327327
if isinstance(values, Series):
328-
if columns is None and values.name is not None:
329-
columns = [values.name]
328+
if columns is None:
329+
if values.name is not None:
330+
columns = [values.name]
330331
if index is None:
331332
index = values.index
332333
else:
333334
values = values.reindex(index)
334335

335336
# zero len case (GH #2234)
336-
if not len(values) and len(columns):
337+
if not len(values) and columns is not None and len(columns):
337338
values = np.empty((0, 1), dtype=object)
338339

339340
values = _prep_ndarray(values, copy=copy)

pandas/tests/test_frame.py

+42
Original file line numberDiff line numberDiff line change
@@ -6154,6 +6154,48 @@ def test_append_empty_dataframe(self):
61546154
expected = df1.copy()
61556155
assert_frame_equal(result, expected)
61566156

6157+
def test_append_dtypes(self):
6158+
6159+
# GH 5754
6160+
# row appends of different dtypes (so need to do by-item)
6161+
# can sometimes infer the correct type
6162+
6163+
df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(5))
6164+
df2 = DataFrame()
6165+
result = df1.append(df2)
6166+
expected = df1.copy()
6167+
assert_frame_equal(result, expected)
6168+
6169+
df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1))
6170+
df2 = DataFrame({ 'bar' : 'foo' }, index=lrange(1,2))
6171+
result = df1.append(df2)
6172+
expected = DataFrame({ 'bar' : [ Timestamp('20130101'), 'foo' ]})
6173+
assert_frame_equal(result, expected)
6174+
6175+
df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1))
6176+
df2 = DataFrame({ 'bar' : np.nan }, index=lrange(1,2))
6177+
result = df1.append(df2)
6178+
expected = DataFrame({ 'bar' : Series([ Timestamp('20130101'), np.nan ],dtype='M8[ns]') })
6179+
assert_frame_equal(result, expected)
6180+
6181+
df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1))
6182+
df2 = DataFrame({ 'bar' : np.nan }, index=lrange(1,2), dtype=object)
6183+
result = df1.append(df2)
6184+
expected = DataFrame({ 'bar' : Series([ Timestamp('20130101'), np.nan ],dtype='M8[ns]') })
6185+
assert_frame_equal(result, expected)
6186+
6187+
df1 = DataFrame({ 'bar' : np.nan }, index=lrange(1))
6188+
df2 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1,2))
6189+
result = df1.append(df2)
6190+
expected = DataFrame({ 'bar' : Series([ np.nan, Timestamp('20130101')] ,dtype='M8[ns]') })
6191+
assert_frame_equal(result, expected)
6192+
6193+
df1 = DataFrame({ 'bar' : Timestamp('20130101') }, index=lrange(1))
6194+
df2 = DataFrame({ 'bar' : 1 }, index=lrange(1,2), dtype=object)
6195+
result = df1.append(df2)
6196+
expected = DataFrame({ 'bar' : Series([ Timestamp('20130101'), 1 ]) })
6197+
assert_frame_equal(result, expected)
6198+
61576199
def test_asfreq(self):
61586200
offset_monthly = self.tsframe.asfreq(datetools.bmonthEnd)
61596201
rule_monthly = self.tsframe.asfreq('BM')

pandas/tests/test_indexing.py

+8
Original file line numberDiff line numberDiff line change
@@ -1793,6 +1793,14 @@ def f():
17931793
expected = DataFrame(columns=['A','B','C'])
17941794
assert_frame_equal(result,expected)
17951795

1796+
# GH 5756
1797+
# setting with empty Series
1798+
df = DataFrame(Series())
1799+
assert_frame_equal(df, DataFrame({ 0 : Series() }))
1800+
1801+
df = DataFrame(Series(name='foo'))
1802+
assert_frame_equal(df, DataFrame({ 'foo' : Series() }))
1803+
17961804
def test_cache_updating(self):
17971805
# GH 4939, make sure to update the cache on setitem
17981806

pandas/tools/merge.py

+29-26
Original file line numberDiff line numberDiff line change
@@ -1139,52 +1139,55 @@ def _concat_blocks(self, blocks):
11391139

11401140
def _concat_single_item(self, objs, item):
11411141
# this is called if we don't have consistent dtypes in a row-wise append
1142-
11431142
all_values = []
1144-
dtypes = set()
1143+
dtypes = []
1144+
alls = set()
11451145

1146+
# figure out the resulting dtype of the combination
11461147
for data, orig in zip(objs, self.objs):
1148+
d = dict([ (t,False) for t in ['object','datetime','timedelta','other'] ])
11471149
if item in orig:
11481150
values = data.get(item)
11491151
if hasattr(values,'to_dense'):
11501152
values = values.to_dense()
1151-
dtypes.add(values.dtype)
11521153
all_values.append(values)
1153-
else:
1154-
all_values.append(None)
11551154

1156-
# figure out the resulting dtype of the combination
1157-
alls = set()
1158-
seen = []
1159-
for dtype in dtypes:
1160-
d = dict([ (t,False) for t in ['object','datetime','timedelta','other'] ])
1161-
if issubclass(dtype.type, (np.object_, np.bool_)):
1162-
d['object'] = True
1163-
alls.add('object')
1164-
elif is_datetime64_dtype(dtype):
1165-
d['datetime'] = True
1166-
alls.add('datetime')
1167-
elif is_timedelta64_dtype(dtype):
1168-
d['timedelta'] = True
1169-
alls.add('timedelta')
1155+
dtype = values.dtype
1156+
1157+
if issubclass(dtype.type, (np.object_, np.bool_)):
1158+
d['object'] = True
1159+
alls.add('object')
1160+
elif is_datetime64_dtype(dtype):
1161+
d['datetime'] = True
1162+
alls.add('datetime')
1163+
elif is_timedelta64_dtype(dtype):
1164+
d['timedelta'] = True
1165+
alls.add('timedelta')
1166+
else:
1167+
d['other'] = True
1168+
alls.add('other')
1169+
11701170
else:
1171+
all_values.append(None)
11711172
d['other'] = True
11721173
alls.add('other')
1173-
seen.append(d)
1174+
1175+
dtypes.append(d)
11741176

11751177
if 'datetime' in alls or 'timedelta' in alls:
11761178

11771179
if 'object' in alls or 'other' in alls:
1178-
for v, s in zip(all_values,seen):
1179-
if s.get('datetime') or s.get('timedelta'):
1180+
1181+
for v, d in zip(all_values,dtypes):
1182+
if d.get('datetime') or d.get('timedelta'):
11801183
pass
11811184

11821185
# if we have all null, then leave a date/time like type
11831186
# if we have only that type left
1184-
elif isnull(v).all():
1187+
elif v is None or isnull(v).all():
11851188

1186-
alls.remove('other')
1187-
alls.remove('object')
1189+
alls.discard('other')
1190+
alls.discard('object')
11881191

11891192
# create the result
11901193
if 'object' in alls:
@@ -1200,7 +1203,7 @@ def _concat_single_item(self, objs, item):
12001203

12011204
to_concat = []
12021205
for obj, item_values in zip(objs, all_values):
1203-
if item_values is None:
1206+
if item_values is None or isnull(item_values).all():
12041207
shape = obj.shape[1:]
12051208
missing_arr = np.empty(shape, dtype=empty_dtype)
12061209
missing_arr.fill(fill_value)

0 commit comments

Comments
 (0)