Skip to content

Commit 350febf

Browse files
committed
Merge pull request #4995 from jreback/nat_append
BUG: Fix appending when dtypes are not the same (error showing mixing float/np.datetime64 (GH4993)
2 parents 7828922 + cac7981 commit 350febf

File tree

3 files changed

+75
-10
lines changed

3 files changed

+75
-10
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ Bug Fixes
474474
explicitly passing labels (:issue:`3415`)
475475
- Fixed wrong check for overlapping in ``DatetimeIndex.union`` (:issue:`4564`)
476476
- Fixed conflict between thousands separator and date parser in csv_parser (:issue:`4678`)
477+
- Fix appending when dtypes are not the same (error showing mixing float/np.datetime64) (:issue:`4993`)
477478

478479
pandas 0.12.0
479480
-------------

pandas/tools/merge.py

+50-10
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,16 @@
1717
from pandas.core.internals import (IntBlock, BoolBlock, BlockManager,
1818
make_block, _consolidate)
1919
from pandas.util.decorators import cache_readonly, Appender, Substitution
20-
from pandas.core.common import PandasError, ABCSeries
20+
from pandas.core.common import (PandasError, ABCSeries,
21+
is_timedelta64_dtype, is_datetime64_dtype,
22+
is_integer_dtype)
23+
2124
import pandas.core.common as com
2225

2326
import pandas.lib as lib
2427
import pandas.algos as algos
2528
import pandas.hashtable as _hash
26-
29+
import pandas.tslib as tslib
2730

2831
@Substitution('\nleft : DataFrame')
2932
@Appender(_merge_doc, indents=0)
@@ -1128,6 +1131,8 @@ def _concat_blocks(self, blocks):
11281131
return block
11291132

11301133
def _concat_single_item(self, objs, item):
1134+
# this is called if we don't have consistent dtypes in a row-wise append
1135+
11311136
all_values = []
11321137
dtypes = set()
11331138

@@ -1141,22 +1146,57 @@ def _concat_single_item(self, objs, item):
11411146
else:
11421147
all_values.append(None)
11431148

1144-
# this stinks
1145-
have_object = False
1149+
# figure out the resulting dtype of the combination
1150+
alls = set()
1151+
seen = []
11461152
for dtype in dtypes:
1153+
d = dict([ (t,False) for t in ['object','datetime','timedelta','other'] ])
11471154
if issubclass(dtype.type, (np.object_, np.bool_)):
1148-
have_object = True
1149-
if have_object:
1150-
empty_dtype = np.object_
1151-
else:
1152-
empty_dtype = np.float64
1155+
d['object'] = True
1156+
alls.add('object')
1157+
elif is_datetime64_dtype(dtype):
1158+
d['datetime'] = True
1159+
alls.add('datetime')
1160+
elif is_timedelta64_dtype(dtype):
1161+
d['timedelta'] = True
1162+
alls.add('timedelta')
1163+
else:
1164+
d['other'] = True
1165+
alls.add('other')
1166+
seen.append(d)
1167+
1168+
if 'datetime' in alls or 'timedelta' in alls:
1169+
1170+
if 'object' in alls or 'other' in alls:
1171+
for v, s in zip(all_values,seen):
1172+
if s.get('datetime') or s.get('timedelta'):
1173+
pass
1174+
1175+
# if we have all null, then leave a date/time like type
1176+
# if we have only that type left
1177+
elif isnull(v).all():
1178+
1179+
alls.remove('other')
1180+
alls.remove('object')
1181+
1182+
# create the result
1183+
if 'object' in alls:
1184+
empty_dtype, fill_value = np.object_, np.nan
1185+
elif 'other' in alls:
1186+
empty_dtype, fill_value = np.float64, np.nan
1187+
elif 'datetime' in alls:
1188+
empty_dtype, fill_value = 'M8[ns]', tslib.iNaT
1189+
elif 'timedelta' in alls:
1190+
empty_dtype, fill_value = 'm8[ns]', tslib.iNaT
1191+
else: # pragma
1192+
raise AssertionError("invalid dtype determination in concat_single_item")
11531193

11541194
to_concat = []
11551195
for obj, item_values in zip(objs, all_values):
11561196
if item_values is None:
11571197
shape = obj.shape[1:]
11581198
missing_arr = np.empty(shape, dtype=empty_dtype)
1159-
missing_arr.fill(np.nan)
1199+
missing_arr.fill(fill_value)
11601200
to_concat.append(missing_arr)
11611201
else:
11621202
to_concat.append(item_values)

pandas/tools/tests/test_merge.py

+24
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,30 @@ def test_merge_nan_right(self):
742742
assert_frame_equal(result, expected)
743743

744744

745+
def test_append_dtype_coerce(self):
746+
747+
# GH 4993
748+
# appending with datetime will incorrectly convert datetime64
749+
import datetime as dt
750+
from pandas import NaT
751+
752+
df1 = DataFrame(index=[1,2], data=[dt.datetime(2013,1,1,0,0),
753+
dt.datetime(2013,1,2,0,0)],
754+
columns=['start_time'])
755+
df2 = DataFrame(index=[4,5], data=[[dt.datetime(2013,1,3,0,0),
756+
dt.datetime(2013,1,3,6,10)],
757+
[dt.datetime(2013,1,4,0,0),
758+
dt.datetime(2013,1,4,7,10)]],
759+
columns=['start_time','end_time'])
760+
761+
expected = concat([
762+
Series([NaT,NaT,dt.datetime(2013,1,3,6,10),dt.datetime(2013,1,4,7,10)],name='end_time'),
763+
Series([dt.datetime(2013,1,1,0,0),dt.datetime(2013,1,2,0,0),dt.datetime(2013,1,3,0,0),dt.datetime(2013,1,4,0,0)],name='start_time'),
764+
],axis=1)
765+
result = df1.append(df2,ignore_index=True)
766+
assert_frame_equal(result, expected)
767+
768+
745769
def test_overlapping_columns_error_message(self):
746770
# #2649
747771
df = DataFrame({'key': [1, 2, 3],

0 commit comments

Comments
 (0)