Skip to content

EA: revert treatment of i8values #24623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
2 changes: 1 addition & 1 deletion pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ def _concat_same_type(cls, to_concat):

def copy(self, deep=False):
values = self.asi8.copy()
return type(self)(values, dtype=self.dtype, freq=self.freq)
return type(self)._simple_new(values, dtype=self.dtype, freq=self.freq)

def _values_for_factorize(self):
return self.asi8, iNaT
Expand Down
107 changes: 42 additions & 65 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def tz_to_dtype(tz):
if tz is None:
return _NS_DTYPE
else:
tz = timezones.tz_standardize(tz)
return DatetimeTZDtype(tz=tz)


Expand Down Expand Up @@ -254,77 +255,53 @@ class DatetimeArray(dtl.DatetimeLikeArrayMixin,
_dtype = None # type: Union[np.dtype, DatetimeTZDtype]
_freq = None

def __init__(self, values, dtype=_NS_DTYPE, freq=None, copy=False):
if isinstance(values, (ABCSeries, ABCIndexClass)):
values = values._values

if isinstance(values, type(self)):
# validation
dtz = getattr(dtype, 'tz', None)
if dtz and values.tz is None:
dtype = DatetimeTZDtype(tz=dtype.tz)
elif dtz and values.tz:
if not timezones.tz_compare(dtz, values.tz):
msg = (
"Timezone of the array and 'dtype' do not match. "
"'{}' != '{}'"
)
raise TypeError(msg.format(dtz, values.tz))
elif values.tz:
dtype = values.dtype
# freq = validate_values_freq(values, freq)
if freq is None:
freq = values.freq
values = values._data
def __init__(self, values, dtype=None, freq=None, copy=False):
if freq == "infer":
raise ValueError(
"Frequency inference not allowed in DatetimeArray.__init__. "
"Use 'pd.array()' instead.")

if not isinstance(values, np.ndarray):
msg = (
"Unexpected type '{}'. 'values' must be a DatetimeArray "
if not hasattr(values, "dtype"):
# e.g. list
raise ValueError(
"Unexpected type '{vals}'. 'values' must be a DatetimeArray "
"ndarray, or Series or Index containing one of those."
)
raise ValueError(msg.format(type(values).__name__))

if values.dtype == 'i8':
# for compat with datetime/timedelta/period shared methods,
# we can sometimes get here with int64 values. These represent
# nanosecond UTC (or tz-naive) unix timestamps
values = values.view(_NS_DTYPE)

if values.dtype != _NS_DTYPE:
msg = (
.format(vals=type(values).__name__))

if is_datetime64_dtype(values.dtype) and hasattr(dtype, "tz"):
# cast to make _from_sequence treat as unix instead of wall-times;
# see GH#24559
values = type(self)._simple_new(
np.asarray(values),
freq=getattr(values, "freq", None),
dtype=tz_to_dtype(utc)).tz_convert(dtype.tz)

elif not (is_datetime64tz_dtype(values.dtype) or
is_datetime64_dtype(values.dtype) or
values.dtype == 'i8'):
raise ValueError(
"The dtype of 'values' is incorrect. Must be 'datetime64[ns]'."
" Got {} instead."
)
raise ValueError(msg.format(values.dtype))
" Got {dtype} instead." .format(dtype=values.dtype))

dtype = _validate_dt64_dtype(dtype)

if freq == "infer":
msg = (
"Frequency inference not allowed in DatetimeArray.__init__. "
"Use 'pd.array()' instead."
)
raise ValueError(msg)

if copy:
values = values.copy()
if freq:
freq = to_offset(freq)
if getattr(dtype, 'tz', None):
# https://github.com/pandas-dev/pandas/issues/18595
# Ensure that we have a standard timezone for pytz objects.
# Without this, things like adding an array of timedeltas and
# a tz-aware Timestamp (with a tz specific to its datetime) will
# be incorrect(ish?) for the array as a whole
dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz))

self._data = values
self._dtype = dtype
self._freq = freq
arr = type(self)._from_sequence(values, dtype=dtype,
freq=freq, copy=copy)
self._data = arr._data
self._freq = arr._freq
self._dtype = arr._dtype

@classmethod
def _simple_new(cls, values, freq=None, dtype=None):
return cls(values, freq=freq, dtype=dtype)
def _simple_new(cls, values, freq=None, dtype=_NS_DTYPE):
"""
we require the we have a dtype compat for the values
if we are passed a non-dtype compat, then coerce using the constructor
"""
assert isinstance(values, np.ndarray), type(values)

result = object.__new__(cls)
result._data = values.view('datetime64[ns]')
result._freq = freq
result._dtype = dtype
return result

@classmethod
def _from_sequence(cls, data, dtype=None, copy=False,
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4377,7 +4377,8 @@ def _maybe_casted_values(index, labels=None):
values, mask, np.nan)

if issubclass(values_type, DatetimeLikeArray):
values = values_type(values, dtype=values_dtype)
values = values_type._simple_new(values,
dtype=values_dtype)

return values

Expand Down
8 changes: 5 additions & 3 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,18 +314,20 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None):
if isinstance(values, DatetimeArray):
if tz:
tz = validate_tz_from_dtype(dtype, tz)
tz = timezones.tz_standardize(tz)
dtype = DatetimeTZDtype(tz=tz)
elif dtype is None:
dtype = _NS_DTYPE
dtype = values.dtype

values = DatetimeArray(values, freq=freq, dtype=dtype)
tz = values.tz
freq = values.freq
values = values._data
else:
tz = tz or getattr(dtype, 'tz', None)

# DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes
if isinstance(values, DatetimeIndex):
values = values._data
assert isinstance(values, np.ndarray)

dtype = tz_to_dtype(tz)
dtarr = DatetimeArray._simple_new(values, freq=freq, dtype=dtype)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3078,7 +3078,7 @@ def make_block(values, placement, klass=None, ndim=None, dtype=None,
elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values):
# TODO: This is no longer hit internally; does it need to be retained
# for e.g. pyarrow?
values = DatetimeArray(values, dtype)
values = DatetimeArray(values.view('i8'), dtype)

return klass(values, ndim=ndim, placement=placement)

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ def create_block(b):
if is_datetime64tz_dtype(b[u'dtype']):
assert isinstance(values, np.ndarray), type(values)
assert values.dtype == 'M8[ns]', values.dtype
values = DatetimeArray(values, dtype=b[u'dtype'])
values = DatetimeArray(values.view('i8'), dtype=b[u'dtype'])

return make_block(values=values,
klass=getattr(internals, b[u'klass']),
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ def test_from_array_keeps_base(self):
arr = np.array(['2000-01-01', '2000-01-02'], dtype='M8[ns]')
dta = DatetimeArray(arr)

assert dta._data is arr
assert dta._data.base is arr
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this change?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

because a view of arr is taken, so the assertion is untrue as-is.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then why was it passing on master? Because you changed it to take a view? Is it important to do that?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because I changed it back to use _from_sequence, which uses sequence_to_dt64ns, which yes takes a view. And it is important in as much as in the PR implementing sequence_to_dt64ns when the original version only took a view if it was necessary, there was a request to do it unconditionally to save a line

dta = DatetimeArray(arr[:0])
assert dta._data.base is arr

Expand Down
70 changes: 68 additions & 2 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,69 @@


class TestDatetimeArrayConstructor(object):

@pytest.mark.parametrize('tz', [None, 'Asia/Singapore'])
def test_constructor_equivalence(self, tz):
# GH#24623 check that DatetimeArray.__init__ behavior matches:
# Timestamp.__new__ for int64
# DatetimeArray._from_sequence for int64, datetime64[ns]
# DatetimeArray._simple_new for int64
# DatetimeIndex.__new__ for int64, datetime64[ns]
# DatetimeIndex._simple_new for int64, datetime64[ns]
#
# and that DatetimeArray._simple_new behaves like
# DatetimeIndex._simple_new for both int64 and datetime64[ns] inputs
arr = np.random.randint(-10**9, 10**9, size=5, dtype=np.int64)
dt64arr = arr.view('datetime64[ns]')
dti = pd.date_range('1960-01-01', periods=1, tz=tz)

v1 = DatetimeArray._simple_new(arr.view('i8'), dtype=dti.dtype)
v2 = DatetimeArray(arr.view('i8'), dtype=dti.dtype)
v3 = DatetimeArray._from_sequence(arr.view('i8'), dtype=dti.dtype)
v4 = pd.DatetimeIndex._simple_new(arr.view('i8'), tz=dti.tz)
v5 = pd.DatetimeIndex(arr.view('i8'), tz=dti.tz)
v6 = pd.to_datetime(arr, utc=True).tz_convert(dti.tz)

# when dealing with _simple_new, i8 and M8[ns] are interchangeable
v7 = DatetimeArray._simple_new(arr.view('M8[ns]'), dtype=dti.dtype)
v8 = pd.DatetimeIndex._simple_new(arr.view('M8[ns]'), dtype=dti.dtype)

# GH#24623 DatetimeArray.__init__ treats M8[ns] as unix timestamps,
# unlike DatetimeIndex.__new__.
v9 = DatetimeArray(dt64arr, dtype=dti.dtype)

tm.assert_datetime_array_equal(v1, v2)
tm.assert_datetime_array_equal(v1, v3)
tm.assert_datetime_array_equal(v1, v4._data)
tm.assert_datetime_array_equal(v1, v5._data)
tm.assert_datetime_array_equal(v1, v6._data)
tm.assert_datetime_array_equal(v1, v7)
tm.assert_datetime_array_equal(v1, v8._data)
tm.assert_datetime_array_equal(v1, v9)

expected = [pd.Timestamp(i8, tz=dti.tz) for i8 in arr]
assert list(v1) == expected

# The guarantees for datetime64 data are fewer
v1 = DatetimeArray._from_sequence(dt64arr, dtype=dti.dtype)
v2 = DatetimeArray._from_sequence(dt64arr, tz=dti.tz)
v3 = pd.DatetimeIndex(dt64arr, dtype=dti.dtype)
v4 = pd.DatetimeIndex(dt64arr, tz=dti.tz)

tm.assert_datetime_array_equal(v1, v2)
tm.assert_datetime_array_equal(v1, v3._data)
tm.assert_datetime_array_equal(v1, v4._data)

def test_freq_validation(self):
# GH#24623 check that invalid instances cannot be created with the
# public constructor
arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9

msg = ("Inferred frequency H from passed values does not "
"conform to passed frequency W-SUN")
with pytest.raises(ValueError, match=msg):
DatetimeArray(arr, freq="W")

@pytest.mark.parametrize('meth', [DatetimeArray._from_sequence,
sequence_to_dt64ns,
pd.to_datetime,
Expand All @@ -35,6 +98,7 @@ def test_mixing_naive_tzaware_raises(self, meth):
meth(obj)

def test_from_pandas_array(self):
# GH#24623, GH#24615
arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9

result = DatetimeArray._from_sequence(arr, freq='infer')
Expand All @@ -46,7 +110,8 @@ def test_mismatched_timezone_raises(self):
arr = DatetimeArray(np.array(['2000-01-01T06:00:00'], dtype='M8[ns]'),
dtype=DatetimeTZDtype(tz='US/Central'))
dtype = DatetimeTZDtype(tz='US/Eastern')
with pytest.raises(TypeError, match='Timezone of the array'):
with pytest.raises(TypeError,
match='data is already tz-aware US/Central'):
DatetimeArray(arr, dtype=dtype)

def test_non_array_raises(self):
Expand All @@ -69,10 +134,11 @@ def test_freq_infer_raises(self):
def test_copy(self):
data = np.array([1, 2, 3], dtype='M8[ns]')
arr = DatetimeArray(data, copy=False)
assert arr._data is data
assert arr._data.base is data

arr = DatetimeArray(data, copy=True)
assert arr._data is not data
assert arr._data.base is not data


class TestDatetimeArrayComparisons(object):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,7 +1255,7 @@ def test_array(array, attr, box):
array = getattr(array, attr)
result = getattr(result, attr)

assert result is array
assert result is array or result.base is array.base


def test_array_multiindex_raises():
Expand Down