Skip to content

Commit c187ac9

Browse files
committed
PERF: use NaT comparisons in int64/datetimelikes pandas-dev#11010
1 parent 33530b3 commit c187ac9

File tree

4 files changed

+29
-28
lines changed

4 files changed

+29
-28
lines changed

doc/source/whatsnew/v0.17.0.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1009,11 +1009,10 @@ Bug Fixes
10091009
- Bug in ``to_json`` which was causing segmentation fault when serializing 0-rank ndarray (:issue:`9576`)
10101010
- Bug in plotting functions may raise ``IndexError`` when plotted on ``GridSpec`` (:issue:`10819`)
10111011
- Bug in plot result may show unnecessary minor ticklabels (:issue:`10657`)
1012-
- Bug in ``groupby`` incorrect computation for aggregation on ``DataFrame`` with ``NaT`` (E.g ``first``, ``last``, ``min``). (:issue:`10590`)
1012+
- Bug in ``groupby`` incorrect computation for aggregation on ``DataFrame`` with ``NaT`` (E.g ``first``, ``last``, ``min``). (:issue:`10590`, :issue:`11010`)
10131013
- Bug when constructing ``DataFrame`` where passing a dictionary with only scalar values and specifying columns did not raise an error (:issue:`10856`)
10141014
- Bug in ``.var()`` causing roundoff errors for highly similar values (:issue:`10242`)
10151015
- Bug in ``DataFrame.plot(subplots=True)`` with duplicated columns outputs incorrect result (:issue:`10962`)
10161016
- Bug in ``Index`` arithmetic may result in incorrect class (:issue:`10638`)
10171017
- Bug in ``date_range`` results in empty if freq is negative annualy, quarterly and monthly (:issue:`11018`)
10181018
- Bug in ``DatetimeIndex`` cannot infer negative freq (:issue:`11018`)
1019-

pandas/core/groupby.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1523,8 +1523,6 @@ def aggregate(self, values, how, axis=0):
15231523

15241524
if is_datetime_or_timedelta_dtype(values.dtype):
15251525
values = values.view('int64')
1526-
values[values == tslib.iNaT] = np.nan
1527-
# GH 7754
15281526
is_numeric = True
15291527
elif is_bool_dtype(values.dtype):
15301528
values = _algos.ensure_float64(values)

pandas/src/generate_code.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,7 @@ def group_last_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
739739
val = values[i, j]
740740
741741
# not nan
742-
if val == val:
742+
if val == val and val != %(nan_val)s:
743743
nobs[lab, j] += 1
744744
resx[lab, j] = val
745745
@@ -785,7 +785,7 @@ def group_nth_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
785785
val = values[i, j]
786786
787787
# not nan
788-
if val == val:
788+
if val == val and val != %(nan_val)s:
789789
nobs[lab, j] += 1
790790
if nobs[lab, j] == rank:
791791
resx[lab, j] = val
@@ -1013,7 +1013,7 @@ def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
10131013
val = values[i, j]
10141014
10151015
# not nan
1016-
if val == val:
1016+
if val == val and val != %(nan_val)s:
10171017
nobs[lab, j] += 1
10181018
if val > maxx[lab, j]:
10191019
maxx[lab, j] = val
@@ -1027,7 +1027,7 @@ def group_max_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
10271027
val = values[i, 0]
10281028
10291029
# not nan
1030-
if val == val:
1030+
if val == val and val != %(nan_val)s:
10311031
nobs[lab, 0] += 1
10321032
if val > maxx[lab, 0]:
10331033
maxx[lab, 0] = val
@@ -1076,7 +1076,8 @@ def group_min_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
10761076
val = values[i, j]
10771077
10781078
# not nan
1079-
if val == val:
1079+
if val == val and val != %(nan_val)s:
1080+
10801081
nobs[lab, j] += 1
10811082
if val < minx[lab, j]:
10821083
minx[lab, j] = val
@@ -1090,7 +1091,7 @@ def group_min_%(name)s(ndarray[%(dest_type2)s, ndim=2] out,
10901091
val = values[i, 0]
10911092
10921093
# not nan
1093-
if val == val:
1094+
if val == val and val != %(nan_val)s:
10941095
nobs[lab, 0] += 1
10951096
if val < minx[lab, 0]:
10961097
minx[lab, 0] = val

pandas/src/generated.pyx

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7315,7 +7315,7 @@ def group_last_float64(ndarray[float64_t, ndim=2] out,
73157315
val = values[i, j]
73167316

73177317
# not nan
7318-
if val == val:
7318+
if val == val and val != NAN:
73197319
nobs[lab, j] += 1
73207320
resx[lab, j] = val
73217321

@@ -7360,7 +7360,7 @@ def group_last_float32(ndarray[float32_t, ndim=2] out,
73607360
val = values[i, j]
73617361

73627362
# not nan
7363-
if val == val:
7363+
if val == val and val != NAN:
73647364
nobs[lab, j] += 1
73657365
resx[lab, j] = val
73667366

@@ -7405,7 +7405,7 @@ def group_last_int64(ndarray[int64_t, ndim=2] out,
74057405
val = values[i, j]
74067406

74077407
# not nan
7408-
if val == val:
7408+
if val == val and val != iNaT:
74097409
nobs[lab, j] += 1
74107410
resx[lab, j] = val
74117411

@@ -7451,7 +7451,7 @@ def group_nth_float64(ndarray[float64_t, ndim=2] out,
74517451
val = values[i, j]
74527452

74537453
# not nan
7454-
if val == val:
7454+
if val == val and val != NAN:
74557455
nobs[lab, j] += 1
74567456
if nobs[lab, j] == rank:
74577457
resx[lab, j] = val
@@ -7497,7 +7497,7 @@ def group_nth_float32(ndarray[float32_t, ndim=2] out,
74977497
val = values[i, j]
74987498

74997499
# not nan
7500-
if val == val:
7500+
if val == val and val != NAN:
75017501
nobs[lab, j] += 1
75027502
if nobs[lab, j] == rank:
75037503
resx[lab, j] = val
@@ -7543,7 +7543,7 @@ def group_nth_int64(ndarray[int64_t, ndim=2] out,
75437543
val = values[i, j]
75447544

75457545
# not nan
7546-
if val == val:
7546+
if val == val and val != iNaT:
75477547
nobs[lab, j] += 1
75487548
if nobs[lab, j] == rank:
75497549
resx[lab, j] = val
@@ -7592,7 +7592,8 @@ def group_min_float64(ndarray[float64_t, ndim=2] out,
75927592
val = values[i, j]
75937593

75947594
# not nan
7595-
if val == val:
7595+
if val == val and val != NAN:
7596+
75967597
nobs[lab, j] += 1
75977598
if val < minx[lab, j]:
75987599
minx[lab, j] = val
@@ -7606,7 +7607,7 @@ def group_min_float64(ndarray[float64_t, ndim=2] out,
76067607
val = values[i, 0]
76077608

76087609
# not nan
7609-
if val == val:
7610+
if val == val and val != NAN:
76107611
nobs[lab, 0] += 1
76117612
if val < minx[lab, 0]:
76127613
minx[lab, 0] = val
@@ -7654,7 +7655,8 @@ def group_min_float32(ndarray[float32_t, ndim=2] out,
76547655
val = values[i, j]
76557656

76567657
# not nan
7657-
if val == val:
7658+
if val == val and val != NAN:
7659+
76587660
nobs[lab, j] += 1
76597661
if val < minx[lab, j]:
76607662
minx[lab, j] = val
@@ -7668,7 +7670,7 @@ def group_min_float32(ndarray[float32_t, ndim=2] out,
76687670
val = values[i, 0]
76697671

76707672
# not nan
7671-
if val == val:
7673+
if val == val and val != NAN:
76727674
nobs[lab, 0] += 1
76737675
if val < minx[lab, 0]:
76747676
minx[lab, 0] = val
@@ -7716,7 +7718,8 @@ def group_min_int64(ndarray[int64_t, ndim=2] out,
77167718
val = values[i, j]
77177719

77187720
# not nan
7719-
if val == val:
7721+
if val == val and val != iNaT:
7722+
77207723
nobs[lab, j] += 1
77217724
if val < minx[lab, j]:
77227725
minx[lab, j] = val
@@ -7730,7 +7733,7 @@ def group_min_int64(ndarray[int64_t, ndim=2] out,
77307733
val = values[i, 0]
77317734

77327735
# not nan
7733-
if val == val:
7736+
if val == val and val != iNaT:
77347737
nobs[lab, 0] += 1
77357738
if val < minx[lab, 0]:
77367739
minx[lab, 0] = val
@@ -7779,7 +7782,7 @@ def group_max_float64(ndarray[float64_t, ndim=2] out,
77797782
val = values[i, j]
77807783

77817784
# not nan
7782-
if val == val:
7785+
if val == val and val != NAN:
77837786
nobs[lab, j] += 1
77847787
if val > maxx[lab, j]:
77857788
maxx[lab, j] = val
@@ -7793,7 +7796,7 @@ def group_max_float64(ndarray[float64_t, ndim=2] out,
77937796
val = values[i, 0]
77947797

77957798
# not nan
7796-
if val == val:
7799+
if val == val and val != NAN:
77977800
nobs[lab, 0] += 1
77987801
if val > maxx[lab, 0]:
77997802
maxx[lab, 0] = val
@@ -7841,7 +7844,7 @@ def group_max_float32(ndarray[float32_t, ndim=2] out,
78417844
val = values[i, j]
78427845

78437846
# not nan
7844-
if val == val:
7847+
if val == val and val != NAN:
78457848
nobs[lab, j] += 1
78467849
if val > maxx[lab, j]:
78477850
maxx[lab, j] = val
@@ -7855,7 +7858,7 @@ def group_max_float32(ndarray[float32_t, ndim=2] out,
78557858
val = values[i, 0]
78567859

78577860
# not nan
7858-
if val == val:
7861+
if val == val and val != NAN:
78597862
nobs[lab, 0] += 1
78607863
if val > maxx[lab, 0]:
78617864
maxx[lab, 0] = val
@@ -7903,7 +7906,7 @@ def group_max_int64(ndarray[int64_t, ndim=2] out,
79037906
val = values[i, j]
79047907

79057908
# not nan
7906-
if val == val:
7909+
if val == val and val != iNaT:
79077910
nobs[lab, j] += 1
79087911
if val > maxx[lab, j]:
79097912
maxx[lab, j] = val
@@ -7917,7 +7920,7 @@ def group_max_int64(ndarray[int64_t, ndim=2] out,
79177920
val = values[i, 0]
79187921

79197922
# not nan
7920-
if val == val:
7923+
if val == val and val != iNaT:
79217924
nobs[lab, 0] += 1
79227925
if val > maxx[lab, 0]:
79237926
maxx[lab, 0] = val

0 commit comments

Comments
 (0)