Skip to content

Commit a9cf215

Browse files
authored
feat: Allow iloc to support lists of negative indices (#1497)
* feat: support iloc with negative indices * update partial ordering test * update naming * update logic * update comment * update logic and tests * update filter
1 parent eb496d9 commit a9cf215

File tree

2 files changed

+53
-7
lines changed

2 files changed

+53
-7
lines changed

bigframes/core/indexers.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import bigframes.core.guid as guid
2828
import bigframes.core.indexes as indexes
2929
import bigframes.core.scalar
30+
import bigframes.core.window_spec as windows
3031
import bigframes.dataframe
3132
import bigframes.dtypes
3233
import bigframes.exceptions as bfe
@@ -477,6 +478,19 @@ def _iloc_getitem_series_or_dataframe(
477478
Union[bigframes.dataframe.DataFrame, bigframes.series.Series],
478479
series_or_dataframe.iloc[0:0],
479480
)
481+
482+
# Check if both positive index and negative index are necessary
483+
if isinstance(key, (bigframes.series.Series, indexes.Index)):
484+
# Avoid data download
485+
is_key_unisigned = False
486+
else:
487+
first_sign = key[0] >= 0
488+
is_key_unisigned = True
489+
for k in key:
490+
if (k >= 0) != first_sign:
491+
is_key_unisigned = False
492+
break
493+
480494
if isinstance(series_or_dataframe, bigframes.series.Series):
481495
original_series_name = series_or_dataframe.name
482496
series_name = (
@@ -497,7 +511,27 @@ def _iloc_getitem_series_or_dataframe(
497511
block = df._block
498512
# explicitly set index to offsets, reset_index may not generate offsets in some modes
499513
block, offsets_id = block.promote_offsets("temp_iloc_offsets_")
500-
block = block.set_index([offsets_id])
514+
pos_block = block.set_index([offsets_id])
515+
516+
if not is_key_unisigned or key[0] < 0:
517+
neg_block, size_col_id = block.apply_window_op(
518+
offsets_id,
519+
ops.aggregations.SizeUnaryOp(),
520+
window_spec=windows.rows(),
521+
)
522+
neg_block, neg_index_id = neg_block.apply_binary_op(
523+
offsets_id, size_col_id, ops.SubOp()
524+
)
525+
526+
neg_block = neg_block.set_index([neg_index_id]).drop_columns(
527+
[size_col_id, offsets_id]
528+
)
529+
530+
if is_key_unisigned:
531+
block = pos_block if key[0] >= 0 else neg_block
532+
else:
533+
block = pos_block.concat([neg_block], how="inner")
534+
501535
df = bigframes.dataframe.DataFrame(block)
502536

503537
result = df.loc[key]

tests/system/small/test_dataframe.py

+18-6
Original file line numberDiff line numberDiff line change
@@ -4400,9 +4400,15 @@ def test_loc_list_multiindex(scalars_dfs_maybe_ordered):
44004400
)
44014401

44024402

4403-
def test_iloc_list(scalars_df_index, scalars_pandas_df_index):
4404-
index_list = [0, 0, 0, 5, 4, 7]
4405-
4403+
@pytest.mark.parametrize(
4404+
"index_list",
4405+
[
4406+
[0, 1, 2, 3, 4, 4],
4407+
[0, 0, 0, 5, 4, 7, -2, -5, 3],
4408+
[-1, -2, -3, -4, -5, -5],
4409+
],
4410+
)
4411+
def test_iloc_list(scalars_df_index, scalars_pandas_df_index, index_list):
44064412
bf_result = scalars_df_index.iloc[index_list]
44074413
pd_result = scalars_pandas_df_index.iloc[index_list]
44084414

@@ -4412,11 +4418,17 @@ def test_iloc_list(scalars_df_index, scalars_pandas_df_index):
44124418
)
44134419

44144420

4421+
@pytest.mark.parametrize(
4422+
"index_list",
4423+
[
4424+
[0, 1, 2, 3, 4, 4],
4425+
[0, 0, 0, 5, 4, 7, -2, -5, 3],
4426+
[-1, -2, -3, -4, -5, -5],
4427+
],
4428+
)
44154429
def test_iloc_list_partial_ordering(
4416-
scalars_df_partial_ordering, scalars_pandas_df_index
4430+
scalars_df_partial_ordering, scalars_pandas_df_index, index_list
44174431
):
4418-
index_list = [0, 0, 0, 5, 4, 7]
4419-
44204432
bf_result = scalars_df_partial_ordering.iloc[index_list]
44214433
pd_result = scalars_pandas_df_index.iloc[index_list]
44224434

0 commit comments

Comments
 (0)