Skip to content

Commit 5d0f149

Browse files
feat: Add MultiIndex subclass. (#596)
1 parent 8702dcf commit 5d0f149

File tree

6 files changed

+193
-29
lines changed

6 files changed

+193
-29
lines changed

bigframes/core/indexes/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
# limitations under the License.
1414

1515
from bigframes.core.indexes.base import Index
16+
from bigframes.core.indexes.multi import MultiIndex
1617

1718
__all__ = [
1819
"Index",
20+
"MultiIndex",
1921
]

bigframes/core/indexes/base.py

+28-29
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,15 @@
4242

4343
class Index(vendored_pandas_index.Index):
4444
__doc__ = vendored_pandas_index.Index.__doc__
45-
46-
def __init__(
47-
self,
45+
_query_job = None
46+
_block: blocks.Block
47+
_linked_frame: Union[
48+
bigframes.dataframe.DataFrame, bigframes.series.Series, None
49+
] = None
50+
51+
# Overrided on __new__ to create subclasses like pandas does
52+
def __new__(
53+
cls,
4854
data=None,
4955
dtype=None,
5056
*,
@@ -73,18 +79,30 @@ def __init__(
7379
if dtype is not None:
7480
index = index.astype(dtype)
7581
block = index._block
82+
elif isinstance(data, pandas.Index):
83+
pd_df = pandas.DataFrame(index=data)
84+
block = df.DataFrame(pd_df, session=session)._block
7685
else:
7786
pd_index = pandas.Index(data=data, dtype=dtype, name=name)
7887
pd_df = pandas.DataFrame(index=pd_index)
7988
block = df.DataFrame(pd_df, session=session)._block
80-
self._query_job = None
81-
self._block: blocks.Block = block
89+
90+
# TODO: Support more index subtypes
91+
from bigframes.core.indexes.multi import MultiIndex
92+
93+
klass = MultiIndex if len(block._index_columns) > 1 else cls
94+
result = typing.cast(Index, object.__new__(klass))
95+
result._query_job = None
96+
result._block = block
97+
return result
8298

8399
@classmethod
84100
def from_frame(
85101
cls, frame: Union[bigframes.series.Series, bigframes.dataframe.DataFrame]
86102
) -> Index:
87-
return FrameIndex(frame)
103+
index = Index(frame._block)
104+
index._linked_frame = frame
105+
return index
88106

89107
@property
90108
def name(self) -> blocks.Label:
@@ -107,6 +125,10 @@ def names(self) -> typing.Sequence[blocks.Label]:
107125
@names.setter
108126
def names(self, values: typing.Sequence[blocks.Label]):
109127
new_block = self._block.with_index_labels(values)
128+
if self._linked_frame is not None:
129+
self._linked_frame._set_block(
130+
self._linked_frame._block.with_index_labels(values)
131+
)
110132
self._block = new_block
111133

112134
@property
@@ -452,26 +474,3 @@ def to_numpy(self, dtype=None, **kwargs) -> np.ndarray:
452474

453475
def __len__(self):
454476
return self.shape[0]
455-
456-
457-
# Index that mutates the originating dataframe/series
458-
class FrameIndex(Index):
459-
def __init__(
460-
self,
461-
series_or_dataframe: typing.Union[
462-
bigframes.series.Series, bigframes.dataframe.DataFrame
463-
],
464-
):
465-
super().__init__(series_or_dataframe._block)
466-
self._whole_frame = series_or_dataframe
467-
468-
@property
469-
def names(self) -> typing.Sequence[blocks.Label]:
470-
"""Returns the names of the Index."""
471-
return self._block._index_labels
472-
473-
@names.setter
474-
def names(self, values: typing.Sequence[blocks.Label]):
475-
new_block = self._whole_frame._get_block().with_index_labels(values)
476-
self._whole_frame._set_block(new_block)
477-
self._block = new_block

bigframes/core/indexes/multi.py

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
from typing import cast, Hashable, Iterable, Sequence
18+
19+
import bigframes_vendored.pandas.core.indexes.multi as vendored_pandas_multindex
20+
import pandas
21+
22+
from bigframes.core.indexes.base import Index
23+
24+
25+
class MultiIndex(Index, vendored_pandas_multindex.MultiIndex):
26+
__doc__ = vendored_pandas_multindex.MultiIndex.__doc__
27+
28+
@classmethod
29+
def from_tuples(
30+
cls,
31+
tuples: Iterable[tuple[Hashable, ...]],
32+
sortorder: int | None = None,
33+
names: Sequence[Hashable] | Hashable | None = None,
34+
) -> MultiIndex:
35+
pd_index = pandas.MultiIndex.from_tuples(tuples, sortorder, names)
36+
# Index.__new__ should detect multiple levels and properly create a multiindex
37+
return cast(MultiIndex, Index(pd_index))
38+
39+
@classmethod
40+
def from_arrays(
41+
cls,
42+
arrays,
43+
sortorder: int | None = None,
44+
names=None,
45+
) -> MultiIndex:
46+
pd_index = pandas.MultiIndex.from_arrays(arrays, sortorder, names)
47+
# Index.__new__ should detect multiple levels and properly create a multiindex
48+
return cast(MultiIndex, Index(pd_index))

bigframes/pandas/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -707,6 +707,7 @@ def to_datetime(
707707
# checking and docstrings.
708708
DataFrame = bigframes.dataframe.DataFrame
709709
Index = bigframes.core.indexes.Index
710+
MultiIndex = bigframes.core.indexes.MultiIndex
710711
Series = bigframes.series.Series
711712

712713
# Other public pandas attributes
@@ -760,6 +761,7 @@ def to_datetime(
760761
# Class aliases
761762
"DataFrame",
762763
"Index",
764+
"MultiIndex",
763765
"Series",
764766
# Other public pandas attributes
765767
"NamedAgg",

tests/system/small/test_multiindex.py

+25
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,31 @@
2020
from tests.system.utils import assert_pandas_df_equal, skip_legacy_pandas
2121

2222

23+
def test_multi_index_from_arrays():
24+
bf_idx = bpd.MultiIndex.from_arrays(
25+
[
26+
pandas.Index([4, 99], dtype=pandas.Int64Dtype()),
27+
pandas.Index(
28+
[" Hello, World!", "_some_new_string"],
29+
dtype=pandas.StringDtype(storage="pyarrow"),
30+
),
31+
],
32+
names=[" 1index 1", "_1index 2"],
33+
)
34+
pd_idx = pandas.MultiIndex.from_arrays(
35+
[
36+
pandas.Index([4, 99], dtype=pandas.Int64Dtype()),
37+
pandas.Index(
38+
[" Hello, World!", "_some_new_string"],
39+
dtype=pandas.StringDtype(storage="pyarrow"),
40+
),
41+
],
42+
names=[" 1index 1", "_1index 2"],
43+
)
44+
assert bf_idx.names == pd_idx.names
45+
pandas.testing.assert_index_equal(bf_idx.to_pandas(), pd_idx)
46+
47+
2348
@skip_legacy_pandas
2449
def test_read_pandas_multi_index_axes():
2550
index = pandas.MultiIndex.from_arrays(
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/indexes/multi.py
2+
from __future__ import annotations
3+
4+
from typing import Hashable, Iterable, Sequence
5+
6+
import bigframes_vendored.pandas.core.indexes.base
7+
8+
from bigframes import constants
9+
10+
11+
class MultiIndex(bigframes_vendored.pandas.core.indexes.base.Index):
12+
"""
13+
A multi-level, or hierarchical, index object for pandas objects.
14+
"""
15+
16+
@classmethod
17+
def from_tuples(
18+
cls,
19+
tuples: Iterable[tuple[Hashable, ...]],
20+
sortorder: int | None = None,
21+
names: Sequence[Hashable] | Hashable | None = None,
22+
) -> MultiIndex:
23+
"""
24+
Convert list of tuples to MultiIndex.
25+
26+
**Examples:**
27+
28+
>>> import bigframes.pandas as bpd
29+
>>> bpd.options.display.progress_bar = None
30+
>>> tuples = [(1, 'red'), (1, 'blue'),
31+
... (2, 'red'), (2, 'blue')]
32+
>>> bpd.MultiIndex.from_tuples(tuples, names=('number', 'color'))
33+
MultiIndex([(1, 'red'),
34+
(1, 'blue'),
35+
(2, 'red'),
36+
(2, 'blue')],
37+
names=['number', 'color'])
38+
39+
Args:
40+
tuples (list / sequence of tuple-likes):
41+
Each tuple is the index of one row/column.
42+
sortorder (int or None):
43+
Level of sortedness (must be lexicographically sorted by that
44+
level).
45+
names (list / sequence of str, optional):
46+
Names for the levels in the index.
47+
48+
Returns:
49+
MultiIndex
50+
"""
51+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
52+
53+
@classmethod
54+
def from_arrays(
55+
cls,
56+
arrays,
57+
sortorder: int | None = None,
58+
names=None,
59+
) -> MultiIndex:
60+
"""
61+
Convert arrays to MultiIndex.
62+
63+
**Examples:**
64+
65+
>>> import bigframes.pandas as bpd
66+
>>> bpd.options.display.progress_bar = None
67+
>>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
68+
>>> bpd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
69+
MultiIndex([(1, 'red'),
70+
(1, 'blue'),
71+
(2, 'red'),
72+
(2, 'blue')],
73+
names=['number', 'color'])
74+
75+
Args:
76+
arrays (list / sequence of array-likes):
77+
Each array-like gives one level's value for each data point.
78+
len(arrays) is the number of levels.
79+
sortorder (int or None):
80+
Level of sortedness (must be lexicographically sorted by that
81+
level).
82+
names (list / sequence of str, optional):
83+
Names for the levels in the index.
84+
85+
Returns:
86+
MultiIndex
87+
"""
88+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

0 commit comments

Comments
 (0)