Skip to content

Commit c065071

Browse files
feat: add __iter__, iterrows, itertuples, keys methods (#164)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent 031f253 commit c065071

File tree

5 files changed

+182
-1
lines changed

5 files changed

+182
-1
lines changed

bigframes/dataframe.py

+18
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,9 @@ def __len__(self):
304304
rows, _ = self.shape
305305
return rows
306306

307+
def __iter__(self):
308+
return iter(self.columns)
309+
307310
def astype(
308311
self,
309312
dtype: Union[bigframes.dtypes.DtypeString, bigframes.dtypes.Dtype],
@@ -1477,12 +1480,27 @@ def isin(self, values) -> DataFrame:
14771480
f"isin(), you passed a [{type(values).__name__}]"
14781481
)
14791482

1483+
def keys(self) -> pandas.Index:
1484+
return self.columns
1485+
14801486
def items(self):
14811487
column_ids = self._block.value_columns
14821488
column_labels = self._block.column_labels
14831489
for col_id, col_label in zip(column_ids, column_labels):
14841490
yield col_label, bigframes.series.Series(self._block.select_column(col_id))
14851491

1492+
def iterrows(self) -> Iterable[tuple[typing.Any, pandas.Series]]:
1493+
for df in self.to_pandas_batches():
1494+
for item in df.iterrows():
1495+
yield item
1496+
1497+
def itertuples(
1498+
self, index: bool = True, name: typing.Optional[str] = "Pandas"
1499+
) -> Iterable[tuple[typing.Any, ...]]:
1500+
for df in self.to_pandas_batches():
1501+
for item in df.itertuples(index=index, name=name):
1502+
yield item
1503+
14861504
def dropna(
14871505
self,
14881506
*,

bigframes/series.py

+6
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
from __future__ import annotations
1818

19+
import itertools
1920
import numbers
2021
import textwrap
2122
import typing
@@ -148,6 +149,11 @@ def _set_internal_query_job(self, query_job: bigquery.QueryJob):
148149
def __len__(self):
149150
return self.shape[0]
150151

152+
def __iter__(self) -> typing.Iterator:
153+
return itertools.chain.from_iterable(
154+
map(lambda x: x.index, self._block.to_pandas_batches())
155+
)
156+
151157
def copy(self) -> Series:
152158
return Series(self._block)
153159

tests/system/small/test_dataframe.py

+49
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,55 @@ def test_apply_series_scalar_callable(
803803
pandas.testing.assert_series_equal(bf_result, pd_result)
804804

805805

806+
def test_df_keys(
807+
scalars_df_index,
808+
scalars_pandas_df_index,
809+
):
810+
pandas.testing.assert_index_equal(
811+
scalars_df_index.keys(), scalars_pandas_df_index.keys()
812+
)
813+
814+
815+
def test_df_iter(
816+
scalars_df_index,
817+
scalars_pandas_df_index,
818+
):
819+
for bf_i, df_i in zip(scalars_df_index, scalars_pandas_df_index):
820+
assert bf_i == df_i
821+
822+
823+
def test_iterrows(
824+
scalars_df_index,
825+
scalars_pandas_df_index,
826+
):
827+
for (bf_index, bf_series), (pd_index, pd_series) in zip(
828+
scalars_df_index.iterrows(), scalars_pandas_df_index.iterrows()
829+
):
830+
assert bf_index == pd_index
831+
pandas.testing.assert_series_equal(bf_series, pd_series)
832+
833+
834+
@pytest.mark.parametrize(
835+
(
836+
"index",
837+
"name",
838+
),
839+
[
840+
(
841+
True,
842+
"my_df",
843+
),
844+
(False, None),
845+
],
846+
)
847+
def test_itertuples(scalars_df_index, index, name):
848+
# Numeric has slightly different representation as a result of conversions.
849+
bf_tuples = scalars_df_index.itertuples(index, name)
850+
pd_tuples = scalars_df_index.to_pandas().itertuples(index, name)
851+
for bf_tuple, pd_tuple in zip(bf_tuples, pd_tuples):
852+
assert bf_tuple == pd_tuple
853+
854+
806855
def test_df_isin_list(scalars_dfs):
807856
scalars_df, scalars_pandas_df = scalars_dfs
808857
values = ["Hello, World!", 55555, 2.51, pd.NA, True]

third_party/bigframes_vendored/pandas/core/frame.py

+79
Original file line numberDiff line numberDiff line change
@@ -975,6 +975,85 @@ def isin(self, values):
975975
"""
976976
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
977977

978+
def keys(self):
979+
"""
980+
Get the 'info axis'.
981+
982+
This is index for Series, columns for DataFrame.
983+
984+
Returns:
985+
Index: Info axis.
986+
987+
**Examples:**
988+
989+
>>> import bigframes.pandas as bpd
990+
>>> bpd.options.display.progress_bar = None
991+
992+
>>> df = bpd.DataFrame({
993+
... 'A': [1, 2, 3],
994+
... 'B': [4, 5, 6],
995+
... })
996+
>>> df.keys()
997+
Index(['A', 'B'], dtype='object')
998+
"""
999+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
1000+
1001+
def iterrows(self):
1002+
"""
1003+
Iterate over DataFrame rows as (index, Series) pairs.
1004+
1005+
Yields:
1006+
a tuple (index, data) where data contains row values as a Series
1007+
1008+
**Examples:**
1009+
1010+
>>> import bigframes.pandas as bpd
1011+
>>> bpd.options.display.progress_bar = None
1012+
>>> df = bpd.DataFrame({
1013+
... 'A': [1, 2, 3],
1014+
... 'B': [4, 5, 6],
1015+
... })
1016+
>>> index, row = next(df.iterrows())
1017+
>>> index
1018+
0
1019+
>>> row
1020+
A 1
1021+
B 4
1022+
Name: 0, dtype: object
1023+
"""
1024+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
1025+
1026+
def itertuples(self, index: bool = True, name: str | None = "Pandas"):
1027+
"""
1028+
Iterate over DataFrame rows as namedtuples.
1029+
1030+
Args:
1031+
index (bool, default True):
1032+
If True, return the index as the first element of the tuple.
1033+
name (str or None, default "Pandas"):
1034+
The name of the returned namedtuples or None to return regular
1035+
tuples.
1036+
1037+
Returns:
1038+
iterator:
1039+
An object to iterate over namedtuples for each row in the
1040+
DataFrame with the first field possibly being the index and
1041+
following fields being the column values.
1042+
1043+
1044+
**Examples:**
1045+
1046+
>>> import bigframes.pandas as bpd
1047+
>>> bpd.options.display.progress_bar = None
1048+
>>> df = bpd.DataFrame({
1049+
... 'A': [1, 2, 3],
1050+
... 'B': [4, 5, 6],
1051+
... })
1052+
>>> next(df.itertuples(name="Pair"))
1053+
Pair(Index=0, A=1, B=4)
1054+
"""
1055+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
1056+
9781057
def items(self):
9791058
"""
9801059
Iterate over (column name, Series) pairs.

third_party/bigframes_vendored/pandas/core/generic.py

+30-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# Contains code from https://github.com/pandas-dev/pandas/blob/main/pandas/core/generic.py
22
from __future__ import annotations
33

4-
from typing import Literal, Optional
4+
from typing import Iterator, Literal, Optional
55

66
from bigframes import constants
77
from third_party.bigframes_vendored.pandas.core import indexing
@@ -35,6 +35,35 @@ def size(self) -> int:
3535
"""
3636
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
3737

38+
def __iter__(self) -> Iterator:
39+
"""
40+
Iterate over info axis.
41+
42+
Returns
43+
iterator: Info axis as iterator.
44+
45+
**Examples:**
46+
>>> import bigframes.pandas as bpd
47+
>>> bpd.options.display.progress_bar = None
48+
49+
>>> df = bpd.DataFrame({
50+
... 'A': [1, 2, 3],
51+
... 'B': [4, 5, 6],
52+
... })
53+
>>> for x in df:
54+
... print(x)
55+
A
56+
B
57+
58+
>>> series = bpd.Series(["a", "b", "c"], index=[10, 20, 30])
59+
>>> for x in series:
60+
... print(x)
61+
10
62+
20
63+
30
64+
"""
65+
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)
66+
3867
# -------------------------------------------------------------------------
3968
# Unary Methods
4069

0 commit comments

Comments
 (0)