Skip to content

Commit ae0e3ea

Browse files
authored
fix: read_pandas inline respects location (#412)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes b/327544164 🦕
1 parent 763edeb commit ae0e3ea

File tree

9 files changed

+44
-8
lines changed

9 files changed

+44
-8
lines changed

bigframes/core/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def from_ibis(
6969
return cls(node)
7070

7171
@classmethod
72-
def from_pandas(cls, pd_df: pandas.DataFrame):
72+
def from_pandas(cls, pd_df: pandas.DataFrame, session: bigframes.Session):
7373
iobytes = io.BytesIO()
7474
# Use alphanumeric identifiers, to avoid downstream problems with escaping.
7575
as_ids = [
@@ -78,7 +78,7 @@ def from_pandas(cls, pd_df: pandas.DataFrame):
7878
]
7979
unique_ids = tuple(bigframes.core.utils.disambiguate_ids(as_ids))
8080
pd_df.reset_index(drop=True).set_axis(unique_ids, axis=1).to_feather(iobytes)
81-
node = nodes.ReadLocalNode(iobytes.getvalue())
81+
node = nodes.ReadLocalNode(feather_bytes=iobytes.getvalue(), session=session)
8282
return cls(node)
8383

8484
@property

bigframes/core/blocks.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def __init__(
140140
self._stats_cache[" ".join(self.index_columns)] = {}
141141

142142
@classmethod
143-
def from_local(cls, data) -> Block:
143+
def from_local(cls, data, session: bigframes.Session) -> Block:
144144
pd_data = pd.DataFrame(data)
145145
columns = pd_data.columns
146146

@@ -162,7 +162,7 @@ def from_local(cls, data) -> Block:
162162
)
163163
index_ids = pd_data.columns[: len(index_labels)]
164164

165-
keys_expr = core.ArrayValue.from_pandas(pd_data)
165+
keys_expr = core.ArrayValue.from_pandas(pd_data, session)
166166
return cls(
167167
keys_expr,
168168
column_labels=columns,

bigframes/core/nodes.py

+1
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ def __hash__(self):
155155
@dataclass(frozen=True)
156156
class ReadLocalNode(BigFrameNode):
157157
feather_bytes: bytes
158+
session: typing.Optional[bigframes.session.Session] = None
158159

159160
def __hash__(self):
160161
return self._node_hash

bigframes/dataframe.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1646,7 +1646,7 @@ def _reindex_rows(
16461646
raise NotImplementedError(
16471647
"Cannot reindex with index with different nlevels"
16481648
)
1649-
new_indexer = DataFrame(index=index)[[]]
1649+
new_indexer = DataFrame(index=index, session=self._session)[[]]
16501650
# multiindex join is senstive to index names, so we will set all these
16511651
result = new_indexer.rename_axis(range(new_indexer.index.nlevels)).join(
16521652
self.rename_axis(range(self.index.nlevels)),

bigframes/session/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -916,7 +916,7 @@ def _read_pandas(
916916
def _read_pandas_inline(
917917
self, pandas_dataframe: pandas.DataFrame
918918
) -> dataframe.DataFrame:
919-
return dataframe.DataFrame(blocks.Block.from_local(pandas_dataframe))
919+
return dataframe.DataFrame(blocks.Block.from_local(pandas_dataframe, self))
920920

921921
def _read_pandas_load_job(
922922
self, pandas_dataframe: pandas.DataFrame, api_name: str

notebooks/location/regionalized.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -2791,7 +2791,7 @@
27912791
"name": "python",
27922792
"nbconvert_exporter": "python",
27932793
"pygments_lexer": "ipython3",
2794-
"version": "3.10.12"
2794+
"version": "3.10.9"
27952795
},
27962796
"orig_nbformat": 4
27972797
},

tests/system/small/test_dataframe.py

+17
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,23 @@ def test_df_construct_from_dict():
9393
)
9494

9595

96+
def test_df_construct_inline_respects_location():
97+
import bigframes.pandas as bpd
98+
99+
bpd.close_session()
100+
bpd.options.bigquery.location = "europe-west1"
101+
102+
df = bpd.DataFrame([[1, 2, 3], [4, 5, 6]])
103+
repr(df)
104+
105+
table = bpd.get_global_session().bqclient.get_table(df.query_job.destination)
106+
assert table.location == "europe-west1"
107+
108+
# Reset global session
109+
bpd.close_session()
110+
bpd.options.bigquery.location = "us"
111+
112+
96113
def test_get_column(scalars_dfs):
97114
scalars_df, scalars_pandas_df = scalars_dfs
98115
col_name = "int64_col"

tests/system/small/test_session.py

+11
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,17 @@ def test_read_pandas(session, scalars_dfs):
369369
pd.testing.assert_frame_equal(result, expected)
370370

371371

372+
def test_read_pandas_inline_respects_location():
373+
options = bigframes.BigQueryOptions(location="europe-west1")
374+
session = bigframes.Session(options)
375+
376+
df = session.read_pandas(pd.DataFrame([[1, 2, 3], [4, 5, 6]]))
377+
repr(df)
378+
379+
table = session.bqclient.get_table(df.query_job.destination)
380+
assert table.location == "europe-west1"
381+
382+
372383
def test_read_pandas_col_label_w_space(session: bigframes.Session):
373384
expected = pd.DataFrame(
374385
{

tests/unit/core/test_blocks.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,13 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from unittest import mock
16+
1517
import pandas
1618
import pandas.testing
1719
import pytest
1820

21+
import bigframes
1922
import bigframes.core.blocks as blocks
2023

2124

@@ -74,8 +77,12 @@
7477
)
7578
def test_block_from_local(data):
7679
expected = pandas.DataFrame(data)
80+
mock_session = mock.create_autospec(spec=bigframes.Session)
81+
82+
# hard-coded the returned dimension of the session for that each of the test case contains 3 rows.
83+
mock_session._execute.return_value = (iter([[3]]), None)
7784

78-
block = blocks.Block.from_local(data)
85+
block = blocks.Block.from_local(data, mock_session)
7986

8087
pandas.testing.assert_index_equal(block.column_labels, expected.columns)
8188
assert tuple(block.index.names) == tuple(expected.index.names)

0 commit comments

Comments
 (0)