Skip to content

Commit 2d7128d

Browse files
authored
test: refactor remote function tests (#147)
This changes moves the tests that deploy cloud function to large remote function tests, and the tests that do not make call to bigquery service to unit tests. Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent 29032d0 commit 2d7128d

File tree

3 files changed

+148
-126
lines changed

3 files changed

+148
-126
lines changed

tests/system/large/test_remote_function.py

+90
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import pytest
2828
import test_utils.prefixer
2929

30+
import bigframes
3031
from bigframes.remote_function import (
3132
get_cloud_function_name,
3233
get_remote_function_locations,
@@ -1120,3 +1121,92 @@ def plusone(x):
11201121
)
11211122
for dir_ in dirs_to_cleanup:
11221123
shutil.rmtree(dir_)
1124+
1125+
1126+
@pytest.mark.flaky(retries=2, delay=120)
1127+
def test_remote_function_via_session_context_connection_setter(
1128+
scalars_dfs, dataset_id, bq_cf_connection
1129+
):
1130+
# Creating a session scoped only to this test as we would be setting a
1131+
# property in it
1132+
context = bigframes.BigQueryOptions()
1133+
context.bq_connection = bq_cf_connection
1134+
session = bigframes.connect(context)
1135+
1136+
try:
1137+
# Without an explicit bigquery connection, the one present in Session,
1138+
# set via context setter would be used. Without an explicit `reuse` the
1139+
# default behavior of reuse=True will take effect. Please note that the
1140+
# udf is same as the one used in other tests in this file so the underlying
1141+
# cloud function would be common with reuse=True. Since we are using a
1142+
# unique dataset_id, even though the cloud function would be reused, the bq
1143+
# remote function would still be created, making use of the bq connection
1144+
# set in the BigQueryOptions above.
1145+
@session.remote_function([int], int, dataset=dataset_id)
1146+
def square(x):
1147+
return x * x
1148+
1149+
scalars_df, scalars_pandas_df = scalars_dfs
1150+
1151+
bf_int64_col = scalars_df["int64_col"]
1152+
bf_int64_col_filter = bf_int64_col.notnull()
1153+
bf_int64_col_filtered = bf_int64_col[bf_int64_col_filter]
1154+
bf_result_col = bf_int64_col_filtered.apply(square)
1155+
bf_result = (
1156+
bf_int64_col_filtered.to_frame().assign(result=bf_result_col).to_pandas()
1157+
)
1158+
1159+
pd_int64_col = scalars_pandas_df["int64_col"]
1160+
pd_int64_col_filter = pd_int64_col.notnull()
1161+
pd_int64_col_filtered = pd_int64_col[pd_int64_col_filter]
1162+
pd_result_col = pd_int64_col_filtered.apply(lambda x: x * x)
1163+
# TODO(shobs): Figure why pandas .apply() changes the dtype, i.e.
1164+
# pd_int64_col_filtered.dtype is Int64Dtype()
1165+
# pd_int64_col_filtered.apply(lambda x: x * x).dtype is int64.
1166+
# For this test let's force the pandas dtype to be same as bigframes' dtype.
1167+
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
1168+
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
1169+
1170+
assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
1171+
finally:
1172+
# clean up the gcp assets created for the remote function
1173+
cleanup_remote_function_assets(
1174+
session.bqclient, session.cloudfunctionsclient, square
1175+
)
1176+
1177+
1178+
@pytest.mark.flaky(retries=2, delay=120)
1179+
def test_remote_function_default_connection(session, scalars_dfs, dataset_id):
1180+
try:
1181+
1182+
@session.remote_function([int], int, dataset=dataset_id)
1183+
def square(x):
1184+
return x * x
1185+
1186+
scalars_df, scalars_pandas_df = scalars_dfs
1187+
1188+
bf_int64_col = scalars_df["int64_col"]
1189+
bf_int64_col_filter = bf_int64_col.notnull()
1190+
bf_int64_col_filtered = bf_int64_col[bf_int64_col_filter]
1191+
bf_result_col = bf_int64_col_filtered.apply(square)
1192+
bf_result = (
1193+
bf_int64_col_filtered.to_frame().assign(result=bf_result_col).to_pandas()
1194+
)
1195+
1196+
pd_int64_col = scalars_pandas_df["int64_col"]
1197+
pd_int64_col_filter = pd_int64_col.notnull()
1198+
pd_int64_col_filtered = pd_int64_col[pd_int64_col_filter]
1199+
pd_result_col = pd_int64_col_filtered.apply(lambda x: x * x)
1200+
# TODO(shobs): Figure why pandas .apply() changes the dtype, i.e.
1201+
# pd_int64_col_filtered.dtype is Int64Dtype()
1202+
# pd_int64_col_filtered.apply(lambda x: x * x).dtype is int64.
1203+
# For this test let's force the pandas dtype to be same as bigframes' dtype.
1204+
pd_result_col = pd_result_col.astype(pandas.Int64Dtype())
1205+
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
1206+
1207+
assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
1208+
finally:
1209+
# clean up the gcp assets created for the remote function
1210+
cleanup_remote_function_assets(
1211+
session.bqclient, session.cloudfunctionsclient, square
1212+
)

tests/system/small/test_remote_function.py

+30-126
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,11 @@
1313
# limitations under the License.
1414

1515
from google.cloud import bigquery
16-
from ibis.backends.bigquery import datatypes as bq_types
17-
from ibis.expr import datatypes as ibis_types
1816
import pandas as pd
1917
import pytest
2018

2119
import bigframes
2220
from bigframes import remote_function as rf
23-
import bigframes.pandas as bpd
2421
from tests.system.utils import assert_pandas_df_equal_ignore_ordering
2522

2623

@@ -65,45 +62,14 @@ def bq_cf_connection_location_project_mismatched() -> str:
6562

6663

6764
@pytest.fixture(scope="module")
68-
def session_with_bq_connection(bq_cf_connection) -> bigframes.Session:
69-
return bigframes.Session(bigframes.BigQueryOptions(bq_connection=bq_cf_connection))
70-
71-
72-
@pytest.fixture(scope="module")
73-
def session_with_bq_connection_location_specified(
74-
bq_cf_connection_location,
75-
) -> bigframes.Session:
76-
return bigframes.Session(
77-
bigframes.BigQueryOptions(bq_connection=bq_cf_connection_location)
78-
)
79-
80-
81-
@pytest.fixture(scope="module")
82-
def session_with_bq_connection_location_mistached(
83-
bq_cf_connection_location_mistached,
84-
) -> bigframes.Session:
85-
return bigframes.Session(
86-
bigframes.BigQueryOptions(bq_connection=bq_cf_connection_location_mistached)
87-
)
88-
89-
90-
@pytest.fixture(scope="module")
91-
def session_with_bq_connection_location_project_specified(
92-
bq_cf_connection_location_project,
65+
def session_with_bq_connection_and_permanent_dataset(
66+
bq_cf_connection, dataset_id_permanent
9367
) -> bigframes.Session:
94-
return bigframes.Session(
95-
bigframes.BigQueryOptions(bq_connection=bq_cf_connection_location_project)
68+
session = bigframes.Session(
69+
bigframes.BigQueryOptions(bq_connection=bq_cf_connection)
9670
)
97-
98-
99-
def test_supported_types_correspond():
100-
# The same types should be representable by the supported Python and BigQuery types.
101-
ibis_types_from_python = {ibis_types.dtype(t) for t in rf.SUPPORTED_IO_PYTHON_TYPES}
102-
ibis_types_from_bigquery = {
103-
bq_types.BigQueryType.to_ibis(tk) for tk in rf.SUPPORTED_IO_BIGQUERY_TYPEKINDS
104-
}
105-
106-
assert ibis_types_from_python == ibis_types_from_bigquery
71+
session._session_dataset = bigquery.Dataset(dataset_id_permanent)
72+
return session
10773

10874

10975
@pytest.mark.flaky(retries=2, delay=120)
@@ -311,11 +277,13 @@ def square(x):
311277

312278

313279
@pytest.mark.flaky(retries=2, delay=120)
314-
def test_remote_function_direct_session_param(session_with_bq_connection, scalars_dfs):
280+
def test_remote_function_direct_session_param(
281+
session_with_bq_connection_and_permanent_dataset, scalars_dfs
282+
):
315283
@rf.remote_function(
316284
[int],
317285
int,
318-
session=session_with_bq_connection,
286+
session=session_with_bq_connection_and_permanent_dataset,
319287
)
320288
def square(x):
321289
return x * x
@@ -345,15 +313,17 @@ def square(x):
345313

346314

347315
@pytest.mark.flaky(retries=2, delay=120)
348-
def test_remote_function_via_session_default(session_with_bq_connection, scalars_dfs):
316+
def test_remote_function_via_session_default(
317+
session_with_bq_connection_and_permanent_dataset, scalars_dfs
318+
):
349319
# Session has bigquery connection initialized via context. Without an
350320
# explicit dataset the default dataset from the session would be used.
351321
# Without an explicit bigquery connection, the one present in Session set
352322
# through the explicit BigQueryOptions would be used. Without an explicit `reuse`
353323
# the default behavior of reuse=True will take effect. Please note that the
354324
# udf is same as the one used in other tests in this file so the underlying
355325
# cloud function would be common and quickly reused.
356-
@session_with_bq_connection.remote_function([int], int)
326+
@session_with_bq_connection_and_permanent_dataset.remote_function([int], int)
357327
def square(x):
358328
return x * x
359329

@@ -421,87 +391,15 @@ def square(x):
421391

422392

423393
@pytest.mark.flaky(retries=2, delay=120)
424-
def test_remote_function_via_session_context_connection_setter(
425-
scalars_dfs, dataset_id, bq_cf_connection
394+
def test_dataframe_applymap(
395+
session_with_bq_connection_and_permanent_dataset, scalars_dfs
426396
):
427-
# Creating a session scoped only to this test as we would be setting a
428-
# property in it
429-
context = bigframes.BigQueryOptions()
430-
context.bq_connection = bq_cf_connection
431-
session = bigframes.connect(context)
432-
433-
# Without an explicit bigquery connection, the one present in Session,
434-
# set via context setter would be used. Without an explicit `reuse` the
435-
# default behavior of reuse=True will take effect. Please note that the
436-
# udf is same as the one used in other tests in this file so the underlying
437-
# cloud function would be common with reuse=True. Since we are using a
438-
# unique dataset_id, even though the cloud function would be reused, the bq
439-
# remote function would still be created, making use of the bq connection
440-
# set in the BigQueryOptions above.
441-
@session.remote_function([int], int, dataset=dataset_id)
442-
def square(x):
443-
return x * x
444-
445-
scalars_df, scalars_pandas_df = scalars_dfs
446-
447-
bf_int64_col = scalars_df["int64_col"]
448-
bf_int64_col_filter = bf_int64_col.notnull()
449-
bf_int64_col_filtered = bf_int64_col[bf_int64_col_filter]
450-
bf_result_col = bf_int64_col_filtered.apply(square)
451-
bf_result = (
452-
bf_int64_col_filtered.to_frame().assign(result=bf_result_col).to_pandas()
453-
)
454-
455-
pd_int64_col = scalars_pandas_df["int64_col"]
456-
pd_int64_col_filter = pd_int64_col.notnull()
457-
pd_int64_col_filtered = pd_int64_col[pd_int64_col_filter]
458-
pd_result_col = pd_int64_col_filtered.apply(lambda x: x * x)
459-
# TODO(shobs): Figure why pandas .apply() changes the dtype, i.e.
460-
# pd_int64_col_filtered.dtype is Int64Dtype()
461-
# pd_int64_col_filtered.apply(lambda x: x * x).dtype is int64.
462-
# For this test let's force the pandas dtype to be same as bigframes' dtype.
463-
pd_result_col = pd_result_col.astype(pd.Int64Dtype())
464-
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
465-
466-
assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
467-
468-
469-
@pytest.mark.flaky(retries=2, delay=120)
470-
def test_remote_function_default_connection(scalars_dfs, dataset_id):
471-
@bpd.remote_function([int], int, dataset=dataset_id)
472-
def square(x):
473-
return x * x
474-
475-
scalars_df, scalars_pandas_df = scalars_dfs
476-
477-
bf_int64_col = scalars_df["int64_col"]
478-
bf_int64_col_filter = bf_int64_col.notnull()
479-
bf_int64_col_filtered = bf_int64_col[bf_int64_col_filter]
480-
bf_result_col = bf_int64_col_filtered.apply(square)
481-
bf_result = (
482-
bf_int64_col_filtered.to_frame().assign(result=bf_result_col).to_pandas()
483-
)
484-
485-
pd_int64_col = scalars_pandas_df["int64_col"]
486-
pd_int64_col_filter = pd_int64_col.notnull()
487-
pd_int64_col_filtered = pd_int64_col[pd_int64_col_filter]
488-
pd_result_col = pd_int64_col_filtered.apply(lambda x: x * x)
489-
# TODO(shobs): Figure why pandas .apply() changes the dtype, i.e.
490-
# pd_int64_col_filtered.dtype is Int64Dtype()
491-
# pd_int64_col_filtered.apply(lambda x: x * x).dtype is int64.
492-
# For this test let's force the pandas dtype to be same as bigframes' dtype.
493-
pd_result_col = pd_result_col.astype(pd.Int64Dtype())
494-
pd_result = pd_int64_col_filtered.to_frame().assign(result=pd_result_col)
495-
496-
assert_pandas_df_equal_ignore_ordering(bf_result, pd_result)
497-
498-
499-
@pytest.mark.flaky(retries=2, delay=120)
500-
def test_dataframe_applymap(session_with_bq_connection, scalars_dfs):
501397
def add_one(x):
502398
return x + 1
503399

504-
remote_add_one = session_with_bq_connection.remote_function([int], int)(add_one)
400+
remote_add_one = session_with_bq_connection_and_permanent_dataset.remote_function(
401+
[int], int
402+
)(add_one)
505403

506404
scalars_df, scalars_pandas_df = scalars_dfs
507405
int64_cols = ["int64_col", "int64_too"]
@@ -524,11 +422,15 @@ def add_one(x):
524422

525423

526424
@pytest.mark.flaky(retries=2, delay=120)
527-
def test_dataframe_applymap_na_ignore(session_with_bq_connection, scalars_dfs):
425+
def test_dataframe_applymap_na_ignore(
426+
session_with_bq_connection_and_permanent_dataset, scalars_dfs
427+
):
528428
def add_one(x):
529429
return x + 1
530430

531-
remote_add_one = session_with_bq_connection.remote_function([int], int)(add_one)
431+
remote_add_one = session_with_bq_connection_and_permanent_dataset.remote_function(
432+
[int], int
433+
)(add_one)
532434

533435
scalars_df, scalars_pandas_df = scalars_dfs
534436
int64_cols = ["int64_col", "int64_too"]
@@ -549,11 +451,13 @@ def add_one(x):
549451

550452

551453
@pytest.mark.flaky(retries=2, delay=120)
552-
def test_series_map(session_with_bq_connection, scalars_dfs):
454+
def test_series_map(session_with_bq_connection_and_permanent_dataset, scalars_dfs):
553455
def add_one(x):
554456
return x + 1
555457

556-
remote_add_one = session_with_bq_connection.remote_function([int], int)(add_one)
458+
remote_add_one = session_with_bq_connection_and_permanent_dataset.remote_function(
459+
[int], int
460+
)(add_one)
557461

558462
scalars_df, scalars_pandas_df = scalars_dfs
559463

@@ -635,7 +539,7 @@ def square1(x):
635539

636540

637541
@pytest.mark.flaky(retries=2, delay=120)
638-
def test_read_gbq_function_reads_udfs(bigquery_client, scalars_dfs, dataset_id):
542+
def test_read_gbq_function_reads_udfs(bigquery_client, dataset_id):
639543
dataset_ref = bigquery.DatasetReference.from_string(dataset_id)
640544
arg = bigquery.RoutineArgument(
641545
name="x",

tests/unit/test_remote_function.py

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from ibis.backends.bigquery import datatypes as bq_types
16+
from ibis.expr import datatypes as ibis_types
17+
18+
from bigframes import remote_function as rf
19+
20+
21+
def test_supported_types_correspond():
22+
# The same types should be representable by the supported Python and BigQuery types.
23+
ibis_types_from_python = {ibis_types.dtype(t) for t in rf.SUPPORTED_IO_PYTHON_TYPES}
24+
ibis_types_from_bigquery = {
25+
bq_types.BigQueryType.to_ibis(tk) for tk in rf.SUPPORTED_IO_BIGQUERY_TYPEKINDS
26+
}
27+
28+
assert ibis_types_from_python == ibis_types_from_bigquery

0 commit comments

Comments
 (0)