13
13
# limitations under the License.
14
14
15
15
from google .cloud import bigquery
16
- from ibis .backends .bigquery import datatypes as bq_types
17
- from ibis .expr import datatypes as ibis_types
18
16
import pandas as pd
19
17
import pytest
20
18
21
19
import bigframes
22
20
from bigframes import remote_function as rf
23
- import bigframes .pandas as bpd
24
21
from tests .system .utils import assert_pandas_df_equal_ignore_ordering
25
22
26
23
@@ -65,45 +62,14 @@ def bq_cf_connection_location_project_mismatched() -> str:
65
62
66
63
67
64
@pytest .fixture (scope = "module" )
68
- def session_with_bq_connection (bq_cf_connection ) -> bigframes .Session :
69
- return bigframes .Session (bigframes .BigQueryOptions (bq_connection = bq_cf_connection ))
70
-
71
-
72
- @pytest .fixture (scope = "module" )
73
- def session_with_bq_connection_location_specified (
74
- bq_cf_connection_location ,
75
- ) -> bigframes .Session :
76
- return bigframes .Session (
77
- bigframes .BigQueryOptions (bq_connection = bq_cf_connection_location )
78
- )
79
-
80
-
81
- @pytest .fixture (scope = "module" )
82
- def session_with_bq_connection_location_mistached (
83
- bq_cf_connection_location_mistached ,
84
- ) -> bigframes .Session :
85
- return bigframes .Session (
86
- bigframes .BigQueryOptions (bq_connection = bq_cf_connection_location_mistached )
87
- )
88
-
89
-
90
- @pytest .fixture (scope = "module" )
91
- def session_with_bq_connection_location_project_specified (
92
- bq_cf_connection_location_project ,
65
+ def session_with_bq_connection_and_permanent_dataset (
66
+ bq_cf_connection , dataset_id_permanent
93
67
) -> bigframes .Session :
94
- return bigframes .Session (
95
- bigframes .BigQueryOptions (bq_connection = bq_cf_connection_location_project )
68
+ session = bigframes .Session (
69
+ bigframes .BigQueryOptions (bq_connection = bq_cf_connection )
96
70
)
97
-
98
-
99
- def test_supported_types_correspond ():
100
- # The same types should be representable by the supported Python and BigQuery types.
101
- ibis_types_from_python = {ibis_types .dtype (t ) for t in rf .SUPPORTED_IO_PYTHON_TYPES }
102
- ibis_types_from_bigquery = {
103
- bq_types .BigQueryType .to_ibis (tk ) for tk in rf .SUPPORTED_IO_BIGQUERY_TYPEKINDS
104
- }
105
-
106
- assert ibis_types_from_python == ibis_types_from_bigquery
71
+ session ._session_dataset = bigquery .Dataset (dataset_id_permanent )
72
+ return session
107
73
108
74
109
75
@pytest .mark .flaky (retries = 2 , delay = 120 )
@@ -311,11 +277,13 @@ def square(x):
311
277
312
278
313
279
@pytest .mark .flaky (retries = 2 , delay = 120 )
314
- def test_remote_function_direct_session_param (session_with_bq_connection , scalars_dfs ):
280
+ def test_remote_function_direct_session_param (
281
+ session_with_bq_connection_and_permanent_dataset , scalars_dfs
282
+ ):
315
283
@rf .remote_function (
316
284
[int ],
317
285
int ,
318
- session = session_with_bq_connection ,
286
+ session = session_with_bq_connection_and_permanent_dataset ,
319
287
)
320
288
def square (x ):
321
289
return x * x
@@ -345,15 +313,17 @@ def square(x):
345
313
346
314
347
315
@pytest .mark .flaky (retries = 2 , delay = 120 )
348
- def test_remote_function_via_session_default (session_with_bq_connection , scalars_dfs ):
316
+ def test_remote_function_via_session_default (
317
+ session_with_bq_connection_and_permanent_dataset , scalars_dfs
318
+ ):
349
319
# Session has bigquery connection initialized via context. Without an
350
320
# explicit dataset the default dataset from the session would be used.
351
321
# Without an explicit bigquery connection, the one present in Session set
352
322
# through the explicit BigQueryOptions would be used. Without an explicit `reuse`
353
323
# the default behavior of reuse=True will take effect. Please note that the
354
324
# udf is same as the one used in other tests in this file so the underlying
355
325
# cloud function would be common and quickly reused.
356
- @session_with_bq_connection .remote_function ([int ], int )
326
+ @session_with_bq_connection_and_permanent_dataset .remote_function ([int ], int )
357
327
def square (x ):
358
328
return x * x
359
329
@@ -421,87 +391,15 @@ def square(x):
421
391
422
392
423
393
@pytest .mark .flaky (retries = 2 , delay = 120 )
424
- def test_remote_function_via_session_context_connection_setter (
425
- scalars_dfs , dataset_id , bq_cf_connection
394
+ def test_dataframe_applymap (
395
+ session_with_bq_connection_and_permanent_dataset , scalars_dfs
426
396
):
427
- # Creating a session scoped only to this test as we would be setting a
428
- # property in it
429
- context = bigframes .BigQueryOptions ()
430
- context .bq_connection = bq_cf_connection
431
- session = bigframes .connect (context )
432
-
433
- # Without an explicit bigquery connection, the one present in Session,
434
- # set via context setter would be used. Without an explicit `reuse` the
435
- # default behavior of reuse=True will take effect. Please note that the
436
- # udf is same as the one used in other tests in this file so the underlying
437
- # cloud function would be common with reuse=True. Since we are using a
438
- # unique dataset_id, even though the cloud function would be reused, the bq
439
- # remote function would still be created, making use of the bq connection
440
- # set in the BigQueryOptions above.
441
- @session .remote_function ([int ], int , dataset = dataset_id )
442
- def square (x ):
443
- return x * x
444
-
445
- scalars_df , scalars_pandas_df = scalars_dfs
446
-
447
- bf_int64_col = scalars_df ["int64_col" ]
448
- bf_int64_col_filter = bf_int64_col .notnull ()
449
- bf_int64_col_filtered = bf_int64_col [bf_int64_col_filter ]
450
- bf_result_col = bf_int64_col_filtered .apply (square )
451
- bf_result = (
452
- bf_int64_col_filtered .to_frame ().assign (result = bf_result_col ).to_pandas ()
453
- )
454
-
455
- pd_int64_col = scalars_pandas_df ["int64_col" ]
456
- pd_int64_col_filter = pd_int64_col .notnull ()
457
- pd_int64_col_filtered = pd_int64_col [pd_int64_col_filter ]
458
- pd_result_col = pd_int64_col_filtered .apply (lambda x : x * x )
459
- # TODO(shobs): Figure why pandas .apply() changes the dtype, i.e.
460
- # pd_int64_col_filtered.dtype is Int64Dtype()
461
- # pd_int64_col_filtered.apply(lambda x: x * x).dtype is int64.
462
- # For this test let's force the pandas dtype to be same as bigframes' dtype.
463
- pd_result_col = pd_result_col .astype (pd .Int64Dtype ())
464
- pd_result = pd_int64_col_filtered .to_frame ().assign (result = pd_result_col )
465
-
466
- assert_pandas_df_equal_ignore_ordering (bf_result , pd_result )
467
-
468
-
469
- @pytest .mark .flaky (retries = 2 , delay = 120 )
470
- def test_remote_function_default_connection (scalars_dfs , dataset_id ):
471
- @bpd .remote_function ([int ], int , dataset = dataset_id )
472
- def square (x ):
473
- return x * x
474
-
475
- scalars_df , scalars_pandas_df = scalars_dfs
476
-
477
- bf_int64_col = scalars_df ["int64_col" ]
478
- bf_int64_col_filter = bf_int64_col .notnull ()
479
- bf_int64_col_filtered = bf_int64_col [bf_int64_col_filter ]
480
- bf_result_col = bf_int64_col_filtered .apply (square )
481
- bf_result = (
482
- bf_int64_col_filtered .to_frame ().assign (result = bf_result_col ).to_pandas ()
483
- )
484
-
485
- pd_int64_col = scalars_pandas_df ["int64_col" ]
486
- pd_int64_col_filter = pd_int64_col .notnull ()
487
- pd_int64_col_filtered = pd_int64_col [pd_int64_col_filter ]
488
- pd_result_col = pd_int64_col_filtered .apply (lambda x : x * x )
489
- # TODO(shobs): Figure why pandas .apply() changes the dtype, i.e.
490
- # pd_int64_col_filtered.dtype is Int64Dtype()
491
- # pd_int64_col_filtered.apply(lambda x: x * x).dtype is int64.
492
- # For this test let's force the pandas dtype to be same as bigframes' dtype.
493
- pd_result_col = pd_result_col .astype (pd .Int64Dtype ())
494
- pd_result = pd_int64_col_filtered .to_frame ().assign (result = pd_result_col )
495
-
496
- assert_pandas_df_equal_ignore_ordering (bf_result , pd_result )
497
-
498
-
499
- @pytest .mark .flaky (retries = 2 , delay = 120 )
500
- def test_dataframe_applymap (session_with_bq_connection , scalars_dfs ):
501
397
def add_one (x ):
502
398
return x + 1
503
399
504
- remote_add_one = session_with_bq_connection .remote_function ([int ], int )(add_one )
400
+ remote_add_one = session_with_bq_connection_and_permanent_dataset .remote_function (
401
+ [int ], int
402
+ )(add_one )
505
403
506
404
scalars_df , scalars_pandas_df = scalars_dfs
507
405
int64_cols = ["int64_col" , "int64_too" ]
@@ -524,11 +422,15 @@ def add_one(x):
524
422
525
423
526
424
@pytest .mark .flaky (retries = 2 , delay = 120 )
527
- def test_dataframe_applymap_na_ignore (session_with_bq_connection , scalars_dfs ):
425
+ def test_dataframe_applymap_na_ignore (
426
+ session_with_bq_connection_and_permanent_dataset , scalars_dfs
427
+ ):
528
428
def add_one (x ):
529
429
return x + 1
530
430
531
- remote_add_one = session_with_bq_connection .remote_function ([int ], int )(add_one )
431
+ remote_add_one = session_with_bq_connection_and_permanent_dataset .remote_function (
432
+ [int ], int
433
+ )(add_one )
532
434
533
435
scalars_df , scalars_pandas_df = scalars_dfs
534
436
int64_cols = ["int64_col" , "int64_too" ]
@@ -549,11 +451,13 @@ def add_one(x):
549
451
550
452
551
453
@pytest .mark .flaky (retries = 2 , delay = 120 )
552
- def test_series_map (session_with_bq_connection , scalars_dfs ):
454
+ def test_series_map (session_with_bq_connection_and_permanent_dataset , scalars_dfs ):
553
455
def add_one (x ):
554
456
return x + 1
555
457
556
- remote_add_one = session_with_bq_connection .remote_function ([int ], int )(add_one )
458
+ remote_add_one = session_with_bq_connection_and_permanent_dataset .remote_function (
459
+ [int ], int
460
+ )(add_one )
557
461
558
462
scalars_df , scalars_pandas_df = scalars_dfs
559
463
@@ -635,7 +539,7 @@ def square1(x):
635
539
636
540
637
541
@pytest .mark .flaky (retries = 2 , delay = 120 )
638
- def test_read_gbq_function_reads_udfs (bigquery_client , scalars_dfs , dataset_id ):
542
+ def test_read_gbq_function_reads_udfs (bigquery_client , dataset_id ):
639
543
dataset_ref = bigquery .DatasetReference .from_string (dataset_id )
640
544
arg = bigquery .RoutineArgument (
641
545
name = "x" ,
0 commit comments