Skip to content

Commit bac62f7

Browse files
Genesis929shobsi
andauthored
fix: dataframes to_gbq now creates dataset if it doesn't exist (#222)
* "fix: dataframes to_gbq now creates dataset if it doesn't exist * fix: dataframes to_gbq now creates dataset if it doesn't exist * fix: dataframes to_gbq now creates dataset if it doesn't exist * update test * update create dataset method. * fix --------- Co-authored-by: Shobhit Singh <[email protected]>
1 parent 6c1969a commit bac62f7

File tree

2 files changed

+30
-2
lines changed

2 files changed

+30
-2
lines changed

bigframes/dataframe.py

+15-2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
Union,
3535
)
3636

37+
import google.api_core.exceptions
3738
import google.cloud.bigquery as bigquery
3839
import numpy
3940
import pandas
@@ -2508,7 +2509,14 @@ def to_gbq(
25082509
)
25092510
if_exists = "replace"
25102511

2511-
if "." not in destination_table:
2512+
table_parts = destination_table.split(".")
2513+
default_project = self._block.expr.session.bqclient.project
2514+
2515+
if len(table_parts) == 2:
2516+
destination_dataset = f"{default_project}.{table_parts[0]}"
2517+
elif len(table_parts) == 3:
2518+
destination_dataset = f"{table_parts[0]}.{table_parts[1]}"
2519+
else:
25122520
raise ValueError(
25132521
f"Got invalid value for destination_table {repr(destination_table)}. "
25142522
"Should be of the form 'datasetId.tableId' or 'projectId.datasetId.tableId'."
@@ -2523,11 +2531,16 @@ def to_gbq(
25232531
f"Valid options include None or one of {dispositions.keys()}."
25242532
)
25252533

2534+
try:
2535+
self._session.bqclient.get_dataset(destination_dataset)
2536+
except google.api_core.exceptions.NotFound:
2537+
self._session.bqclient.create_dataset(destination_dataset, exists_ok=True)
2538+
25262539
job_config = bigquery.QueryJobConfig(
25272540
write_disposition=dispositions[if_exists],
25282541
destination=bigquery.table.TableReference.from_string(
25292542
destination_table,
2530-
default_project=self._block.expr.session.bqclient.project,
2543+
default_project=default_project,
25312544
),
25322545
)
25332546

tests/system/small/test_dataframe.py

+15
Original file line numberDiff line numberDiff line change
@@ -3683,3 +3683,18 @@ def test_to_pandas_downsampling_option_override(session):
36833683
total_memory_bytes = df.memory_usage(deep=True).sum()
36843684
total_memory_mb = total_memory_bytes / (1024 * 1024)
36853685
assert total_memory_mb == pytest.approx(download_size, rel=0.3)
3686+
3687+
3688+
def test_to_gbq_and_create_dataset(session, scalars_df_index, dataset_id_not_created):
3689+
dataset_id = dataset_id_not_created
3690+
destination_table = f"{dataset_id}.scalars_df"
3691+
3692+
result_table = scalars_df_index.to_gbq(destination_table)
3693+
assert (
3694+
result_table == destination_table
3695+
if destination_table
3696+
else result_table is not None
3697+
)
3698+
3699+
loaded_scalars_df_index = session.read_gbq(result_table)
3700+
assert not loaded_scalars_df_index.empty

0 commit comments

Comments
 (0)