@@ -498,6 +498,8 @@ def read_gbq_query(
498
498
499
499
See also: :meth:`Session.read_gbq`.
500
500
"""
501
+ # NOTE: This method doesn't (yet) exist in pandas or pandas-gbq, so
502
+ # these docstrings are inline.
501
503
return self ._read_gbq_query (
502
504
query = query ,
503
505
index_col = index_col ,
@@ -515,8 +517,6 @@ def _read_gbq_query(
515
517
max_results : Optional [int ] = None ,
516
518
api_name : str ,
517
519
) -> dataframe .DataFrame :
518
- # NOTE: This method doesn't (yet) exist in pandas or pandas-gbq, so
519
- # these docstrings are inline.
520
520
if isinstance (index_col , str ):
521
521
index_cols = [index_col ]
522
522
else :
@@ -561,6 +561,8 @@ def read_gbq_table(
561
561
562
562
See also: :meth:`Session.read_gbq`.
563
563
"""
564
+ # NOTE: This method doesn't (yet) exist in pandas or pandas-gbq, so
565
+ # these docstrings are inline.
564
566
return self ._read_gbq_table (
565
567
query = query ,
566
568
index_col = index_col ,
@@ -569,6 +571,62 @@ def read_gbq_table(
569
571
api_name = "read_gbq_table" ,
570
572
)
571
573
574
+ def _read_gbq_table_to_ibis_with_total_ordering (
575
+ self ,
576
+ table_ref : bigquery .table .TableReference ,
577
+ * ,
578
+ api_name : str ,
579
+ ) -> Tuple [ibis_types .Table , Optional [Sequence [str ]]]:
580
+ """Create a read-only Ibis table expression representing a table.
581
+
582
+ If we can get a total ordering from the table, such as via primary key
583
+ column(s), then return those too so that ordering generation can be
584
+ avoided.
585
+ """
586
+ if table_ref .dataset_id .upper () == "_SESSION" :
587
+ # _SESSION tables aren't supported by the tables.get REST API.
588
+ return (
589
+ self .ibis_client .sql (
590
+ f"SELECT * FROM `_SESSION`.`{ table_ref .table_id } `"
591
+ ),
592
+ None ,
593
+ )
594
+
595
+ table_expression = self .ibis_client .table (
596
+ table_ref .table_id ,
597
+ database = f"{ table_ref .project } .{ table_ref .dataset_id } " ,
598
+ )
599
+
600
+ # If there are primary keys defined, the query engine assumes these
601
+ # columns are unique, even if the constraint is not enforced. We make
602
+ # the same assumption and use these columns as the total ordering keys.
603
+ table = self .bqclient .get_table (table_ref )
604
+
605
+ # TODO(b/305264153): Use public properties to fetch primary keys once
606
+ # added to google-cloud-bigquery.
607
+ primary_keys = (
608
+ table ._properties .get ("tableConstraints" , {})
609
+ .get ("primaryKey" , {})
610
+ .get ("columns" )
611
+ )
612
+
613
+ if not primary_keys :
614
+ return table_expression , None
615
+ else :
616
+ # Read from a snapshot since we won't have to copy the table data to create a total ordering.
617
+ job_config = bigquery .QueryJobConfig ()
618
+ job_config .labels ["bigframes-api" ] = api_name
619
+ current_timestamp = list (
620
+ self .bqclient .query (
621
+ "SELECT CURRENT_TIMESTAMP() AS `current_timestamp`" ,
622
+ job_config = job_config ,
623
+ ).result ()
624
+ )[0 ][0 ]
625
+ table_expression = self .ibis_client .sql (
626
+ bigframes_io .create_snapshot_sql (table_ref , current_timestamp )
627
+ )
628
+ return table_expression , primary_keys
629
+
572
630
def _read_gbq_table (
573
631
self ,
574
632
query : str ,
@@ -581,24 +639,19 @@ def _read_gbq_table(
581
639
if max_results and max_results <= 0 :
582
640
raise ValueError ("`max_results` should be a positive number." )
583
641
584
- # NOTE: This method doesn't (yet) exist in pandas or pandas-gbq, so
585
- # these docstrings are inline.
586
642
# TODO(swast): Can we re-use the temp table from other reads in the
587
643
# session, if the original table wasn't modified?
588
644
table_ref = bigquery .table .TableReference .from_string (
589
645
query , default_project = self .bqclient .project
590
646
)
591
647
592
- if table_ref .dataset_id .upper () == "_SESSION" :
593
- # _SESSION tables aren't supported by the tables.get REST API.
594
- table_expression = self .ibis_client .sql (
595
- f"SELECT * FROM `_SESSION`.`{ table_ref .table_id } `"
596
- )
597
- else :
598
- table_expression = self .ibis_client .table (
599
- table_ref .table_id ,
600
- database = f"{ table_ref .project } .{ table_ref .dataset_id } " ,
601
- )
648
+ (
649
+ table_expression ,
650
+ total_ordering_cols ,
651
+ ) = self ._read_gbq_table_to_ibis_with_total_ordering (
652
+ table_ref ,
653
+ api_name = api_name ,
654
+ )
602
655
603
656
for key in col_order :
604
657
if key not in table_expression .columns :
@@ -624,7 +677,34 @@ def _read_gbq_table(
624
677
ordering = None
625
678
is_total_ordering = False
626
679
627
- if len (index_cols ) != 0 :
680
+ if total_ordering_cols is not None :
681
+ # Note: currently, this a table has a total ordering only when the
682
+ # primary key(s) are set on a table. The query engine assumes such
683
+ # columns are unique, even if not enforced.
684
+ is_total_ordering = True
685
+ ordering = core .ExpressionOrdering (
686
+ ordering_value_columns = [
687
+ core .OrderingColumnReference (column_id )
688
+ for column_id in total_ordering_cols
689
+ ],
690
+ total_ordering_columns = frozenset (total_ordering_cols ),
691
+ )
692
+
693
+ if len (index_cols ) != 0 :
694
+ index_labels = typing .cast (List [Optional [str ]], index_cols )
695
+ else :
696
+ # Use the total_ordering_cols to project offsets to use as the default index.
697
+ table_expression = table_expression .order_by (index_cols )
698
+ default_index_id = guid .generate_guid ("bigframes_index_" )
699
+ default_index_col = (
700
+ ibis .row_number ().cast (ibis_dtypes .int64 ).name (default_index_id )
701
+ )
702
+ table_expression = table_expression .mutate (
703
+ ** {default_index_id : default_index_col }
704
+ )
705
+ index_cols = [default_index_id ]
706
+ index_labels = [None ]
707
+ elif len (index_cols ) != 0 :
628
708
index_labels = typing .cast (List [Optional [str ]], index_cols )
629
709
distinct_table = table_expression .select (* index_cols ).distinct ()
630
710
is_unique_sql = f"""WITH full_table AS (
0 commit comments