Skip to content

Commit 55cfdd0

Browse files
fix: Fix 'sql' property for null index
1 parent ed06436 commit 55cfdd0

File tree

4 files changed

+18
-1
lines changed

4 files changed

+18
-1
lines changed

bigframes/dataframe.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,9 @@ def _to_sql_query(
385385
@property
386386
def sql(self) -> str:
387387
"""Compiles this DataFrame's expression tree to SQL."""
388-
include_index = self.index.name is not None or len(self.index.names) > 1
388+
include_index = self._has_index and (
389+
self.index.name is not None or len(self.index.names) > 1
390+
)
389391
sql, _, _ = self._to_sql_query(include_index=include_index)
390392
return sql
391393

bigframes/session/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -884,6 +884,8 @@ def _read_gbq_table(
884884
table=table,
885885
index_cols=index_cols,
886886
api_name=api_name,
887+
# If non in strict ordering mode, don't go through overhead of scanning index column(s) to determine if unique
888+
metadata_only=not self._strictly_ordered,
887889
)
888890
schema = schemata.ArraySchema.from_bq_table(table)
889891
if columns:

bigframes/session/_io/bigquery/read_gbq_table.py

+4
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ def are_index_cols_unique(
152152
table: bigquery.table.Table,
153153
index_cols: List[str],
154154
api_name: str,
155+
metadata_only: bool,
155156
) -> bool:
156157
if len(index_cols) == 0:
157158
return False
@@ -161,6 +162,9 @@ def are_index_cols_unique(
161162
if (len(primary_keys) > 0) and primary_keys <= frozenset(index_cols):
162163
return True
163164

165+
if metadata_only:
166+
# Sometimes not worth scanning data to check uniqueness
167+
return False
164168
# TODO(b/337925142): Avoid a "SELECT *" subquery here by ensuring
165169
# table_expression only selects just index_cols.
166170
is_unique_sql = bigframes.core.sql.is_distinct_sql(index_cols, table.reference)

tests/system/small/test_unordered.py

+9
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@
2020
from tests.system.utils import assert_pandas_df_equal, skip_legacy_pandas
2121

2222

23+
def test_unordered_mode_sql_no_hash(unordered_session):
24+
bf_df = unordered_session.read_gbq(
25+
"bigquery-public-data.ethereum_blockchain.blocks"
26+
)
27+
sql = bf_df.sql
28+
assert "ORDER BY" not in sql.upper()
29+
assert "farm_fingerprint" not in sql
30+
31+
2332
def test_unordered_mode_cache_aggregate(unordered_session):
2433
pd_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=pd.Int64Dtype())
2534
df = bpd.DataFrame(pd_df, session=unordered_session)

0 commit comments

Comments
 (0)