Skip to content

Commit 1b6a556

Browse files
fix: Fix 'sql' property for null index (#844)
1 parent f9e4435 commit 1b6a556

File tree

4 files changed

+18
-1
lines changed

4 files changed

+18
-1
lines changed

bigframes/dataframe.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -385,7 +385,9 @@ def _to_sql_query(
385385
@property
386386
def sql(self) -> str:
387387
"""Compiles this DataFrame's expression tree to SQL."""
388-
include_index = self.index.name is not None or len(self.index.names) > 1
388+
include_index = self._has_index and (
389+
self.index.name is not None or len(self.index.names) > 1
390+
)
389391
sql, _, _ = self._to_sql_query(include_index=include_index)
390392
return sql
391393

bigframes/session/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,8 @@ def _read_gbq_table(
889889
table=table,
890890
index_cols=index_cols,
891891
api_name=api_name,
892+
# If non in strict ordering mode, don't go through overhead of scanning index column(s) to determine if unique
893+
metadata_only=not self._strictly_ordered,
892894
)
893895
schema = schemata.ArraySchema.from_bq_table(table)
894896
if columns:

bigframes/session/_io/bigquery/read_gbq_table.py

+4
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ def are_index_cols_unique(
152152
table: bigquery.table.Table,
153153
index_cols: List[str],
154154
api_name: str,
155+
metadata_only: bool = False,
155156
) -> bool:
156157
if len(index_cols) == 0:
157158
return False
@@ -161,6 +162,9 @@ def are_index_cols_unique(
161162
if (len(primary_keys) > 0) and primary_keys <= frozenset(index_cols):
162163
return True
163164

165+
if metadata_only:
166+
# Sometimes not worth scanning data to check uniqueness
167+
return False
164168
# TODO(b/337925142): Avoid a "SELECT *" subquery here by ensuring
165169
# table_expression only selects just index_cols.
166170
is_unique_sql = bigframes.core.sql.is_distinct_sql(index_cols, table.reference)

tests/system/small/test_unordered.py

+9
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,15 @@
2020
from tests.system.utils import assert_pandas_df_equal, skip_legacy_pandas
2121

2222

23+
def test_unordered_mode_sql_no_hash(unordered_session):
24+
bf_df = unordered_session.read_gbq(
25+
"bigquery-public-data.ethereum_blockchain.blocks"
26+
)
27+
sql = bf_df.sql
28+
assert "ORDER BY".casefold() not in sql.casefold()
29+
assert "farm_fingerprint".casefold() not in sql.casefold()
30+
31+
2332
def test_unordered_mode_job_label(unordered_session):
2433
pd_df = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, dtype=pd.Int64Dtype())
2534
df = bpd.DataFrame(pd_df, session=unordered_session)

0 commit comments

Comments
 (0)