Skip to content

Commit ffb3915

Browse files
feat: add databoost enabled property for batch transactions (#892)
* proto changes * changes * changes * linting * changes * changes * changes * changes * changes * Changes * Update google/cloud/spanner_v1/snapshot.py Co-authored-by: Rajat Bhatta <[email protected]> * Update google/cloud/spanner_v1/database.py Co-authored-by: Rajat Bhatta <[email protected]> --------- Co-authored-by: Rajat Bhatta <[email protected]>
1 parent 1f4a3ca commit ffb3915

File tree

5 files changed

+133
-7
lines changed

5 files changed

+133
-7
lines changed

google/cloud/spanner_v1/database.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -1101,6 +1101,7 @@ def generate_read_batches(
11011101
index="",
11021102
partition_size_bytes=None,
11031103
max_partitions=None,
1104+
data_boost_enabled=False,
11041105
*,
11051106
retry=gapic_v1.method.DEFAULT,
11061107
timeout=gapic_v1.method.DEFAULT,
@@ -1135,6 +1136,11 @@ def generate_read_batches(
11351136
service uses this as a hint, the actual number of partitions may
11361137
differ.
11371138
1139+
:type data_boost_enabled:
1140+
:param data_boost_enabled:
1141+
(Optional) If this is for a partitioned read and this field is
1142+
set ``true``, the request will be executed via offline access.
1143+
11381144
:type retry: :class:`~google.api_core.retry.Retry`
11391145
:param retry: (Optional) The retry settings for this request.
11401146
@@ -1162,6 +1168,7 @@ def generate_read_batches(
11621168
"columns": columns,
11631169
"keyset": keyset._to_dict(),
11641170
"index": index,
1171+
"data_boost_enabled": data_boost_enabled,
11651172
}
11661173
for partition in partitions:
11671174
yield {"partition": partition, "read": read_info.copy()}
@@ -1205,6 +1212,7 @@ def generate_query_batches(
12051212
partition_size_bytes=None,
12061213
max_partitions=None,
12071214
query_options=None,
1215+
data_boost_enabled=False,
12081216
*,
12091217
retry=gapic_v1.method.DEFAULT,
12101218
timeout=gapic_v1.method.DEFAULT,
@@ -1251,6 +1259,11 @@ def generate_query_batches(
12511259
If a dict is provided, it must be of the same form as the protobuf
12521260
message :class:`~google.cloud.spanner_v1.types.QueryOptions`
12531261
1262+
:type data_boost_enabled:
1263+
:param data_boost_enabled:
1264+
(Optional) If this is for a partitioned query and this field is
1265+
set ``true``, the request will be executed via offline access.
1266+
12541267
:type retry: :class:`~google.api_core.retry.Retry`
12551268
:param retry: (Optional) The retry settings for this request.
12561269
@@ -1272,7 +1285,10 @@ def generate_query_batches(
12721285
timeout=timeout,
12731286
)
12741287

1275-
query_info = {"sql": sql}
1288+
query_info = {
1289+
"sql": sql,
1290+
"data_boost_enabled": data_boost_enabled,
1291+
}
12761292
if params:
12771293
query_info["params"] = params
12781294
query_info["param_types"] = param_types

google/cloud/spanner_v1/snapshot.py

+20
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ def read(
167167
limit=0,
168168
partition=None,
169169
request_options=None,
170+
data_boost_enabled=False,
170171
*,
171172
retry=gapic_v1.method.DEFAULT,
172173
timeout=gapic_v1.method.DEFAULT,
@@ -210,6 +211,14 @@ def read(
210211
:type timeout: float
211212
:param timeout: (Optional) The timeout for this request.
212213
214+
:type data_boost_enabled:
215+
:param data_boost_enabled:
216+
(Optional) If this is for a partitioned read and this field is
217+
set ``true``, the request will be executed via offline access.
218+
If the field is set to ``true`` but the request does not set
219+
``partition_token``, the API will return an
220+
``INVALID_ARGUMENT`` error.
221+
213222
:rtype: :class:`~google.cloud.spanner_v1.streamed.StreamedResultSet`
214223
:returns: a result set instance which can be used to consume rows.
215224
@@ -247,6 +256,7 @@ def read(
247256
limit=limit,
248257
partition_token=partition,
249258
request_options=request_options,
259+
data_boost_enabled=data_boost_enabled,
250260
)
251261
restart = functools.partial(
252262
api.streaming_read,
@@ -302,6 +312,7 @@ def execute_sql(
302312
partition=None,
303313
retry=gapic_v1.method.DEFAULT,
304314
timeout=gapic_v1.method.DEFAULT,
315+
data_boost_enabled=False,
305316
):
306317
"""Perform an ``ExecuteStreamingSql`` API request.
307318
@@ -351,6 +362,14 @@ def execute_sql(
351362
:type timeout: float
352363
:param timeout: (Optional) The timeout for this request.
353364
365+
:type data_boost_enabled:
366+
:param data_boost_enabled:
367+
(Optional) If this is for a partitioned query and this field is
368+
set ``true``, the request will be executed via offline access.
369+
If the field is set to ``true`` but the request does not set
370+
``partition_token``, the API will return an
371+
``INVALID_ARGUMENT`` error.
372+
354373
:raises ValueError:
355374
for reuse of single-use snapshots, or if a transaction ID is
356375
already pending for multiple-use snapshots.
@@ -400,6 +419,7 @@ def execute_sql(
400419
seqno=self._execute_sql_count,
401420
query_options=query_options,
402421
request_options=request_options,
422+
data_boost_enabled=data_boost_enabled,
403423
)
404424
restart = functools.partial(
405425
api.execute_streaming_sql,

samples/samples/batch_sample.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,10 @@ def run_batch_query(instance_id, database_id):
4747
table="Singers",
4848
columns=("SingerId", "FirstName", "LastName"),
4949
keyset=spanner.KeySet(all_=True),
50+
# A Partition object is serializable and can be used from a different process.
51+
# DataBoost option is an optional parameter which can also be used for partition read
52+
# and query to execute the request via spanner independent compute resources.
53+
data_boost_enabled=True,
5054
)
5155

5256
# Create a pool of workers for the tasks
@@ -87,4 +91,5 @@ def process(snapshot, partition):
8791

8892
args = parser.parse_args()
8993

90-
run_batch_query(args.instance_id, args.database_id)
94+
if args.command == "run_batch_query":
95+
run_batch_query(args.instance_id, args.database_id)

tests/system/test_session_api.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -1875,7 +1875,7 @@ def test_read_with_range_keys_and_index_open_open(sessions_database):
18751875
assert rows == expected
18761876

18771877

1878-
def test_partition_read_w_index(sessions_database):
1878+
def test_partition_read_w_index(sessions_database, not_emulator):
18791879
sd = _sample_data
18801880
row_count = 10
18811881
columns = sd.COLUMNS[1], sd.COLUMNS[2]
@@ -1886,7 +1886,11 @@ def test_partition_read_w_index(sessions_database):
18861886

18871887
batch_txn = sessions_database.batch_snapshot(read_timestamp=committed)
18881888
batches = batch_txn.generate_read_batches(
1889-
sd.TABLE, columns, spanner_v1.KeySet(all_=True), index="name"
1889+
sd.TABLE,
1890+
columns,
1891+
spanner_v1.KeySet(all_=True),
1892+
index="name",
1893+
data_boost_enabled=True,
18901894
)
18911895
for batch in batches:
18921896
p_results_iter = batch_txn.process(batch)
@@ -2494,7 +2498,7 @@ def test_execute_sql_returning_transfinite_floats(sessions_database, not_postgre
24942498
assert math.isnan(float_array[2])
24952499

24962500

2497-
def test_partition_query(sessions_database):
2501+
def test_partition_query(sessions_database, not_emulator):
24982502
row_count = 40
24992503
sql = f"SELECT * FROM {_sample_data.TABLE}"
25002504
committed = _set_up_table(sessions_database, row_count)
@@ -2503,7 +2507,7 @@ def test_partition_query(sessions_database):
25032507
all_data_rows = set(_row_data(row_count))
25042508
union = set()
25052509
batch_txn = sessions_database.batch_snapshot(read_timestamp=committed)
2506-
for batch in batch_txn.generate_query_batches(sql):
2510+
for batch in batch_txn.generate_query_batches(sql, data_boost_enabled=True):
25072511
p_results_iter = batch_txn.process(batch)
25082512
# Lists aren't hashable so the results need to be converted
25092513
rows = [tuple(result) for result in p_results_iter]

tests/unit/test_database.py

+82-1
Original file line numberDiff line numberDiff line change
@@ -2114,6 +2114,7 @@ def test_generate_read_batches_w_max_partitions(self):
21142114
"columns": self.COLUMNS,
21152115
"keyset": {"all": True},
21162116
"index": "",
2117+
"data_boost_enabled": False,
21172118
}
21182119
self.assertEqual(len(batches), len(self.TOKENS))
21192120
for batch, token in zip(batches, self.TOKENS):
@@ -2155,6 +2156,7 @@ def test_generate_read_batches_w_retry_and_timeout_params(self):
21552156
"columns": self.COLUMNS,
21562157
"keyset": {"all": True},
21572158
"index": "",
2159+
"data_boost_enabled": False,
21582160
}
21592161
self.assertEqual(len(batches), len(self.TOKENS))
21602162
for batch, token in zip(batches, self.TOKENS):
@@ -2195,6 +2197,7 @@ def test_generate_read_batches_w_index_w_partition_size_bytes(self):
21952197
"columns": self.COLUMNS,
21962198
"keyset": {"all": True},
21972199
"index": self.INDEX,
2200+
"data_boost_enabled": False,
21982201
}
21992202
self.assertEqual(len(batches), len(self.TOKENS))
22002203
for batch, token in zip(batches, self.TOKENS):
@@ -2212,6 +2215,47 @@ def test_generate_read_batches_w_index_w_partition_size_bytes(self):
22122215
timeout=gapic_v1.method.DEFAULT,
22132216
)
22142217

2218+
def test_generate_read_batches_w_data_boost_enabled(self):
2219+
data_boost_enabled = True
2220+
keyset = self._make_keyset()
2221+
database = self._make_database()
2222+
batch_txn = self._make_one(database)
2223+
snapshot = batch_txn._snapshot = self._make_snapshot()
2224+
snapshot.partition_read.return_value = self.TOKENS
2225+
2226+
batches = list(
2227+
batch_txn.generate_read_batches(
2228+
self.TABLE,
2229+
self.COLUMNS,
2230+
keyset,
2231+
index=self.INDEX,
2232+
data_boost_enabled=data_boost_enabled,
2233+
)
2234+
)
2235+
2236+
expected_read = {
2237+
"table": self.TABLE,
2238+
"columns": self.COLUMNS,
2239+
"keyset": {"all": True},
2240+
"index": self.INDEX,
2241+
"data_boost_enabled": True,
2242+
}
2243+
self.assertEqual(len(batches), len(self.TOKENS))
2244+
for batch, token in zip(batches, self.TOKENS):
2245+
self.assertEqual(batch["partition"], token)
2246+
self.assertEqual(batch["read"], expected_read)
2247+
2248+
snapshot.partition_read.assert_called_once_with(
2249+
table=self.TABLE,
2250+
columns=self.COLUMNS,
2251+
keyset=keyset,
2252+
index=self.INDEX,
2253+
partition_size_bytes=None,
2254+
max_partitions=None,
2255+
retry=gapic_v1.method.DEFAULT,
2256+
timeout=gapic_v1.method.DEFAULT,
2257+
)
2258+
22152259
def test_process_read_batch(self):
22162260
keyset = self._make_keyset()
22172261
token = b"TOKEN"
@@ -2288,7 +2332,11 @@ def test_generate_query_batches_w_max_partitions(self):
22882332
batch_txn.generate_query_batches(sql, max_partitions=max_partitions)
22892333
)
22902334

2291-
expected_query = {"sql": sql, "query_options": client._query_options}
2335+
expected_query = {
2336+
"sql": sql,
2337+
"data_boost_enabled": False,
2338+
"query_options": client._query_options,
2339+
}
22922340
self.assertEqual(len(batches), len(self.TOKENS))
22932341
for batch, token in zip(batches, self.TOKENS):
22942342
self.assertEqual(batch["partition"], token)
@@ -2326,6 +2374,7 @@ def test_generate_query_batches_w_params_w_partition_size_bytes(self):
23262374

23272375
expected_query = {
23282376
"sql": sql,
2377+
"data_boost_enabled": False,
23292378
"params": params,
23302379
"param_types": param_types,
23312380
"query_options": client._query_options,
@@ -2372,6 +2421,7 @@ def test_generate_query_batches_w_retry_and_timeout_params(self):
23722421

23732422
expected_query = {
23742423
"sql": sql,
2424+
"data_boost_enabled": False,
23752425
"params": params,
23762426
"param_types": param_types,
23772427
"query_options": client._query_options,
@@ -2391,6 +2441,37 @@ def test_generate_query_batches_w_retry_and_timeout_params(self):
23912441
timeout=2.0,
23922442
)
23932443

2444+
def test_generate_query_batches_w_data_boost_enabled(self):
2445+
sql = "SELECT COUNT(*) FROM table_name"
2446+
client = _Client(self.PROJECT_ID)
2447+
instance = _Instance(self.INSTANCE_NAME, client=client)
2448+
database = _Database(self.DATABASE_NAME, instance=instance)
2449+
batch_txn = self._make_one(database)
2450+
snapshot = batch_txn._snapshot = self._make_snapshot()
2451+
snapshot.partition_query.return_value = self.TOKENS
2452+
2453+
batches = list(batch_txn.generate_query_batches(sql, data_boost_enabled=True))
2454+
2455+
expected_query = {
2456+
"sql": sql,
2457+
"data_boost_enabled": True,
2458+
"query_options": client._query_options,
2459+
}
2460+
self.assertEqual(len(batches), len(self.TOKENS))
2461+
for batch, token in zip(batches, self.TOKENS):
2462+
self.assertEqual(batch["partition"], token)
2463+
self.assertEqual(batch["query"], expected_query)
2464+
2465+
snapshot.partition_query.assert_called_once_with(
2466+
sql=sql,
2467+
params=None,
2468+
param_types=None,
2469+
partition_size_bytes=None,
2470+
max_partitions=None,
2471+
retry=gapic_v1.method.DEFAULT,
2472+
timeout=gapic_v1.method.DEFAULT,
2473+
)
2474+
23942475
def test_process_query_batch(self):
23952476
sql = (
23962477
"SELECT first_name, last_name, email FROM citizens " "WHERE age <= @max_age"

0 commit comments

Comments
 (0)