Skip to content

Commit e87179e

Browse files
rahulKQLigorbernstein2
authored andcommitted
feat: introducing bulk read API through Batcher (#99)
* feat: introducing bulk read API through Batcher This change introduces BulkReadAPI on BigtableDataClient. This operation accepts row keys in a batch mode and behind the scene fetch rows based on configurable batches. * Added Query.clone test case and some formatting changes * Address feedback comments * Address more feedback comments * Updated QueryTest with asserts * Moved TODO into getDefaultChannelPoolSize() * Minor changes to address feedback changes
1 parent b375e87 commit e87179e

12 files changed

+911
-14
lines changed

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/BigtableDataClient.java

+80
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import com.google.api.gax.rpc.UnaryCallable;
2727
import com.google.cloud.bigtable.data.v2.models.BulkMutation;
2828
import com.google.cloud.bigtable.data.v2.models.ConditionalRowMutation;
29+
import com.google.cloud.bigtable.data.v2.models.Filters;
2930
import com.google.cloud.bigtable.data.v2.models.Filters.Filter;
3031
import com.google.cloud.bigtable.data.v2.models.KeyOffset;
3132
import com.google.cloud.bigtable.data.v2.models.Query;
@@ -936,6 +937,85 @@ public Batcher<RowMutationEntry, Void> newBulkMutationBatcher(@Nonnull String ta
936937
return stub.newMutateRowsBatcher(tableId);
937938
}
938939

940+
/**
941+
* Reads rows for given tableId in a batch. If the row does not exist, the value will be null.
942+
* This operation should be called with in a single thread.
943+
*
944+
* <p>Sample Code:
945+
*
946+
* <pre>{@code
947+
* try (BigtableDataClient bigtableDataClient = BigtableDataClient.create("[PROJECT]", "[INSTANCE]")) {
948+
* List<ApiFuture<Row>> rows = new ArrayList<>();
949+
*
950+
* try (Batcher<ByteString, Row> batcher = bigtableDataClient.newBulkReadRowsBatcher("[TABLE]")) {
951+
* for (String someValue : someCollection) {
952+
* ApiFuture<Row> rowFuture =
953+
* batcher.add(ByteString.copyFromUtf8("[ROW KEY]"));
954+
* rows.add(rowFuture);
955+
* }
956+
*
957+
* // [Optional] Sends collected elements for batching asynchronously.
958+
* batcher.sendOutstanding();
959+
*
960+
* // [Optional] Invokes sendOutstanding() and awaits until all pending entries are resolved.
961+
* batcher.flush();
962+
* }
963+
* // batcher.close() invokes `flush()` which will in turn invoke `sendOutstanding()` with await for
964+
* pending batches until its resolved.
965+
*
966+
* List<Row> actualRows = ApiFutures.allAsList(rows).get();
967+
* }
968+
* }</pre>
969+
*/
970+
public Batcher<ByteString, Row> newBulkReadRowsBatcher(String tableId) {
971+
return newBulkReadRowsBatcher(tableId, null);
972+
}
973+
974+
/**
975+
* Reads rows for given tableId and filter criteria in a batch. If the row does not exist, the
976+
* value will be null. This operation should be called with in a single thread.
977+
*
978+
* <p>Sample Code:
979+
*
980+
* <pre>{@code
981+
* try (BigtableDataClient bigtableDataClient = BigtableDataClient.create("[PROJECT]", "[INSTANCE]")) {
982+
*
983+
* // Build the filter expression
984+
* Filter filter = FILTERS.chain()
985+
* .filter(FILTERS.key().regex("prefix.*"))
986+
* .filter(FILTERS.limit().cellsPerRow(10));
987+
*
988+
* List<ApiFuture<Row>> rows = new ArrayList<>();
989+
*
990+
* try (Batcher<ByteString, Row> batcher = bigtableDataClient.newBulkReadRowsBatcher("[TABLE]", filter)) {
991+
* for (String someValue : someCollection) {
992+
* ApiFuture<Row> rowFuture =
993+
* batcher.add(ByteString.copyFromUtf8("[ROW KEY]"));
994+
* rows.add(rowFuture);
995+
* }
996+
*
997+
* // [Optional] Sends collected elements for batching asynchronously.
998+
* batcher.sendOutstanding();
999+
*
1000+
* // [Optional] Invokes sendOutstanding() and awaits until all pending entries are resolved.
1001+
* batcher.flush();
1002+
* }
1003+
* // batcher.close() invokes `flush()` which will in turn invoke `sendOutstanding()` with await for
1004+
* pending batches until its resolved.
1005+
*
1006+
* List<Row> actualRows = ApiFutures.allAsList(rows).get();
1007+
* }
1008+
* }</pre>
1009+
*/
1010+
public Batcher<ByteString, Row> newBulkReadRowsBatcher(
1011+
String tableId, @Nullable Filters.Filter filter) {
1012+
Query query = Query.create(tableId);
1013+
if (filter != null) {
1014+
query = query.filter(filter);
1015+
}
1016+
return stub.newBulkReadRowsBatcher(query);
1017+
}
1018+
9391019
/**
9401020
* Convenience method to mutate multiple rows in a batch. Each individual row is mutated
9411021
* atomically as in MutateRow, but the entire batch is not executed atomically. This method

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/models/Query.java

+6
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,12 @@ public static Query fromProto(@Nonnull ReadRowsRequest request) {
274274
return query;
275275
}
276276

277+
public Query clone() {
278+
Query query = Query.create(tableId);
279+
query.builder = this.builder.clone();
280+
return query;
281+
}
282+
277283
private static ByteString wrapKey(String key) {
278284
if (key == null) {
279285
return null;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
/*
2+
* Copyright 2020 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* https://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package com.google.cloud.bigtable.data.v2.stub;
17+
18+
import com.google.api.core.BetaApi;
19+
import com.google.api.gax.batching.BatchingCallSettings;
20+
import com.google.api.gax.batching.BatchingDescriptor;
21+
import com.google.api.gax.batching.BatchingSettings;
22+
import com.google.api.gax.retrying.RetrySettings;
23+
import com.google.api.gax.rpc.StatusCode;
24+
import com.google.api.gax.rpc.UnaryCallSettings;
25+
import com.google.cloud.bigtable.data.v2.models.Query;
26+
import com.google.cloud.bigtable.data.v2.models.Row;
27+
import com.google.common.base.Preconditions;
28+
import com.google.protobuf.ByteString;
29+
import java.util.List;
30+
import java.util.Set;
31+
import javax.annotation.Nonnull;
32+
33+
/**
34+
* This settings holds the batching thresholds as well as retry configuration for bulk read API.
35+
*
36+
* <p>Sample configuration:
37+
*
38+
* <pre>{@code
39+
* BigtableBulkReadRowsCallSettings defaultBulkReadCallSettings =
40+
* bigtableDataCallSettings.getStubSettings().bulkReadRowsSettings();
41+
*
42+
* BigtableBulkReadRowsCallSettings customBulkReadCallSettings = defaultBulkReadCallSettings
43+
* .toBuilder()
44+
* .setBatchingSettings(
45+
* defaultBulkReadCallSettings.getBatchingSettings().toBuilder()
46+
* .setDelayThreshold(Duration.ofSeconds(10))
47+
* .build())
48+
* .setRetryableCodes(Code.DEADLINE_EXCEEDED)
49+
* .build();
50+
* }</pre>
51+
*
52+
* @see BatchingSettings for batching thresholds explantion.
53+
* @see RetrySettings for retry configuration.
54+
*/
55+
@BetaApi("This surface is likely to change as the batching surface evolves.")
56+
public class BigtableBulkReadRowsCallSettings extends UnaryCallSettings<Query, List<Row>> {
57+
58+
private final BatchingCallSettings<ByteString, Row, Query, List<Row>> batchingCallSettings;
59+
60+
private BigtableBulkReadRowsCallSettings(Builder builder) {
61+
super(builder);
62+
batchingCallSettings =
63+
BatchingCallSettings.newBuilder(builder.batchingDescriptor)
64+
.setBatchingSettings(builder.batchingSettings)
65+
.setRetrySettings(builder.getRetrySettings())
66+
.setRetryableCodes(builder.getRetryableCodes())
67+
.build();
68+
}
69+
70+
/** Returns batching settings which contains multiple batch threshold levels. */
71+
public BatchingSettings getBatchingSettings() {
72+
return batchingCallSettings.getBatchingSettings();
73+
}
74+
75+
/** Returns an adapter that packs and unpacks batching elements. */
76+
BatchingDescriptor<ByteString, Row, Query, List<Row>> getBatchingDescriptor() {
77+
return batchingCallSettings.getBatchingDescriptor();
78+
}
79+
80+
static BigtableBulkReadRowsCallSettings.Builder newBuilder(
81+
BatchingDescriptor<ByteString, Row, Query, List<Row>> batchingDescriptor) {
82+
return new Builder(batchingDescriptor);
83+
}
84+
85+
/**
86+
* Get a builder with the same values as this object. See the class documentation of {@link
87+
* BigtableBatchingCallSettings} for a sample settings configuration.
88+
*/
89+
@Override
90+
public final BigtableBulkReadRowsCallSettings.Builder toBuilder() {
91+
return new BigtableBulkReadRowsCallSettings.Builder(this);
92+
}
93+
94+
public static class Builder extends UnaryCallSettings.Builder<Query, List<Row>> {
95+
96+
private BatchingDescriptor<ByteString, Row, Query, List<Row>> batchingDescriptor;
97+
private BatchingSettings batchingSettings;
98+
99+
private Builder(
100+
@Nonnull BatchingDescriptor<ByteString, Row, Query, List<Row>> batchingDescriptor) {
101+
this.batchingDescriptor =
102+
Preconditions.checkNotNull(batchingDescriptor, "batching descriptor can't be null");
103+
}
104+
105+
private Builder(@Nonnull BigtableBulkReadRowsCallSettings settings) {
106+
super(settings);
107+
this.batchingDescriptor = settings.getBatchingDescriptor();
108+
this.batchingSettings = settings.getBatchingSettings();
109+
}
110+
111+
/** Sets the batching settings with various thresholds. */
112+
public Builder setBatchingSettings(@Nonnull BatchingSettings batchingSettings) {
113+
Preconditions.checkNotNull(batchingSettings, "batching settings can't be null");
114+
this.batchingSettings = batchingSettings;
115+
return this;
116+
}
117+
118+
/** Returns the {@link BatchingSettings}. */
119+
public BatchingSettings getBatchingSettings() {
120+
return batchingSettings;
121+
}
122+
123+
/** Sets the rpc failure {@link StatusCode.Code code}, for which retries should be performed. */
124+
@Override
125+
public Builder setRetryableCodes(StatusCode.Code... codes) {
126+
super.setRetryableCodes(codes);
127+
return this;
128+
}
129+
130+
/** Sets the rpc failure {@link StatusCode.Code code}, for which retries should be performed. */
131+
@Override
132+
public Builder setRetryableCodes(Set<StatusCode.Code> retryableCodes) {
133+
super.setRetryableCodes(retryableCodes);
134+
return this;
135+
}
136+
137+
/** Sets the {@link RetrySettings} values for each retry attempts. */
138+
@Override
139+
public Builder setRetrySettings(@Nonnull RetrySettings retrySettings) {
140+
super.setRetrySettings(retrySettings);
141+
return this;
142+
}
143+
144+
/** Builds the {@link BigtableBulkReadRowsCallSettings} object with provided configuration. */
145+
@Override
146+
public BigtableBulkReadRowsCallSettings build() {
147+
return new BigtableBulkReadRowsCallSettings(this);
148+
}
149+
}
150+
}

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/data/v2/stub/EnhancedBigtableStub.java

+30-3
Original file line numberDiff line numberDiff line change
@@ -52,12 +52,15 @@
5252
import com.google.cloud.bigtable.data.v2.stub.mutaterows.MutateRowsBatchingDescriptor;
5353
import com.google.cloud.bigtable.data.v2.stub.mutaterows.MutateRowsRetryingCallable;
5454
import com.google.cloud.bigtable.data.v2.stub.readrows.FilterMarkerRowsCallable;
55+
import com.google.cloud.bigtable.data.v2.stub.readrows.ReadRowsBatchingDescriptor;
5556
import com.google.cloud.bigtable.data.v2.stub.readrows.ReadRowsResumptionStrategy;
5657
import com.google.cloud.bigtable.data.v2.stub.readrows.ReadRowsRetryCompletedCallable;
5758
import com.google.cloud.bigtable.data.v2.stub.readrows.ReadRowsUserCallable;
5859
import com.google.cloud.bigtable.data.v2.stub.readrows.RowMergingCallable;
5960
import com.google.cloud.bigtable.gaxx.retrying.ApiResultRetryAlgorithm;
6061
import com.google.cloud.bigtable.gaxx.tracing.WrappedTracerFactory;
62+
import com.google.common.base.Preconditions;
63+
import com.google.protobuf.ByteString;
6164
import io.opencensus.stats.Stats;
6265
import io.opencensus.stats.StatsRecorder;
6366
import io.opencensus.tags.Tagger;
@@ -381,9 +384,8 @@ private UnaryCallable<BulkMutation, Void> createBulkMutateRowsCallable() {
381384
}
382385

383386
/**
384-
* Creates a {@link com.google.api.gax.batching.BatcherImpl} to handle {@link
385-
* MutateRowsRequest.Entry} mutations. This is meant to be used for automatic batching with flow
386-
* control.
387+
* Creates a {@link BatcherImpl} to handle {@link MutateRowsRequest.Entry} mutations. This is
388+
* meant to be used for automatic batching with flow control.
387389
*
388390
* <ul>
389391
* <li>Uses {@link MutateRowsBatchingDescriptor} to spool the {@link RowMutationEntry} mutations
@@ -409,6 +411,31 @@ public Batcher<RowMutationEntry, Void> newMutateRowsBatcher(@Nonnull String tabl
409411
clientContext.getExecutor());
410412
}
411413

414+
/**
415+
* Creates a {@link BatcherImpl} to handle {@link Query#rowKey(String)}. This is meant for bulk
416+
* read with flow control.
417+
*
418+
* <ul>
419+
* <li>Uses {@link ReadRowsBatchingDescriptor} to merge the row-keys and send them out as {@link
420+
* Query}.
421+
* <li>Uses {@link #readRowsCallable()} to perform RPC.
422+
* <li>Batching thresholds can be configured from {@link
423+
* EnhancedBigtableStubSettings#bulkReadRowsSettings()}.
424+
* <li>Schedule retries for retryable exceptions until there are no more entries or there are no
425+
* more retry attempts left.
426+
* <li>Split the responses using {@link ReadRowsBatchingDescriptor}.
427+
* </ul>
428+
*/
429+
public Batcher<ByteString, Row> newBulkReadRowsBatcher(@Nonnull Query query) {
430+
Preconditions.checkNotNull(query, "query cannot be null");
431+
return new BatcherImpl<>(
432+
settings.bulkReadRowsSettings().getBatchingDescriptor(),
433+
readRowsCallable().all(),
434+
query,
435+
settings.bulkReadRowsSettings().getBatchingSettings(),
436+
clientContext.getExecutor());
437+
}
438+
412439
/**
413440
* Internal helper to create the base MutateRows callable chain. The chain is responsible for
414441
* retrying individual entry in case of error.

0 commit comments

Comments
 (0)