Skip to content

Commit 440c88b

Browse files
authored
fix: make sure commitAsync always finishes (#3216)
The future that is returned by commitAsync() seems to never finish in some cases. It is unknown exactly what causes it, and this change adds a number of safety precautions that should ensure that the future always returns a result eventually. This is always easier to debug and handle than a future that never returns a value.
1 parent e859b29 commit 440c88b

File tree

7 files changed

+72
-19
lines changed

7 files changed

+72
-19
lines changed

google-cloud-spanner/src/main/java/com/google/cloud/spanner/TransactionRunnerImpl.java

+43-19
Original file line numberDiff line numberDiff line change
@@ -306,12 +306,23 @@ private void createTxnAsync(final SettableApiFuture<Void> res) {
306306

307307
void commit() {
308308
try {
309-
commitResponse = commitAsync().get();
310-
} catch (InterruptedException e) {
309+
// Normally, Gax will take care of any timeouts, but we add a timeout for getting the value
310+
// from the future here as well to make sure the call always finishes, even if the future
311+
// never resolves.
312+
commitResponse =
313+
commitAsync()
314+
.get(
315+
rpc.getCommitRetrySettings().getTotalTimeout().getSeconds() + 5,
316+
TimeUnit.SECONDS);
317+
} catch (InterruptedException | TimeoutException e) {
311318
if (commitFuture != null) {
312319
commitFuture.cancel(true);
313320
}
314-
throw SpannerExceptionFactory.propagateInterrupt(e);
321+
if (e instanceof InterruptedException) {
322+
throw SpannerExceptionFactory.propagateInterrupt((InterruptedException) e);
323+
} else {
324+
throw SpannerExceptionFactory.propagateTimeout((TimeoutException) e);
325+
}
315326
} catch (ExecutionException e) {
316327
throw SpannerExceptionFactory.newSpannerException(e.getCause() == null ? e : e.getCause());
317328
}
@@ -422,6 +433,14 @@ public void run() {
422433
commitFuture.addListener(
423434
() -> {
424435
try (IScope ignore = tracer.withSpan(opSpan)) {
436+
if (!commitFuture.isDone()) {
437+
// This should not be possible, considering that we are in a listener for the
438+
// future, but we add a result here as well as a safety precaution.
439+
res.setException(
440+
SpannerExceptionFactory.newSpannerException(
441+
ErrorCode.INTERNAL, "commitFuture is not done"));
442+
return;
443+
}
425444
com.google.spanner.v1.CommitResponse proto = commitFuture.get();
426445
if (!proto.hasCommitTimestamp()) {
427446
throw newSpannerException(
@@ -430,30 +449,35 @@ public void run() {
430449
span.addAnnotation("Commit Done");
431450
opSpan.end();
432451
res.set(new CommitResponse(proto));
433-
} catch (Throwable e) {
434-
if (e instanceof ExecutionException) {
435-
e =
436-
SpannerExceptionFactory.newSpannerException(
437-
e.getCause() == null ? e : e.getCause());
438-
} else if (e instanceof InterruptedException) {
439-
e = SpannerExceptionFactory.propagateInterrupt((InterruptedException) e);
440-
} else {
441-
e = SpannerExceptionFactory.newSpannerException(e);
452+
} catch (Throwable throwable) {
453+
SpannerException resultException;
454+
try {
455+
if (throwable instanceof ExecutionException) {
456+
resultException =
457+
SpannerExceptionFactory.asSpannerException(
458+
throwable.getCause() == null ? throwable : throwable.getCause());
459+
} else if (throwable instanceof InterruptedException) {
460+
resultException =
461+
SpannerExceptionFactory.propagateInterrupt(
462+
(InterruptedException) throwable);
463+
} else {
464+
resultException = SpannerExceptionFactory.asSpannerException(throwable);
465+
}
466+
span.addAnnotation("Commit Failed", resultException);
467+
opSpan.setStatus(resultException);
468+
opSpan.end();
469+
res.setException(onError(resultException, false));
470+
} catch (Throwable unexpectedError) {
471+
// This is a safety precaution to make sure that a result is always returned.
472+
res.setException(unexpectedError);
442473
}
443-
span.addAnnotation("Commit Failed", e);
444-
opSpan.setStatus(e);
445-
opSpan.end();
446-
res.setException(onError((SpannerException) e, false));
447474
}
448475
},
449476
MoreExecutors.directExecutor());
450477
} catch (InterruptedException e) {
451478
res.setException(SpannerExceptionFactory.propagateInterrupt(e));
452479
} catch (TimeoutException e) {
453480
res.setException(SpannerExceptionFactory.propagateTimeout(e));
454-
} catch (ExecutionException e) {
455-
res.setException(
456-
SpannerExceptionFactory.newSpannerException(e.getCause() == null ? e : e.getCause()));
457481
} catch (Throwable e) {
458482
res.setException(
459483
SpannerExceptionFactory.newSpannerException(e.getCause() == null ? e : e.getCause()));

google-cloud-spanner/src/main/java/com/google/cloud/spanner/spi/v1/GapicSpannerRpc.java

+10
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ public class GapicSpannerRpc implements SpannerRpc {
240240
private final Set<Code> executeQueryRetryableCodes;
241241
private final RetrySettings readRetrySettings;
242242
private final Set<Code> readRetryableCodes;
243+
private final RetrySettings commitRetrySettings;
243244
private final SpannerStub partitionedDmlStub;
244245
private final RetrySettings partitionedDmlRetrySettings;
245246
private final InstanceAdminStubSettings instanceAdminStubSettings;
@@ -398,6 +399,8 @@ public GapicSpannerRpc(final SpannerOptions options) {
398399
options.getSpannerStubSettings().executeStreamingSqlSettings().getRetrySettings();
399400
this.executeQueryRetryableCodes =
400401
options.getSpannerStubSettings().executeStreamingSqlSettings().getRetryableCodes();
402+
this.commitRetrySettings =
403+
options.getSpannerStubSettings().commitSettings().getRetrySettings();
401404
partitionedDmlRetrySettings =
402405
options
403406
.getSpannerStubSettings()
@@ -508,6 +511,8 @@ public <RequestT, ResponseT> UnaryCallable<RequestT, ResponseT> createUnaryCalla
508511
this.readRetryableCodes = null;
509512
this.executeQueryRetrySettings = null;
510513
this.executeQueryRetryableCodes = null;
514+
this.commitRetrySettings =
515+
SpannerStubSettings.newBuilder().commitSettings().getRetrySettings();
511516
this.partitionedDmlStub = null;
512517
this.databaseAdminStubSettings = null;
513518
this.instanceAdminStubSettings = null;
@@ -1801,6 +1806,11 @@ public CommitResponse commit(CommitRequest commitRequest, @Nullable Map<Option,
18011806
return get(commitAsync(commitRequest, options));
18021807
}
18031808

1809+
@Override
1810+
public RetrySettings getCommitRetrySettings() {
1811+
return commitRetrySettings;
1812+
}
1813+
18041814
@Override
18051815
public ApiFuture<Empty> rollbackAsync(RollbackRequest request, @Nullable Map<Option, ?> options) {
18061816
GrpcCallContext context =

google-cloud-spanner/src/main/java/com/google/cloud/spanner/spi/v1/SpannerRpc.java

+4
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,10 @@ CommitResponse commit(CommitRequest commitRequest, @Nullable Map<Option, ?> opti
469469
ApiFuture<CommitResponse> commitAsync(
470470
CommitRequest commitRequest, @Nullable Map<Option, ?> options);
471471

472+
default RetrySettings getCommitRetrySettings() {
473+
return SpannerStubSettings.newBuilder().commitSettings().getRetrySettings();
474+
}
475+
472476
void rollback(RollbackRequest request, @Nullable Map<Option, ?> options) throws SpannerException;
473477

474478
ApiFuture<Empty> rollbackAsync(RollbackRequest request, @Nullable Map<Option, ?> options);

google-cloud-spanner/src/test/java/com/google/cloud/spanner/SessionImplTest.java

+2
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ public void setUp() {
138138
when(rpc.getExecuteQueryRetryableCodes())
139139
.thenReturn(
140140
SpannerStubSettings.newBuilder().executeStreamingSqlSettings().getRetryableCodes());
141+
when(rpc.getCommitRetrySettings())
142+
.thenReturn(SpannerStubSettings.newBuilder().commitSettings().getRetrySettings());
141143
session = spanner.getSessionClient(db).createSession();
142144
Span oTspan = mock(Span.class);
143145
ISpan span = new OpenTelemetrySpan(oTspan);

google-cloud-spanner/src/test/java/com/google/cloud/spanner/TransactionContextImplTest.java

+3
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import com.google.api.core.ApiFutures;
2929
import com.google.cloud.spanner.TransactionRunnerImpl.TransactionContextImpl;
3030
import com.google.cloud.spanner.spi.v1.SpannerRpc;
31+
import com.google.cloud.spanner.v1.stub.SpannerStubSettings;
3132
import com.google.protobuf.ByteString;
3233
import com.google.protobuf.Timestamp;
3334
import com.google.rpc.Code;
@@ -80,6 +81,8 @@ public void setup() {
8081
when(tracer.spanBuilderWithExplicitParent(
8182
eq(SpannerImpl.BATCH_UPDATE), eq(span), any(Attributes.class)))
8283
.thenReturn(span);
84+
when(rpc.getCommitRetrySettings())
85+
.thenReturn(SpannerStubSettings.newBuilder().commitSettings().getRetrySettings());
8386
}
8487

8588
private TransactionContextImpl createContext() {

google-cloud-spanner/src/test/java/com/google/cloud/spanner/TransactionManagerImplTest.java

+5
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import com.google.cloud.grpc.GrpcTransportOptions.ExecutorFactory;
3636
import com.google.cloud.spanner.TransactionManager.TransactionState;
3737
import com.google.cloud.spanner.spi.v1.SpannerRpc;
38+
import com.google.cloud.spanner.v1.stub.SpannerStubSettings;
3839
import com.google.protobuf.ByteString;
3940
import com.google.protobuf.Empty;
4041
import com.google.spanner.v1.BeginTransactionRequest;
@@ -248,6 +249,8 @@ public void usesPreparedTransaction() {
248249
com.google.protobuf.Timestamp.newBuilder()
249250
.setSeconds(System.currentTimeMillis() * 1000))
250251
.build()));
252+
when(rpc.getCommitRetrySettings())
253+
.thenReturn(SpannerStubSettings.newBuilder().commitSettings().getRetrySettings());
251254
DatabaseId db = DatabaseId.of("test", "test", "test");
252255
try (SpannerImpl spanner = new SpannerImpl(rpc, options)) {
253256
DatabaseClient client = spanner.getDatabaseClient(db);
@@ -332,6 +335,8 @@ public void inlineBegin() {
332335
com.google.protobuf.Timestamp.newBuilder()
333336
.setSeconds(System.currentTimeMillis() * 1000))
334337
.build()));
338+
when(rpc.getCommitRetrySettings())
339+
.thenReturn(SpannerStubSettings.newBuilder().commitSettings().getRetrySettings());
335340
DatabaseId db = DatabaseId.of("test", "test", "test");
336341
try (SpannerImpl spanner = new SpannerImpl(rpc, options)) {
337342
DatabaseClient client = spanner.getDatabaseClient(db);

google-cloud-spanner/src/test/java/com/google/cloud/spanner/TransactionRunnerImplTest.java

+5
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import com.google.cloud.spanner.SessionClient.SessionId;
3636
import com.google.cloud.spanner.TransactionRunnerImpl.TransactionContextImpl;
3737
import com.google.cloud.spanner.spi.v1.SpannerRpc;
38+
import com.google.cloud.spanner.v1.stub.SpannerStubSettings;
3839
import com.google.common.base.Preconditions;
3940
import com.google.protobuf.ByteString;
4041
import com.google.protobuf.Duration;
@@ -141,6 +142,8 @@ public void setUp() {
141142
CommitResponse.newBuilder()
142143
.setCommitTimestamp(Timestamp.getDefaultInstance())
143144
.build()));
145+
when(rpc.getCommitRetrySettings())
146+
.thenReturn(SpannerStubSettings.newBuilder().commitSettings().getRetrySettings());
144147
when(rpc.rollbackAsync(Mockito.any(RollbackRequest.class), Mockito.anyMap()))
145148
.thenReturn(ApiFutures.immediateFuture(Empty.getDefaultInstance()));
146149
Span oTspan = mock(Span.class);
@@ -196,6 +199,8 @@ public void usesPreparedTransaction() {
196199
.setCommitTimestamp(
197200
Timestamp.newBuilder().setSeconds(System.currentTimeMillis() * 1000))
198201
.build()));
202+
when(rpc.getCommitRetrySettings())
203+
.thenReturn(SpannerStubSettings.newBuilder().commitSettings().getRetrySettings());
199204
DatabaseId db = DatabaseId.of("test", "test", "test");
200205
try (SpannerImpl spanner = new SpannerImpl(rpc, options)) {
201206
DatabaseClient client = spanner.getDatabaseClient(db);

0 commit comments

Comments
 (0)