Skip to content

Commit 31b084a

Browse files
mutianfigorbernstein2gcf-owl-bot[bot]
authored
fix: a rare race condition in the row merger (#1939) (#1988)
* fix: a rare race condition in the row merger (#1939) * fix: a rare race condition in the row merger This would manifest as a hang when iterating over a ServerStream from ReadRows Change-Id: I74533c6714b40a68ec0ef81dadac747e10bee39d * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Igor Bernstein <[email protected]> Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 0285425 commit 31b084a

File tree

3 files changed

+126
-5
lines changed

3 files changed

+126
-5
lines changed

README.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -50,20 +50,20 @@ If you are using Maven without the BOM, add this to your dependencies:
5050
If you are using Gradle 5.x or later, add this to your dependencies:
5151

5252
```Groovy
53-
implementation platform('com.google.cloud:libraries-bom:26.16.0')
53+
implementation platform('com.google.cloud:libraries-bom:26.26.0')
5454
5555
implementation 'com.google.cloud:google-cloud-bigtable'
5656
```
5757
If you are using Gradle without BOM, add this to your dependencies:
5858

5959
```Groovy
60-
implementation 'com.google.cloud:google-cloud-bigtable:2.23.2'
60+
implementation 'com.google.cloud:google-cloud-bigtable:2.29.0'
6161
```
6262

6363
If you are using SBT, add this to your dependencies:
6464

6565
```Scala
66-
libraryDependencies += "com.google.cloud" % "google-cloud-bigtable" % "2.23.2"
66+
libraryDependencies += "com.google.cloud" % "google-cloud-bigtable" % "2.29.0"
6767
```
6868
<!-- {x-version-update-end} -->
6969

@@ -609,7 +609,7 @@ Java is a registered trademark of Oracle and/or its affiliates.
609609
[kokoro-badge-link-5]: http://storage.googleapis.com/cloud-devrel-public/java/badges/java-bigtable/java11.html
610610
[stability-image]: https://img.shields.io/badge/stability-stable-green
611611
[maven-version-image]: https://img.shields.io/maven-central/v/com.google.cloud/google-cloud-bigtable.svg
612-
[maven-version-link]: https://central.sonatype.com/artifact/com.google.cloud/google-cloud-bigtable/2.23.2
612+
[maven-version-link]: https://central.sonatype.com/artifact/com.google.cloud/google-cloud-bigtable/2.29.0
613613
[authentication]: https://github.com/googleapis/google-cloud-java#authentication
614614
[auth-scopes]: https://developers.google.com/identity/protocols/oauth2/scopes
615615
[predefined-iam-roles]: https://cloud.google.com/iam/docs/understanding-roles#predefined_roles

google-cloud-bigtable/src/main/java/com/google/cloud/bigtable/gaxx/reframing/ReframingResponseObserver.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -277,7 +277,7 @@ private void deliverUnsafe() {
277277
// Optimization: the inner loop will eager process any accumulated state, so reset the lock
278278
// for just this iteration. (If another event occurs during processing, it can increment the
279279
// lock to enqueue another iteration).
280-
lock.lazySet(1);
280+
lock.set(1);
281281

282282
// Process the upstream message if one exists.
283283
pollUpstream();

google-cloud-bigtable/src/test/java/com/google/cloud/bigtable/gaxx/reframing/ReframingResponseObserverTest.java

+121
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@
1515
*/
1616
package com.google.cloud.bigtable.gaxx.reframing;
1717

18+
import static com.google.common.truth.Truth.assertWithMessage;
19+
1820
import com.google.api.gax.rpc.StreamController;
1921
import com.google.cloud.bigtable.gaxx.testing.FakeStreamingApi.ServerStreamingStashCallable;
2022
import com.google.cloud.bigtable.gaxx.testing.FakeStreamingApi.ServerStreamingStashCallable.StreamControllerStash;
23+
import com.google.cloud.bigtable.gaxx.testing.MockStreamingApi;
2124
import com.google.cloud.bigtable.gaxx.testing.MockStreamingApi.MockResponseObserver;
2225
import com.google.cloud.bigtable.gaxx.testing.MockStreamingApi.MockServerStreamingCall;
2326
import com.google.cloud.bigtable.gaxx.testing.MockStreamingApi.MockServerStreamingCallable;
@@ -27,9 +30,13 @@
2730
import com.google.common.collect.ImmutableList;
2831
import com.google.common.collect.Queues;
2932
import com.google.common.truth.Truth;
33+
import java.util.ArrayList;
3034
import java.util.Arrays;
35+
import java.util.List;
3136
import java.util.Queue;
37+
import java.util.concurrent.Callable;
3238
import java.util.concurrent.CancellationException;
39+
import java.util.concurrent.CompletableFuture;
3340
import java.util.concurrent.CountDownLatch;
3441
import java.util.concurrent.ExecutionException;
3542
import java.util.concurrent.ExecutorService;
@@ -431,6 +438,120 @@ public String pop() {
431438
Truth.assertThat(finalError.getSuppressed()[0].getCause()).isSameInstanceAs(fakeCancelError);
432439
}
433440

441+
/**
442+
* Test race between a request() and onComplete (b/295866356). This will stress the concurrency
443+
* primitives in deliver() by running a many iterations across many threads. Some race conditions
444+
* are very subtle and are very rare, so bugs in the implementation would present themselves as
445+
* flakes in this test. All flakes of this test should be investigated as a failure.
446+
*/
447+
@Test
448+
public void testRequestAndCompleteRaceCondition() throws Throwable {
449+
int concurrency = 20;
450+
int iterations = 20_000;
451+
452+
ExecutorService executor = Executors.newFixedThreadPool(concurrency);
453+
454+
List<Future<?>> results = new ArrayList<>();
455+
456+
for (int i = 0; i < concurrency; i++) {
457+
Future<?> result =
458+
executor.submit(
459+
(Callable<Void>)
460+
() -> {
461+
for (int j = 0; j < iterations; j++) {
462+
requestAndCompleteRaceConditionIteration();
463+
}
464+
return null;
465+
});
466+
results.add(result);
467+
}
468+
469+
executor.shutdown();
470+
471+
for (Future<?> result : results) {
472+
try {
473+
result.get();
474+
} catch (ExecutionException e) {
475+
throw e.getCause();
476+
}
477+
}
478+
}
479+
480+
private static void requestAndCompleteRaceConditionIteration()
481+
throws InterruptedException, ExecutionException {
482+
MockStreamingApi.MockResponseObserver<String> observer =
483+
new MockStreamingApi.MockResponseObserver<>(false);
484+
ReframingResponseObserver<String, String> underTest =
485+
new ReframingResponseObserver<>(
486+
observer, new ReframingResponseObserverTest.DasherizingReframer(1));
487+
488+
// This is intentionally not a Phaser, the Phaser seems to drastically reduce the reproduction
489+
// rate of the
490+
// original race condition.
491+
CountDownLatch readySignal = new CountDownLatch(2);
492+
CompletableFuture<Void> startSignal = new CompletableFuture<>();
493+
494+
ExecutorService executor = Executors.newFixedThreadPool(2);
495+
496+
Future<Void> f1 =
497+
executor.submit(
498+
() -> {
499+
// no setup, tell controller thread we are ready and wait for the start signal
500+
readySignal.countDown();
501+
startSignal.get();
502+
503+
// Race start
504+
underTest.onComplete();
505+
// Race end
506+
507+
return null;
508+
});
509+
510+
Future<Void> f2 =
511+
executor.submit(
512+
() -> {
513+
// Setup before race - simulate that the ServerStream iterator got one row and is now
514+
// checking if there
515+
// is another. This is the lead up to the race with grpc's onComplete
516+
underTest.onStart(
517+
new StreamController() {
518+
@Override
519+
public void cancel() {}
520+
521+
@Override
522+
public void disableAutoInboundFlowControl() {}
523+
524+
@Override
525+
public void request(int count) {}
526+
});
527+
observer.getController().request(1);
528+
underTest.onResponse("moo");
529+
530+
// Setup complete, tell controller thread we are ready and wait for the start signal
531+
readySignal.countDown();
532+
startSignal.get();
533+
534+
// Race start
535+
observer.getController().request(1);
536+
// Race end
537+
538+
return null;
539+
});
540+
executor.shutdown();
541+
542+
// Wait for worker setup
543+
readySignal.await();
544+
// Tell workers to race
545+
startSignal.complete(null);
546+
547+
// Wait workers to finish
548+
f1.get();
549+
f2.get();
550+
551+
// the outer observer should be told of the completion of rpc
552+
assertWithMessage("outer observer should not hang").that(observer.isDone()).isTrue();
553+
}
554+
434555
/**
435556
* A simple implementation of a {@link Reframer}. The input string is split by dash, and the
436557
* output is concatenated by dashes. The test can verify M:N behavior by adjusting the

0 commit comments

Comments
 (0)