Skip to content

Connect to any stream member for consumers when using a load balancer #660

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 26, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Refine node picking for consumers
  • Loading branch information
acogoluegnes committed Nov 26, 2024
commit d8bdf7f8fa96e9780685b3148c93c3ba98b09e67
82 changes: 48 additions & 34 deletions src/main/java/com/rabbitmq/stream/impl/ConsumersCoordinator.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import static com.rabbitmq.stream.impl.Utils.*;
import static java.lang.String.format;
import static java.util.stream.Collectors.toList;

import com.rabbitmq.stream.*;
import com.rabbitmq.stream.Consumer;
Expand All @@ -35,7 +36,6 @@
import java.util.Map.Entry;
import java.util.NavigableSet;
import java.util.Objects;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
Expand All @@ -53,7 +53,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

class ConsumersCoordinator {
final class ConsumersCoordinator implements AutoCloseable {

static final int MAX_SUBSCRIPTIONS_PER_CLIENT = 256;
static final int MAX_ATTEMPT_BEFORE_FALLING_BACK_TO_LEADER = 5;
Expand All @@ -62,7 +62,6 @@ class ConsumersCoordinator {
static final OffsetSpecification DEFAULT_OFFSET_SPECIFICATION = OffsetSpecification.next();

private static final Logger LOGGER = LoggerFactory.getLogger(ConsumersCoordinator.class);
private final Random random = new Random();
private final StreamEnvironment environment;
private final ClientFactory clientFactory;
private final int maxConsumersByConnection;
Expand Down Expand Up @@ -115,8 +114,8 @@ Runnable subscribe(
return lock(
this.coordinatorLock,
() -> {
List<Client.Broker> candidates = findBrokersForStream(stream, forceReplica);
Client.Broker newNode = pickBroker(candidates);
List<BrokerWrapper> candidates = findCandidateNodes(stream, forceReplica);
Broker newNode = pickBroker(this.brokerPicker, candidates);
if (newNode == null) {
throw new IllegalStateException("No available node to subscribe to");
}
Expand Down Expand Up @@ -161,7 +160,7 @@ Runnable subscribe(

private void addToManager(
Broker node,
List<Broker> candidates,
List<BrokerWrapper> candidates,
SubscriptionTracker tracker,
OffsetSpecification offsetSpecification,
boolean isInitialSubscription) {
Expand Down Expand Up @@ -231,7 +230,7 @@ int managerCount() {
}

// package protected for testing
List<Client.Broker> findBrokersForStream(String stream, boolean forceReplica) {
List<BrokerWrapper> findCandidateNodes(String stream, boolean forceReplica) {
LOGGER.debug(
"Candidate lookup to consumer from '{}', forcing replica? {}", stream, forceReplica);
Map<String, Client.StreamMetadata> metadata =
Expand All @@ -254,12 +253,13 @@ List<Client.Broker> findBrokersForStream(String stream, boolean forceReplica) {
}
}

List<Client.Broker> replicas = streamMetadata.getReplicas();
if ((replicas == null || replicas.isEmpty()) && streamMetadata.getLeader() == null) {
Broker leader = streamMetadata.getLeader();
List<Broker> replicas = streamMetadata.getReplicas();
if ((replicas == null || replicas.isEmpty()) && leader == null) {
throw new IllegalStateException("No node available to consume from stream " + stream);
}

List<Client.Broker> brokers;
List<BrokerWrapper> brokers;
if (replicas == null || replicas.isEmpty()) {
if (forceReplica) {
throw new IllegalStateException(
Expand All @@ -268,21 +268,26 @@ List<Client.Broker> findBrokersForStream(String stream, boolean forceReplica) {
+ "consuming from leader has been deactivated for this consumer",
stream));
} else {
brokers = Collections.singletonList(streamMetadata.getLeader());
LOGGER.debug(
"Only leader node {} for consuming from {}", streamMetadata.getLeader(), stream);
brokers = Collections.singletonList(new BrokerWrapper(leader, true));
LOGGER.debug("Only leader node {} for consuming from {}", leader, stream);
}
} else {
LOGGER.debug("Replicas for consuming from {}: {}", stream, replicas);
brokers = new ArrayList<>(replicas);
brokers =
replicas.stream()
.map(b -> new BrokerWrapper(b, false))
.collect(Collectors.toCollection(ArrayList::new));
if (!forceReplica && leader != null) {
brokers.add(new BrokerWrapper(leader, true));
}
}

LOGGER.debug("Candidates to consume from {}: {}", stream, brokers);

return brokers;
}

private Callable<List<Broker>> findBrokersForStream(String stream) {
private Callable<List<BrokerWrapper>> findCandidateNodes(String stream) {
AtomicInteger attemptNumber = new AtomicInteger();
return () -> {
boolean mustUseReplica;
Expand All @@ -294,20 +299,10 @@ private Callable<List<Broker>> findBrokersForStream(String stream) {
}
LOGGER.debug(
"Looking for broker(s) for stream {}, forcing replica {}", stream, mustUseReplica);
return findBrokersForStream(stream, mustUseReplica);
return findCandidateNodes(stream, mustUseReplica);
};
}

private Client.Broker pickBroker(List<Client.Broker> brokers) {
if (brokers.isEmpty()) {
return null;
} else if (brokers.size() == 1) {
return brokers.get(0);
} else {
return brokers.get(random.nextInt(brokers.size()));
}
}

public void close() {
Iterator<ClientSubscriptionsManager> iterator = this.managers.iterator();
while (iterator.hasNext()) {
Expand Down Expand Up @@ -584,7 +579,9 @@ private class ClientSubscriptionsManager implements Comparable<ClientSubscriptio
private final AtomicBoolean closed = new AtomicBoolean(false);

private ClientSubscriptionsManager(
Broker targetNode, List<Broker> candidates, Client.ClientParameters clientParameters) {
Broker targetNode,
List<BrokerWrapper> candidates,
Client.ClientParameters clientParameters) {
this.id = managerIdSequence.getAndIncrement();
this.trackerCount = 0;
AtomicReference<String> nameReference = new AtomicReference<>();
Expand Down Expand Up @@ -804,7 +801,7 @@ private ClientSubscriptionsManager(
.metadataListener(metadataListener)
.consumerUpdateListener(consumerUpdateListener),
keyForNode(targetNode),
candidates);
candidates.stream().map(BrokerWrapper::broker).collect(toList()));
this.client = clientFactory.client(clientFactoryContext);
this.node = brokerFromClient(this.client);
this.name = keyForNode(this.node);
Expand Down Expand Up @@ -834,15 +831,15 @@ private void assignConsumersToStream(
}
};

AsyncRetry.asyncRetry(findBrokersForStream(stream))
AsyncRetry.asyncRetry(findCandidateNodes(stream))
.description("Candidate lookup to consume from '%s'", stream)
.scheduler(environment.scheduledExecutorService())
.retry(ex -> !(ex instanceof StreamDoesNotExistException))
.delayPolicy(delayPolicy)
.build()
.thenAccept(
candidateNodes -> {
List<Broker> candidates = candidateNodes;
List<BrokerWrapper> candidates = candidateNodes;
if (candidates == null) {
LOGGER.debug("No candidate nodes to consume from '{}'", stream);
consumersClosingCallback.run();
Expand Down Expand Up @@ -876,7 +873,8 @@ private List<SubscriptionTracker> createSubscriptionTrackerList() {
return newSubscriptions;
}

private void maybeRecoverSubscription(List<Broker> candidates, SubscriptionTracker tracker) {
private void maybeRecoverSubscription(
List<BrokerWrapper> candidates, SubscriptionTracker tracker) {
if (tracker.compareAndSet(SubscriptionState.ACTIVE, SubscriptionState.RECOVERING)) {
try {
recoverSubscription(candidates, tracker);
Expand All @@ -897,12 +895,12 @@ private void maybeRecoverSubscription(List<Broker> candidates, SubscriptionTrack
}
}

private void recoverSubscription(List<Broker> candidates, SubscriptionTracker tracker) {
private void recoverSubscription(List<BrokerWrapper> candidates, SubscriptionTracker tracker) {
boolean reassignmentCompleted = false;
while (!reassignmentCompleted) {
try {
if (tracker.consumer.isOpen()) {
Broker broker = pickBroker(candidates);
Broker broker = pickBroker(brokerPicker, candidates);
LOGGER.debug("Using {} to resume consuming from {}", broker, tracker.stream);
synchronized (tracker.consumer) {
if (tracker.consumer.isOpen()) {
Expand Down Expand Up @@ -933,7 +931,7 @@ private void recoverSubscription(List<Broker> candidates, SubscriptionTracker tr
// maybe not a good candidate, let's refresh and retry for this one
candidates =
Utils.callAndMaybeRetry(
findBrokersForStream(tracker.stream),
findCandidateNodes(tracker.stream),
ex -> !(ex instanceof StreamDoesNotExistException),
recoveryBackOffDelayPolicy(),
"Candidate lookup to consume from '%s' (subscription recovery)",
Expand Down Expand Up @@ -1301,4 +1299,20 @@ static <T> int pickSlot(List<T> list, AtomicInteger sequence) {
}
return index;
}

private static List<Broker> keepReplicasIfPossible(Collection<BrokerWrapper> brokers) {
if (brokers.size() > 1) {
return brokers.stream()
.filter(w -> !w.isLeader())
.map(BrokerWrapper::broker)
.collect(toList());
} else {
return brokers.stream().map(BrokerWrapper::broker).collect(toList());
}
}

static Broker pickBroker(
Function<List<Broker>, Broker> picker, Collection<BrokerWrapper> candidates) {
return picker.apply(keepReplicasIfPossible(candidates));
}
}
49 changes: 43 additions & 6 deletions src/main/java/com/rabbitmq/stream/impl/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ static short encodeResponseCode(Short code) {
}

static ClientFactory coordinatorClientFactory(StreamEnvironment environment) {
return coordinatorClientFactory(environment, ConditionalClientFactory.RETRY_INTERVAL);
}

static ClientFactory coordinatorClientFactory(
StreamEnvironment environment, Duration retryInterval) {
String messageFormat =
"%s. %s. "
+ "This may be due to the usage of a load balancer that makes topology discovery fail. "
Expand All @@ -156,7 +161,8 @@ static ClientFactory coordinatorClientFactory(StreamEnvironment environment) {
ClientFactory delegate = context1 -> new Client(context1.parameters());
ClientFactoryContext clientFactoryContext =
new ClientFactoryContext(parametersCopy, context.targetKey(), context.candidates());
return Utils.connectToAdvertisedNodeClientFactory(delegate).client(clientFactoryContext);
return Utils.connectToAdvertisedNodeClientFactory(delegate, retryInterval)
.client(clientFactoryContext);
} catch (TimeoutStreamException e) {
throw new TimeoutStreamException(
format(messageFormat, e.getMessage(), e.getCause().getMessage(), e.getCause()));
Expand All @@ -173,11 +179,6 @@ static ClientFactory coordinatorClientFactory(StreamEnvironment environment) {
};
}

static ClientFactory connectToAdvertisedNodeClientFactory(ClientFactory clientFactory) {
return connectToAdvertisedNodeClientFactory(
clientFactory, ConditionalClientFactory.RETRY_INTERVAL);
}

static ClientFactory connectToAdvertisedNodeClientFactory(
ClientFactory clientFactory, Duration retryInterval) {
return new ConditionalClientFactory(
Expand Down Expand Up @@ -692,4 +693,40 @@ static <T> T lock(Lock lock, Supplier<T> action) {
lock.unlock();
}
}

static class BrokerWrapper {

private final Client.Broker broker;
private final boolean leader;

BrokerWrapper(Client.Broker broker, boolean leader) {
this.broker = broker;
this.leader = leader;
}

Client.Broker broker() {
return broker;
}

boolean isLeader() {
return this.leader;
}

@Override
public boolean equals(Object o) {
if (o == null || getClass() != o.getClass()) return false;
BrokerWrapper that = (BrokerWrapper) o;
return leader == that.leader && Objects.equals(broker, that.broker);
}

@Override
public int hashCode() {
return Objects.hash(broker, leader);
}

@Override
public String toString() {
return "BrokerWrapper{" + "broker=" + broker + ", leader=" + leader + '}';
}
}
}
4 changes: 2 additions & 2 deletions src/test/java/com/rabbitmq/stream/Host.java
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2020-2023 Broadcom. All Rights Reserved.
// Copyright (c) 2020-2024 Broadcom. All Rights Reserved.
// The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries.
//
// This software, the RabbitMQ Stream Java client library, is dual-licensed under the
Expand Down Expand Up @@ -118,7 +118,7 @@ static Process rabbitmqStreams(String command) {
return executeCommand(rabbitmqStreamsCommand() + " " + command);
}

public static Process rabbitmqctlIgnoreError(String command) throws IOException {
public static Process rabbitmqctlIgnoreError(String command) {
return executeCommand(rabbitmqctlCommand() + " " + command, true);
}

Expand Down
Loading