Skip to content
This repository was archived by the owner on Sep 26, 2023. It is now read-only.

Commit fff2bab

Browse files
feat: dynamic channel pool scaled by number of outstanding request (#1569)
* merge RefreshingManagedChannel & SafeShutdownManagedChannel into ChannelPool. The eventual goal is to allow channel pool to safely add remove channels. The code has been refactored roughly as: - SafeShutdownManagedChannel is now ChannelPool.Entry and ReleasingClientCall - RefreshingManagedChannel has been merged into ChannelPool as a pair of functions scheduleNextRefresh & refresh * migrate SafeShutdownManagedChannel tests * migrate old tests and remove RefreshingManagedChannel and SafeShutdownManagedChannel * fix test * address feedback * fix race condition on refresh() * fix warnings in test * Update gax-grpc/src/test/java/com/google/api/gax/grpc/ChannelPoolTest.java Co-authored-by: Chanseok Oh <[email protected]> * Update gax-grpc/src/test/java/com/google/api/gax/grpc/ChannelPoolTest.java Co-authored-by: Chanseok Oh <[email protected]> * handle race condition * Update gax-grpc/src/main/java/com/google/api/gax/grpc/ChannelPool.java Co-authored-by: Chanseok Oh <[email protected]> * introduce dynamic channel pool * fix test after broken merge * format * address feedback * remove unused import * inline old factory methods Co-authored-by: Chanseok Oh <[email protected]>
1 parent 7141860 commit fff2bab

File tree

9 files changed

+648
-188
lines changed

9 files changed

+648
-188
lines changed

gax-grpc/src/main/java/com/google/api/gax/grpc/ChannelPool.java

Lines changed: 171 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131

3232
import com.google.api.core.InternalApi;
3333
import com.google.common.annotations.VisibleForTesting;
34+
import com.google.common.base.Preconditions;
3435
import com.google.common.collect.ImmutableList;
3536
import io.grpc.CallOptions;
3637
import io.grpc.Channel;
@@ -46,14 +47,12 @@
4647
import java.util.List;
4748
import java.util.concurrent.Executors;
4849
import java.util.concurrent.ScheduledExecutorService;
49-
import java.util.concurrent.ScheduledFuture;
5050
import java.util.concurrent.TimeUnit;
5151
import java.util.concurrent.atomic.AtomicBoolean;
5252
import java.util.concurrent.atomic.AtomicInteger;
5353
import java.util.concurrent.atomic.AtomicReference;
5454
import java.util.logging.Level;
5555
import java.util.logging.Logger;
56-
import javax.annotation.Nullable;
5756
import org.threeten.bp.Duration;
5857

5958
/**
@@ -68,93 +67,61 @@
6867
*/
6968
class ChannelPool extends ManagedChannel {
7069
private static final Logger LOG = Logger.getLogger(ChannelPool.class.getName());
71-
72-
// size greater than 1 to allow multiple channel to refresh at the same time
73-
// size not too large so refreshing channels doesn't use too many threads
74-
private static final int CHANNEL_REFRESH_EXECUTOR_SIZE = 2;
7570
private static final Duration REFRESH_PERIOD = Duration.ofMinutes(50);
76-
private static final double JITTER_PERCENTAGE = 0.15;
71+
72+
private final ChannelPoolSettings settings;
73+
private final ChannelFactory channelFactory;
74+
private final ScheduledExecutorService executor;
7775

7876
private final Object entryWriteLock = new Object();
79-
private final AtomicReference<ImmutableList<Entry>> entries = new AtomicReference<>();
77+
@VisibleForTesting final AtomicReference<ImmutableList<Entry>> entries = new AtomicReference<>();
8078
private final AtomicInteger indexTicker = new AtomicInteger();
8179
private final String authority;
82-
// if set, ChannelPool will manage the life cycle of channelRefreshExecutorService
83-
@Nullable private final ScheduledExecutorService channelRefreshExecutorService;
84-
private final ChannelFactory channelFactory;
85-
86-
private volatile ScheduledFuture<?> nextScheduledRefresh = null;
87-
88-
/**
89-
* Factory method to create a non-refreshing channel pool
90-
*
91-
* @param poolSize number of channels in the pool
92-
* @param channelFactory method to create the channels
93-
* @return ChannelPool of non-refreshing channels
94-
*/
95-
static ChannelPool create(int poolSize, ChannelFactory channelFactory) throws IOException {
96-
return new ChannelPool(channelFactory, poolSize, null);
97-
}
9880

99-
/**
100-
* Factory method to create a refreshing channel pool
101-
*
102-
* <p>Package-private for testing purposes only
103-
*
104-
* @param poolSize number of channels in the pool
105-
* @param channelFactory method to create the channels
106-
* @param channelRefreshExecutorService periodically refreshes the channels; its life cycle will
107-
* be managed by ChannelPool
108-
* @return ChannelPool of refreshing channels
109-
*/
110-
@VisibleForTesting
111-
static ChannelPool createRefreshing(
112-
int poolSize,
113-
ChannelFactory channelFactory,
114-
ScheduledExecutorService channelRefreshExecutorService)
81+
static ChannelPool create(ChannelPoolSettings settings, ChannelFactory channelFactory)
11582
throws IOException {
116-
return new ChannelPool(channelFactory, poolSize, channelRefreshExecutorService);
117-
}
118-
119-
/**
120-
* Factory method to create a refreshing channel pool
121-
*
122-
* @param poolSize number of channels in the pool
123-
* @param channelFactory method to create the channels
124-
* @return ChannelPool of refreshing channels
125-
*/
126-
static ChannelPool createRefreshing(int poolSize, final ChannelFactory channelFactory)
127-
throws IOException {
128-
return createRefreshing(
129-
poolSize, channelFactory, Executors.newScheduledThreadPool(CHANNEL_REFRESH_EXECUTOR_SIZE));
83+
return new ChannelPool(settings, channelFactory, Executors.newSingleThreadScheduledExecutor());
13084
}
13185

13286
/**
13387
* Initializes the channel pool. Assumes that all channels have the same authority.
13488
*
89+
* @param settings options for controling the ChannelPool sizing behavior
13590
* @param channelFactory method to create the channels
136-
* @param poolSize number of channels in the pool
137-
* @param channelRefreshExecutorService periodically refreshes the channels
91+
* @param executor periodically refreshes the channels
13892
*/
139-
private ChannelPool(
93+
@VisibleForTesting
94+
ChannelPool(
95+
ChannelPoolSettings settings,
14096
ChannelFactory channelFactory,
141-
int poolSize,
142-
@Nullable ScheduledExecutorService channelRefreshExecutorService)
97+
ScheduledExecutorService executor)
14398
throws IOException {
99+
this.settings = settings;
144100
this.channelFactory = channelFactory;
145101

146102
ImmutableList.Builder<Entry> initialListBuilder = ImmutableList.builder();
147103

148-
for (int i = 0; i < poolSize; i++) {
104+
for (int i = 0; i < settings.getInitialChannelCount(); i++) {
149105
initialListBuilder.add(new Entry(channelFactory.createSingleChannel()));
150106
}
151107

152108
entries.set(initialListBuilder.build());
153109
authority = entries.get().get(0).channel.authority();
154-
this.channelRefreshExecutorService = channelRefreshExecutorService;
155-
156-
if (channelRefreshExecutorService != null) {
157-
nextScheduledRefresh = scheduleNextRefresh();
110+
this.executor = executor;
111+
112+
if (!settings.isStaticSize()) {
113+
executor.scheduleAtFixedRate(
114+
this::resizeSafely,
115+
ChannelPoolSettings.RESIZE_INTERVAL.getSeconds(),
116+
ChannelPoolSettings.RESIZE_INTERVAL.getSeconds(),
117+
TimeUnit.SECONDS);
118+
}
119+
if (settings.isPreemptiveRefreshEnabled()) {
120+
executor.scheduleAtFixedRate(
121+
this::refreshSafely,
122+
REFRESH_PERIOD.getSeconds(),
123+
REFRESH_PERIOD.getSeconds(),
124+
TimeUnit.SECONDS);
158125
}
159126
}
160127

@@ -187,12 +154,9 @@ public ManagedChannel shutdown() {
187154
for (Entry entry : localEntries) {
188155
entry.channel.shutdown();
189156
}
190-
if (nextScheduledRefresh != null) {
191-
nextScheduledRefresh.cancel(true);
192-
}
193-
if (channelRefreshExecutorService != null) {
157+
if (executor != null) {
194158
// shutdownNow will cancel scheduled tasks
195-
channelRefreshExecutorService.shutdownNow();
159+
executor.shutdownNow();
196160
}
197161
return this;
198162
}
@@ -206,7 +170,7 @@ public boolean isShutdown() {
206170
return false;
207171
}
208172
}
209-
return channelRefreshExecutorService == null || channelRefreshExecutorService.isShutdown();
173+
return executor == null || executor.isShutdown();
210174
}
211175

212176
/** {@inheritDoc} */
@@ -218,7 +182,8 @@ public boolean isTerminated() {
218182
return false;
219183
}
220184
}
221-
return channelRefreshExecutorService == null || channelRefreshExecutorService.isTerminated();
185+
186+
return executor == null || executor.isTerminated();
222187
}
223188

224189
/** {@inheritDoc} */
@@ -228,11 +193,8 @@ public ManagedChannel shutdownNow() {
228193
for (Entry entry : localEntries) {
229194
entry.channel.shutdownNow();
230195
}
231-
if (nextScheduledRefresh != null) {
232-
nextScheduledRefresh.cancel(true);
233-
}
234-
if (channelRefreshExecutorService != null) {
235-
channelRefreshExecutorService.shutdownNow();
196+
if (executor != null) {
197+
executor.shutdownNow();
236198
}
237199
return this;
238200
}
@@ -249,25 +211,131 @@ public boolean awaitTermination(long timeout, TimeUnit unit) throws InterruptedE
249211
}
250212
entry.channel.awaitTermination(awaitTimeNanos, TimeUnit.NANOSECONDS);
251213
}
252-
if (channelRefreshExecutorService != null) {
214+
if (executor != null) {
253215
long awaitTimeNanos = endTimeNanos - System.nanoTime();
254-
channelRefreshExecutorService.awaitTermination(awaitTimeNanos, TimeUnit.NANOSECONDS);
216+
executor.awaitTermination(awaitTimeNanos, TimeUnit.NANOSECONDS);
255217
}
256218
return isTerminated();
257219
}
258220

259-
/** Scheduling loop. */
260-
private ScheduledFuture<?> scheduleNextRefresh() {
261-
long delayPeriod = REFRESH_PERIOD.toMillis();
262-
long jitter = (long) ((Math.random() - 0.5) * JITTER_PERCENTAGE * delayPeriod);
263-
long delay = jitter + delayPeriod;
264-
return channelRefreshExecutorService.schedule(
265-
() -> {
266-
scheduleNextRefresh();
267-
refresh();
268-
},
269-
delay,
270-
TimeUnit.MILLISECONDS);
221+
private void resizeSafely() {
222+
try {
223+
synchronized (entryWriteLock) {
224+
resize();
225+
}
226+
} catch (Exception e) {
227+
LOG.log(Level.WARNING, "Failed to resize channel pool", e);
228+
}
229+
}
230+
231+
/**
232+
* Resize the number of channels based on the number of outstanding RPCs.
233+
*
234+
* <p>This method is expected to be called on a fixed interval. On every invocation it will:
235+
*
236+
* <ul>
237+
* <li>Get the maximum number of outstanding RPCs since last invocation
238+
* <li>Determine a valid range of number of channels to handle that many outstanding RPCs
239+
* <li>If the current number of channel falls outside of that range, add or remove at most
240+
* {@link ChannelPoolSettings#MAX_RESIZE_DELTA} to get closer to middle of that range.
241+
* </ul>
242+
*
243+
* <p>Not threadsafe, must be called under the entryWriteLock monitor
244+
*/
245+
@VisibleForTesting
246+
void resize() {
247+
List<Entry> localEntries = entries.get();
248+
// Estimate the peak of RPCs in the last interval by summing the peak of RPCs per channel
249+
int actualOutstandingRpcs =
250+
localEntries.stream().mapToInt(Entry::getAndResetMaxOutstanding).sum();
251+
252+
// Number of channels if each channel operated at max capacity
253+
int minChannels =
254+
(int) Math.ceil(actualOutstandingRpcs / (double) settings.getMaxRpcsPerChannel());
255+
// Limit the threshold to absolute range
256+
if (minChannels < settings.getMinChannelCount()) {
257+
minChannels = settings.getMinChannelCount();
258+
}
259+
260+
// Number of channels if each channel operated at minimum capacity
261+
// Note: getMinRpcsPerChannel() can return 0, but division by 0 shouldn't cause a problem.
262+
int maxChannels =
263+
(int) Math.ceil(actualOutstandingRpcs / (double) settings.getMinRpcsPerChannel());
264+
// Limit the threshold to absolute range
265+
if (maxChannels > settings.getMaxChannelCount()) {
266+
maxChannels = settings.getMaxChannelCount();
267+
}
268+
if (maxChannels < minChannels) {
269+
maxChannels = minChannels;
270+
}
271+
272+
// If the pool were to be resized, try to aim for the middle of the bound, but limit rate of
273+
// change.
274+
int tentativeTarget = (maxChannels + minChannels) / 2;
275+
int currentSize = localEntries.size();
276+
int delta = tentativeTarget - currentSize;
277+
int dampenedTarget = tentativeTarget;
278+
if (Math.abs(delta) > ChannelPoolSettings.MAX_RESIZE_DELTA) {
279+
dampenedTarget =
280+
currentSize + (int) Math.copySign(ChannelPoolSettings.MAX_RESIZE_DELTA, delta);
281+
}
282+
283+
// Only resize the pool when thresholds are crossed
284+
if (localEntries.size() < minChannels) {
285+
LOG.fine(
286+
String.format(
287+
"Detected throughput peak of %d, expanding channel pool size: %d -> %d.",
288+
actualOutstandingRpcs, currentSize, dampenedTarget));
289+
290+
expand(dampenedTarget);
291+
} else if (localEntries.size() > maxChannels) {
292+
LOG.fine(
293+
String.format(
294+
"Detected throughput drop to %d, shrinking channel pool size: %d -> %d.",
295+
actualOutstandingRpcs, currentSize, dampenedTarget));
296+
297+
shrink(dampenedTarget);
298+
}
299+
}
300+
301+
/** Not threadsafe, must be called under the entryWriteLock monitor */
302+
private void shrink(int desiredSize) {
303+
ImmutableList<Entry> localEntries = entries.get();
304+
Preconditions.checkState(
305+
localEntries.size() >= desiredSize, "current size is already smaller than the desired");
306+
307+
// Set the new list
308+
entries.set(localEntries.subList(0, desiredSize));
309+
// clean up removed entries
310+
List<Entry> removed = localEntries.subList(desiredSize, localEntries.size());
311+
removed.forEach(Entry::requestShutdown);
312+
}
313+
314+
/** Not threadsafe, must be called under the entryWriteLock monitor */
315+
private void expand(int desiredSize) {
316+
List<Entry> localEntries = entries.get();
317+
Preconditions.checkState(
318+
localEntries.size() <= desiredSize, "current size is already bigger than the desired");
319+
320+
ImmutableList.Builder<Entry> newEntries = ImmutableList.<Entry>builder().addAll(localEntries);
321+
322+
for (int i = 0; i < desiredSize - localEntries.size(); i++) {
323+
try {
324+
newEntries.add(new Entry(channelFactory.createSingleChannel()));
325+
} catch (IOException e) {
326+
LOG.log(Level.WARNING, "Failed to add channel", e);
327+
}
328+
}
329+
330+
entries.set(newEntries.build());
331+
}
332+
333+
private void refreshSafely() {
334+
try {
335+
refresh();
336+
} catch (Exception e) {
337+
LOG.log(Level.WARNING, "Failed to pre-emptively refresh channnels", e);
338+
}
271339
}
272340

273341
/**
@@ -341,13 +409,15 @@ private Entry getEntry(int affinity) {
341409
List<Entry> localEntries = entries.get();
342410

343411
int index = Math.abs(affinity % localEntries.size());
412+
344413
return localEntries.get(index);
345414
}
346415

347416
/** Bundles a gRPC {@link ManagedChannel} with some usage accounting. */
348417
private static class Entry {
349418
private final ManagedChannel channel;
350419
private final AtomicInteger outstandingRpcs = new AtomicInteger(0);
420+
private final AtomicInteger maxOutstanding = new AtomicInteger();
351421

352422
// Flag that the channel should be closed once all of the outstanding RPC complete.
353423
private final AtomicBoolean shutdownRequested = new AtomicBoolean();
@@ -358,6 +428,10 @@ private Entry(ManagedChannel channel) {
358428
this.channel = channel;
359429
}
360430

431+
int getAndResetMaxOutstanding() {
432+
return maxOutstanding.getAndSet(outstandingRpcs.get());
433+
}
434+
361435
/**
362436
* Try to increment the outstanding RPC count. The method will return false if the channel is
363437
* closing and the caller should pick a different channel. If the method returned true, the
@@ -366,7 +440,13 @@ private Entry(ManagedChannel channel) {
366440
*/
367441
private boolean retain() {
368442
// register desire to start RPC
369-
outstandingRpcs.incrementAndGet();
443+
int currentOutstanding = outstandingRpcs.incrementAndGet();
444+
445+
// Rough book keeping
446+
int prevMax = maxOutstanding.get();
447+
if (currentOutstanding > prevMax) {
448+
maxOutstanding.incrementAndGet();
449+
}
370450

371451
// abort if the channel is closing
372452
if (shutdownRequested.get()) {

0 commit comments

Comments
 (0)