CASSANDRASC-86 Add Random Delays Between Retry Attempts for Health Checks
Patch by Yuriy Semchyshyn; Reviewed by Yifan Cai and Francisco Guerrero for CASSANDRASC-86
diff --git a/CHANGES.txt b/CHANGES.txt
index 3f1b4cc..27fba7e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,6 @@
1.0.0
-----
+ * Startup Validation Failures when Checking Sidecar Connectivity (CASSANDRASC-86)
* Add support for additional digest validation during SSTable upload (CASSANDRASC-97)
* Add sidecar client changes for restore from S3 (CASSANDRASC-95)
* Add restore SSTables from S3 into Cassandra feature to Cassandra Sidecar (CASSANDRASC-92)
diff --git a/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClient.java b/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClient.java
index 42b7093..c89fc9a 100644
--- a/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClient.java
+++ b/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClient.java
@@ -69,6 +69,7 @@
protected RequestExecutor executor;
protected final RetryPolicy defaultRetryPolicy;
protected final RetryPolicy ignoreConflictRetryPolicy;
+ protected final RetryPolicy oncePerInstanceRetryPolicy;
protected RequestContext.Builder baseBuilder;
public SidecarClient(SidecarInstancesProvider instancesProvider,
@@ -80,6 +81,8 @@
ignoreConflictRetryPolicy = new IgnoreConflictRetryPolicy(sidecarClientConfig.maxRetries(),
sidecarClientConfig.retryDelayMillis(),
sidecarClientConfig.maxRetryDelayMillis());
+ oncePerInstanceRetryPolicy = new OncePerInstanceRetryPolicy(sidecarClientConfig.minimumHealthRetryDelay(),
+ sidecarClientConfig.maximumHealthRetryDelay());
baseBuilder = new RequestContext.Builder()
.instanceSelectionPolicy(new RandomInstanceSelectionPolicy(instancesProvider))
.retryPolicy(defaultRetryPolicy);
@@ -95,7 +98,7 @@
{
return executor.executeRequestAsync(requestBuilder()
.sidecarHealthRequest()
- .retryPolicy(new OncePerInstanceRetryPolicy())
+ .retryPolicy(oncePerInstanceRetryPolicy)
.build());
}
@@ -110,7 +113,7 @@
{
return executor.executeRequestAsync(requestBuilder()
.cassandraHealthRequest()
- .retryPolicy(new OncePerInstanceRetryPolicy())
+ .retryPolicy(oncePerInstanceRetryPolicy)
.build());
}
diff --git a/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfig.java b/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfig.java
index fd138d1..a3d9fc9 100644
--- a/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfig.java
+++ b/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfig.java
@@ -19,6 +19,8 @@
package org.apache.cassandra.sidecar.client;
+import java.time.Duration;
+
/**
* Encapsulates configurations for the {@link SidecarClient}
*/
@@ -38,4 +40,14 @@
* @return the maximum amount of time to wait before retrying a failed request
*/
long maxRetryDelayMillis();
+
+ /**
+ * @return the minimum amount of time to wait before retrying a failed health check
+ */
+ Duration minimumHealthRetryDelay();
+
+ /**
+ * @return the maximum amount of time to wait before retrying a failed health check
+ */
+ Duration maximumHealthRetryDelay();
}
diff --git a/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfigImpl.java b/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfigImpl.java
index 3a3a4c9..5c4a6ce 100644
--- a/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfigImpl.java
+++ b/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfigImpl.java
@@ -19,6 +19,8 @@
package org.apache.cassandra.sidecar.client;
+import java.time.Duration;
+
import org.apache.cassandra.sidecar.common.DataObjectBuilder;
/**
@@ -29,16 +31,22 @@
public static final int DEFAULT_MAX_RETRIES = 3;
public static final long DEFAULT_RETRY_DELAY_MILLIS = 500L;
public static final long DEFAULT_MAX_RETRY_DELAY_MILLIS = 60_000L;
+ public static final Duration DEFAULT_MINIMUM_HEALTH_RETRY_DELAY = Duration.ofSeconds(1L);
+ public static final Duration DEFAULT_MAXIMUM_HEALTH_RETRY_DELAY = Duration.ofSeconds(5L);
protected final int maxRetries;
protected final long retryDelayMillis;
protected final long maxRetryDelayMillis;
+ protected final Duration minimumHealthRetryDelay;
+ protected final Duration maximumHealthRetryDelay;
private SidecarClientConfigImpl(Builder builder)
{
maxRetries = builder.maxRetries;
retryDelayMillis = builder.retryDelayMillis;
maxRetryDelayMillis = builder.maxRetryDelayMillis;
+ minimumHealthRetryDelay = builder.minimumHealthRetryDelay;
+ maximumHealthRetryDelay = builder.maximumHealthRetryDelay;
}
/**
@@ -68,6 +76,24 @@
return maxRetryDelayMillis;
}
+ /**
+ * @return the minimum amount of time to wait before retrying a failed health check
+ */
+ @Override
+ public Duration minimumHealthRetryDelay()
+ {
+ return minimumHealthRetryDelay;
+ }
+
+ /**
+ * @return the maximum amount of time to wait before retrying a failed health check
+ */
+ @Override
+ public Duration maximumHealthRetryDelay()
+ {
+ return maximumHealthRetryDelay;
+ }
+
public static Builder builder()
{
return new Builder();
@@ -81,6 +107,8 @@
protected int maxRetries = DEFAULT_MAX_RETRIES;
protected long retryDelayMillis = DEFAULT_RETRY_DELAY_MILLIS;
protected long maxRetryDelayMillis = DEFAULT_MAX_RETRY_DELAY_MILLIS;
+ protected Duration minimumHealthRetryDelay = DEFAULT_MINIMUM_HEALTH_RETRY_DELAY;
+ protected Duration maximumHealthRetryDelay = DEFAULT_MAXIMUM_HEALTH_RETRY_DELAY;
protected Builder()
{
@@ -126,6 +154,28 @@
}
/**
+ * Sets the {@code minimumHealthRetryDelay} and returns a reference to this Builder enabling method chaining
+ *
+ * @param minimumHealthRetryDelay the {@code minimumHealthRetryDelay} to set
+ * @return a reference to this Builder
+ */
+ public Builder minimumHealthRetryDelay(Duration minimumHealthRetryDelay)
+ {
+ return update(builder -> builder.minimumHealthRetryDelay = minimumHealthRetryDelay);
+ }
+
+ /**
+ * Sets the {@code maximumHealthRetryDelay} and returns a reference to this Builder enabling method chaining
+ *
+ * @param maximumHealthRetryDelay the {@code maximumHealthRetryDelay} to set
+ * @return a reference to this Builder
+ */
+ public Builder maximumHealthRetryDelay(Duration maximumHealthRetryDelay)
+ {
+ return update(builder -> builder.maximumHealthRetryDelay = maximumHealthRetryDelay);
+ }
+
+ /**
* Returns a {@code SidecarConfig} built from the parameters previously set.
*
* @return a {@code SidecarConfig} built with parameters of this {@code SidecarConfig.Builder}
diff --git a/client/src/main/java/org/apache/cassandra/sidecar/client/retry/OncePerInstanceRetryPolicy.java b/client/src/main/java/org/apache/cassandra/sidecar/client/retry/OncePerInstanceRetryPolicy.java
index cb14429..7c229f4 100644
--- a/client/src/main/java/org/apache/cassandra/sidecar/client/retry/OncePerInstanceRetryPolicy.java
+++ b/client/src/main/java/org/apache/cassandra/sidecar/client/retry/OncePerInstanceRetryPolicy.java
@@ -18,19 +18,50 @@
package org.apache.cassandra.sidecar.client.retry;
+import java.time.Duration;
import java.util.concurrent.CompletableFuture;
import io.netty.handler.codec.http.HttpResponseStatus;
import org.apache.cassandra.sidecar.client.HttpResponse;
import org.apache.cassandra.sidecar.client.exception.RetriesExhaustedException;
import org.apache.cassandra.sidecar.client.request.Request;
+import org.apache.cassandra.sidecar.common.utils.TimeUtils;
/**
* A retry policy that attempts to execute the request once on each instance
- * until the first successful response is received, or fails if none were successful
+ * until the first successful response is received, or fails if none were successful.
+ *
+ * Accepts optional minimum and maximum durations used to calculate random delay
+ * before each retry attempt in order to avoid the thundering herd problem.
+ *
+ * Retries immediately without delay if minimum and maximum durations are not specified.
*/
public class OncePerInstanceRetryPolicy extends RetryPolicy
{
+ private final Duration minimumDelay;
+ private final Duration maximumDelay;
+
+ /**
+ * Instantiates {@link OncePerInstanceRetryPolicy} with no delay between retry attempts
+ */
+ public OncePerInstanceRetryPolicy()
+ {
+ this(Duration.ZERO, Duration.ZERO);
+ }
+
+ /**
+ * Instantiates {@link OncePerInstanceRetryPolicy} with random delays between retry attempts
+ *
+ * @param minimumDelay duration of minimum possible retry delay, inclusive
+ * @param maximumDelay duration of maximum possible retry delay, inclusive
+ */
+ public OncePerInstanceRetryPolicy(Duration minimumDelay, Duration maximumDelay)
+ {
+ super();
+ this.minimumDelay = minimumDelay;
+ this.maximumDelay = maximumDelay;
+ }
+
/**
* {@inheritDoc}
*/
@@ -49,7 +80,7 @@
}
else if (canRetryOnADifferentHost)
{
- retryAction.retry(attempts + 1, 0L);
+ retryAction.retry(attempts + 1, TimeUtils.randomDuration(minimumDelay, maximumDelay).toMillis());
}
else
{
diff --git a/common/src/main/java/org/apache/cassandra/sidecar/common/utils/TimeUtils.java b/common/src/main/java/org/apache/cassandra/sidecar/common/utils/TimeUtils.java
new file mode 100644
index 0000000..a44f1f6
--- /dev/null
+++ b/common/src/main/java/org/apache/cassandra/sidecar/common/utils/TimeUtils.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.cassandra.sidecar.common.utils;
+
+import java.time.Duration;
+import java.util.concurrent.ThreadLocalRandom;
+
+/**
+ * Utility class for manipulating dates, times, and durations
+ */
+public final class TimeUtils
+{
+ /**
+ * Private constructor that prevents unnecessary instantiation
+ *
+ * @throws IllegalStateException when called
+ */
+ private TimeUtils()
+ {
+ throw new IllegalStateException(getClass() + " is a static utility class and shall not be instantiated");
+ }
+
+ /**
+ * Returns a random duration with millisecond precision that is uniformly distributed between two provided durations
+ *
+ * @param minimum minimum possible duration, inclusive
+ * @param maximum maximum possible duration, inclusive
+ *
+ * @return random duration uniformly distributed between two provided durations, with millisecond precision
+ *
+ * @throws IllegalArgumentException if minimum duration is greater than maximum duration
+ */
+ public static Duration randomDuration(Duration minimum, Duration maximum)
+ {
+ Preconditions.checkArgument(minimum.compareTo(maximum) <= 0,
+ "Minimum duration must be less or equal to maximum duration");
+ return Duration.ofMillis(ThreadLocalRandom.current().nextLong(minimum.toMillis(), maximum.toMillis() + 1L));
+ }
+}
diff --git a/common/src/test/java/org/apache/cassandra/sidecar/common/utils/TimeUtilsTest.java b/common/src/test/java/org/apache/cassandra/sidecar/common/utils/TimeUtilsTest.java
new file mode 100644
index 0000000..0f8d5bb
--- /dev/null
+++ b/common/src/test/java/org/apache/cassandra/sidecar/common/utils/TimeUtilsTest.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.cassandra.sidecar.common.utils;
+
+import java.time.Duration;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Unit tests for {@link TimeUtils} class
+ */
+public class TimeUtilsTest
+{
+ @Test
+ public void testRandomDurationWithMinimumAboveMaximum()
+ {
+ assertThrows(IllegalArgumentException.class,
+ () -> TimeUtils.randomDuration(Duration.ofSeconds(2L), Duration.ofSeconds(1L)));
+ }
+
+ @Test
+ public void testRandomDurationWithMinimumEqualToMaximum()
+ {
+ assertEquals(Duration.ofSeconds(1L),
+ TimeUtils.randomDuration(Duration.ofSeconds(1L), Duration.ofSeconds(1L)));
+ }
+
+ @Test
+ public void testRandomDurationWithMinimumBelowMaximum()
+ {
+ for (int test = 0; test < 3600; test++)
+ {
+ Duration minimum = Duration.ofSeconds(test);
+ Duration maximum = Duration.ofSeconds(test + 1);
+ Duration random = TimeUtils.randomDuration(minimum, maximum);
+
+ assertTrue(minimum.compareTo(random) <= 0);
+ assertTrue(random.compareTo(maximum) <= 0);
+ }
+ }
+}