CASSANDRASC-86 Add Random Delays Between Retry Attempts for Health Checks

Patch by Yuriy Semchyshyn; Reviewed by Yifan Cai and Francisco Guerrero for CASSANDRASC-86
diff --git a/CHANGES.txt b/CHANGES.txt
index 3f1b4cc..27fba7e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,6 @@
 1.0.0
 -----
+ * Startup Validation Failures when Checking Sidecar Connectivity (CASSANDRASC-86)
  * Add support for additional digest validation during SSTable upload (CASSANDRASC-97)
  * Add sidecar client changes for restore from S3 (CASSANDRASC-95)
  * Add restore SSTables from S3 into Cassandra feature to Cassandra Sidecar (CASSANDRASC-92)
diff --git a/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClient.java b/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClient.java
index 42b7093..c89fc9a 100644
--- a/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClient.java
+++ b/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClient.java
@@ -69,6 +69,7 @@
     protected RequestExecutor executor;
     protected final RetryPolicy defaultRetryPolicy;
     protected final RetryPolicy ignoreConflictRetryPolicy;
+    protected final RetryPolicy oncePerInstanceRetryPolicy;
     protected RequestContext.Builder baseBuilder;
 
     public SidecarClient(SidecarInstancesProvider instancesProvider,
@@ -80,6 +81,8 @@
         ignoreConflictRetryPolicy = new IgnoreConflictRetryPolicy(sidecarClientConfig.maxRetries(),
                                                                   sidecarClientConfig.retryDelayMillis(),
                                                                   sidecarClientConfig.maxRetryDelayMillis());
+        oncePerInstanceRetryPolicy = new OncePerInstanceRetryPolicy(sidecarClientConfig.minimumHealthRetryDelay(),
+                                                                    sidecarClientConfig.maximumHealthRetryDelay());
         baseBuilder = new RequestContext.Builder()
                       .instanceSelectionPolicy(new RandomInstanceSelectionPolicy(instancesProvider))
                       .retryPolicy(defaultRetryPolicy);
@@ -95,7 +98,7 @@
     {
         return executor.executeRequestAsync(requestBuilder()
                                             .sidecarHealthRequest()
-                                            .retryPolicy(new OncePerInstanceRetryPolicy())
+                                            .retryPolicy(oncePerInstanceRetryPolicy)
                                             .build());
     }
 
@@ -110,7 +113,7 @@
     {
         return executor.executeRequestAsync(requestBuilder()
                                             .cassandraHealthRequest()
-                                            .retryPolicy(new OncePerInstanceRetryPolicy())
+                                            .retryPolicy(oncePerInstanceRetryPolicy)
                                             .build());
     }
 
diff --git a/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfig.java b/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfig.java
index fd138d1..a3d9fc9 100644
--- a/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfig.java
+++ b/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfig.java
@@ -19,6 +19,8 @@
 
 package org.apache.cassandra.sidecar.client;
 
+import java.time.Duration;
+
 /**
  * Encapsulates configurations for the {@link SidecarClient}
  */
@@ -38,4 +40,14 @@
      * @return the maximum amount of time to wait before retrying a failed request
      */
      long maxRetryDelayMillis();
+
+    /**
+     * @return the minimum amount of time to wait before retrying a failed health check
+     */
+     Duration minimumHealthRetryDelay();
+
+    /**
+     * @return the maximum amount of time to wait before retrying a failed health check
+     */
+     Duration maximumHealthRetryDelay();
 }
diff --git a/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfigImpl.java b/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfigImpl.java
index 3a3a4c9..5c4a6ce 100644
--- a/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfigImpl.java
+++ b/client/src/main/java/org/apache/cassandra/sidecar/client/SidecarClientConfigImpl.java
@@ -19,6 +19,8 @@
 
 package org.apache.cassandra.sidecar.client;
 
+import java.time.Duration;
+
 import org.apache.cassandra.sidecar.common.DataObjectBuilder;
 
 /**
@@ -29,16 +31,22 @@
     public static final int DEFAULT_MAX_RETRIES = 3;
     public static final long DEFAULT_RETRY_DELAY_MILLIS = 500L;
     public static final long DEFAULT_MAX_RETRY_DELAY_MILLIS = 60_000L;
+    public static final Duration DEFAULT_MINIMUM_HEALTH_RETRY_DELAY = Duration.ofSeconds(1L);
+    public static final Duration DEFAULT_MAXIMUM_HEALTH_RETRY_DELAY = Duration.ofSeconds(5L);
 
     protected final int maxRetries;
     protected final long retryDelayMillis;
     protected final long maxRetryDelayMillis;
+    protected final Duration minimumHealthRetryDelay;
+    protected final Duration maximumHealthRetryDelay;
 
     private SidecarClientConfigImpl(Builder builder)
     {
         maxRetries = builder.maxRetries;
         retryDelayMillis = builder.retryDelayMillis;
         maxRetryDelayMillis = builder.maxRetryDelayMillis;
+        minimumHealthRetryDelay = builder.minimumHealthRetryDelay;
+        maximumHealthRetryDelay = builder.maximumHealthRetryDelay;
     }
 
     /**
@@ -68,6 +76,24 @@
         return maxRetryDelayMillis;
     }
 
+    /**
+     * @return the minimum amount of time to wait before retrying a failed health check
+     */
+     @Override
+     public Duration minimumHealthRetryDelay()
+     {
+        return minimumHealthRetryDelay;
+     }
+
+    /**
+     * @return the maximum amount of time to wait before retrying a failed health check
+     */
+     @Override
+     public Duration maximumHealthRetryDelay()
+     {
+        return maximumHealthRetryDelay;
+     }
+
     public static Builder builder()
     {
         return new Builder();
@@ -81,6 +107,8 @@
         protected int maxRetries = DEFAULT_MAX_RETRIES;
         protected long retryDelayMillis = DEFAULT_RETRY_DELAY_MILLIS;
         protected long maxRetryDelayMillis = DEFAULT_MAX_RETRY_DELAY_MILLIS;
+        protected Duration minimumHealthRetryDelay = DEFAULT_MINIMUM_HEALTH_RETRY_DELAY;
+        protected Duration maximumHealthRetryDelay = DEFAULT_MAXIMUM_HEALTH_RETRY_DELAY;
 
         protected Builder()
         {
@@ -126,6 +154,28 @@
         }
 
         /**
+         * Sets the {@code minimumHealthRetryDelay} and returns a reference to this Builder enabling method chaining
+         *
+         * @param minimumHealthRetryDelay the {@code minimumHealthRetryDelay} to set
+         * @return a reference to this Builder
+         */
+        public Builder minimumHealthRetryDelay(Duration minimumHealthRetryDelay)
+        {
+            return update(builder -> builder.minimumHealthRetryDelay = minimumHealthRetryDelay);
+        }
+
+        /**
+         * Sets the {@code maximumHealthRetryDelay} and returns a reference to this Builder enabling method chaining
+         *
+         * @param maximumHealthRetryDelay the {@code maximumHealthRetryDelay} to set
+         * @return a reference to this Builder
+         */
+        public Builder maximumHealthRetryDelay(Duration maximumHealthRetryDelay)
+        {
+            return update(builder -> builder.maximumHealthRetryDelay = maximumHealthRetryDelay);
+        }
+
+        /**
          * Returns a {@code SidecarConfig} built from the parameters previously set.
          *
          * @return a {@code SidecarConfig} built with parameters of this {@code SidecarConfig.Builder}
diff --git a/client/src/main/java/org/apache/cassandra/sidecar/client/retry/OncePerInstanceRetryPolicy.java b/client/src/main/java/org/apache/cassandra/sidecar/client/retry/OncePerInstanceRetryPolicy.java
index cb14429..7c229f4 100644
--- a/client/src/main/java/org/apache/cassandra/sidecar/client/retry/OncePerInstanceRetryPolicy.java
+++ b/client/src/main/java/org/apache/cassandra/sidecar/client/retry/OncePerInstanceRetryPolicy.java
@@ -18,19 +18,50 @@
 
 package org.apache.cassandra.sidecar.client.retry;
 
+import java.time.Duration;
 import java.util.concurrent.CompletableFuture;
 
 import io.netty.handler.codec.http.HttpResponseStatus;
 import org.apache.cassandra.sidecar.client.HttpResponse;
 import org.apache.cassandra.sidecar.client.exception.RetriesExhaustedException;
 import org.apache.cassandra.sidecar.client.request.Request;
+import org.apache.cassandra.sidecar.common.utils.TimeUtils;
 
 /**
  * A retry policy that attempts to execute the request once on each instance
- * until the first successful response is received, or fails if none were successful
+ * until the first successful response is received, or fails if none were successful.
+ *
+ * Accepts optional minimum and maximum durations used to calculate random delay
+ * before each retry attempt in order to avoid the thundering herd problem.
+ *
+ * Retries immediately without delay if minimum and maximum durations are not specified.
  */
 public class OncePerInstanceRetryPolicy extends RetryPolicy
 {
+    private final Duration minimumDelay;
+    private final Duration maximumDelay;
+
+    /**
+     * Instantiates {@link OncePerInstanceRetryPolicy} with no delay between retry attempts
+     */
+    public OncePerInstanceRetryPolicy()
+    {
+        this(Duration.ZERO, Duration.ZERO);
+    }
+
+    /**
+     * Instantiates {@link OncePerInstanceRetryPolicy} with random delays between retry attempts
+     *
+     * @param minimumDelay duration of minimum possible retry delay, inclusive
+     * @param maximumDelay duration of maximum possible retry delay, inclusive
+     */
+    public OncePerInstanceRetryPolicy(Duration minimumDelay, Duration maximumDelay)
+    {
+        super();
+        this.minimumDelay = minimumDelay;
+        this.maximumDelay = maximumDelay;
+    }
+
     /**
      * {@inheritDoc}
      */
@@ -49,7 +80,7 @@
         }
         else if (canRetryOnADifferentHost)
         {
-            retryAction.retry(attempts + 1, 0L);
+            retryAction.retry(attempts + 1, TimeUtils.randomDuration(minimumDelay, maximumDelay).toMillis());
         }
         else
         {
diff --git a/common/src/main/java/org/apache/cassandra/sidecar/common/utils/TimeUtils.java b/common/src/main/java/org/apache/cassandra/sidecar/common/utils/TimeUtils.java
new file mode 100644
index 0000000..a44f1f6
--- /dev/null
+++ b/common/src/main/java/org/apache/cassandra/sidecar/common/utils/TimeUtils.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.cassandra.sidecar.common.utils;
+
+import java.time.Duration;
+import java.util.concurrent.ThreadLocalRandom;
+
+/**
+ * Utility class for manipulating dates, times, and durations
+ */
+public final class TimeUtils
+{
+    /**
+     * Private constructor that prevents unnecessary instantiation
+     *
+     * @throws IllegalStateException when called
+     */
+    private TimeUtils()
+    {
+        throw new IllegalStateException(getClass() + " is a static utility class and shall not be instantiated");
+    }
+
+    /**
+     * Returns a random duration with millisecond precision that is uniformly distributed between two provided durations
+     *
+     * @param minimum minimum possible duration, inclusive
+     * @param maximum maximum possible duration, inclusive
+     *
+     * @return random duration uniformly distributed between two provided durations, with millisecond precision
+     *
+     * @throws IllegalArgumentException if minimum duration is greater than maximum duration
+     */
+    public static Duration randomDuration(Duration minimum, Duration maximum)
+    {
+        Preconditions.checkArgument(minimum.compareTo(maximum) <= 0,
+                                    "Minimum duration must be less or equal to maximum duration");
+        return Duration.ofMillis(ThreadLocalRandom.current().nextLong(minimum.toMillis(), maximum.toMillis() + 1L));
+    }
+}
diff --git a/common/src/test/java/org/apache/cassandra/sidecar/common/utils/TimeUtilsTest.java b/common/src/test/java/org/apache/cassandra/sidecar/common/utils/TimeUtilsTest.java
new file mode 100644
index 0000000..0f8d5bb
--- /dev/null
+++ b/common/src/test/java/org/apache/cassandra/sidecar/common/utils/TimeUtilsTest.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.cassandra.sidecar.common.utils;
+
+import java.time.Duration;
+
+import org.junit.jupiter.api.Test;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Unit tests for {@link TimeUtils} class
+ */
+public class TimeUtilsTest
+{
+    @Test
+    public void testRandomDurationWithMinimumAboveMaximum()
+    {
+        assertThrows(IllegalArgumentException.class,
+                     () -> TimeUtils.randomDuration(Duration.ofSeconds(2L), Duration.ofSeconds(1L)));
+    }
+
+    @Test
+    public void testRandomDurationWithMinimumEqualToMaximum()
+    {
+        assertEquals(Duration.ofSeconds(1L),
+                     TimeUtils.randomDuration(Duration.ofSeconds(1L), Duration.ofSeconds(1L)));
+    }
+
+    @Test
+    public void testRandomDurationWithMinimumBelowMaximum()
+    {
+        for (int test = 0; test < 3600; test++)
+        {
+            Duration minimum = Duration.ofSeconds(test);
+            Duration maximum = Duration.ofSeconds(test + 1);
+            Duration random = TimeUtils.randomDuration(minimum, maximum);
+
+            assertTrue(minimum.compareTo(random) <= 0);
+            assertTrue(random.compareTo(maximum) <= 0);
+        }
+    }
+}