CASSANDRASC-58: Support retries in Sidecar Client on Invalid Checksum
In rare occasions an SSTable upload will receive a corrupted SSTable. Bit flips
are expected to occur occassionally while transmitting SSTables from client to
server.
This commit adds support for retries in Sidecar Client when a checksum mismatch
is encountered during SSTable upload. Allowing for clients to retry
patch by Francisco Guerrero; reviewed by Dinesh Joshi, Yifan Cai for CASSANDRASC-58
diff --git a/CHANGES.txt b/CHANGES.txt
index b4f59b6..f7626e1 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,6 @@
1.0.0
-----
+ * Support retries in Sidecar Client on Invalid Checksum (CASSANDRASC-58)
* Ignore unknown properties during Sidecar client deserialization (CASSANDRASC-53)
* Create staging directory if it doesn't exists (CASSANDRASC-56)
* Remove RESTEasy (CASSANDRASC-57)
diff --git a/build.gradle b/build.gradle
index 055cd28..dd07dfe 100644
--- a/build.gradle
+++ b/build.gradle
@@ -22,6 +22,7 @@
id "nebula.ospackage" version "8.3.0"
id 'nebula.ospackage-application' version "8.3.0"
id 'com.google.cloud.tools.jib' version '2.2.0'
+ id 'org.asciidoctor.jvm.convert' version '3.1.0'
}
ext.dtestJar = System.getenv("DTEST_JAR") ?: "dtest-5.0.jar" // trunk is currently 5.0.jar - update when trunk moves
diff --git a/client/src/main/java/org/apache/cassandra/sidecar/client/retry/BasicRetryPolicy.java b/client/src/main/java/org/apache/cassandra/sidecar/client/retry/BasicRetryPolicy.java
index 6b8d3dd..1eb3719 100644
--- a/client/src/main/java/org/apache/cassandra/sidecar/client/retry/BasicRetryPolicy.java
+++ b/client/src/main/java/org/apache/cassandra/sidecar/client/retry/BasicRetryPolicy.java
@@ -27,6 +27,7 @@
import org.apache.cassandra.sidecar.client.HttpResponse;
import org.apache.cassandra.sidecar.client.exception.ResourceNotFoundException;
import org.apache.cassandra.sidecar.client.request.Request;
+import org.apache.cassandra.sidecar.common.http.SidecarHttpResponseStatus;
/**
* A basic {@link RetryPolicy} supporting standard status codes
@@ -145,6 +146,21 @@
return;
}
+ if (response.statusCode() == SidecarHttpResponseStatus.CHECKSUM_MISMATCH.code())
+ {
+ // assume that the uploaded payload might have been corrupted, so allow for retries when an invalid
+ // checksum is encountered
+ if (canRetryOnADifferentHost)
+ {
+ retryImmediately(responseFuture, request, retryAction, attempts, throwable);
+ }
+ else
+ {
+ retry(responseFuture, request, retryAction, attempts, throwable);
+ }
+ return;
+ }
+
// 4xx Client Errors - 5xx Server Errors
if (HttpStatusClass.CLIENT_ERROR.contains(response.statusCode()) ||
HttpStatusClass.SERVER_ERROR.contains(response.statusCode()))
diff --git a/client/src/test/java/org/apache/cassandra/sidecar/client/retry/BasicRetryPolicyTest.java b/client/src/test/java/org/apache/cassandra/sidecar/client/retry/BasicRetryPolicyTest.java
index 591e364..58b9c3b 100644
--- a/client/src/test/java/org/apache/cassandra/sidecar/client/retry/BasicRetryPolicyTest.java
+++ b/client/src/test/java/org/apache/cassandra/sidecar/client/retry/BasicRetryPolicyTest.java
@@ -36,7 +36,6 @@
import org.junit.jupiter.params.provider.MethodSource;
import org.junit.jupiter.params.provider.ValueSource;
-import io.netty.handler.codec.http.HttpResponseStatus;
import org.apache.cassandra.sidecar.client.HttpResponse;
import org.apache.cassandra.sidecar.client.exception.ResourceNotFoundException;
import org.apache.cassandra.sidecar.client.exception.RetriesExhaustedException;
@@ -50,6 +49,7 @@
import static io.netty.handler.codec.http.HttpResponseStatus.NOT_IMPLEMENTED;
import static io.netty.handler.codec.http.HttpResponseStatus.OK;
import static io.netty.handler.codec.http.HttpResponseStatus.SERVICE_UNAVAILABLE;
+import static org.apache.cassandra.sidecar.common.http.SidecarHttpResponseStatus.CHECKSUM_MISMATCH;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatExceptionOfType;
import static org.assertj.core.api.Fail.fail;
@@ -159,6 +159,14 @@
testWithRetries(mockRequest, mockResponse, null, 5, 300, canRetryOnADifferentHost);
}
+ @ParameterizedTest(name = "{index} => canRetryOnADifferentHost={0}")
+ @ValueSource(booleans = { true, false })
+ void testRetriesWithChecksumMismatchStatusCode(boolean canRetryOnADifferentHost)
+ {
+ when(mockResponse.statusCode()).thenReturn(CHECKSUM_MISMATCH.code());
+ testWithRetries(mockRequest, mockResponse, null, 5, 300, canRetryOnADifferentHost);
+ }
+
@Test
void testRetriesWithServiceUnavailableStatusCodeWithRetryAfterHeader()
{
@@ -208,7 +216,8 @@
private static Stream<Arguments> clientStatusCodeArguments()
{
return IntStream.range(400, 500)
- .filter(statusCode -> statusCode != HttpResponseStatus.NOT_FOUND.code())
+ .filter(statusCode -> statusCode != NOT_FOUND.code()
+ && statusCode != CHECKSUM_MISMATCH.code())
.boxed()
.map(Arguments::of);
}
diff --git a/common/build.gradle b/common/build.gradle
index e9e9f97..aa42bea 100644
--- a/common/build.gradle
+++ b/common/build.gradle
@@ -28,12 +28,14 @@
compileOnly('org.jetbrains:annotations:23.0.0')
compileOnly('com.google.code.findbugs:jsr305:3.0.2') // required for the @NotThreadSafe annotation
compileOnly(group: 'com.datastax.cassandra', name: 'cassandra-driver-core', version: '3.11.3')
+ compileOnly(group: 'io.netty', name: 'netty-codec-http', version: '4.1.69.Final')
testImplementation("com.google.guava:guava:${project.rootProject.guavaVersion}")
testImplementation('org.mockito:mockito-inline:4.10.0')
testImplementation("org.assertj:assertj-core:3.24.2")
testImplementation("org.junit.jupiter:junit-jupiter-api:${project.junitVersion}")
testImplementation("org.junit.jupiter:junit-jupiter-params:${project.junitVersion}")
+ testImplementation(group: 'io.netty', name: 'netty-codec-http', version: '4.1.69.Final')
testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:${project.junitVersion}")
}
diff --git a/common/src/main/java/org/apache/cassandra/sidecar/common/http/SidecarHttpResponseStatus.java b/common/src/main/java/org/apache/cassandra/sidecar/common/http/SidecarHttpResponseStatus.java
new file mode 100644
index 0000000..3524daa
--- /dev/null
+++ b/common/src/main/java/org/apache/cassandra/sidecar/common/http/SidecarHttpResponseStatus.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.sidecar.common.http;
+
+import io.netty.handler.codec.http.HttpResponseStatus;
+
+/**
+ * Non-standard HTTP response status codes used in Sidecar
+ */
+public class SidecarHttpResponseStatus extends HttpResponseStatus
+{
+ /**
+ * 455 INVALID CHECKSUM
+ */
+ public static final HttpResponseStatus CHECKSUM_MISMATCH = newStatus(455, "Checksum Mismatch");
+
+ /**
+ * Creates a new instance with the specified {@code code} and its {@code reasonPhrase}.
+ *
+ * @param code the HTTP status code
+ * @param reasonPhrase the HTTP status reason
+ */
+ public SidecarHttpResponseStatus(int code, String reasonPhrase)
+ {
+ super(code, reasonPhrase);
+ }
+
+ private static HttpResponseStatus newStatus(int statusCode, String reasonPhrase)
+ {
+ return new SidecarHttpResponseStatus(statusCode, reasonPhrase);
+ }
+}
diff --git a/common/src/test/java/org/apache/cassandra/sidecar/common/http/SidecarHttpResponseStatusTest.java b/common/src/test/java/org/apache/cassandra/sidecar/common/http/SidecarHttpResponseStatusTest.java
new file mode 100644
index 0000000..b8dcfbb
--- /dev/null
+++ b/common/src/test/java/org/apache/cassandra/sidecar/common/http/SidecarHttpResponseStatusTest.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.sidecar.common.http;
+
+import org.junit.jupiter.api.Test;
+
+import static org.apache.cassandra.sidecar.common.http.SidecarHttpResponseStatus.CHECKSUM_MISMATCH;
+import static org.assertj.core.api.Assertions.assertThat;
+
+/**
+ * Unit tests for {@link SidecarHttpResponseStatus}
+ */
+class SidecarHttpResponseStatusTest
+{
+
+ @Test
+ void testInvalidChecksum()
+ {
+ assertThat(CHECKSUM_MISMATCH.code()).isEqualTo(455);
+ assertThat(CHECKSUM_MISMATCH.reasonPhrase()).isEqualTo("Checksum Mismatch");
+ }
+}
diff --git a/docs/build.gradle b/docs/build.gradle
index 3eb6a9f..bd76db0 100644
--- a/docs/build.gradle
+++ b/docs/build.gradle
@@ -1,14 +1,4 @@
-buildscript {
- repositories {
- jcenter()
- }
-
- dependencies {
- classpath 'org.asciidoctor:asciidoctor-gradle-plugin:1.5.9.2'
- }
-}
-
-apply plugin: 'org.asciidoctor.convert'
+apply plugin: 'org.asciidoctor.jvm.convert'
asciidoctor {
sourceDir = file("src")
diff --git a/src/main/java/org/apache/cassandra/sidecar/utils/MD5ChecksumVerifier.java b/src/main/java/org/apache/cassandra/sidecar/utils/MD5ChecksumVerifier.java
index a74b5ec..378b1ab 100644
--- a/src/main/java/org/apache/cassandra/sidecar/utils/MD5ChecksumVerifier.java
+++ b/src/main/java/org/apache/cassandra/sidecar/utils/MD5ChecksumVerifier.java
@@ -25,7 +25,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import io.netty.handler.codec.http.HttpResponseStatus;
import io.vertx.core.Future;
import io.vertx.core.Promise;
import io.vertx.core.file.AsyncFile;
@@ -33,6 +32,8 @@
import io.vertx.core.file.OpenOptions;
import io.vertx.ext.web.handler.HttpException;
+import static org.apache.cassandra.sidecar.common.http.SidecarHttpResponseStatus.CHECKSUM_MISMATCH;
+
/**
* Implementation of {@link ChecksumVerifier}. Here we use MD5 implementation of {@link java.security.MessageDigest}
* for calculating checksum. And match the calculated checksum with expected checksum obtained from request.
@@ -61,7 +62,7 @@
.compose(this::calculateMD5)
.compose(computedChecksum -> {
if (!expectedChecksum.equals(computedChecksum))
- return Future.failedFuture(new HttpException(HttpResponseStatus.BAD_REQUEST.code(),
+ return Future.failedFuture(new HttpException(CHECKSUM_MISMATCH.code(),
String.format("Checksum mismatch. "
+ "computed_checksum=%s, "
+ "expected_checksum=%s, "
diff --git a/src/test/java/org/apache/cassandra/sidecar/routes/sstableuploads/SSTableUploadHandlerTest.java b/src/test/java/org/apache/cassandra/sidecar/routes/sstableuploads/SSTableUploadHandlerTest.java
index e29f055..6ed3d2c 100644
--- a/src/test/java/org/apache/cassandra/sidecar/routes/sstableuploads/SSTableUploadHandlerTest.java
+++ b/src/test/java/org/apache/cassandra/sidecar/routes/sstableuploads/SSTableUploadHandlerTest.java
@@ -38,6 +38,7 @@
import io.vertx.ext.web.client.WebClient;
import io.vertx.junit5.VertxExtension;
import io.vertx.junit5.VertxTestContext;
+import org.apache.cassandra.sidecar.common.http.SidecarHttpResponseStatus;
import org.apache.cassandra.sidecar.snapshots.SnapshotUtils;
import static org.assertj.core.api.Assertions.assertThat;
@@ -74,7 +75,8 @@
{
UUID uploadId = UUID.randomUUID();
sendUploadRequestAndVerify(context, uploadId, "ks", "tbl", "with-incorrect-md5.db", "incorrectMd5",
- Files.size(Paths.get(FILE_TO_BE_UPLOADED)), HttpResponseStatus.BAD_REQUEST.code(),
+ Files.size(Paths.get(FILE_TO_BE_UPLOADED)),
+ SidecarHttpResponseStatus.CHECKSUM_MISMATCH.code(),
false);
}