HDDS-13594. Use a different endpoint for fetching the OM checkpoint tarball. (#8963)
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
index 70b723b..fab011b 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
@@ -75,6 +75,8 @@ public final class OzoneConsts {
"/serviceList";
public static final String OZONE_DB_CHECKPOINT_HTTP_ENDPOINT =
"/dbCheckpoint";
+ public static final String OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2 =
+ "/v2/dbCheckpoint";
// Ozone File System scheme
public static final String OZONE_URI_SCHEME = "o3fs";
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java
index 26c93cd..dae2df9 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java
@@ -20,6 +20,7 @@
import static org.apache.hadoop.hdds.utils.HddsServerUtil.writeDBCheckpointToStream;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_TO_EXCLUDE_SST;
+import static org.apache.hadoop.ozone.OzoneConsts.ROCKSDB_SST_SUFFIX;
import com.google.common.annotations.VisibleForTesting;
import java.io.File;
@@ -277,7 +278,18 @@ public void processMetadataSnapshotRequest(HttpServletRequest request, HttpServl
}
}
- protected static Set<String> extractSstFilesToExclude(String[] sstParam) {
+ protected static Set<String> extractSstFilesToExclude(String[] filesInExclusionParam) {
+ Set<String> sstFilesToExclude = new HashSet<>();
+ if (filesInExclusionParam != null) {
+ sstFilesToExclude.addAll(
+ Arrays.stream(filesInExclusionParam).filter(s -> s.endsWith(ROCKSDB_SST_SUFFIX))
+ .distinct().collect(Collectors.toList()));
+ logSstFileList(sstFilesToExclude, "Received list of {} SST files to be excluded{}: {}", 5);
+ }
+ return sstFilesToExclude;
+ }
+
+ protected static Set<String> extractFilesToExclude(String[] sstParam) {
Set<String> receivedSstFiles = new HashSet<>();
if (sstParam != null) {
receivedSstFiles.addAll(
diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java
index adcf07d..ff51cfa 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java
@@ -17,7 +17,7 @@
package org.apache.hadoop.ozone.om.helpers;
-import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY;
@@ -189,7 +189,7 @@ public URL getOMDBCheckpointEndpointUrl(boolean isHttp, boolean flush)
URIBuilder urlBuilder = new URIBuilder().
setScheme(isHttp ? "http" : "https").
setHost(isHttp ? getHttpAddress() : getHttpsAddress()).
- setPath(OZONE_DB_CHECKPOINT_HTTP_ENDPOINT).
+ setPath(OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2).
addParameter(OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA, "true").
addParameter(OZONE_DB_CHECKPOINT_REQUEST_FLUSH,
flush ? "true" : "false");
diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh
index 5e53515..80bcbd2 100755
--- a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh
+++ b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh
@@ -73,6 +73,15 @@
compatibility/read.robot
}
+_test_checkpoint_compatibility() {
+ _kinit
+ execute_robot_test ${container} -N "xcompat-cluster-${cluster_version}-client-${client_version}-checkpoint" \
+ -v CLIENT_VERSION:${client_version} \
+ -v CLUSTER_VERSION:${cluster_version} \
+ -v TEST_DATA_DIR:/testdata \
+ compatibility/checkpoint.robot
+}
+
test_cross_compatibility() {
echo "Starting ${cluster_version} cluster with COMPOSE_FILE=${COMPOSE_FILE}"
@@ -107,6 +116,35 @@
done
done
+ # Add checkpoint compatibility tests (only for clusters that support checkpoint endpoints)
+ # Skip checkpoint tests for very old clusters that don't have the endpoints
+ if [[ "${cluster_version}" < "2.0.0" ]]; then
+ echo "Skipping checkpoint compatibility tests for cluster ${cluster_version} (checkpoint endpoints not available)"
+ else
+ echo ""
+ echo "=========================================="
+ echo "Running checkpoint compatibility tests"
+ echo "=========================================="
+
+ # Test 2.0.0 client (if available)
+ for client_version in "$@"; do
+ if [[ "${client_version}" == "2.0.0" ]]; then
+ echo "Testing 2.0.0 client against ${cluster_version} cluster"
+ client _test_checkpoint_compatibility
+ break # Only test 2.0 once
+ fi
+ done
+
+ # Test current client (if different from 2.0.0 and available)
+ for client_version in "$@"; do
+ if [[ "${client_version}" == "${current_version}" ]]; then
+ echo "Testing ${current_version} client against ${cluster_version} cluster"
+ client _test_checkpoint_compatibility
+ break # Only test current version once
+ fi
+ done
+ fi
+
KEEP_RUNNING=false stop_docker_env
}
diff --git a/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot
new file mode 100644
index 0000000..e1776ef
--- /dev/null
+++ b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot
@@ -0,0 +1,110 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+*** Settings ***
+Documentation Checkpoint Compatibility
+Resource ../ozone-lib/shell.robot
+Resource setup.robot
+Test Timeout 5 minutes
+
+*** Variables ***
+${CHECKPOINT_V2_VERSION} 2.1.0
+${OM_HOST} om
+${OM_PORT} 9874
+
+*** Keywords ***
+Download Checkpoint V1
+ [Documentation] Download checkpoint using v1 endpoint (/dbCheckpoint)
+ [Arguments] ${expected_result}
+
+ Log Testing v1 checkpoint endpoint with authentication
+
+ # Try different keytabs based on client version/container
+ ${download_file} = Set Variable /tmp/checkpoint_v1_${CLIENT_VERSION}.tar.gz
+
+ # Debug: Check keytab availability first
+ ${keytab_check} = Execute ls -la /etc/security/keytabs/ 2>&1 | head -5 || echo "No keytabs directory"
+ Log Keytab directory: ${keytab_check}
+
+ # Combine kinit and curl in a single command to preserve Kerberos session
+ ${combined_cmd} = Set Variable kinit -k -t /etc/security/keytabs/testuser.keytab testuser/scm@EXAMPLE.COM && curl -f --negotiate -u : --connect-timeout 10 --max-time 30 -o ${download_file} http://${OM_HOST}:${OM_PORT}/dbCheckpoint
+
+ Log Executing: ${combined_cmd}
+ ${result} = Execute and checkrc ${combined_cmd} ${expected_result}
+
+ IF ${expected_result} == 0
+ # If we expect success, verify the file was created and has content
+ ${file_check} = Execute ls -la ${download_file} 2>/dev/null || echo "File not found"
+ Should Not Contain ${file_check} File not found
+ Should Contain ${file_check} checkpoint_v1_${CLIENT_VERSION}.tar.gz
+ Log Successfully downloaded checkpoint via v1 endpoint: ${file_check}
+ ELSE
+ Log v1 endpoint failed as expected for this version combination
+ END
+
+Download Checkpoint V2
+ [Documentation] Download checkpoint using v2 endpoint (/dbCheckpointv2)
+ [Arguments] ${expected_result}
+
+ Log Testing v2 checkpoint endpoint with authentication
+
+ # Debug: Check keytab availability first (reuse from V1 if already checked)
+ ${keytab_check} = Execute ls -la /etc/security/keytabs/ 2>&1 | head -5 || echo "No keytabs directory"
+ Log Keytab directory: ${keytab_check}
+
+ # Combine kinit and curl in a single command to preserve Kerberos session
+ ${download_file} = Set Variable /tmp/checkpoint_v2_${CLIENT_VERSION}.tar.gz
+ ${combined_cmd} = Set Variable kinit -k -t /etc/security/keytabs/testuser.keytab testuser/scm@EXAMPLE.COM && curl -f --negotiate -u : --connect-timeout 10 --max-time 30 -o ${download_file} http://${OM_HOST}:${OM_PORT}/v2/dbCheckpoint
+
+ Log Executing: ${combined_cmd}
+ ${result} = Execute and checkrc ${combined_cmd} ${expected_result}
+
+ IF ${expected_result} == 0
+ # If we expect success, verify the file was created and has content
+ ${file_check} = Execute ls -la ${download_file} 2>/dev/null || echo "File not found"
+ Should Not Contain ${file_check} File not found
+ Should Contain ${file_check} checkpoint_v2_${CLIENT_VERSION}.tar.gz
+ Log Successfully downloaded checkpoint via v2 endpoint: ${file_check}
+ ELSE
+ Log v2 endpoint failed as expected for this version combination
+ END
+
+*** Test Cases ***
+Checkpoint V1 Endpoint Compatibility
+ [Documentation] Test v1 checkpoint endpoint (/dbCheckpoint) - should work for all versions (backward compatibility)
+
+ Log Testing v1 checkpoint endpoint: CLIENT=${CLIENT_VERSION}, CLUSTER=${CLUSTER_VERSION}
+
+ # Both old and new clusters should serve v1 endpoint for backward compatibility
+ Download Checkpoint V1 0
+
+Checkpoint V2 Endpoint Compatibility
+ [Documentation] Test v2 checkpoint endpoint (/v2/dbCheckpoint) - should only work with new cluster
+
+ Log Testing v2 checkpoint endpoint: CLIENT=${CLIENT_VERSION}, CLUSTER=${CLUSTER_VERSION}
+
+ IF '${CLUSTER_VERSION}' < '${CHECKPOINT_V2_VERSION}'
+ # Old cluster doesn't have v2 endpoint - should fail with any non-zero exit code
+ ${result} = Run Keyword And Return Status Download Checkpoint V2 0
+ IF not ${result}
+ Log v2 endpoint correctly failed on old cluster ${CLUSTER_VERSION} (expected failure)
+ ELSE
+ Fail v2 endpoint unexpectedly succeeded on old cluster ${CLUSTER_VERSION}
+ END
+ ELSE
+ # New cluster has v2 endpoint - should succeed
+ Download Checkpoint V2 0
+ Log v2 endpoint correctly succeeded on new cluster ${CLUSTER_VERSION}
+ END
diff --git a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java
index 790932f..254a9f3 100644
--- a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java
+++ b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java
@@ -18,7 +18,7 @@
package org.apache.hadoop.ozone.recon;
import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -119,7 +119,7 @@ public void testReconGetsSnapshotFromLeader() throws Exception {
String expectedUrl = "http://" +
(hostname.equals("0.0.0.0") ? "localhost" : hostname) + ":" +
ozoneManager.get().getHttpServer().getHttpAddress().getPort() +
- OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+ OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
String snapshotUrl = impl.getOzoneManagerSnapshotUrl();
assertEquals(expectedUrl, snapshotUrl);
// Write some data
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
index 42a28e4..1acd959 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
@@ -131,7 +131,7 @@ public void processMetadataSnapshotRequest(HttpServletRequest request, HttpServl
String[] sstParam = isFormData ?
parseFormDataParameters(request) : request.getParameterValues(
OZONE_DB_CHECKPOINT_REQUEST_TO_EXCLUDE_SST);
- Set<String> receivedSstFiles = extractSstFilesToExclude(sstParam);
+ Set<String> receivedSstFiles = extractFilesToExclude(sstParam);
Path tmpdir = null;
try (BootstrapStateHandler.Lock lock = getBootstrapStateLock().lock()) {
tmpdir = Files.createTempDirectory(getBootstrapTempData().toPath(),
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java
index 8836324..9c2688d 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.ozone.om;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OM_SERVICE_LIST_HTTP_ENDPOINT;
import java.io.IOException;
@@ -36,6 +37,8 @@ public OzoneManagerHttpServer(MutableConfigurationSource conf,
addServlet("serviceList", OZONE_OM_SERVICE_LIST_HTTP_ENDPOINT,
ServiceListJSONServlet.class);
addServlet("dbCheckpoint", OZONE_DB_CHECKPOINT_HTTP_ENDPOINT,
+ OMDBCheckpointServlet.class);
+ addServlet("dbCheckpointv2", OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2,
OMDBCheckpointServletInodeBasedXfer.class);
getWebAppContext().setAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE, om);
}
diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
index a748250..cf3d039 100644
--- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
+++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
@@ -18,7 +18,7 @@
package org.apache.hadoop.ozone.recon.spi.impl;
import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_DB_DIRS_PERMISSIONS_DEFAULT;
-import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_AUTH_TYPE;
import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_OM_SNAPSHOT_DB;
@@ -195,11 +195,11 @@ public OzoneManagerServiceProviderImpl(
HttpConfig.Policy policy = HttpConfig.getHttpPolicy(configuration);
omDBSnapshotUrl = "http://" + ozoneManagerHttpAddress +
- OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+ OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
if (policy.isHttpsEnabled()) {
omDBSnapshotUrl = "https://" + ozoneManagerHttpsAddress +
- OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+ OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
}
boolean flushParam = configuration.getBoolean(
@@ -394,7 +394,7 @@ public String getOzoneManagerSnapshotUrl() throws IOException {
omLeaderUrl = (policy.isHttpsEnabled() ?
"https://" + info.getServiceAddress(Type.HTTPS) :
"http://" + info.getServiceAddress(Type.HTTP)) +
- OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+ OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
}
}
}