HDDS-13594. Use a different endpoint for fetching the OM checkpoint tarball. (#8963)

diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
index 70b723b..fab011b 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/ozone/OzoneConsts.java
@@ -75,6 +75,8 @@ public final class OzoneConsts {
       "/serviceList";
   public static final String OZONE_DB_CHECKPOINT_HTTP_ENDPOINT =
       "/dbCheckpoint";
+  public static final String OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2 =
+      "/v2/dbCheckpoint";
 
   // Ozone File System scheme
   public static final String OZONE_URI_SCHEME = "o3fs";
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java
index 26c93cd..dae2df9 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/DBCheckpointServlet.java
@@ -20,6 +20,7 @@
 import static org.apache.hadoop.hdds.utils.HddsServerUtil.writeDBCheckpointToStream;
 import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
 import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_TO_EXCLUDE_SST;
+import static org.apache.hadoop.ozone.OzoneConsts.ROCKSDB_SST_SUFFIX;
 
 import com.google.common.annotations.VisibleForTesting;
 import java.io.File;
@@ -277,7 +278,18 @@ public void processMetadataSnapshotRequest(HttpServletRequest request, HttpServl
     }
   }
 
-  protected static Set<String> extractSstFilesToExclude(String[] sstParam) {
+  protected static Set<String> extractSstFilesToExclude(String[] filesInExclusionParam) {
+    Set<String> sstFilesToExclude = new HashSet<>();
+    if (filesInExclusionParam != null) {
+      sstFilesToExclude.addAll(
+          Arrays.stream(filesInExclusionParam).filter(s -> s.endsWith(ROCKSDB_SST_SUFFIX))
+              .distinct().collect(Collectors.toList()));
+      logSstFileList(sstFilesToExclude, "Received list of {} SST files to be excluded{}: {}", 5);
+    }
+    return sstFilesToExclude;
+  }
+
+  protected static Set<String> extractFilesToExclude(String[] sstParam) {
     Set<String> receivedSstFiles = new HashSet<>();
     if (sstParam != null) {
       receivedSstFiles.addAll(
diff --git a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java
index adcf07d..ff51cfa 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/helpers/OMNodeDetails.java
@@ -17,7 +17,7 @@
 
 package org.apache.hadoop.ozone.om.helpers;
 
-import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
 import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA;
 import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
 import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_ADDRESS_KEY;
@@ -189,7 +189,7 @@ public URL getOMDBCheckpointEndpointUrl(boolean isHttp, boolean flush)
       URIBuilder urlBuilder = new URIBuilder().
           setScheme(isHttp ? "http" : "https").
           setHost(isHttp ? getHttpAddress() : getHttpsAddress()).
-          setPath(OZONE_DB_CHECKPOINT_HTTP_ENDPOINT).
+          setPath(OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2).
           addParameter(OZONE_DB_CHECKPOINT_INCLUDE_SNAPSHOT_DATA, "true").
           addParameter(OZONE_DB_CHECKPOINT_REQUEST_FLUSH,
               flush ? "true" : "false");
diff --git a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh
index 5e53515..80bcbd2 100755
--- a/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh
+++ b/hadoop-ozone/dist/src/main/compose/xcompat/lib.sh
@@ -73,6 +73,15 @@
     compatibility/read.robot
 }
 
+_test_checkpoint_compatibility() {
+  _kinit
+  execute_robot_test ${container} -N "xcompat-cluster-${cluster_version}-client-${client_version}-checkpoint" \
+    -v CLIENT_VERSION:${client_version} \
+    -v CLUSTER_VERSION:${cluster_version} \
+    -v TEST_DATA_DIR:/testdata \
+    compatibility/checkpoint.robot
+}
+
 test_cross_compatibility() {
   echo "Starting ${cluster_version} cluster with COMPOSE_FILE=${COMPOSE_FILE}"
 
@@ -107,6 +116,35 @@
     done
   done
 
+  # Add checkpoint compatibility tests (only for clusters that support checkpoint endpoints)
+  # Skip checkpoint tests for very old clusters that don't have the endpoints
+  if [[ "${cluster_version}" < "2.0.0" ]]; then
+    echo "Skipping checkpoint compatibility tests for cluster ${cluster_version} (checkpoint endpoints not available)"
+  else
+    echo ""
+    echo "=========================================="
+    echo "Running checkpoint compatibility tests"
+    echo "=========================================="
+    
+    # Test 2.0.0 client (if available)
+    for client_version in "$@"; do
+      if [[ "${client_version}" == "2.0.0" ]]; then
+        echo "Testing 2.0.0 client against ${cluster_version} cluster"
+        client _test_checkpoint_compatibility
+        break  # Only test 2.0 once
+      fi
+    done
+    
+    # Test current client (if different from 2.0.0 and available)
+    for client_version in "$@"; do
+      if [[ "${client_version}" == "${current_version}" ]]; then
+        echo "Testing ${current_version} client against ${cluster_version} cluster"
+        client _test_checkpoint_compatibility
+        break  # Only test current version once
+      fi
+    done
+  fi
+
   KEEP_RUNNING=false stop_docker_env
 }
 
diff --git a/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot
new file mode 100644
index 0000000..e1776ef
--- /dev/null
+++ b/hadoop-ozone/dist/src/main/smoketest/compatibility/checkpoint.robot
@@ -0,0 +1,110 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+*** Settings ***
+Documentation       Checkpoint Compatibility
+Resource            ../ozone-lib/shell.robot
+Resource            setup.robot
+Test Timeout        5 minutes
+
+*** Variables ***
+${CHECKPOINT_V2_VERSION}    2.1.0
+${OM_HOST}                  om
+${OM_PORT}                  9874
+
+*** Keywords ***
+Download Checkpoint V1
+    [Documentation]    Download checkpoint using v1 endpoint (/dbCheckpoint)
+    [Arguments]        ${expected_result}
+    
+    Log                   Testing v1 checkpoint endpoint with authentication
+    
+    # Try different keytabs based on client version/container
+    ${download_file} =    Set Variable    /tmp/checkpoint_v1_${CLIENT_VERSION}.tar.gz
+    
+    # Debug: Check keytab availability first
+    ${keytab_check} =     Execute    ls -la /etc/security/keytabs/ 2>&1 | head -5 || echo "No keytabs directory"
+    Log                   Keytab directory: ${keytab_check}
+    
+    # Combine kinit and curl in a single command to preserve Kerberos session
+    ${combined_cmd} =     Set Variable    kinit -k -t /etc/security/keytabs/testuser.keytab testuser/scm@EXAMPLE.COM && curl -f --negotiate -u : --connect-timeout 10 --max-time 30 -o ${download_file} http://${OM_HOST}:${OM_PORT}/dbCheckpoint
+    
+    Log                   Executing: ${combined_cmd}
+    ${result} =           Execute and checkrc    ${combined_cmd}    ${expected_result}
+    
+    IF    ${expected_result} == 0
+        # If we expect success, verify the file was created and has content
+        ${file_check} =    Execute    ls -la ${download_file} 2>/dev/null || echo "File not found"
+        Should Not Contain    ${file_check}    File not found
+        Should Contain        ${file_check}    checkpoint_v1_${CLIENT_VERSION}.tar.gz
+        Log                   Successfully downloaded checkpoint via v1 endpoint: ${file_check}
+    ELSE
+        Log                   v1 endpoint failed as expected for this version combination
+    END
+
+Download Checkpoint V2
+    [Documentation]    Download checkpoint using v2 endpoint (/dbCheckpointv2)
+    [Arguments]        ${expected_result}
+    
+    Log                   Testing v2 checkpoint endpoint with authentication
+    
+    # Debug: Check keytab availability first (reuse from V1 if already checked)
+    ${keytab_check} =     Execute    ls -la /etc/security/keytabs/ 2>&1 | head -5 || echo "No keytabs directory"
+    Log                   Keytab directory: ${keytab_check}
+    
+    # Combine kinit and curl in a single command to preserve Kerberos session
+    ${download_file} =    Set Variable    /tmp/checkpoint_v2_${CLIENT_VERSION}.tar.gz
+    ${combined_cmd} =     Set Variable    kinit -k -t /etc/security/keytabs/testuser.keytab testuser/scm@EXAMPLE.COM && curl -f --negotiate -u : --connect-timeout 10 --max-time 30 -o ${download_file} http://${OM_HOST}:${OM_PORT}/v2/dbCheckpoint
+    
+    Log                   Executing: ${combined_cmd}
+    ${result} =           Execute and checkrc    ${combined_cmd}    ${expected_result}
+    
+    IF    ${expected_result} == 0
+        # If we expect success, verify the file was created and has content
+        ${file_check} =    Execute    ls -la ${download_file} 2>/dev/null || echo "File not found"
+        Should Not Contain    ${file_check}    File not found
+        Should Contain        ${file_check}    checkpoint_v2_${CLIENT_VERSION}.tar.gz
+        Log                   Successfully downloaded checkpoint via v2 endpoint: ${file_check}
+    ELSE
+        Log                   v2 endpoint failed as expected for this version combination
+    END
+
+*** Test Cases ***
+Checkpoint V1 Endpoint Compatibility
+    [Documentation]    Test v1 checkpoint endpoint (/dbCheckpoint) - should work for all versions (backward compatibility)
+    
+    Log    Testing v1 checkpoint endpoint: CLIENT=${CLIENT_VERSION}, CLUSTER=${CLUSTER_VERSION}
+    
+    # Both old and new clusters should serve v1 endpoint for backward compatibility
+    Download Checkpoint V1    0
+
+Checkpoint V2 Endpoint Compatibility
+    [Documentation]    Test v2 checkpoint endpoint (/v2/dbCheckpoint) - should only work with new cluster
+    
+    Log    Testing v2 checkpoint endpoint: CLIENT=${CLIENT_VERSION}, CLUSTER=${CLUSTER_VERSION}
+    
+    IF    '${CLUSTER_VERSION}' < '${CHECKPOINT_V2_VERSION}'
+        # Old cluster doesn't have v2 endpoint - should fail with any non-zero exit code
+        ${result} =    Run Keyword And Return Status    Download Checkpoint V2    0
+        IF    not ${result}
+            Log    v2 endpoint correctly failed on old cluster ${CLUSTER_VERSION} (expected failure)
+        ELSE
+            Fail    v2 endpoint unexpectedly succeeded on old cluster ${CLUSTER_VERSION}
+        END
+    ELSE
+        # New cluster has v2 endpoint - should succeed
+        Download Checkpoint V2    0
+        Log    v2 endpoint correctly succeeded on new cluster ${CLUSTER_VERSION}
+    END
diff --git a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java
index 790932f..254a9f3 100644
--- a/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java
+++ b/hadoop-ozone/integration-test-recon/src/test/java/org/apache/hadoop/ozone/recon/TestReconWithOzoneManagerHA.java
@@ -18,7 +18,7 @@
 package org.apache.hadoop.ozone.recon;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
-import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -119,7 +119,7 @@ public void testReconGetsSnapshotFromLeader() throws Exception {
     String expectedUrl = "http://" +
         (hostname.equals("0.0.0.0") ? "localhost" : hostname) + ":" +
         ozoneManager.get().getHttpServer().getHttpAddress().getPort() +
-        OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+        OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
     String snapshotUrl = impl.getOzoneManagerSnapshotUrl();
     assertEquals(expectedUrl, snapshotUrl);
     // Write some data
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
index 42a28e4..1acd959 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServletInodeBasedXfer.java
@@ -131,7 +131,7 @@ public void processMetadataSnapshotRequest(HttpServletRequest request, HttpServl
     String[] sstParam = isFormData ?
         parseFormDataParameters(request) : request.getParameterValues(
         OZONE_DB_CHECKPOINT_REQUEST_TO_EXCLUDE_SST);
-    Set<String> receivedSstFiles = extractSstFilesToExclude(sstParam);
+    Set<String> receivedSstFiles = extractFilesToExclude(sstParam);
     Path tmpdir = null;
     try (BootstrapStateHandler.Lock lock = getBootstrapStateLock().lock()) {
       tmpdir = Files.createTempDirectory(getBootstrapTempData().toPath(),
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java
index 8836324..9c2688d 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerHttpServer.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.ozone.om;
 
 import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
 import static org.apache.hadoop.ozone.OzoneConsts.OZONE_OM_SERVICE_LIST_HTTP_ENDPOINT;
 
 import java.io.IOException;
@@ -36,6 +37,8 @@ public OzoneManagerHttpServer(MutableConfigurationSource conf,
     addServlet("serviceList", OZONE_OM_SERVICE_LIST_HTTP_ENDPOINT,
         ServiceListJSONServlet.class);
     addServlet("dbCheckpoint", OZONE_DB_CHECKPOINT_HTTP_ENDPOINT,
+        OMDBCheckpointServlet.class);
+    addServlet("dbCheckpointv2", OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2,
         OMDBCheckpointServletInodeBasedXfer.class);
     getWebAppContext().setAttribute(OzoneConsts.OM_CONTEXT_ATTRIBUTE, om);
   }
diff --git a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
index a748250..cf3d039 100644
--- a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
+++ b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
@@ -18,7 +18,7 @@
 package org.apache.hadoop.ozone.recon.spi.impl;
 
 import static org.apache.hadoop.hdds.recon.ReconConfigKeys.OZONE_RECON_DB_DIRS_PERMISSIONS_DEFAULT;
-import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
 import static org.apache.hadoop.ozone.OzoneConsts.OZONE_DB_CHECKPOINT_REQUEST_FLUSH;
 import static org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_HTTP_AUTH_TYPE;
 import static org.apache.hadoop.ozone.recon.ReconConstants.RECON_OM_SNAPSHOT_DB;
@@ -195,11 +195,11 @@ public OzoneManagerServiceProviderImpl(
     HttpConfig.Policy policy = HttpConfig.getHttpPolicy(configuration);
 
     omDBSnapshotUrl = "http://" + ozoneManagerHttpAddress +
-        OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+        OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
 
     if (policy.isHttpsEnabled()) {
       omDBSnapshotUrl = "https://" + ozoneManagerHttpsAddress +
-          OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+          OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
     }
 
     boolean flushParam = configuration.getBoolean(
@@ -394,7 +394,7 @@ public String getOzoneManagerSnapshotUrl() throws IOException {
           omLeaderUrl = (policy.isHttpsEnabled() ?
               "https://" + info.getServiceAddress(Type.HTTPS) :
               "http://" + info.getServiceAddress(Type.HTTP)) +
-              OZONE_DB_CHECKPOINT_HTTP_ENDPOINT;
+              OZONE_DB_CHECKPOINT_HTTP_ENDPOINT_V2;
         }
       }
     }