HDDS-5058. Make getScmInfo retry for a duration.
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
index d96eb50..8f47756 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
@@ -510,6 +510,11 @@
       "hdds.scm.ha.security.enable";
   public static final boolean OZONE_SCM_HA_SECURITY_SUPPORTED_DEFAULT = false;
 
+  public static final String OZONE_SCM_INFO_WAIT_DURATION =
+      "ozone.scm.info.wait.duration";
+  public static final long OZONE_SCM_INFO_WAIT_DURATION_DEFAULT =
+      10 * 60;
+
   /**
    * Never constructed.
    */
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 98684b8..8a5ebb5 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -2751,4 +2751,13 @@
       filesystem semantics.
     </description>
   </property>
+
+  <property>
+    <name>ozone.scm.info.wait.duration</name>
+    <tag>OZONE, SCM, OM</tag>
+    <value>10m</value>
+    <description> Maximum amount of duration OM/SCM waits to get Scm Info
+      during OzoneManager init/SCM bootstrap.
+    </description>
+  </property>
 </configuration>
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java
index 99dc446..65acfae 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/proxy/SCMClientConfig.java
@@ -68,7 +68,7 @@
       tags = {OZONE, SCM, CLIENT},
       timeUnit = TimeUnit.MILLISECONDS,
       description = "SCM Client timeout on waiting for the next connection " +
-          "retry to other SCM IP. The default value is set to 2 minutes. "
+          "retry to other SCM IP. The default value is set to 2 seconds. "
   )
   private long retryInterval = 2 * 1000;
 
diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
index db129f4..f9f88ef 100644
--- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
+++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/HAUtils.java
@@ -33,6 +33,7 @@
 import org.apache.hadoop.hdds.scm.protocolPB.ScmBlockLocationProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdds.scm.protocolPB.StorageContainerLocationProtocolClientSideTranslatorPB;
 import org.apache.hadoop.hdds.scm.proxy.SCMBlockLocationFailoverProxyProvider;
+import org.apache.hadoop.hdds.scm.proxy.SCMClientConfig;
 import org.apache.hadoop.hdds.scm.proxy.SCMContainerLocationFailoverProxyProvider;
 import org.apache.hadoop.hdds.security.exception.SCMSecurityException;
 import org.apache.hadoop.hdds.security.x509.certificate.client.CertificateClient;
@@ -63,7 +64,10 @@
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
+import java.util.concurrent.TimeUnit;
 
+import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_INFO_WAIT_DURATION;
+import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_INFO_WAIT_DURATION_DEFAULT;
 import static org.apache.hadoop.hdds.server.ServerUtils.getOzoneMetaDirPath;
 import static org.apache.hadoop.ozone.OzoneConsts.DB_TRANSIENT_MARKER;
 import static org.apache.hadoop.ozone.OzoneConsts.TRANSACTION_INFO_KEY;
@@ -79,8 +83,23 @@
 
   public static ScmInfo getScmInfo(OzoneConfiguration conf)
       throws IOException {
+    OzoneConfiguration configuration = new OzoneConfiguration(conf);
     try {
-      return getScmBlockClient(conf).getScmInfo();
+      long duration = conf.getTimeDuration(OZONE_SCM_INFO_WAIT_DURATION,
+          OZONE_SCM_INFO_WAIT_DURATION_DEFAULT, TimeUnit.SECONDS);
+      SCMClientConfig scmClientConfig =
+          configuration.getObject(SCMClientConfig.class);
+      int retryCount =
+          (int) (duration / (scmClientConfig.getRetryInterval()/1000));
+
+      // If duration is set to lesser value, fall back to actual default
+      // retry count.
+      if (retryCount > scmClientConfig.getRetryCount()) {
+        scmClientConfig.setRetryCount(retryCount);
+        configuration.setFromObject(scmClientConfig);
+      }
+
+      return getScmBlockClient(configuration).getScmInfo();
     } catch (IOException e) {
       throw e;
     } catch (Exception e) {
diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
index f0fbd22..d56499b 100644
--- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
+++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
@@ -403,7 +403,7 @@
     // For testing purpose only, not hit scm from om as Hadoop UGI can't login
     // two principals in the same JVM.
     if (!testSecureOmFlag) {
-      ScmInfo scmInfo = getScmInfo(configuration);
+      ScmInfo scmInfo = HAUtils.getScmInfo(configuration);
       if (!(scmInfo.getClusterId().equals(omStorage.getClusterID()) && scmInfo
           .getScmId().equals(omStorage.getScmId()))) {
         logVersionMismatch(conf, scmInfo);
@@ -930,7 +930,7 @@
     StorageState state = omStorage.getState();
     if (state != StorageState.INITIALIZED) {
       try {
-        ScmInfo scmInfo = getScmInfo(conf);
+        ScmInfo scmInfo = HAUtils.getScmInfo(conf);
         String clusterId = scmInfo.getClusterId();
         String scmId = scmInfo.getScmId();
         if (clusterId == null || clusterId.isEmpty()) {
@@ -1008,11 +1008,6 @@
     }
   }
 
-  private static ScmInfo getScmInfo(OzoneConfiguration conf)
-      throws IOException {
-    return HAUtils.getScmInfo(conf);
-  }
-
   /**
    * Builds a message for logging startup information about an RPC server.
    *