HDDS-5049. Add timeout support for ratis requests in SCM HA. (#2099)
diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
index 6dd3856..d96eb50 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
@@ -467,6 +467,11 @@
public static final TimeDuration OZONE_SCM_RATIS_MINIMUM_TIMEOUT_DEFAULT
= TimeDuration.valueOf(1, TimeUnit.SECONDS);
+ public static final String OZONE_SCM_RATIS_REQUEST_TIMEOUT_KEY
+ = "ozone.scm.ratis.minimum.timeout";
+ public static final String OZONE_SCM_RATIS_REQUEST_TIMEOUT_DEFAULT
+ = "30s";
+
// SCM Ratis Leader Election configurations
public static final String
OZONE_SCM_LEADER_ELECTION_MINIMUM_TIMEOUT_DURATION_KEY =
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index b67b53d..98684b8 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -2068,6 +2068,14 @@
</property>
<property>
+ <name>ozone.scm.ratis.minimum.timeout</name>
+ <value>30s</value>
+ <tag>OZONE, SCM, HA, RATIS</tag>
+ <description>The request timeout duration for SCM's Ratis server request.
+ </description>
+ </property>
+
+ <property>
<name>ozone.scm.ratis.leader.election.minimum.timeout.duration</name>
<value>1s</value>
<tag>OZONE, SCM, HA, RATIS</tag>
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisServer.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisServer.java
index b351c86..66607a0 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisServer.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisServer.java
@@ -25,6 +25,7 @@
import java.io.IOException;
import java.util.List;
import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeoutException;
/**
* TODO.
@@ -36,7 +37,8 @@
void registerStateMachineHandler(RequestType handlerType, Object handler);
SCMRatisResponse submitRequest(SCMRatisRequest request)
- throws IOException, ExecutionException, InterruptedException;
+ throws IOException, ExecutionException, InterruptedException,
+ TimeoutException;
void stop() throws IOException;
diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisServerImpl.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisServerImpl.java
index 79da583..d9d6595 100644
--- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisServerImpl.java
+++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMRatisServerImpl.java
@@ -23,6 +23,8 @@
import java.util.UUID;
import java.util.Iterator;
import java.util.concurrent.ExecutionException;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
@@ -32,6 +34,7 @@
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.proto.SCMRatisProtocol.RequestType;
import org.apache.hadoop.hdds.scm.AddSCMRequest;
+import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.ozone.OzoneConsts;
import org.apache.hadoop.util.Time;
@@ -195,7 +198,8 @@
@Override
public SCMRatisResponse submitRequest(SCMRatisRequest request)
- throws IOException, ExecutionException, InterruptedException {
+ throws IOException, ExecutionException, InterruptedException,
+ TimeoutException {
final RaftClientRequest raftClientRequest = RaftClientRequest.newBuilder()
.setClientId(clientId)
.setServerId(getDivision().getId())
@@ -204,8 +208,14 @@
.setMessage(request.encode())
.setType(RaftClientRequest.writeRequestType())
.build();
+ // any request submitted to
+ final long requestTimeout = scm.getConfiguration()
+ .getTimeDuration(ScmConfigKeys.OZONE_SCM_RATIS_REQUEST_TIMEOUT_KEY,
+ ScmConfigKeys.OZONE_SCM_RATIS_REQUEST_TIMEOUT_DEFAULT,
+ TimeUnit.MILLISECONDS);
final RaftClientReply raftClientReply =
- server.submitClientRequestAsync(raftClientRequest).get();
+ server.submitClientRequestAsync(raftClientRequest)
+ .get(requestTimeout, TimeUnit.MILLISECONDS);
if (LOG.isDebugEnabled()) {
LOG.info("request {} Reply {}", raftClientRequest, raftClientReply);
}