RATIS-2074. Intermittent fork timeout in TestRaftWithNetty#testBasicAppendEntriesKillLeader. (#1080)
diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyClient.java b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyClient.java
index 0cf4bd3..efea5fd 100644
--- a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyClient.java
+++ b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyClient.java
@@ -33,13 +33,17 @@
import java.net.InetSocketAddress;
public class NettyClient implements Closeable {
- private final LifeCycle lifeCycle = new LifeCycle(JavaUtils.getClassSimpleName(getClass()));
-
+ private final LifeCycle lifeCycle;
+ private final String serverAddress;
private Channel channel;
+ NettyClient(String serverAddress) {
+ this.lifeCycle = new LifeCycle(JavaUtils.getClassSimpleName(getClass()) + "-" + serverAddress);
+ this.serverAddress = serverAddress;
+ }
+
/** Connects to the given server address. */
- public void connect(String serverAddress, EventLoopGroup group,
- ChannelInitializer<SocketChannel> initializer)
+ public void connect(EventLoopGroup group, ChannelInitializer<SocketChannel> initializer)
throws InterruptedException {
final InetSocketAddress address = NetUtils.createSocketAddr(serverAddress);
@@ -57,13 +61,16 @@
@Override
public void close() {
- lifeCycle.checkStateAndClose(() -> {
- channel.close().syncUninterruptibly();
- });
+ lifeCycle.checkStateAndClose(() -> NettyUtils.closeChannel(channel, serverAddress));
}
public ChannelFuture writeAndFlush(Object msg) {
lifeCycle.assertCurrentState(LifeCycle.States.RUNNING);
return channel.writeAndFlush(msg);
}
+
+ @Override
+ public String toString() {
+ return lifeCycle.toString();
+ }
}
diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyRpcProxy.java b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyRpcProxy.java
index b7a04b0..b9788a8 100644
--- a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyRpcProxy.java
+++ b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyRpcProxy.java
@@ -99,7 +99,7 @@
class Connection implements Closeable {
- private final NettyClient client = new NettyClient();
+ private final NettyClient client = new NettyClient(peer.getAddress());
private final Queue<CompletableFuture<RaftNettyServerReplyProto>> replies
= new LinkedList<>();
@@ -137,7 +137,7 @@
}
};
- client.connect(peer.getAddress(), group, initializer);
+ client.connect(group, initializer);
}
synchronized ChannelFuture offer(RaftNettyServerRequestProto request,
diff --git a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyUtils.java b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyUtils.java
index 8cce291..37666bf 100644
--- a/ratis-netty/src/main/java/org/apache/ratis/netty/NettyUtils.java
+++ b/ratis-netty/src/main/java/org/apache/ratis/netty/NettyUtils.java
@@ -22,6 +22,8 @@
import org.apache.ratis.security.TlsConf.KeyManagerConf;
import org.apache.ratis.security.TlsConf.PrivateKeyConf;
import org.apache.ratis.security.TlsConf.TrustManagerConf;
+import org.apache.ratis.thirdparty.io.netty.channel.Channel;
+import org.apache.ratis.thirdparty.io.netty.channel.ChannelFuture;
import org.apache.ratis.thirdparty.io.netty.channel.EventLoopGroup;
import org.apache.ratis.thirdparty.io.netty.channel.ServerChannel;
import org.apache.ratis.thirdparty.io.netty.channel.epoll.Epoll;
@@ -35,16 +37,19 @@
import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContext;
import org.apache.ratis.thirdparty.io.netty.handler.ssl.SslContextBuilder;
import org.apache.ratis.util.ConcurrentUtils;
+import org.apache.ratis.util.TimeDuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.net.ssl.KeyManager;
import javax.net.ssl.TrustManager;
+import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.Function;
public interface NettyUtils {
Logger LOG = LoggerFactory.getLogger(NettyUtils.class);
+ TimeDuration CLOSE_TIMEOUT = TimeDuration.valueOf(5, TimeUnit.SECONDS);
class Print {
private static final AtomicBoolean PRINTED_EPOLL_UNAVAILABILITY_CAUSE = new AtomicBoolean();
@@ -176,4 +181,19 @@
return eventLoopGroup instanceof EpollEventLoopGroup ?
EpollServerSocketChannel.class : NioServerSocketChannel.class;
}
+
+ static void closeChannel(Channel channel, String name) {
+ final ChannelFuture f = channel.close();
+ final boolean completed;
+ try {
+ completed = f.await(CLOSE_TIMEOUT.getDuration(), CLOSE_TIMEOUT.getUnit());
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ LOG.info("Interrupted closeChannel {} ", name, e);
+ return;
+ }
+ if (!completed) {
+ LOG.warn("closeChannel {} is not yet completed in {}", name, CLOSE_TIMEOUT);
+ }
+ }
}
\ No newline at end of file
diff --git a/ratis-server/src/test/java/org/apache/ratis/RaftBasicTests.java b/ratis-server/src/test/java/org/apache/ratis/RaftBasicTests.java
index 13ee08c..9f360cb 100644
--- a/ratis-server/src/test/java/org/apache/ratis/RaftBasicTests.java
+++ b/ratis-server/src/test/java/org/apache/ratis/RaftBasicTests.java
@@ -45,6 +45,7 @@
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Assumptions;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.Timeout;
import org.slf4j.Logger;
import org.slf4j.event.Level;
@@ -338,6 +339,7 @@
}
@Test
+ @Timeout(value = 300)
public void testWithLoad() throws Exception {
runWithNewCluster(NUM_SERVERS, cluster -> testWithLoad(10, 300, false, cluster, LOG));
}