blob: 2e1901830e9f4946d69a26b07e938420c778c9d1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite.internal.processors.cache;
import java.util.concurrent.CountDownLatch;
import org.apache.ignite.Ignite;
import org.apache.ignite.IgniteCheckedException;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.failure.StopNodeOrHaltFailureHandler;
import org.apache.ignite.internal.IgniteEx;
import org.apache.ignite.internal.IgniteInternalFuture;
import org.apache.ignite.internal.TestRecordingCommunicationSpi;
import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsSingleMessage;
import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionsSingleRequest;
import org.apache.ignite.lang.IgniteBiPredicate;
import org.apache.ignite.plugin.extensions.communication.Message;
import org.apache.ignite.spi.discovery.tcp.TcpDiscoverySpi;
import org.apache.ignite.spi.discovery.tcp.ipfinder.vm.TcpDiscoveryVmIpFinder;
import org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryAbstractMessage;
import org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryNodeFailedMessage;
import org.apache.ignite.spi.discovery.tcp.messages.TcpDiscoveryNodeLeftMessage;
import org.apache.ignite.testframework.GridTestUtils;
import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
import org.junit.After;
import org.junit.Test;
/**
* Covers race with client join and instant successive coordinator change.
*/
public class ClientFastReplyCoordinatorFailureTest extends GridCommonAbstractTest {
/** */
private static final TcpDiscoveryVmIpFinder IP_FINDER = new TcpDiscoveryVmIpFinder(true);
/** Least significant bits of old coordinator's node ID. */
public static final int OLD_CRD_BITS = 0xFFFF;
/** Latch that will be triggered after blocking message from client to old coordinator. */
private final CountDownLatch clientSingleMesssageLatch = new CountDownLatch(1);
/** Latch that will be triggered after blocking message from new server to old coordinator. */
private final CountDownLatch newSrvSingleMesssageLatch = new CountDownLatch(1);
/** */
private static final CountDownLatch PART_SINGLE_REQ_MSG_LATCH = new CountDownLatch(1);
/** */
private boolean delayNodeFailedMsg;
/** {@inheritDoc} */
@Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
cfg.setFailureHandler(new StopNodeOrHaltFailureHandler());
cfg.setDiscoverySpi(new TcpDiscoverySpi().setIpFinder(IP_FINDER));
TestRecordingCommunicationSpi commSpi = new TestRecordingCommunicationSpi();
// Block messages to old coordinator right before killing it.
if (igniteInstanceName.contains("client")) {
commSpi.blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {
@Override public boolean apply(ClusterNode node, Message msg) {
if (msg instanceof GridDhtPartitionsSingleMessage &&
(node.id().getLeastSignificantBits() & OLD_CRD_BITS) == 0) {
info("Going to block message [node=" + node + ", msg=" + msg + ']');
clientSingleMesssageLatch.countDown();
return true;
}
return false;
}
});
if (delayNodeFailedMsg) {
TcpDiscoverySpi spi = new TestDiscoverySpi();
spi.setIpFinder(IP_FINDER);
cfg.setDiscoverySpi(spi);
}
}
else if (getTestIgniteInstanceName(3).equals(igniteInstanceName)) {
commSpi.blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {
@Override public boolean apply(ClusterNode node, Message msg) {
if (msg instanceof GridDhtPartitionsSingleMessage &&
(node.id().getLeastSignificantBits() & OLD_CRD_BITS) == 0L) {
info("Going to block message [node=" + node + ", msg=" + msg + ']');
newSrvSingleMesssageLatch.countDown();
return true;
}
return false;
}
});
}
else if (delayNodeFailedMsg) {
commSpi.blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {
@Override public boolean apply(ClusterNode node, Message msg) {
if (msg instanceof GridDhtPartitionsSingleRequest && node.isClient()) {
GridTestUtils.runAsync(() -> {
try {
Thread.sleep(1_000);
}
catch (InterruptedException ignore) {
// No-op.
}
PART_SINGLE_REQ_MSG_LATCH.countDown();
});
}
return false;
}
});
}
cfg.setCommunicationSpi(commSpi);
return cfg;
}
/**
* Cleanup after test.
*/
@After
public void cleanUp() {
stopAllGrids();
}
/**
* Checks that new coordinator will respond to client single partitions message.
*
* @throws Exception if failed.
*/
@Test
public void testClientFastReply() throws Exception {
startGrids(3);
awaitPartitionMapExchange();
// Client join will be hanging on local join exchange.
IgniteInternalFuture<Ignite> startFut = GridTestUtils.runAsync(() -> startClientGrid("client-1"));
clientSingleMesssageLatch.await();
// Server start will be blocked.
IgniteInternalFuture<IgniteEx> srvStartFut = GridTestUtils.runAsync(() -> startGrid(3));
newSrvSingleMesssageLatch.await();
stopGrid(0);
srvStartFut.get();
startFut.get();
}
/**
* Reproduces scenario of race between GridDhtPartitionsSingleRequest arriving at client and updating coordinator
* and onNodeLeft event for the same coordinator change event
* that should trigger resending SingleMessage from client to new coordinator node.
*
* @throws Exception If failed.
*/
@Test
public void testClientRepeatedReply() throws Exception {
delayNodeFailedMsg = true;
startGrids(3);
awaitPartitionMapExchange();
// Server start will be blocked.
GridTestUtils.runAsync(() -> startGrid(3));
newSrvSingleMesssageLatch.await();
// Client join will be hanging on local join exchange.
IgniteInternalFuture<Ignite> startFut =
GridTestUtils.runAsync(() -> startClientGrid("client-1"));
clientSingleMesssageLatch.await();
stopGrid(0);
assertTrue(
GridTestUtils.waitForCondition(() -> {
try {
startFut.get();
}
catch (IgniteCheckedException e) {
error("Failed when waiting for client start future to complete", e);
}
return true;
}, 10_000)
);
}
/**
* Used on client node, allows to postpone processing NODE_FAILED event
* and give GridDhtPartitionsSingleRequest to arrive earlier than discovery event is processed.
*/
public static class TestDiscoverySpi extends TcpDiscoverySpi {
/** {@inheritDoc] */
@Override protected void startMessageProcess(TcpDiscoveryAbstractMessage msg) {
if (msg instanceof TcpDiscoveryNodeLeftMessage || msg instanceof TcpDiscoveryNodeFailedMessage) {
try {
PART_SINGLE_REQ_MSG_LATCH.await();
}
catch (InterruptedException ignore) {
// no-op
}
}
super.startMessageProcess(msg);
}
}
}