blob: ceaa17b0449793f947910272cbfe06d0aaf32dd5 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.ignite.internal.processors.cache.distributed;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.ignite.IgniteCache;
import org.apache.ignite.IgniteDataStreamer;
import org.apache.ignite.cache.PartitionLossPolicy;
import org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction;
import org.apache.ignite.cluster.ClusterNode;
import org.apache.ignite.configuration.CacheConfiguration;
import org.apache.ignite.configuration.DataRegionConfiguration;
import org.apache.ignite.configuration.DataStorageConfiguration;
import org.apache.ignite.configuration.IgniteConfiguration;
import org.apache.ignite.configuration.WALMode;
import org.apache.ignite.internal.IgniteEx;
import org.apache.ignite.internal.TestRecordingCommunicationSpi;
import org.apache.ignite.internal.processors.cache.PartitionUpdateCounter;
import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionDemandMessage;
import org.apache.ignite.internal.processors.cache.distributed.dht.preloader.GridDhtPartitionSupplyMessage;
import org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtLocalPartition;
import org.apache.ignite.internal.processors.cache.distributed.dht.topology.GridDhtPartitionState;
import org.apache.ignite.internal.util.typedef.internal.CU;
import org.apache.ignite.lang.IgniteBiPredicate;
import org.apache.ignite.plugin.extensions.communication.Message;
import org.apache.ignite.testframework.GridTestUtils;
import org.apache.ignite.testframework.junits.WithSystemProperty;
import org.apache.ignite.testframework.junits.common.GridCommonAbstractTest;
import org.junit.Ignore;
import org.junit.Test;
import static org.apache.ignite.IgniteSystemProperties.IGNITE_PREFER_WAL_REBALANCE;
import static org.apache.ignite.cache.CacheAtomicityMode.TRANSACTIONAL;
import static org.apache.ignite.cache.CacheMode.PARTITIONED;
/**
* Test scenario: last supplier has left while a partition on demander is cleared before sending first demand request.
*/
@WithSystemProperty(key = IGNITE_PREFER_WAL_REBALANCE, value = "true")
public class CachePartitionLostAfterSupplierHasLeftTest extends GridCommonAbstractTest {
/** */
private static final int PARTS_CNT = 64;
/** */
private PartitionLossPolicy lossPlc;
/** */
private boolean persistence;
/** {@inheritDoc} */
@Override protected IgniteConfiguration getConfiguration(String igniteInstanceName) throws Exception {
IgniteConfiguration cfg = super.getConfiguration(igniteInstanceName);
cfg.setCommunicationSpi(new TestRecordingCommunicationSpi());
cfg.setActiveOnStart(false);
cfg.setConsistentId(igniteInstanceName);
cfg.setDataStorageConfiguration(
new DataStorageConfiguration()
.setWalMode(WALMode.LOG_ONLY)
.setWalSegmentSize(4 * 1024 * 1024)
.setDefaultDataRegionConfiguration(
new DataRegionConfiguration()
.setPersistenceEnabled(persistence)
.setMaxSize(100L * 1024 * 1024))
);
cfg.setCacheConfiguration(new CacheConfiguration(DEFAULT_CACHE_NAME).
setAtomicityMode(TRANSACTIONAL).
setCacheMode(PARTITIONED).
setBackups(1).
setPartitionLossPolicy(lossPlc).
setAffinity(new RendezvousAffinityFunction(false, PARTS_CNT)));
return cfg;
}
/**
* {@inheritDoc}
*/
@Override protected void beforeTest() throws Exception {
super.beforeTest();
cleanPersistenceDir();
}
/**
* {@inheritDoc}
*/
@Override protected void afterTest() throws Exception {
super.afterTest();
stopAllGrids();
cleanPersistenceDir();
}
/**
* @throws Exception If failed.
*/
@Test
public void testPartitionLostWhileClearing_FailOnCrd() throws Exception {
lossPlc = PartitionLossPolicy.READ_WRITE_SAFE;
persistence = true;
doTestPartitionLostWhileClearing(2, 0);
}
/**
* @throws Exception If failed.
*/
@Test
public void testPartitionLostWhileClearing_FailOnCrd_Volatile() throws Exception {
lossPlc = PartitionLossPolicy.READ_WRITE_SAFE;
persistence = false;
doTestPartitionLostWhileClearing(2, 0);
}
/**
* @throws Exception If failed.
*/
@Test
@WithSystemProperty(key = IGNITE_PREFER_WAL_REBALANCE, value = "true")
public void testPartitionLostWhileClearing_FailOnFullMessage() throws Exception {
lossPlc = PartitionLossPolicy.READ_WRITE_SAFE;
persistence = true;
doTestPartitionLostWhileClearing(3, 0);
}
/**
* @throws Exception If failed.
*/
@Test
public void testPartitionLostWhileClearing_FailOnFullMessage_Volatile() throws Exception {
lossPlc = PartitionLossPolicy.READ_WRITE_SAFE;
persistence = false;
doTestPartitionLostWhileClearing(3, 0);
}
/**
* @throws Exception If failed.
*/
@Test
public void testPartitionLostWhileClearing_FailOnCrd_Unsafe() throws Exception {
lossPlc = PartitionLossPolicy.IGNORE; // READ_WRITE_SAFE is used instead of IGNORE.
persistence = true;
doTestPartitionLostWhileClearing(2, 0);
}
/**
* @throws Exception If failed.
*/
@Test
public void testPartitionLostWhileClearing_FailOnCrd_Unsafe_Volatile() throws Exception {
lossPlc = PartitionLossPolicy.IGNORE; // READ_WRITE_SAFE is used instead of IGNORE.
persistence = false;
doTestPartitionLostWhileClearing(2, 0);
}
/**
* @throws Exception If failed.
*/
@Test
public void testPartitionLostWhileClearing_FailOnFullMessage_Unsafe() throws Exception {
lossPlc = PartitionLossPolicy.IGNORE; // READ_WRITE_SAFE is used instead of IGNORE.
persistence = true;
doTestPartitionLostWhileClearing(3, 0);
}
/**
* @throws Exception If failed.
*/
@Test
public void testPartitionLostWhileClearing_FailOnFullMessage_Unsafe_Volatile() throws Exception {
lossPlc = PartitionLossPolicy.IGNORE; // READ_WRITE_SAFE is used instead of IGNORE.
persistence = false;
doTestPartitionLostWhileClearing(3, 0);
}
/**
* @throws Exception If failed.
*/
@Test
public void testPartitionLostWhileClearing_Restart() throws Exception {
lossPlc = PartitionLossPolicy.READ_WRITE_SAFE;
persistence = true;
doTestPartitionLostWhileClearing(2, 1);
}
/**
* @throws Exception If failed.
*/
@Test
@Ignore("https://issues.apache.org/jira/browse/IGNITE-13054")
public void testPartitionLostWhileClearing_Restart_2() throws Exception {
lossPlc = PartitionLossPolicy.READ_WRITE_SAFE;
persistence = true;
doTestPartitionLostWhileClearing(2, 2);
}
/**
*
*/
@Test
public void testPartitionConsistencyOnSupplierRestart() throws Exception {
lossPlc = PartitionLossPolicy.READ_ONLY_SAFE;
persistence = true;
int entryCnt = PARTS_CNT * 200;
IgniteEx crd = (IgniteEx)startGridsMultiThreaded(2);
crd.cluster().active(true);
IgniteCache<Integer, String> cache0 = crd.cache(DEFAULT_CACHE_NAME);
for (int i = 0; i < entryCnt / 2; i++)
cache0.put(i, String.valueOf(i));
forceCheckpoint();
stopGrid(1);
for (int i = entryCnt / 2; i < entryCnt; i++)
cache0.put(i, String.valueOf(i));
final IgniteConfiguration cfg = getConfiguration(getTestIgniteInstanceName(1));
final TestRecordingCommunicationSpi spi1 = (TestRecordingCommunicationSpi)cfg.getCommunicationSpi();
spi1.blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {
@Override public boolean apply(ClusterNode node, Message msg) {
if (msg instanceof GridDhtPartitionDemandMessage) {
GridDhtPartitionDemandMessage msg0 = (GridDhtPartitionDemandMessage)msg;
return msg0.groupId() == CU.cacheId(DEFAULT_CACHE_NAME);
}
return false;
}
});
startGrid(cfg);
spi1.waitForBlocked(1);
// Will cause a cancellation of rebalancing because a supplier has left.
stopGrid(0);
awaitPartitionMapExchange();
final Collection<Integer> lostParts = grid(1).cache(DEFAULT_CACHE_NAME).lostPartitions();
assertEquals(PARTS_CNT, lostParts.size());
final IgniteEx g0 = startGrid(0);
final Collection<Integer> lostParts2 = g0.cache(DEFAULT_CACHE_NAME).lostPartitions();
assertEquals(PARTS_CNT, lostParts2.size());
spi1.stopBlock();
g0.resetLostPartitions(Collections.singletonList(DEFAULT_CACHE_NAME));
awaitPartitionMapExchange();
assertPartitionsSame(idleVerify(grid(0), DEFAULT_CACHE_NAME));
}
/**
* Test scenario: last supplier has left while a partition on demander is cleared before sending first demand request.
*
* @param cnt Nodes count.
* @param mode Test mode: 0 - reset while clearing, 1 - restart while clearing and activate all, 2 - restart while
* clearing and activate in wrong order.
* @throws Exception If failed.
*/
private void doTestPartitionLostWhileClearing(int cnt, int mode) throws Exception {
IgniteEx crd = startGrids(cnt);
crd.cluster().baselineAutoAdjustEnabled(false);
crd.cluster().active(true);
int partId = -1;
int idx0 = 0;
int idx1 = 1;
for (int p = 0; p < PARTS_CNT; p++) {
List<ClusterNode> nodes = new ArrayList<>(crd.affinity(DEFAULT_CACHE_NAME).mapPartitionToPrimaryAndBackups(p));
if (grid(nodes.get(0)) == grid(idx0) && grid(nodes.get(1)) == grid(idx1)) {
partId = p;
break;
}
}
assertTrue(partId >= 0);
final int keysCnt = 10_010;
List<Integer> keys = partitionKeys(grid(idx0).cache(DEFAULT_CACHE_NAME), partId, keysCnt, 0);
load(grid(idx0), DEFAULT_CACHE_NAME, keys.subList(0, keysCnt - 10));
stopGrid(idx1);
load(grid(idx0), DEFAULT_CACHE_NAME, keys.subList(keysCnt - 10, keysCnt));
TestRecordingCommunicationSpi.spi(grid(0)).blockMessages(new IgniteBiPredicate<ClusterNode, Message>() {
@Override public boolean apply(ClusterNode clusterNode, Message msg) {
if (msg instanceof GridDhtPartitionSupplyMessage) {
GridDhtPartitionSupplyMessage msg0 = (GridDhtPartitionSupplyMessage) msg;
return msg0.groupId() == CU.cacheId(DEFAULT_CACHE_NAME);
}
return false;
}
});
IgniteConfiguration cfg = getConfiguration(getTestIgniteInstanceName(idx1));
((TestRecordingCommunicationSpi)cfg.getCommunicationSpi()).blockMessages((node, msg) -> {
if (msg instanceof GridDhtPartitionDemandMessage) {
GridDhtPartitionDemandMessage demandMsg = (GridDhtPartitionDemandMessage)msg;
return CU.cacheId(DEFAULT_CACHE_NAME) == demandMsg.groupId();
}
return false;
});
IgniteEx g1 = startGrid(optimize(cfg));
stopGrid(idx0); // Stop supplier in the middle of rebalancing.
TestRecordingCommunicationSpi.spi(g1).stopBlock();
final GridDhtLocalPartition part = g1.cachex(DEFAULT_CACHE_NAME).context().topology().localPartition(partId);
assertTrue("Unexpected partition state [p=" + partId +
", expected=" + GridDhtPartitionState.LOST +
", actual=" + part.state() + ']',
GridTestUtils.waitForCondition(() -> part.state() == GridDhtPartitionState.LOST, 30_000));
assertTrue(g1.cachex(DEFAULT_CACHE_NAME).lostPartitions().contains(partId));
if (mode != 0) {
stopAllGrids();
if (mode == 1) {
crd = startGrids(cnt);
crd.cluster().active(true);
}
else if (mode == 2) {
crd = startGrid(idx1);
crd.cluster().active(true);
startGrid(idx0);
}
else
fail("Mode: " + mode);
awaitPartitionMapExchange();
assertPartitionsSame(idleVerify(crd, DEFAULT_CACHE_NAME));
}
else {
// Will own a clearing partition.
g1.resetLostPartitions(Collections.singletonList(DEFAULT_CACHE_NAME));
awaitPartitionMapExchange();
// Expecting partition in OWNING state.
final PartitionUpdateCounter cntr = counter(partId, DEFAULT_CACHE_NAME, g1.name());
assertNotNull(cntr);
// Counter must be reset.
assertEquals(0, cntr.get());
// Puts done concurrently with clearing after reset should not be lost.
g1.cache(DEFAULT_CACHE_NAME).putAll(keys.stream().collect(Collectors.toMap(k -> k, v -> -1)));
GridTestUtils.waitForCondition(() -> g1.context().cache().context().evict().total() == 0, 30_000);
for (Integer key : keys)
assertEquals("key=" + key.toString(), -1, g1.cache(DEFAULT_CACHE_NAME).get(key));
}
}
/**
* @param ignite Ignite.
* @param cache Cache.
* @param keys Keys.
*/
private void load(IgniteEx ignite, String cache, List<Integer> keys) {
try (IgniteDataStreamer<Object, Object> s = ignite.dataStreamer(cache)) {
for (Integer key : keys)
s.addData(key, key);
}
}
}