blob: 11a30e5762e9b0d00308de103d51a55ad68d60f7 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cassandra.distributed.test.hostreplacement;
import java.io.IOException;
import java.time.Duration;
import java.util.Arrays;
import java.util.List;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.cassandra.distributed.Cluster;
import org.apache.cassandra.distributed.api.ConsistencyLevel;
import org.apache.cassandra.distributed.api.Feature;
import org.apache.cassandra.distributed.api.IInvokableInstance;
import org.apache.cassandra.distributed.api.SimpleQueryResult;
import org.apache.cassandra.distributed.api.TokenSupplier;
import org.apache.cassandra.distributed.test.TestBaseImpl;
import static org.apache.cassandra.config.CassandraRelevantProperties.BOOTSTRAP_SKIP_SCHEMA_CHECK;
import static org.apache.cassandra.distributed.shared.ClusterUtils.assertRingIs;
import static org.apache.cassandra.distributed.shared.ClusterUtils.assertRingState;
import static org.apache.cassandra.distributed.shared.ClusterUtils.awaitRingHealthy;
import static org.apache.cassandra.distributed.shared.ClusterUtils.awaitRingJoin;
import static org.apache.cassandra.distributed.shared.ClusterUtils.replaceHostAndStart;
import static org.apache.cassandra.distributed.shared.ClusterUtils.stopAbrupt;
import static org.apache.cassandra.distributed.test.hostreplacement.HostReplacementTest.setupCluster;
import static org.apache.cassandra.distributed.test.hostreplacement.HostReplacementTest.validateRows;
public class HostReplacementAbruptDownedInstanceTest extends TestBaseImpl
{
private static final Logger logger = LoggerFactory.getLogger(HostReplacementAbruptDownedInstanceTest.class);
/**
* Can we maybe also test with an abrupt shutdown, that is when the shutdown state is not broadcast and the node to be replaced is on NORMAL state?
*/
@Test
public void hostReplaceAbruptShutdown() throws IOException
{
int numStartNodes = 3;
TokenSupplier even = TokenSupplier.evenlyDistributedTokens(numStartNodes);
try (Cluster cluster = Cluster.build(numStartNodes)
.withConfig(c -> c.with(Feature.GOSSIP, Feature.NETWORK))
.withTokenSupplier(node -> even.token(node == (numStartNodes + 1) ? 2 : node))
.start())
{
IInvokableInstance seed = cluster.get(1);
IInvokableInstance nodeToRemove = cluster.get(2);
IInvokableInstance peer = cluster.get(3);
List<IInvokableInstance> peers = Arrays.asList(seed, peer);
setupCluster(cluster);
// collect rows/tokens to detect issues later on if the state doesn't match
SimpleQueryResult expectedState = nodeToRemove.coordinator().executeWithResult("SELECT * FROM " + KEYSPACE + ".tbl", ConsistencyLevel.ALL);
stopAbrupt(cluster, nodeToRemove);
// at this point node 2 should still be NORMAL on all other nodes
peers.forEach(p -> assertRingState(p, nodeToRemove, "Normal"));
// node is down, but queries should still work
//TODO failing, but shouldn't!
// peers.forEach(p -> validateRows(p.coordinator(), expectedState));
// now create a new node to replace the other node
long startNanos = System.nanoTime();
IInvokableInstance replacingNode = replaceHostAndStart(cluster, nodeToRemove, properties -> {
// since node2 was killed abruptly its possible that node2's gossip state has an old schema version
// if this happens then bootstrap will fail waiting for a schema version it will never see; to avoid
// this, setting this property to log the warning rather than fail bootstrap
properties.set(BOOTSTRAP_SKIP_SCHEMA_CHECK, true);
});
logger.info("Host replacement of {} with {} took {}", nodeToRemove, replacingNode, Duration.ofNanos(System.nanoTime() - startNanos));
peers.forEach(p -> awaitRingJoin(p, replacingNode));
// make sure all nodes are healthy
awaitRingHealthy(seed);
List<IInvokableInstance> expectedRing = Arrays.asList(seed, peer, replacingNode);
expectedRing.forEach(p -> assertRingIs(p, expectedRing));
expectedRing.forEach(p -> validateRows(p.coordinator(), expectedState));
}
}
}