blob: 3207878f9a4847ca606d6e51a23a36d6f3063f35 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*
*/
package org.apache.hadoop.hdds.scm.pipeline;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos
.ReplicationFactor.THREE;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos
.ReplicationType.RATIS;
/**
* Test Node failure detection and handling in Ratis.
*/
public class TestNodeFailure {
private static MiniOzoneCluster cluster;
private static OzoneConfiguration conf;
private static Pipeline ratisPipelineOne;
private static Pipeline ratisPipelineTwo;
private static ContainerManager containerManager;
private static PipelineManager pipelineManager;
private static long timeForFailure;
/**
* Create a MiniDFSCluster for testing.
*
* @throws IOException
*/
@BeforeClass
public static void init() throws Exception {
conf = new OzoneConfiguration();
conf.setTimeDuration(OzoneConfigKeys.DFS_RATIS_SERVER_FAILURE_DURATION_KEY,
10, TimeUnit.SECONDS);
conf.setTimeDuration(
ScmConfigKeys.OZONE_SCM_CONTAINER_CREATION_LEASE_TIMEOUT,
10, TimeUnit.SECONDS);
cluster = MiniOzoneCluster.newBuilder(conf)
.setNumDatanodes(6)
.setHbInterval(1000)
.setHbProcessorInterval(1000)
.build();
cluster.waitForClusterToBeReady();
StorageContainerManager scm = cluster.getStorageContainerManager();
containerManager = scm.getContainerManager();
pipelineManager = scm.getPipelineManager();
ratisPipelineOne = pipelineManager.getPipeline(
containerManager.allocateContainer(
RATIS, THREE, "testOwner").getPipelineID());
ratisPipelineTwo = pipelineManager.getPipeline(
containerManager.allocateContainer(
RATIS, THREE, "testOwner").getPipelineID());
// At this stage, there should be 2 pipeline one with 1 open container each.
// Try closing the both the pipelines, one with a closed container and
// the other with an open container.
timeForFailure = conf.getTimeDuration(
OzoneConfigKeys.DFS_RATIS_SERVER_FAILURE_DURATION_KEY,
OzoneConfigKeys.DFS_RATIS_SERVER_FAILURE_DURATION_DEFAULT
.getDuration(), TimeUnit.MILLISECONDS);
}
/**
* Shutdown MiniDFSCluster.
*/
@AfterClass
public static void shutdown() {
if (cluster != null) {
cluster.shutdown();
}
}
@Ignore
// Enable this after we implement teardown pipeline logic once a datanode
// dies.
@Test(timeout = 300_000L)
public void testPipelineFail() throws InterruptedException, IOException,
TimeoutException {
Assert.assertEquals(ratisPipelineOne.getPipelineState(),
Pipeline.PipelineState.OPEN);
Pipeline pipelineToFail = ratisPipelineOne;
DatanodeDetails dnToFail = pipelineToFail.getFirstNode();
cluster.shutdownHddsDatanode(dnToFail);
// wait for sufficient time for the callback to be triggered
Thread.sleep(3 * timeForFailure);
Assert.assertEquals(Pipeline.PipelineState.CLOSED,
pipelineManager.getPipeline(ratisPipelineOne.getId())
.getPipelineState());
Assert.assertEquals(Pipeline.PipelineState.OPEN,
pipelineManager.getPipeline(ratisPipelineTwo.getId())
.getPipelineState());
// Now restart the datanode and make sure that a new pipeline is created.
cluster.setWaitForClusterToBeReadyTimeout(300000);
cluster.restartHddsDatanode(dnToFail, true);
Pipeline ratisPipelineThree = pipelineManager.getPipeline(
containerManager.allocateContainer(
RATIS, THREE, "testOwner").getPipelineID());
//Assert that new container is not created from the ratis 2 pipeline
Assert.assertNotEquals(ratisPipelineThree.getId(),
ratisPipelineTwo.getId());
}
}