blob: 665c3f77d362940d67a1611d35274294d2c7edf4 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdds.scm.node;
import org.apache.hadoop.hdds.HddsConfigKeys;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.TestUtils;
import org.apache.hadoop.hdds.scm.DatanodeAdminError;
import org.apache.hadoop.hdds.scm.ha.SCMContext;
import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.hdds.server.events.EventQueue;
import org.apache.hadoop.security.authentication.client.AuthenticationException;
import org.apache.hadoop.test.GenericTestUtils;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.util.List;
import java.util.UUID;
import java.util.Arrays;
import java.util.ArrayList;
import static junit.framework.TestCase.assertEquals;
import static org.assertj.core.api.Fail.fail;
import static org.junit.Assert.assertNotEquals;
/**
* Unit tests for the decommision manager.
*/
public class TestNodeDecommissionManager {
private NodeDecommissionManager decom;
private StorageContainerManager scm;
private NodeManager nodeManager;
private OzoneConfiguration conf;
private String storageDir;
@Before
public void setup() throws Exception {
conf = new OzoneConfiguration();
storageDir = GenericTestUtils.getTempPath(
TestDeadNodeHandler.class.getSimpleName() + UUID.randomUUID());
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, storageDir);
nodeManager = createNodeManager(conf);
decom = new NodeDecommissionManager(conf, nodeManager, null,
SCMContext.emptyContext(), new EventQueue(), null);
}
@Test
public void testHostStringsParseCorrectly()
throws InvalidHostStringException {
NodeDecommissionManager.HostDefinition def =
new NodeDecommissionManager.HostDefinition("foobar");
assertEquals("foobar", def.getHostname());
assertEquals(-1, def.getPort());
def = new NodeDecommissionManager.HostDefinition(" foobar ");
assertEquals("foobar", def.getHostname());
assertEquals(-1, def.getPort());
def = new NodeDecommissionManager.HostDefinition("foobar:1234");
assertEquals("foobar", def.getHostname());
assertEquals(1234, def.getPort());
def = new NodeDecommissionManager.HostDefinition(
"foobar.mycompany.com:1234");
assertEquals("foobar.mycompany.com", def.getHostname());
assertEquals(1234, def.getPort());
try {
new NodeDecommissionManager.HostDefinition("foobar:abcd");
fail("InvalidHostStringException should have been thrown");
} catch (InvalidHostStringException e) {
}
}
@Test
public void testAnyInvalidHostThrowsException()
throws InvalidHostStringException{
List<DatanodeDetails> dns = generateDatanodes();
// Try to decommission a host that does exist, but give incorrect port
try {
decom.decommissionNodes(Arrays.asList(dns.get(1).getIpAddress()+":10"));
fail("InvalidHostStringException expected");
} catch (InvalidHostStringException e) {
}
// Try to decommission a host that does not exist
try {
decom.decommissionNodes(Arrays.asList("123.123.123.123"));
fail("InvalidHostStringException expected");
} catch (InvalidHostStringException e) {
}
// Try to decommission a host that does exist and a host that does not
try {
decom.decommissionNodes(Arrays.asList(
dns.get(1).getIpAddress(), "123,123,123,123"));
fail("InvalidHostStringException expected");
} catch (InvalidHostStringException e) {
}
// Try to decommission a host with many DNs on the address with no port
try {
decom.decommissionNodes(Arrays.asList(
dns.get(0).getIpAddress()));
fail("InvalidHostStringException expected");
} catch (InvalidHostStringException e) {
}
// Try to decommission a host with many DNs on the address with a port
// that does not exist
try {
decom.decommissionNodes(Arrays.asList(
dns.get(0).getIpAddress()+":10"));
fail("InvalidHostStringException expected");
} catch (InvalidHostStringException e) {
}
}
@Test
public void testNodesCanBeDecommissionedAndRecommissioned()
throws InvalidHostStringException, NodeNotFoundException {
List<DatanodeDetails> dns = generateDatanodes();
// Decommission 2 valid nodes
decom.decommissionNodes(Arrays.asList(dns.get(1).getIpAddress(),
dns.get(2).getIpAddress()));
assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING,
nodeManager.getNodeStatus(dns.get(1)).getOperationalState());
assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING,
nodeManager.getNodeStatus(dns.get(2)).getOperationalState());
// Running the command again gives no error - nodes already decommissioning
// are silently ignored.
decom.decommissionNodes(Arrays.asList(dns.get(1).getIpAddress(),
dns.get(2).getIpAddress()));
// Attempt to decommission dn(10) which has multiple hosts on the same IP
// and we hardcoded ports to 3456, 4567, 5678
DatanodeDetails multiDn = dns.get(10);
String multiAddr =
multiDn.getIpAddress()+":"+multiDn.getPorts().get(0).getValue();
decom.decommissionNodes(Arrays.asList(multiAddr));
assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING,
nodeManager.getNodeStatus(multiDn).getOperationalState());
// Recommission all 3 hosts
decom.recommissionNodes(Arrays.asList(
multiAddr, dns.get(1).getIpAddress(), dns.get(2).getIpAddress()));
decom.getMonitor().run();
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
nodeManager.getNodeStatus(dns.get(1)).getOperationalState());
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
nodeManager.getNodeStatus(dns.get(2)).getOperationalState());
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
nodeManager.getNodeStatus(dns.get(10)).getOperationalState());
}
@Test
public void testNodesCanBePutIntoMaintenanceAndRecommissioned()
throws InvalidHostStringException, NodeNotFoundException {
List<DatanodeDetails> dns = generateDatanodes();
// Put 2 valid nodes into maintenance
decom.startMaintenanceNodes(Arrays.asList(dns.get(1).getIpAddress(),
dns.get(2).getIpAddress()), 100);
assertEquals(HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE,
nodeManager.getNodeStatus(dns.get(1)).getOperationalState());
assertNotEquals(0, nodeManager.getNodeStatus(
dns.get(1)).getOpStateExpiryEpochSeconds());
assertEquals(HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE,
nodeManager.getNodeStatus(dns.get(2)).getOperationalState());
assertNotEquals(0, nodeManager.getNodeStatus(
dns.get(2)).getOpStateExpiryEpochSeconds());
// Running the command again gives no error - nodes already decommissioning
// are silently ignored.
decom.startMaintenanceNodes(Arrays.asList(dns.get(1).getIpAddress(),
dns.get(2).getIpAddress()), 100);
// Attempt to decommission dn(10) which has multiple hosts on the same IP
// and we hardcoded ports to 3456, 4567, 5678
DatanodeDetails multiDn = dns.get(10);
String multiAddr =
multiDn.getIpAddress()+":"+multiDn.getPorts().get(0).getValue();
decom.startMaintenanceNodes(Arrays.asList(multiAddr), 100);
assertEquals(HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE,
nodeManager.getNodeStatus(multiDn).getOperationalState());
// Recommission all 3 hosts
decom.recommissionNodes(Arrays.asList(
multiAddr, dns.get(1).getIpAddress(), dns.get(2).getIpAddress()));
decom.getMonitor().run();
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
nodeManager.getNodeStatus(dns.get(1)).getOperationalState());
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
nodeManager.getNodeStatus(dns.get(2)).getOperationalState());
assertEquals(HddsProtos.NodeOperationalState.IN_SERVICE,
nodeManager.getNodeStatus(dns.get(10)).getOperationalState());
}
@Test
public void testNodesCannotTransitionFromDecomToMaint() throws Exception {
List<DatanodeDetails> dns = generateDatanodes();
// Put 1 node into maintenance and another into decom
decom.startMaintenance(dns.get(1), 100);
decom.startDecommission(dns.get(2));
assertEquals(HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE,
nodeManager.getNodeStatus(dns.get(1)).getOperationalState());
assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING,
nodeManager.getNodeStatus(dns.get(2)).getOperationalState());
// Try to go from maint to decom:
List<String> dn = new ArrayList<>();
dn.add(dns.get(1).getIpAddress());
List<DatanodeAdminError> errors = decom.decommissionNodes(dn);
assertEquals(1, errors.size());
assertEquals(dns.get(1).getHostName(), errors.get(0).getHostname());
// Try to go from decom to maint:
dn = new ArrayList<>();
dn.add(dns.get(2).getIpAddress());
errors = decom.startMaintenanceNodes(dn, 100);
assertEquals(1, errors.size());
assertEquals(dns.get(2).getHostName(), errors.get(0).getHostname());
// Ensure the states are still as before
assertEquals(HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE,
nodeManager.getNodeStatus(dns.get(1)).getOperationalState());
assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING,
nodeManager.getNodeStatus(dns.get(2)).getOperationalState());
}
@Test
public void testNodeDecommissionManagerOnBecomeLeader() throws Exception {
List<DatanodeDetails> dns = generateDatanodes();
long maintenanceEnd =
(System.currentTimeMillis() / 1000L) + (100 * 60L * 60L);
// Put 1 node into entering_maintenance, 1 node into decommissioning
// and 1 node into in_maintenance.
nodeManager.setNodeOperationalState(dns.get(1),
HddsProtos.NodeOperationalState.ENTERING_MAINTENANCE, maintenanceEnd);
nodeManager.setNodeOperationalState(dns.get(2),
HddsProtos.NodeOperationalState.DECOMMISSIONING, 0);
nodeManager.setNodeOperationalState(dns.get(3),
HddsProtos.NodeOperationalState.IN_MAINTENANCE, maintenanceEnd);
// trackedNodes should be empty now.
assertEquals(decom.getMonitor().getTrackedNodes().size(), 0);
// all nodes with decommissioning, entering_maintenance and in_maintenance
// should be added to trackedNodes
decom.onBecomeLeader();
decom.getMonitor().run();
// so size of trackedNodes will be 3.
assertEquals(decom.getMonitor().getTrackedNodes().size(), 3);
}
private SCMNodeManager createNodeManager(OzoneConfiguration config)
throws IOException, AuthenticationException {
scm = TestUtils.getScm(config);
return (SCMNodeManager) scm.getScmNodeManager();
}
/**
* Generate a list of random DNs and return the list. A total of 11 DNs will
* be generated and registered with the node manager. Index 0 and 10 will
* have the same IP and host and the rest will have unique IPs and Hosts.
* The DN at index 10, has 3 hard coded ports of 3456, 4567, 5678. All other
* DNs will have ports set to 0.
* @return The list of DatanodeDetails Generated
*/
private List<DatanodeDetails> generateDatanodes() {
List<DatanodeDetails> dns = new ArrayList<>();
for (int i=0; i<10; i++) {
DatanodeDetails dn = MockDatanodeDetails.randomDatanodeDetails();
dns.add(dn);
nodeManager.register(dn, null, null);
}
// We have 10 random DNs, we want to create another one that is on the same
// host as some of the others.
DatanodeDetails multiDn = dns.get(0);
DatanodeDetails.Builder builder = DatanodeDetails.newBuilder();
builder.setUuid(UUID.randomUUID())
.setHostName(multiDn.getHostName())
.setIpAddress(multiDn.getIpAddress())
.addPort(DatanodeDetails.newPort(
DatanodeDetails.Port.Name.STANDALONE, 3456))
.addPort(DatanodeDetails.newPort(
DatanodeDetails.Port.Name.RATIS, 4567))
.addPort(DatanodeDetails.newPort(
DatanodeDetails.Port.Name.REST, 5678))
.setNetworkLocation(multiDn.getNetworkLocation());
DatanodeDetails dn = builder.build();
nodeManager.register(dn, null, null);
dns.add(dn);
return dns;
}
}