blob: 67936c8becda722bd4b53c6876ff5516f6b28604 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.hadoop.hdds.scm.container.replication.health;
import org.apache.hadoop.hdds.client.ECReplicationConfig;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport;
import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest;
import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult;
import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp;
import org.apache.hadoop.hdds.scm.container.replication.ECContainerReplicaCount;
import java.util.List;
import java.util.Set;
import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType.EC;
/**
* Container Check handler to check the under / over replication state for
* EC containers. If any containers are found to be over or under replicated
* they are added to the queue passed within the request object.
*/
public class ECReplicationCheckHandler extends AbstractCheck {
public ECReplicationCheckHandler() {
}
@Override
public boolean handle(ContainerCheckRequest request) {
if (request.getContainerInfo().getReplicationType() != EC) {
// This handler is only for EC containers.
return false;
}
ReplicationManagerReport report = request.getReport();
ContainerInfo container = request.getContainerInfo();
ContainerID containerID = container.containerID();
ContainerHealthResult health = checkHealth(request);
if (health.getHealthState() == ContainerHealthResult.HealthState.HEALTHY) {
// If the container is healthy, there is nothing else to do in this
// handler so return as unhandled so any further handlers will be tried.
return false;
}
// TODO - should the report have a HEALTHY state, rather than just bad
// states? It would need to be added to legacy RM too.
if (health.getHealthState()
== ContainerHealthResult.HealthState.UNDER_REPLICATED) {
report.incrementAndSample(
ReplicationManagerReport.HealthState.UNDER_REPLICATED, containerID);
ContainerHealthResult.UnderReplicatedHealthResult underHealth
= ((ContainerHealthResult.UnderReplicatedHealthResult) health);
if (underHealth.isUnrecoverable()) {
// TODO - do we need a new health state for unrecoverable EC?
report.incrementAndSample(
ReplicationManagerReport.HealthState.MISSING, containerID);
}
// TODO - if it is unrecoverable, should we return false to other
// handlers can be tried?
if (!underHealth.isSufficientlyReplicatedAfterPending() &&
!underHealth.isUnrecoverable()) {
request.getUnderRepQueue().add(underHealth);
}
return true;
} else if (health.getHealthState()
== ContainerHealthResult.HealthState.OVER_REPLICATED) {
report.incrementAndSample(
ReplicationManagerReport.HealthState.OVER_REPLICATED, containerID);
ContainerHealthResult.OverReplicatedHealthResult overHealth
= ((ContainerHealthResult.OverReplicatedHealthResult) health);
if (!overHealth.isSufficientlyReplicatedAfterPending()) {
request.getOverRepQueue().add(overHealth);
}
return true;
}
// Should not get here, but incase it does the container is not healthy,
// but is also not under or over replicated.
return false;
}
public ContainerHealthResult checkHealth(ContainerCheckRequest request) {
ContainerInfo container = request.getContainerInfo();
Set<ContainerReplica> replicas = request.getContainerReplicas();
List<ContainerReplicaOp> replicaPendingOps = request.getPendingOps();
ECContainerReplicaCount replicaCount =
new ECContainerReplicaCount(container, replicas, replicaPendingOps,
request.getMaintenanceRedundancy());
ECReplicationConfig repConfig =
(ECReplicationConfig) container.getReplicationConfig();
if (!replicaCount.isSufficientlyReplicated(false)) {
List<Integer> missingIndexes = replicaCount.unavailableIndexes(false);
int remainingRedundancy = repConfig.getParity();
boolean dueToDecommission = true;
if (missingIndexes.size() > 0) {
// The container has reduced redundancy and will need reconstructed
// via an EC reconstruction command. Note that it may also have some
// replicas in decommission / maintenance states, but as the under
// replication is not caused only by decommission, we say it is not
// due to decommission/
dueToDecommission = false;
remainingRedundancy = repConfig.getParity() - missingIndexes.size();
}
return new ContainerHealthResult.UnderReplicatedHealthResult(
container, remainingRedundancy, dueToDecommission,
replicaCount.isSufficientlyReplicated(true),
replicaCount.isUnrecoverable());
}
if (replicaCount.isOverReplicated(false)) {
List<Integer> overRepIndexes = replicaCount.overReplicatedIndexes(false);
return new ContainerHealthResult
.OverReplicatedHealthResult(container, overRepIndexes.size(),
!replicaCount.isOverReplicated(true));
}
// No issues detected, so return healthy.
return new ContainerHealthResult.HealthyResult(container);
}
}