hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ECReplicationCheckHandler.java - ozone - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with this
  * work for additional information regarding copyright ownership.  The ASF
  * licenses this file to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  * <p/>
  * http://www.apache.org/licenses/LICENSE-2.0
  * <p/>
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  * License for the specific language governing permissions and limitations under
  * the License.
  */
 package org.apache.hadoop.hdds.scm.container.replication.health;

 import org.apache.hadoop.hdds.client.ECReplicationConfig;
 import org.apache.hadoop.hdds.scm.container.ContainerID;
 import org.apache.hadoop.hdds.scm.container.ContainerInfo;
 import org.apache.hadoop.hdds.scm.container.ContainerReplica;
 import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport;
 import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest;
 import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult;
 import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp;
 import org.apache.hadoop.hdds.scm.container.replication.ECContainerReplicaCount;

 import java.util.List;
 import java.util.Set;

 import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType.EC;

 /**
  * Container Check handler to check the under / over replication state for
  * EC containers. If any containers are found to be over or under replicated
  * they are added to the queue passed within the request object.
  */
 public class ECReplicationCheckHandler extends AbstractCheck {

   public ECReplicationCheckHandler() {
   }

   @Override
   public boolean handle(ContainerCheckRequest request) {
     if (request.getContainerInfo().getReplicationType() != EC) {
       // This handler is only for EC containers.
       return false;
     }
     ReplicationManagerReport report = request.getReport();
     ContainerInfo container = request.getContainerInfo();
     ContainerID containerID = container.containerID();
     ContainerHealthResult health = checkHealth(request);
     if (health.getHealthState() == ContainerHealthResult.HealthState.HEALTHY) {
       // If the container is healthy, there is nothing else to do in this
       // handler so return as unhandled so any further handlers will be tried.
       return false;
     }
     // TODO - should the report have a HEALTHY state, rather than just bad
     //        states? It would need to be added to legacy RM too.
     if (health.getHealthState()
         == ContainerHealthResult.HealthState.UNDER_REPLICATED) {
       report.incrementAndSample(
           ReplicationManagerReport.HealthState.UNDER_REPLICATED, containerID);
       ContainerHealthResult.UnderReplicatedHealthResult underHealth
           = ((ContainerHealthResult.UnderReplicatedHealthResult) health);
       if (underHealth.isUnrecoverable()) {
         // TODO - do we need a new health state for unrecoverable EC?
         report.incrementAndSample(
             ReplicationManagerReport.HealthState.MISSING, containerID);
       }
       // TODO - if it is unrecoverable, should we return false to other
       //        handlers can be tried?
       if (!underHealth.isSufficientlyReplicatedAfterPending() &&
           !underHealth.isUnrecoverable()) {
         request.getUnderRepQueue().add(underHealth);
       }
       return true;
     } else if (health.getHealthState()
         == ContainerHealthResult.HealthState.OVER_REPLICATED) {
       report.incrementAndSample(
           ReplicationManagerReport.HealthState.OVER_REPLICATED, containerID);
       ContainerHealthResult.OverReplicatedHealthResult overHealth
           = ((ContainerHealthResult.OverReplicatedHealthResult) health);
       if (!overHealth.isSufficientlyReplicatedAfterPending()) {
         request.getOverRepQueue().add(overHealth);
       }
       return true;
     }
     // Should not get here, but incase it does the container is not healthy,
     // but is also not under or over replicated.
     return false;
   }

   public ContainerHealthResult checkHealth(ContainerCheckRequest request) {
     ContainerInfo container = request.getContainerInfo();
     Set<ContainerReplica> replicas = request.getContainerReplicas();
     List<ContainerReplicaOp> replicaPendingOps = request.getPendingOps();
     ECContainerReplicaCount replicaCount =
         new ECContainerReplicaCount(container, replicas, replicaPendingOps,
             request.getMaintenanceRedundancy());

     ECReplicationConfig repConfig =
         (ECReplicationConfig) container.getReplicationConfig();

     if (!replicaCount.isSufficientlyReplicated(false)) {
       List<Integer> missingIndexes = replicaCount.unavailableIndexes(false);
       int remainingRedundancy = repConfig.getParity();
       boolean dueToDecommission = true;
       if (missingIndexes.size() > 0) {
         // The container has reduced redundancy and will need reconstructed
         // via an EC reconstruction command. Note that it may also have some
         // replicas in decommission / maintenance states, but as the under
         // replication is not caused only by decommission, we say it is not
         // due to decommission/
         dueToDecommission = false;
         remainingRedundancy = repConfig.getParity() - missingIndexes.size();
       }
       return new ContainerHealthResult.UnderReplicatedHealthResult(
           container, remainingRedundancy, dueToDecommission,
           replicaCount.isSufficientlyReplicated(true),
           replicaCount.isUnrecoverable());
     }

     if (replicaCount.isOverReplicated(false)) {
       List<Integer> overRepIndexes = replicaCount.overReplicatedIndexes(false);
       return new ContainerHealthResult
           .OverReplicatedHealthResult(container, overRepIndexes.size(),
           !replicaCount.isOverReplicated(true));
     }
     // No issues detected, so return healthy.
     return new ContainerHealthResult.HealthyResult(container);
   }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with this
	* work for additional information regarding copyright ownership. The ASF
	* licenses this file to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	* <p/>
	* http://www.apache.org/licenses/LICENSE-2.0
	* <p/>
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
	* License for the specific language governing permissions and limitations under
	* the License.
	*/
	package org.apache.hadoop.hdds.scm.container.replication.health;

	import org.apache.hadoop.hdds.client.ECReplicationConfig;
	import org.apache.hadoop.hdds.scm.container.ContainerID;
	import org.apache.hadoop.hdds.scm.container.ContainerInfo;
	import org.apache.hadoop.hdds.scm.container.ContainerReplica;
	import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport;
	import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest;
	import org.apache.hadoop.hdds.scm.container.replication.ContainerHealthResult;
	import org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp;
	import org.apache.hadoop.hdds.scm.container.replication.ECContainerReplicaCount;

	import java.util.List;
	import java.util.Set;

	import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType.EC;

	/**
	* Container Check handler to check the under / over replication state for
	* EC containers. If any containers are found to be over or under replicated
	* they are added to the queue passed within the request object.
	*/
	public class ECReplicationCheckHandler extends AbstractCheck {

	public ECReplicationCheckHandler() {
	}

	@Override
	public boolean handle(ContainerCheckRequest request) {
	if (request.getContainerInfo().getReplicationType() != EC) {
	// This handler is only for EC containers.
	return false;
	}
	ReplicationManagerReport report = request.getReport();
	ContainerInfo container = request.getContainerInfo();
	ContainerID containerID = container.containerID();
	ContainerHealthResult health = checkHealth(request);
	if (health.getHealthState() == ContainerHealthResult.HealthState.HEALTHY) {
	// If the container is healthy, there is nothing else to do in this
	// handler so return as unhandled so any further handlers will be tried.
	return false;
	}
	// TODO - should the report have a HEALTHY state, rather than just bad
	// states? It would need to be added to legacy RM too.
	if (health.getHealthState()
	== ContainerHealthResult.HealthState.UNDER_REPLICATED) {
	report.incrementAndSample(
	ReplicationManagerReport.HealthState.UNDER_REPLICATED, containerID);
	ContainerHealthResult.UnderReplicatedHealthResult underHealth
	= ((ContainerHealthResult.UnderReplicatedHealthResult) health);
	if (underHealth.isUnrecoverable()) {
	// TODO - do we need a new health state for unrecoverable EC?
	report.incrementAndSample(
	ReplicationManagerReport.HealthState.MISSING, containerID);
	}
	// TODO - if it is unrecoverable, should we return false to other
	// handlers can be tried?
	if (!underHealth.isSufficientlyReplicatedAfterPending() &&
	!underHealth.isUnrecoverable()) {
	request.getUnderRepQueue().add(underHealth);
	}
	return true;
	} else if (health.getHealthState()
	== ContainerHealthResult.HealthState.OVER_REPLICATED) {
	report.incrementAndSample(
	ReplicationManagerReport.HealthState.OVER_REPLICATED, containerID);
	ContainerHealthResult.OverReplicatedHealthResult overHealth
	= ((ContainerHealthResult.OverReplicatedHealthResult) health);
	if (!overHealth.isSufficientlyReplicatedAfterPending()) {
	request.getOverRepQueue().add(overHealth);
	}
	return true;
	}
	// Should not get here, but incase it does the container is not healthy,
	// but is also not under or over replicated.
	return false;
	}

	public ContainerHealthResult checkHealth(ContainerCheckRequest request) {
	ContainerInfo container = request.getContainerInfo();
	Set<ContainerReplica> replicas = request.getContainerReplicas();
	List<ContainerReplicaOp> replicaPendingOps = request.getPendingOps();
	ECContainerReplicaCount replicaCount =
	new ECContainerReplicaCount(container, replicas, replicaPendingOps,
	request.getMaintenanceRedundancy());

	ECReplicationConfig repConfig =
	(ECReplicationConfig) container.getReplicationConfig();

	if (!replicaCount.isSufficientlyReplicated(false)) {
	List<Integer> missingIndexes = replicaCount.unavailableIndexes(false);
	int remainingRedundancy = repConfig.getParity();
	boolean dueToDecommission = true;
	if (missingIndexes.size() > 0) {
	// The container has reduced redundancy and will need reconstructed
	// via an EC reconstruction command. Note that it may also have some
	// replicas in decommission / maintenance states, but as the under
	// replication is not caused only by decommission, we say it is not
	// due to decommission/
	dueToDecommission = false;
	remainingRedundancy = repConfig.getParity() - missingIndexes.size();
	}
	return new ContainerHealthResult.UnderReplicatedHealthResult(
	container, remainingRedundancy, dueToDecommission,
	replicaCount.isSufficientlyReplicated(true),
	replicaCount.isUnrecoverable());
	}

	if (replicaCount.isOverReplicated(false)) {
	List<Integer> overRepIndexes = replicaCount.overReplicatedIndexes(false);
	return new ContainerHealthResult
	.OverReplicatedHealthResult(container, overRepIndexes.size(),
	!replicaCount.isOverReplicated(true));
	}
	// No issues detected, so return healthy.
	return new ContainerHealthResult.HealthyResult(container);
	}
	}