blob: e29052a0dd3b043663eff592c904812ad3da2527 [file] [log] [blame]
package org.apache.helix.controller.rebalancer.waged.model;
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import org.apache.helix.HelixException;
import org.apache.helix.controller.rebalancer.topology.Topology;
import org.apache.helix.controller.rebalancer.util.WagedValidationUtil;
import org.apache.helix.model.ClusterConfig;
import org.apache.helix.model.InstanceConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This class represents a possible allocation of the replication.
* Note that any usage updates to the AssignableNode are not thread safe.
*/
public class AssignableNode implements Comparable<AssignableNode> {
private static final Logger LOG = LoggerFactory.getLogger(AssignableNode.class.getName());
// Immutable Instance Properties
private final String _instanceName;
private final String _faultZone;
// maximum number of the partitions that can be assigned to the instance.
private final int _maxPartition;
private final ImmutableSet<String> _instanceTags;
private final ImmutableMap<String, List<String>> _disabledPartitionsMap;
private final ImmutableMap<String, Integer> _maxAllowedCapacity;
// Mutable (Dynamic) Instance Properties
// A map of <resource name, <partition name, replica>> that tracks the replicas assigned to the
// node.
private Map<String, Map<String, AssignableReplica>> _currentAssignedReplicaMap;
// A map of <capacity key, capacity value> that tracks the current available node capacity
private Map<String, Integer> _remainingCapacity;
private Map<String, Integer> _remainingTopStateCapacity;
/**
* Update the node with a ClusterDataCache. This resets the current assignment and recalculates
* currentCapacity.
* NOTE: While this is required to be used in the constructor, this can also be used when the
* clusterCache needs to be
* refreshed. This is under the assumption that the capacity mappings of InstanceConfig and
* ResourceConfig could
* subject to change. If the assumption is no longer true, this function should become private.
*/
AssignableNode(ClusterConfig clusterConfig, InstanceConfig instanceConfig, String instanceName) {
_instanceName = instanceName;
Map<String, Integer> instanceCapacity = fetchInstanceCapacity(clusterConfig, instanceConfig);
_faultZone = computeFaultZone(clusterConfig, instanceConfig);
_instanceTags = ImmutableSet.copyOf(instanceConfig.getTags());
_disabledPartitionsMap = ImmutableMap.copyOf(instanceConfig.getDisabledPartitionsMap());
// make a copy of max capacity
_maxAllowedCapacity = ImmutableMap.copyOf(instanceCapacity);
_remainingCapacity = new HashMap<>(instanceCapacity);
_remainingTopStateCapacity = new HashMap<>(instanceCapacity);
_maxPartition = clusterConfig.getMaxPartitionsPerInstance();
_currentAssignedReplicaMap = new HashMap<>();
}
/**
* This function should only be used to assign a set of new partitions that are not allocated on
* this node. It's because the any exception could occur at the middle of batch assignment and the
* previous finished assignment cannot be reverted
* Using this function avoids the overhead of updating capacity repeatedly.
*/
void assignInitBatch(Collection<AssignableReplica> replicas) {
Map<String, Integer> totalTopStatePartitionCapacity = new HashMap<>();
Map<String, Integer> totalPartitionCapacity = new HashMap<>();
for (AssignableReplica replica : replicas) {
// TODO: the exception could occur in the middle of for loop and the previous added records cannot be reverted
addToAssignmentRecord(replica);
// increment the capacity requirement according to partition's capacity configuration.
for (Map.Entry<String, Integer> capacity : replica.getCapacity().entrySet()) {
if (replica.isReplicaTopState()) {
totalTopStatePartitionCapacity.compute(capacity.getKey(),
(key, totalValue) -> (totalValue == null) ? capacity.getValue()
: totalValue + capacity.getValue());
}
totalPartitionCapacity.compute(capacity.getKey(),
(key, totalValue) -> (totalValue == null) ? capacity.getValue()
: totalValue + capacity.getValue());
}
}
// Update the global state after all single replications' calculation is done.
updateRemainingCapacity(totalTopStatePartitionCapacity, _remainingTopStateCapacity, false);
updateRemainingCapacity(totalPartitionCapacity, _remainingCapacity, false);
}
/**
* Assign a replica to the node.
* @param assignableReplica - the replica to be assigned
*/
void assign(AssignableReplica assignableReplica) {
addToAssignmentRecord(assignableReplica);
updateRemainingCapacity(assignableReplica.getCapacity(), _remainingCapacity, false);
if (assignableReplica.isReplicaTopState()) {
updateRemainingCapacity(assignableReplica.getCapacity(), _remainingTopStateCapacity, false);
}
}
/**
* Release a replica from the node.
* If the replication is not on this node, the assignable node is not updated.
* @param replica - the replica to be released
*/
void release(AssignableReplica replica)
throws IllegalArgumentException {
String resourceName = replica.getResourceName();
String partitionName = replica.getPartitionName();
// Check if the release is necessary
if (!_currentAssignedReplicaMap.containsKey(resourceName)) {
LOG.warn("Resource {} is not on node {}. Ignore the release call.", resourceName,
getInstanceName());
return;
}
Map<String, AssignableReplica> partitionMap = _currentAssignedReplicaMap.get(resourceName);
if (!partitionMap.containsKey(partitionName) || !partitionMap.get(partitionName)
.equals(replica)) {
LOG.warn("Replica {} is not assigned to node {}. Ignore the release call.",
replica.toString(), getInstanceName());
return;
}
AssignableReplica removedReplica = partitionMap.remove(partitionName);
updateRemainingCapacity(removedReplica.getCapacity(), _remainingCapacity, true);
if (removedReplica.isReplicaTopState()) {
updateRemainingCapacity(removedReplica.getCapacity(), _remainingTopStateCapacity, true);
}
}
/**
* @return A set of all assigned replicas on the node.
*/
Set<AssignableReplica> getAssignedReplicas() {
return _currentAssignedReplicaMap.values().stream()
.flatMap(replicaMap -> replicaMap.values().stream()).collect(Collectors.toSet());
}
/**
* @return The current assignment in a map of <resource name, set of partition names>
*/
Map<String, Set<String>> getAssignedPartitionsMap() {
Map<String, Set<String>> assignmentMap = new HashMap<>();
for (String resourceName : _currentAssignedReplicaMap.keySet()) {
assignmentMap.put(resourceName, _currentAssignedReplicaMap.get(resourceName).keySet());
}
return assignmentMap;
}
/**
* @param resource Resource name
* @return A set of the current assigned replicas' partition names in the specified resource.
*/
public Set<String> getAssignedPartitionsByResource(String resource) {
return _currentAssignedReplicaMap.getOrDefault(resource, Collections.emptyMap()).keySet();
}
/**
* @param resource Resource name
* @return A set of the current assigned replicas' partition names with the top state in the
* specified resource.
*/
Set<String> getAssignedTopStatePartitionsByResource(String resource) {
return _currentAssignedReplicaMap.getOrDefault(resource, Collections.emptyMap()).entrySet()
.stream().filter(partitionEntry -> partitionEntry.getValue().isReplicaTopState())
.map(partitionEntry -> partitionEntry.getKey()).collect(Collectors.toSet());
}
/**
* @return The total count of assigned top state partitions.
*/
public int getAssignedTopStatePartitionsCount() {
return (int) _currentAssignedReplicaMap.values().stream()
.flatMap(replicaMap -> replicaMap.values().stream())
.filter(AssignableReplica::isReplicaTopState).count();
}
/**
* @return The total count of assigned replicas.
*/
public int getAssignedReplicaCount() {
return _currentAssignedReplicaMap.values().stream().mapToInt(Map::size).sum();
}
/**
* @return The current available capacity.
*/
public Map<String, Integer> getRemainingCapacity() {
return _remainingCapacity;
}
/**
* @return A map of <capacity category, capacity number> that describes the max capacity of the
* node.
*/
public Map<String, Integer> getMaxCapacity() {
return _maxAllowedCapacity;
}
/**
* Return the most concerning capacity utilization number for evenly partition assignment.
* The method dynamically calculates the projected highest utilization number among all the
* capacity categories assuming the new capacity usage is added to the node.
* For example, if the current node usage is {CPU: 0.9, MEM: 0.4, DISK: 0.6}. Then this call shall
* return 0.9.
* @param newUsage the proposed new additional capacity usage.
* @return The highest utilization number of the node among all the capacity category.
*/
public float getGeneralProjectedHighestUtilization(Map<String, Integer> newUsage) {
return getProjectedHighestUtilization(newUsage, _remainingCapacity);
}
/**
* Return the most concerning capacity utilization number for evenly partition assignment.
* The method dynamically calculates the projected highest utilization number among all the
* capacity categories assuming the new capacity usage is added to the node.
* For example, if the current node usage is {CPU: 0.9, MEM: 0.4, DISK: 0.6}. Then this call shall
* return 0.9.
* This function returns projected highest utilization for only top state partitions.
* @param newUsage the proposed new additional capacity usage.
* @return The highest utilization number of the node among all the capacity category.
*/
public float getTopStateProjectedHighestUtilization(Map<String, Integer> newUsage) {
return getProjectedHighestUtilization(newUsage, _remainingTopStateCapacity);
}
private float getProjectedHighestUtilization(Map<String, Integer> newUsage,
Map<String, Integer> remainingCapacity) {
float highestCapacityUtilization = 0;
for (String capacityKey : _maxAllowedCapacity.keySet()) {
float capacityValue = _maxAllowedCapacity.get(capacityKey);
float utilization = (capacityValue - remainingCapacity.get(capacityKey) + newUsage
.getOrDefault(capacityKey, 0)) / capacityValue;
highestCapacityUtilization = Math.max(highestCapacityUtilization, utilization);
}
return highestCapacityUtilization;
}
public String getInstanceName() {
return _instanceName;
}
public Set<String> getInstanceTags() {
return _instanceTags;
}
public String getFaultZone() {
return _faultZone;
}
public boolean hasFaultZone() {
return _faultZone != null;
}
/**
* @return A map of <resource name, set of partition names> contains all the partitions that are
* disabled on the node.
*/
public Map<String, List<String>> getDisabledPartitionsMap() {
return _disabledPartitionsMap;
}
/**
* @return The max partition count that are allowed to be allocated on the node.
*/
public int getMaxPartition() {
return _maxPartition;
}
/**
* Computes the fault zone id based on the domain and fault zone type when topology is enabled.
* For example, when
* the domain is "zone=2, instance=testInstance" and the fault zone type is "zone", this function
* returns "2".
* If cannot find the fault zone type, this function leaves the fault zone id as the instance name.
* Note the WAGED rebalancer does not require full topology tree to be created. So this logic is
* simpler than the CRUSH based rebalancer.
*/
private String computeFaultZone(ClusterConfig clusterConfig, InstanceConfig instanceConfig) {
LinkedHashMap<String, String> instanceTopologyMap = Topology
.computeInstanceTopologyMap(clusterConfig, instanceConfig.getInstanceName(), instanceConfig,
true /*earlyQuitTillFaultZone*/);
StringBuilder faultZoneStringBuilder = new StringBuilder();
for (Map.Entry<String, String> entry : instanceTopologyMap.entrySet()) {
faultZoneStringBuilder.append(entry.getValue());
faultZoneStringBuilder.append('/');
}
faultZoneStringBuilder.setLength(faultZoneStringBuilder.length() - 1);
return faultZoneStringBuilder.toString();
}
/**
* @throws HelixException if the replica has already been assigned to the node.
*/
private void addToAssignmentRecord(AssignableReplica replica) {
String resourceName = replica.getResourceName();
String partitionName = replica.getPartitionName();
if (_currentAssignedReplicaMap.containsKey(resourceName) && _currentAssignedReplicaMap
.get(resourceName).containsKey(partitionName)) {
throw new HelixException(String
.format("Resource %s already has a replica with state %s from partition %s on node %s",
replica.getResourceName(), replica.getReplicaState(), replica.getPartitionName(),
getInstanceName()));
} else {
_currentAssignedReplicaMap.computeIfAbsent(resourceName, key -> new HashMap<>())
.put(partitionName, replica);
}
}
private void updateRemainingCapacity(Map<String, Integer> usedCapacity, Map<String, Integer> remainingCapacity,
boolean isRelease) {
int multiplier = isRelease ? -1 : 1;
// if the used capacity key does not exist in the node's capacity, ignore it
usedCapacity.forEach((capacityKey, capacityValue) -> remainingCapacity.compute(capacityKey,
(key, value) -> value == null ? null : value - multiplier * capacityValue));
}
/**
* Get and validate the instance capacity from instance config.
* @throws HelixException if any required capacity key is not configured in the instance config.
*/
private Map<String, Integer> fetchInstanceCapacity(ClusterConfig clusterConfig,
InstanceConfig instanceConfig) {
Map<String, Integer> instanceCapacity =
WagedValidationUtil.validateAndGetInstanceCapacity(clusterConfig, instanceConfig);
// Remove all the non-required capacity items from the map.
instanceCapacity.keySet().retainAll(clusterConfig.getInstanceCapacityKeys());
return instanceCapacity;
}
@Override
public int hashCode() {
return _instanceName.hashCode();
}
@Override
public int compareTo(AssignableNode o) {
return _instanceName.compareTo(o.getInstanceName());
}
@Override
public String toString() {
return _instanceName;
}
}