blob: 1db0d07fdd1ed7ba628701cea7f01b1846a30ed1 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <map>
#include <random>
#include <string>
#include <vector>
#include <gtest/gtest_prod.h>
#include "kudu/util/status.h"
namespace boost {
template <class T> class optional;
} // namespace boost
namespace kudu {
namespace tools {
// A map from a count of replicas to a server id. The "reversed" relationship
// facilitates finding the servers with the maximum and minimum counts.
typedef std::multimap<int32_t, std::string> ServersByCountMap;
// Balance information for a table.
struct TableBalanceInfo {
std::string table_id;
// Mapping table replica count -> tablet server.
//
// The table replica count of a tablet server is defined as the number of
// replicas belonging to the table hosted on the tablet server.
ServersByCountMap servers_by_replica_count;
};
// Balance information for a cluster.
struct ClusterBalanceInfo {
// Mapping table skew -> table balance information. The "reversed"
// relationship facilitates finding the most and least skewed tables.
//
// The skew of a table is defined as the difference between its most
// occupied and least occupied tablet servers. Skew is considered to be
// improved both when the number of pairs of tablet servers exhibiting max
// skew between them decreases, or when the skew decreases.
std::multimap<int32_t, TableBalanceInfo> table_info_by_skew;
// Mapping total replica count -> tablet server.
//
// The total replica count of a tablet server is defined as the total number
// of replicas hosted on the tablet server.
ServersByCountMap servers_by_total_replica_count;
};
// A directive to move some replica of a table between two tablet servers.
struct TableReplicaMove {
std::string table_id;
std::string from; // Unique identifier of the source tablet server.
std::string to; // Unique identifier of the target tablet server.
};
// A rebalancing algorithm, which orders replica moves aiming to balance a
// cluster. The definition of "balance" depends on the algorithm.
class RebalancingAlgo {
public:
virtual ~RebalancingAlgo() = default;
// The top-level method of the algorithm. Using information on the current
// balance state of the cluster in 'cluster_info', the algorithm populates
// the output parameter 'moves' with no more than 'max_moves_num' replica
// moves that aim to balance the cluster. Due to code conventions, 'int' is
// used instead of 'size_t' as 'max_moves_num' type; 'max_moves_num' must be
// non-negative value, where value of '0' is a shortcut for
// 'the possible maximum'.
//
// Once this method returns Status::OK() and leaves 'moves' empty, the cluster
// is considered balanced.
//
// 'moves' must be non-NULL.
virtual Status GetNextMoves(const ClusterBalanceInfo& cluster_info,
int max_moves_num,
std::vector<TableReplicaMove>* moves);
protected:
// Get the next rebalancing move from the algorithm. If there is no such move,
// the 'move' output parameter is set to 'boost::none'.
//
// 'move' must be non-NULL.
virtual Status GetNextMove(const ClusterBalanceInfo& cluster_info,
boost::optional<TableReplicaMove>* move) = 0;
// Update the balance state in 'cluster_info' with the outcome of the move
// 'move'. 'cluster_info' is an in-out parameter.
//
// 'cluster_info' must be non-NULL.
static Status ApplyMove(const TableReplicaMove& move,
ClusterBalanceInfo* cluster_info);
};
// A two-dimensional greedy rebalancing algorithm. From among moves that
// decrease the skew of a most skewed table, it prefers ones that reduce the
// skew of the cluster. A cluster is considered balanced when the skew of every
// table is <= 1 and the skew of the cluster is <= 1.
//
// The skew of the cluster is defined as the difference between the maximum
// total replica count over all tablet servers and the minimum total replica
// count over all tablet servers.
class TwoDimensionalGreedyAlgo : public RebalancingAlgo {
public:
// Policies for picking one element from equal-skew choices.
enum class EqualSkewOption {
PICK_FIRST,
PICK_RANDOM,
};
explicit TwoDimensionalGreedyAlgo(
EqualSkewOption opt = EqualSkewOption::PICK_RANDOM);
Status GetNextMove(const ClusterBalanceInfo& cluster_info,
boost::optional<TableReplicaMove>* move) override;
private:
enum class ExtremumType { MAX, MIN, };
FRIEND_TEST(RebalanceAlgoUnitTest, RandomizedTest);
FRIEND_TEST(RebalanceAlgoUnitTest, EmptyClusterBalanceInfoGetNextMove);
// Compute the intersection of the least or most loaded tablet servers for a
// table with the least or most loaded tablet servers in the cluster:
// 'intersection' is populated with the ids of the tablet servers in the
// intersection of those two sets. The minimum or maximum number of replicas
// per tablet server for the table and for the cluster are output into
// 'replica_count_table' and 'replica_count_total', respectively. Identifiers
// of the tablet servers with '*replica_count_table' replicas of the table are
// output into the 'server_uuids' parameter. Whether to consider most or least
// loaded servers is controlled by 'extremum'. An empty 'intersection' on
// return means no intersection was found for the mentioned sets of the
// extremally loaded servers: in that case optimizing the load by table would
// not affect the extreme load by server.
//
// None of the output parameters may be NULL.
Status GetIntersection(
ExtremumType extremum,
const ServersByCountMap& servers_by_table_replica_count,
const ServersByCountMap& servers_by_total_replica_count,
int32_t* replica_count_table,
int32_t* replica_count_total,
std::vector<std::string>* server_uuids,
std::vector<std::string>* intersection);
// As determined by 'extremum', find the tablet servers with the minimum or
// maximum count in 'server_by_replica_counts'. The extremal count will be set
// in 'replica_count', while 'server_uuids' will be populated with the servers
// that host the extremal replica count.
//
// Neither of the output parameters may be NULL.
Status GetMinMaxLoadedServers(
const ServersByCountMap& servers_by_replica_count,
ExtremumType extremum,
int32_t* replica_count,
std::vector<std::string>* server_uuids);
const EqualSkewOption equal_skew_opt_;
std::random_device random_device_;
std::mt19937 generator_;
};
} // namespace tools
} // namespace kudu