blob: 8228bc26d7f8f8df15db61b3b46f3ee5aa9a50e1 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
// This file is copied from
// https://github.com/apache/kudu/blob/master/src/kudu/tablet/rowset_tree.h
// and modified by Doris
#pragma once
#include <boost/optional/optional.hpp>
#include <cstdint>
#include <functional>
#include <memory>
#include <unordered_map>
#include <vector>
#include "common/status.h"
#include "gutil/map-util.h"
#include "olap/rowset/rowset.h"
#include "util/slice.h"
namespace doris {
template <class Traits>
class IntervalTree;
struct RowsetIntervalTraits;
struct RowsetWithBounds;
// Used often enough, may as well typedef it.
typedef std::vector<RowsetSharedPtr> RowsetVector;
// Class which encapsulates the set of rowsets which are active for a given
// Tablet. This provides efficient lookup by key for Rowsets which may overlap
// that key range.
//
// Additionally, the rowset tree maintains information about the implicit
// intervals generated by the row sets (for instance, if a tablet has
// rowsets [0, 2] and [1, 3] it has three implicit contiguous intervals:
// [0, 1], [1, 2], and [2, 3].
class RowsetTree {
public:
// An RSEndpoint is a POD which associates a rowset, an EndpointType
// (either the START or STOP of an interval), and the key at which the
// endpoint is located.
enum EndpointType { START, STOP };
struct RSEndpoint {
RSEndpoint(RowsetSharedPtr rowset, uint32_t segment_id, EndpointType endpoint, Slice slice)
: rowset_(rowset), segment_id_(segment_id), endpoint_(endpoint), slice_(slice) {}
RowsetSharedPtr rowset_;
uint32_t segment_id_;
enum EndpointType endpoint_;
Slice slice_;
};
RowsetTree();
Status Init(const RowsetVector& rowsets);
~RowsetTree();
// Return Rowsets whose id in rowset_ids and range may contain the given encoded key.
//
// The returned pointers are guaranteed to be valid at least until this
// RowsetTree object is Reset().
void FindRowsetsWithKeyInRange(const Slice& encoded_key, const RowsetIdUnorderedSet* rowset_ids,
vector<std::pair<RowsetSharedPtr, int32_t>>* rowsets) const;
// Call 'cb(rowset, index)' for each (rowset, index) pair such that
// 'encoded_keys[index]' may be within the bounds of 'rowset'.
//
// See IntervalTree::ForEachIntervalContainingPoints for additional
// information on the particular order in which the callback will be called.
//
// REQUIRES: 'encoded_keys' must be in sorted order.
void ForEachRowsetContainingKeys(const std::vector<Slice>& encoded_keys,
const std::function<void(RowsetSharedPtr, int)>& cb) const;
// When 'lower_bound' is boost::none, it means negative infinity.
// When 'upper_bound' is boost::none, it means positive infinity.
// So the query interval can be one of below:
// [-OO, +OO)
// [-OO, upper_bound)
// [lower_bound, +OO)
// [lower_bound, upper_bound)
void FindRowsetsIntersectingInterval(
const std::optional<Slice>& lower_bound, const std::optional<Slice>& upper_bound,
vector<std::pair<RowsetSharedPtr, int32_t>>* rowsets) const;
const RowsetVector& all_rowsets() const { return all_rowsets_; }
RowsetSharedPtr rs_by_id(RowsetId rs_id) const { return FindPtrOrNull(rs_by_id_, rs_id); }
// Iterates over RowsetTree::RSEndpoint, guaranteed to be ordered and for
// any rowset to appear exactly twice, once at its start slice and once at
// its stop slice, equivalent to its GetBounds() values.
const std::vector<RSEndpoint>& key_endpoints() const { return key_endpoints_; }
private:
// Interval tree of the rowsets. Used to efficiently find rowsets which might contain
// a probe row.
std::unique_ptr<IntervalTree<RowsetIntervalTraits>> tree_;
// Ordered map of all the interval endpoints, holding the implicit contiguous
// intervals
std::vector<RSEndpoint> key_endpoints_;
// Container for all of the entries in tree_. IntervalTree does
// not itself manage memory, so this provides a simple way to enumerate
// all the entry structs and free them in the destructor.
std::vector<RowsetWithBounds*> entries_;
// All of the rowsets which were put in this RowsetTree.
RowsetVector all_rowsets_;
// The Rowsets in this RowsetTree, keyed by their id.
std::unordered_map<RowsetId, RowsetSharedPtr, HashOfRowsetId> rs_by_id_;
bool initted_;
};
void ModifyRowSetTree(const RowsetTree& old_tree, const RowsetVector& rowsets_to_remove,
const RowsetVector& rowsets_to_add, RowsetTree* new_tree);
} // namespace doris