blob: 0af21d902c689a7c452186f18e98c1465d2e6b82 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <functional>
#include <memory>
#include <optional>
#include <unordered_map>
#include <vector>
#include "kudu/gutil/map-util.h"
#include "kudu/tablet/rowset.h"
#include "kudu/util/slice.h"
#include "kudu/util/status.h"
namespace kudu {
template<class Traits>
class IntervalTree;
namespace tablet {
struct RowSetIntervalTraits;
struct RowSetWithBounds;
// Class which encapsulates the set of rowsets which are active for a given
// Tablet. This provides efficient lookup by key for RowSets which may overlap
// that key range.
//
// Additionally, the rowset tree maintains information about the implicit
// intervals generated by the row sets (for instance, if a tablet has
// rowsets [0, 2] and [1, 3] it has three implicit contiguous intervals:
// [0, 1], [1, 2], and [2, 3].
class RowSetTree {
public:
// An RSEndpoint is a POD which associates a rowset, an EndpointType
// (either the START or STOP of an interval), and the key at which the
// endpoint is located.
enum EndpointType {
START,
STOP
};
struct RSEndpoint {
RSEndpoint(RowSet *rowset, EndpointType endpoint, Slice slice)
: rowset_(rowset), endpoint_(endpoint), slice_(slice) {}
RowSet* rowset_;
enum EndpointType endpoint_;
Slice slice_;
};
RowSetTree();
Status Reset(const RowSetVector &rowsets);
~RowSetTree();
// Return all RowSets whose range may contain the given encoded key.
//
// The returned pointers are guaranteed to be valid at least until this
// RowSetTree object is Reset().
void FindRowSetsWithKeyInRange(const Slice &encoded_key,
std::vector<RowSet *> *rowsets) const;
// Call 'cb(rowset, index)' for each (rowset, index) pair such that
// 'encoded_keys[index]' may be within the bounds of 'rowset'.
//
// See IntervalTree::ForEachIntervalContainingPoints for additional
// information on the particular order in which the callback will be called.
//
// REQUIRES: 'encoded_keys' must be in sorted order.
void ForEachRowSetContainingKeys(const std::vector<Slice>& encoded_keys,
const std::function<void(RowSet*, int)>& cb) const;
// When 'lower_bound' is std::nullopt, it means negative infinity.
// When 'upper_bound' is std::nullopt, it means positive infinity.
// So the query interval can be one of below:
// [-OO, +OO)
// [-OO, upper_bound)
// [lower_bound, +OO)
// [lower_bound, upper_bound)
void FindRowSetsIntersectingInterval(const std::optional<Slice>& lower_bound,
const std::optional<Slice>& upper_bound,
std::vector<RowSet*>* rowsets) const;
const RowSetVector &all_rowsets() const { return all_rowsets_; }
RowSet* drs_by_id(int64_t drs_id) const {
return FindPtrOrNull(drs_by_id_, drs_id);
}
// Iterates over RowSetTree::RSEndpoint, guaranteed to be ordered and for
// any rowset to appear exactly twice, once at its start slice and once at
// its stop slice, equivalent to its GetBounds() values.
const std::vector<RSEndpoint>& key_endpoints() const { return key_endpoints_; }
private:
// Interval tree of the rowsets. Used to efficiently find rowsets which might contain
// a probe row.
std::unique_ptr<IntervalTree<RowSetIntervalTraits>> tree_;
// Ordered map of all the interval endpoints, holding the implicit contiguous
// intervals
// TODO map to usage statistics as well. See KUDU-???
std::vector<RSEndpoint> key_endpoints_;
// Container for all of the entries in tree_. IntervalTree does
// not itself manage memory, so this provides a simple way to enumerate
// all the entry structs and free them in the destructor.
std::vector<RowSetWithBounds *> entries_;
// All of the rowsets which were put in this RowSetTree.
RowSetVector all_rowsets_;
// The DiskRowSets in this RowSetTree, keyed by their id.
std::unordered_map<int64_t, RowSet*> drs_by_id_;
// Rowsets for which the bounds are unknown -- e.g because they
// are mutable (MemRowSets).
//
// These have to be consulted for every access, so are not
// stored in the interval tree.
RowSetVector unbounded_rowsets_;
bool initted_;
};
} // namespace tablet
} // namespace kudu