blob: 08883fecf206b2a600415626b77d1acc4dabde96 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "kudu/common/timestamp.h"
#include "kudu/tablet/delta_key.h"
#include "kudu/tablet/mvcc.h"
namespace kudu {
namespace tablet {
// Functions that evaluate the relevancy of deltas against a snapshot or pair
// of snapshots.
//
// When constructing an iterator tree, one must specify either one or two MVCC
// snapshots. The first always serves as an upper bound, ensuring that the data
// may be further mutated by writers without affecting the results of the
// iteration. If present, the second serves as a lower bound, excluding any
// deltas that were committed before it in time.
//
// Together, the snapshots affect the behavior of deltas encountered during the
// iteration. Various criteria is used to determine whether a particular delta
// is relevant to the iteration; which criteria is used depends on the context.
//
// Selection criteria
// ==================
// The selection (or "select") criteria determines whether the particular delta
// should cause its row to be included in the iteration. It's only applicable to
// two snapshots. In detail, regardless of delta type, if the delta's timestamp
// is "between" the two snapshots (i.e. not committed in the lower bound
// snapshot but committed in the upper bound snapshot), its row should be included.
//
// Application criteria
// ====================
// The application (or "apply") criteria determines whether the logical contents
// of the UPDATE, DELETE, or REINSERT in a particular delta should be applied to
// a row block during a scan. It only considers the first snapshot. In detail:
// - For REDO deltas: if the delta's timestamp is committed in the snapshot,
// the mutation should be applied.
// - For UNDO deltas: if the delta's timestamp is not committed in the snapshot,
// the mutation should be applied.
// Returns whether a delta at 'delta_ts' is relevant under the apply criteria to 'snap'.
template<DeltaType Type>
inline bool IsDeltaRelevantForApply(const MvccSnapshot& snap,
const Timestamp& delta_ts) {
bool ignored;
return IsDeltaRelevantForApply<Type>(snap, delta_ts, &ignored);
}
// A variant of IsDeltaRelevantForApply that, if the delta is not relevant,
// further checks whether any remaining deltas for this row can be skipped; this
// is an optimization and not necessary for correctness.
template<DeltaType Type>
inline bool IsDeltaRelevantForApply(const MvccSnapshot& snap,
const Timestamp& delta_ts,
bool* finished_row);
template<>
inline bool IsDeltaRelevantForApply<REDO>(const MvccSnapshot& snap,
const Timestamp& delta_ts,
bool* finished_row) {
*finished_row = false;
if (snap.IsApplied(delta_ts)) {
return true;
}
if (!snap.MayHaveAppliedOpsAtOrAfter(delta_ts)) {
// REDO deltas are sorted first in ascending row ordinal order, then in
// ascending timestamp order. Thus, if we know that there are no more
// committed ops whose timestamps are >= 'delta_ts', we know that
// any future deltas belonging to this row aren't relevant (as per the apply
// criteria, REDOs are relevant if they are committed in the snapshot), and
// we can skip to the next row.
*finished_row = true;
}
return false;
}
template<>
inline bool IsDeltaRelevantForApply<UNDO>(const MvccSnapshot& snap,
const Timestamp& delta_ts,
bool* finished_row) {
*finished_row = false;
if (!snap.IsApplied(delta_ts)) {
return true;
}
if (!snap.MayHaveNonAppliedOpsAtOrBefore(delta_ts)) {
// UNDO deltas are sorted first in ascending row ordinal order, then in
// descending timestamp order. Thus, if we know that there are no more
// uncommitted ops whose timestamps are <= 'delta_ts', we know that
// any future deltas belonging to this row aren't relevant (as per the apply
// criteria, UNDOs are relevant if they are uncommitted in the snapshot),
// and we can skip to the next row.
*finished_row = true;
}
return false;
}
// Returns whether deltas within the time range 'delta_ts_start' to
// 'delta_ts_end' are relevant under the select criteria to 'snap_start' and 'snap_end'.
inline bool IsDeltaRelevantForSelect(const MvccSnapshot& snap_start,
const MvccSnapshot& snap_end,
const Timestamp& delta_ts_start,
const Timestamp& delta_ts_end) {
return !snap_start.IsApplied(delta_ts_end) &&
snap_end.IsApplied(delta_ts_start);
}
// A variant of IsDeltaRelevantForSelect that operates on a single delta's
// timestamp given by 'delta_ts', and if the delta is not relevant, further
// checks whether any remaining deltas for this row can be skipped; this is an
// optimization and not necessary for correctness.
template<DeltaType Type>
inline bool IsDeltaRelevantForSelect(const MvccSnapshot& snap_start,
const MvccSnapshot& snap_end,
const Timestamp& delta_ts,
bool* finished_row);
template<>
inline bool IsDeltaRelevantForSelect<REDO>(const MvccSnapshot& snap_start,
const MvccSnapshot& snap_end,
const Timestamp& delta_ts,
bool* finished_row) {
*finished_row = false;
if (snap_start.IsApplied(delta_ts)) {
// No short-circuit available here; because REDO deltas for a given row are
// sorted in ascending timestamp order, the next REDO may be uncommitted in
// 'snap_start'.
return false;
}
if (!snap_end.IsApplied(delta_ts)) {
if (!snap_end.MayHaveAppliedOpsAtOrAfter(delta_ts)) {
// But if 'delta_ts' is not committed in 'snap_end', all future REDOs may
// also be uncommitted in 'snap_end'.
*finished_row = true;
}
return false;
}
return true;
}
template<>
inline bool IsDeltaRelevantForSelect<UNDO>(const MvccSnapshot& snap_start,
const MvccSnapshot& snap_end,
const Timestamp& delta_ts,
bool* finished_row) {
*finished_row = false;
if (!snap_end.IsApplied(delta_ts)) {
// No short-circuit available here; because UNDO deltas for a given row are
// sorted in descending timestamp order, the next UNDO may be committed in
// 'snap_end'.
return false;
}
if (snap_start.IsApplied(delta_ts)) {
if (!snap_start.MayHaveNonAppliedOpsAtOrBefore(delta_ts)) {
// But if 'delta_ts' is committed in 'snap_start', all future UNDOs may
// also be committed in 'snap_start'.
*finished_row = true;
}
return false;
}
return true;
}
} // namespace tablet
} // namespace kudu