blob: e53474e9e9d0b3d7bda1ca75e39ce7f792eeeaed [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <optional>
#include <string>
#include <unordered_map>
#include <vector>
#include <glog/logging.h>
#include "kudu/common/column_predicate.h" // IWYU pragma: keep
#include "kudu/common/partition.h"
namespace kudu {
class Arena;
class ColumnSchema;
class EncodedKey;
class Schema;
class ScanSpec {
public:
ScanSpec()
: lower_bound_key_(nullptr),
exclusive_upper_bound_key_(nullptr),
cache_blocks_(true),
limit_(std::nullopt) {
}
// Add a predicate on the column.
//
// The new predicate is merged into the existing predicate for the column.
void AddPredicate(ColumnPredicate pred);
// Remove the predicate for the column.
void RemovePredicate(const std::string& column_name);
// Removes all column predicates.
void RemovePredicates();
// Returns true if the result set is known to be empty.
bool CanShortCircuit() const;
// Returns true if a Bloom filter predicate is present.
bool ContainsBloomFilterPredicate() const;
// Optimizes the scan by unifying the lower and upper bound constraints and
// the column predicates.
//
// If remove_pushed_predicates is true, then column predicates that are pushed
// into the upper or lower primary key bounds are removed.
//
// Idempotent.
void UnifyPrimaryKeyBoundsAndColumnPredicates(const Schema& schema,
Arena* arena,
bool remove_pushed_predicates);
void OptimizeScan(const Schema& schema,
Arena* arena,
bool remove_pushed_predicates);
// Filter in-list predicate values with given a partition schema.
//
// Supports pruning only for single-column hash and range schemas. The pruning
// of IN list predicate's values is enabled for the following partitioning
// patterns:
//
// hash(onekey), # pruning on 'onekey'
// range(onekey), # pruning on 'onekey'
// hash(onekey), hash(anotherkey) # pruning on either key
// hash(onekey), range(anotherkey) # pruning on either key
// hash(key_one, key_two), hash(anotherkey) # pruning on 'anotherkey'
//
// TODO(ningw) For IN list predicate on hash/range(key_one, key_two) or more
// columns, if one predicate is IN list, and the rest predicate(s)
// are EQUAL, could have IN list predicate values prune as well.
void PruneInlistValuesIfPossible(const Schema& schema,
const Partition& partition,
const PartitionSchema& partition_schema);
// Get columns that are present in the predicates but not in the projection
std::vector<ColumnSchema> GetMissingColumns(const Schema& projection);
// Set the lower bound (inclusive) primary key for the scan.
// Does not take ownership of 'key', which must remain valid.
// If called multiple times, the most restrictive key will be used.
void SetLowerBoundKey(const EncodedKey* key);
// Set the upper bound (exclusive) primary key for the scan.
// Does not take ownership of 'key', which must remain valid.
// If called multiple times, the most restrictive key will be used.
void SetExclusiveUpperBoundKey(const EncodedKey* key);
// Sets the lower bound (inclusive) partition key for the scan.
//
// Only used in the client.
void SetLowerBoundPartitionKey(const PartitionKey& partition_key);
// Sets the upper bound (exclusive) partition key for the scan.
//
// Only used in the client.
void SetExclusiveUpperBoundPartitionKey(const PartitionKey& partition_key);
// Returns the scan predicates.
const std::unordered_map<std::string, ColumnPredicate>& predicates() const {
return predicates_;
}
const EncodedKey* lower_bound_key() const {
return lower_bound_key_;
}
const EncodedKey* exclusive_upper_bound_key() const {
return exclusive_upper_bound_key_;
}
const PartitionKey& lower_bound_partition_key() const {
return lower_bound_partition_key_;
}
const PartitionKey& exclusive_upper_bound_partition_key() const {
return exclusive_upper_bound_partition_key_;
}
bool cache_blocks() const {
return cache_blocks_;
}
void set_cache_blocks(bool cache_blocks) {
cache_blocks_ = cache_blocks;
}
bool has_limit() const {
return limit_.has_value();
}
void set_limit(int64_t limit) {
limit_ = limit;
}
int64_t limit() const {
CHECK(has_limit());
return *limit_;
}
std::string ToString(const Schema& schema) const;
private:
// Lift implicit predicates specified as part of the lower and upper bound
// primary key constraints into the simplified predicate bounds.
//
// When the lower and exclusive upper bound primary keys have a prefix of
// equal components, the components can be lifted into an equality predicate
// over their associated column. Optionally, a single (pair) of range
// predicates can be lifted from the key component following the prefix of
// equal components.
void LiftPrimaryKeyBounds(const Schema& schema, Arena* arena);
// Encode the column predicates into lower and upper primary key bounds, and
// replace the existing bounds if the new bounds are more constrained.
//
// If remove_pushed_predicates is true, then the predicates in the primary key
// bound will be removed if the bound is replaced.
void PushPredicatesIntoPrimaryKeyBounds(const Schema& schema,
Arena* arena,
bool remove_pushed_predicates);
std::unordered_map<std::string, ColumnPredicate> predicates_;
const EncodedKey* lower_bound_key_;
const EncodedKey* exclusive_upper_bound_key_;
PartitionKey lower_bound_partition_key_;
PartitionKey exclusive_upper_bound_partition_key_;
bool cache_blocks_;
std::optional<int64_t> limit_;
};
} // namespace kudu