blob: af33266697b09dd7ab21b5f0d5f60c4380002a7d [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "kudu/common/partition_pruner.h"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <iostream>
#include <optional>
#include <string>
#include <tuple>
#include <utility>
#include <vector>
#include <gtest/gtest.h>
#include "kudu/common/column_predicate.h"
#include "kudu/common/common.pb.h"
#include "kudu/common/encoded_key.h"
#include "kudu/common/partial_row.h"
#include "kudu/common/partition.h"
#include "kudu/common/row.h"
#include "kudu/common/row_operations.h"
#include "kudu/common/row_operations.pb.h"
#include "kudu/common/scan_spec.h"
#include "kudu/common/schema.h"
#include "kudu/gutil/strings/substitute.h"
#include "kudu/util/memory/arena.h"
#include "kudu/util/slice.h"
#include "kudu/util/test_macros.h"
#include "kudu/util/test_util.h"
using std::count_if;
using std::get;
using std::make_tuple;
using std::nullopt;
using std::optional;
using std::ostream;
using std::pair;
using std::string;
using std::tuple;
using std::vector;
using strings::Substitute;
namespace kudu {
class PartitionPrunerTest : public KuduTest {
public:
typedef tuple<vector<string>, int32_t, uint32_t> ColumnNamesNumBucketsAndSeed;
typedef pair<string, string> ColumnNameAndStringValue;
typedef pair<string, int8_t> ColumnNameAndIntValue;
static void CreatePartitionSchemaPB(
const vector<string>& range_columns,
const vector<ColumnNamesNumBucketsAndSeed>& table_hash_schema,
PartitionSchemaPB* partition_schema_pb);
static void AddRangePartitionWithSchema(
const Schema& schema,
const vector<ColumnNameAndStringValue>& lower_string_cols,
const vector<ColumnNameAndStringValue>& upper_string_cols,
const vector<ColumnNameAndIntValue>& lower_int_cols,
const vector<ColumnNameAndIntValue>& upper_int_cols,
const vector<ColumnNamesNumBucketsAndSeed>& hash_schemas,
PartitionSchemaPB* pb);
static void CheckPrunedPartitions(const Schema& schema,
const PartitionSchema& partition_schema,
const vector<Partition>& partitions,
const ScanSpec& spec,
size_t remaining_tablets,
size_t pruner_ranges);
};
void PartitionPrunerTest::CreatePartitionSchemaPB(
const vector<string>& range_columns,
const vector<ColumnNamesNumBucketsAndSeed>& table_hash_schema,
PartitionSchemaPB* partition_schema_pb) {
auto* range_schema = partition_schema_pb->mutable_range_schema();
for (const auto& range_column : range_columns) {
range_schema->add_columns()->set_name(range_column);
}
for (const auto& hash_dimension : table_hash_schema) {
auto* hash_dimension_pb = partition_schema_pb->add_hash_schema();
for (const auto& hash_schema_column : get<0>(hash_dimension)) {
hash_dimension_pb->add_columns()->set_name(hash_schema_column);
}
hash_dimension_pb->set_num_buckets(get<1>(hash_dimension));
hash_dimension_pb->set_seed(get<2>(hash_dimension));
}
}
void PartitionPrunerTest::AddRangePartitionWithSchema(
const Schema& schema,
const vector<ColumnNameAndStringValue>& lower_string_cols,
const vector<ColumnNameAndStringValue>& upper_string_cols,
const vector<ColumnNameAndIntValue>& lower_int_cols,
const vector<ColumnNameAndIntValue>& upper_int_cols,
const vector<ColumnNamesNumBucketsAndSeed>& hash_buckets_info,
PartitionSchemaPB* pb) {
auto* range = pb->add_custom_hash_schema_ranges();
RowOperationsPBEncoder encoder(range->mutable_range_bounds());
KuduPartialRow lower(&schema);
KuduPartialRow upper(&schema);
for (const auto& bound : lower_string_cols) {
ASSERT_OK(lower.SetStringCopy(bound.first, bound.second));
}
for (const auto& bound : upper_string_cols) {
ASSERT_OK(upper.SetStringCopy(bound.first, bound.second));
}
for (const auto& bound : lower_int_cols) {
ASSERT_OK(lower.SetInt8(bound.first, bound.second));
}
for (const auto& bound : upper_int_cols) {
ASSERT_OK(upper.SetInt8(bound.first, bound.second));
}
encoder.Add(RowOperationsPB::RANGE_LOWER_BOUND, lower);
encoder.Add(RowOperationsPB::RANGE_UPPER_BOUND, upper);
PartitionSchema::HashSchema hash_schema;
for (const auto& hash_bucket_info : hash_buckets_info) {
auto* hash_dimension_pb = range->add_hash_schema();
PartitionSchema::HashDimension hash_dimension;
for (const auto& hash_schema_columns : get<0>(hash_bucket_info)) {
hash_dimension_pb->add_columns()->set_name(hash_schema_columns);
hash_dimension.column_ids.emplace_back(schema.find_column(hash_schema_columns));
}
hash_dimension_pb->set_num_buckets(get<1>(hash_bucket_info));
hash_dimension.num_buckets = get<1>(hash_bucket_info);
hash_dimension_pb->set_seed(get<2>(hash_bucket_info));
hash_dimension.seed = get<2>(hash_bucket_info);
hash_schema.emplace_back(hash_dimension);
}
}
void PartitionPrunerTest::CheckPrunedPartitions(
const Schema& schema,
const PartitionSchema& partition_schema,
const vector<Partition>& partitions,
const ScanSpec& spec,
size_t remaining_tablets,
size_t pruner_ranges) {
ScanSpec opt_spec(spec);
Arena arena(256);
opt_spec.OptimizeScan(schema, &arena, false);
PartitionPruner pruner;
pruner.Init(schema, partition_schema, opt_spec);
SCOPED_TRACE(strings::Substitute("schema: $0", schema.ToString()));
SCOPED_TRACE(strings::Substitute("partition schema: $0", partition_schema.DebugString(schema)));
SCOPED_TRACE(strings::Substitute("partition pruner: $0",
pruner.ToString(schema, partition_schema)));
SCOPED_TRACE(strings::Substitute("optimized scan spec: $0", opt_spec.ToString(schema)));
SCOPED_TRACE(strings::Substitute("original scan spec: $0", spec.ToString(schema)));
int pruned_partitions = count_if(partitions.begin(), partitions.end(),
[&] (const Partition& partition) {
return pruner.ShouldPrune(partition);
});
ASSERT_EQ(remaining_tablets, partitions.size() - pruned_partitions);
ASSERT_EQ(pruner_ranges, pruner.NumRangesRemaining());
}
TEST_F(PartitionPrunerTest, TestPrimaryKeyRangePruning) {
// CREATE TABLE t
// (a INT8, b INT8, c INT8)
// PRIMARY KEY (a, b, c)) SPLIT ROWS [(0, 0, 0), (10, 10, 10)]
// DISTRIBUTE BY RANGE(a, b, c);
Schema schema({ ColumnSchema("a", INT8),
ColumnSchema("b", INT8),
ColumnSchema("c", INT8) },
{ ColumnId(0), ColumnId(1), ColumnId(2) },
3);
Arena arena(1024);
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(PartitionSchemaPB(), schema, &partition_schema));
KuduPartialRow split1(&schema);
ASSERT_OK(split1.SetInt8("a", 0));
ASSERT_OK(split1.SetInt8("b", 0));
ASSERT_OK(split1.SetInt8("c", 0));
KuduPartialRow split2(&schema);
ASSERT_OK(split2.SetInt8("a", 10));
ASSERT_OK(split2.SetInt8("b", 10));
ASSERT_OK(split2.SetInt8("c", 10));
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions({ split1, split2 }, {}, schema, &partitions));
// Creates a scan with optional lower and upper bounds, and checks that the
// expected number of tablets are pruned.
const auto check = [&] (optional<tuple<int8_t, int8_t, int8_t>> lower,
optional<tuple<int8_t, int8_t, int8_t>> upper,
size_t remaining_tablets) {
ScanSpec spec;
KuduPartialRow lower_bound(&schema);
KuduPartialRow upper_bound(&schema);
EncodedKey* enc_lower_bound = nullptr;
EncodedKey* enc_upper_bound = nullptr;
if (lower) {
ASSERT_OK(lower_bound.SetInt8("a", get<0>(*lower)));
ASSERT_OK(lower_bound.SetInt8("b", get<1>(*lower)));
ASSERT_OK(lower_bound.SetInt8("c", get<2>(*lower)));
ConstContiguousRow row(lower_bound.schema(), lower_bound.row_data_);
enc_lower_bound = EncodedKey::FromContiguousRow(row, &arena);
spec.SetLowerBoundKey(enc_lower_bound);
}
if (upper) {
ASSERT_OK(upper_bound.SetInt8("a", get<0>(*upper)));
ASSERT_OK(upper_bound.SetInt8("b", get<1>(*upper)));
ASSERT_OK(upper_bound.SetInt8("c", get<2>(*upper)));
ConstContiguousRow row(upper_bound.schema(), upper_bound.row_data_);
enc_upper_bound = EncodedKey::FromContiguousRow(row, &arena);
spec.SetExclusiveUpperBoundKey(enc_upper_bound);
}
size_t pruner_ranges = remaining_tablets == 0 ? 0 : 1;
CheckPrunedPartitions(schema, partition_schema, partitions, spec,
remaining_tablets, pruner_ranges);
};
// No bounds
NO_FATALS(check(nullopt, nullopt, 3));
// PK < (-1, min, min)
NO_FATALS(check(nullopt,
make_tuple<int8_t, int8_t, int8_t>(-1, INT8_MIN, INT8_MIN),
1));
// PK < (10, 10, 10)
NO_FATALS(check(nullopt,
make_tuple<int8_t, int8_t, int8_t>(10, 10, 10),
2));
// PK < (100, min, min)
NO_FATALS(check(nullopt,
make_tuple<int8_t, int8_t, int8_t>(100, INT8_MIN, INT8_MIN),
3));
// PK >= (-10, -10, -10)
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(-10, -10, -10),
nullopt,
3));
// PK >= (0, 0, 0)
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(0, 0, 0),
nullopt,
2));
// PK >= (100, 0, 0)
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(100, 0, 0),
nullopt,
1));
// PK >= (-10, 0, 0)
// PK < (100, 0, 0)
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(-10, 0, 0),
make_tuple<int8_t, int8_t, int8_t>(100, 0, 0),
3));
// PK >= (0, 0, 0)
// PK < (10, 10, 10)
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(0, 0, 0),
make_tuple<int8_t, int8_t, int8_t>(10, 10, 10),
1));
// PK >= (0, 0, 0)
// PK < (10, 10, 11)
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(0, 0, 0),
make_tuple<int8_t, int8_t, int8_t>(10, 10, 11),
2));
// PK < (0, 0, 0)
// PK >= (10, 10, 11)
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(10, 10, 11),
make_tuple<int8_t, int8_t, int8_t>(0, 0, 0),
0));
}
TEST_F(PartitionPrunerTest, TestPartialPrimaryKeyRangePruning) {
// CREATE TABLE t
// (a INT8, b STRING, c STRING, PRIMARY KEY (a, b, c))
// DISTRIBUTE BY RANGE(a, b)
// SPLIT ROWS [(0, "m"), (10, "r"];
Schema schema({ ColumnSchema("a", INT8),
ColumnSchema("b", STRING),
ColumnSchema("c", STRING) },
{ ColumnId(0), ColumnId(1), ColumnId(2) },
3);
Arena arena(1024);
PartitionSchemaPB pb;
CreatePartitionSchemaPB({"a", "b"}, {}, &pb);
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema));
KuduPartialRow split1(&schema);
ASSERT_OK(split1.SetInt8("a", 0));
ASSERT_OK(split1.SetStringCopy("b", "m"));
KuduPartialRow split2(&schema);
ASSERT_OK(split2.SetInt8("a", 10));
ASSERT_OK(split2.SetStringCopy("b", "r"));
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions({ split1, split2 }, {}, schema, &partitions));
// Applies the specified lower and upper bound primary keys against the
// schema, and checks that the expected number of partitions are pruned.
const auto check = [&] (optional<tuple<int8_t, string, string>> lower,
optional<tuple<int8_t, string, string>> upper,
size_t remaining_tablets ) {
ScanSpec spec;
KuduPartialRow lower_bound(&schema);
KuduPartialRow upper_bound(&schema);
EncodedKey* enc_lower_bound = nullptr;
EncodedKey* enc_upper_bound = nullptr;
if (lower) {
ASSERT_OK(lower_bound.SetInt8("a", get<0>(*lower)));
ASSERT_OK(lower_bound.SetStringCopy("b", get<1>(*lower)));
ASSERT_OK(lower_bound.SetStringCopy("c", get<2>(*lower)));
ConstContiguousRow row(lower_bound.schema(), lower_bound.row_data_);
enc_lower_bound = EncodedKey::FromContiguousRow(row, &arena);
spec.SetLowerBoundKey(enc_lower_bound);
}
if (upper) {
ASSERT_OK(upper_bound.SetInt8("a", get<0>(*upper)));
ASSERT_OK(upper_bound.SetStringCopy("b", get<1>(*upper)));
ASSERT_OK(upper_bound.SetStringCopy("c", get<2>(*upper)));
ConstContiguousRow row(upper_bound.schema(), upper_bound.row_data_);
enc_upper_bound = EncodedKey::FromContiguousRow(row, &arena);
spec.SetExclusiveUpperBoundKey(enc_upper_bound);
}
size_t pruner_ranges = remaining_tablets == 0 ? 0 : 1;
CheckPrunedPartitions(schema, partition_schema, partitions, spec,
remaining_tablets, pruner_ranges);
};
// No bounds
NO_FATALS(check(nullopt, nullopt, 3));
// PK < (-1, "", "")
NO_FATALS(check(nullopt, make_tuple<int8_t, string, string>(-1, "", ""), 1));
// PK < (10, "r", "")
NO_FATALS(check(nullopt, make_tuple<int8_t, string, string>(10, "r", ""), 2));
// PK < (10, "r", "z")
NO_FATALS(check(nullopt, make_tuple<int8_t, string, string>(10, "r", "z"), 3));
// PK < (100, "", "")
NO_FATALS(check(nullopt, make_tuple<int8_t, string, string>(100, "", ""), 3));
// PK >= (-10, "m", "")
NO_FATALS(check(make_tuple<int8_t, string, string>(-10, "m", ""), nullopt, 3));
// PK >= (0, "", "")
NO_FATALS(check(make_tuple<int8_t, string, string>(0, "", ""), nullopt, 3));
// PK >= (0, "m", "")
NO_FATALS(check(make_tuple<int8_t, string, string>(0, "m", ""), nullopt, 2));
// PK >= (100, "", "")
NO_FATALS(check(make_tuple<int8_t, string, string>(100, "", ""), nullopt, 1));
// PK >= (-10, "", "")
// PK < (100, "", "")
NO_FATALS(check(make_tuple<int8_t, string, string>(-10, "", ""),
make_tuple<int8_t, string, string>(100, "", ""), 3));
// PK >= (0, "m", "")
// PK < (10, "r", "")
NO_FATALS(check(make_tuple<int8_t, string, string>(0, "m", ""),
make_tuple<int8_t, string, string>(10, "r", ""), 1));
// PK >= (0, "m", "")
// PK < (10, "r", "z")
NO_FATALS(check(make_tuple<int8_t, string, string>(0, "m", ""),
make_tuple<int8_t, string, string>(10, "r", "z"), 2));
// PK >= (0, "", "")
// PK < (10, "m", "z")
NO_FATALS(check(make_tuple<int8_t, string, string>(0, "", ""),
make_tuple<int8_t, string, string>(10, "m", "z"), 2));
// PK >= (10, "m", "")
// PK < (10, "m", "z")
NO_FATALS(check(make_tuple<int8_t, string, string>(10, "m", ""),
make_tuple<int8_t, string, string>(10, "m", "z"), 1));
}
TEST_F(PartitionPrunerTest, TestIntPartialPrimaryKeyRangePruning) {
// CREATE TABLE t
// (a INT8, b INT8, c INT8, PRIMARY KEY (a, b, c))
// DISTRIBUTE BY RANGE(a, b)
// SPLIT ROWS [(0, 0)];
Schema schema({ ColumnSchema("a", INT8),
ColumnSchema("b", INT8),
ColumnSchema("c", INT8) },
{ ColumnId(0), ColumnId(1), ColumnId(2) },
3);
Arena arena(1024);
PartitionSchemaPB pb;
CreatePartitionSchemaPB({"a", "b"}, {}, &pb);
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema));
KuduPartialRow split(&schema);
ASSERT_OK(split.SetInt8("a", 0));
ASSERT_OK(split.SetInt8("b", 0));
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions({ split }, {}, schema, &partitions));
// Applies the specified lower and upper bound primary keys against the
// schema, and checks that the expected number of partitions are pruned.
const auto check = [&] (optional<tuple<int8_t, int8_t, int8_t>> lower,
optional<tuple<int8_t, int8_t, int8_t>> upper,
size_t remaining_tablets ) {
ScanSpec spec;
KuduPartialRow lower_bound(&schema);
KuduPartialRow upper_bound(&schema);
EncodedKey* enc_lower_bound = nullptr;
EncodedKey* enc_upper_bound = nullptr;
if (lower) {
ASSERT_OK(lower_bound.SetInt8("a", get<0>(*lower)));
ASSERT_OK(lower_bound.SetInt8("b", get<1>(*lower)));
ASSERT_OK(lower_bound.SetInt8("c", get<2>(*lower)));
ConstContiguousRow row(lower_bound.schema(), lower_bound.row_data_);
enc_lower_bound = EncodedKey::FromContiguousRow(row, &arena);
spec.SetLowerBoundKey(enc_lower_bound);
}
if (upper) {
ASSERT_OK(upper_bound.SetInt8("a", get<0>(*upper)));
ASSERT_OK(upper_bound.SetInt8("b", get<1>(*upper)));
ASSERT_OK(upper_bound.SetInt8("c", get<2>(*upper)));
ConstContiguousRow row(upper_bound.schema(), upper_bound.row_data_);
enc_upper_bound = EncodedKey::FromContiguousRow(row, &arena);
spec.SetExclusiveUpperBoundKey(enc_upper_bound);
}
size_t pruner_ranges = remaining_tablets == 0 ? 0 : 1;
CheckPrunedPartitions(schema, partition_schema, partitions, spec,
remaining_tablets, pruner_ranges);
};
// No bounds
NO_FATALS(check(nullopt, nullopt, 2));
// PK < (0, 0, min)
NO_FATALS(check(nullopt, make_tuple<int8_t, int8_t, int8_t>(0, 0, INT8_MIN), 1));
// PK < (0, 0, 0);
NO_FATALS(check(nullopt, make_tuple<int8_t, int8_t, int8_t>(0, 0, 0), 2));
// PK < (0, max, 0);
NO_FATALS(check(nullopt, make_tuple<int8_t, int8_t, int8_t>(0, INT8_MAX, 0), 2));
// PK < (max, max, min);
NO_FATALS(check(nullopt,
make_tuple<int8_t, int8_t, int8_t>(INT8_MAX, INT8_MAX, INT8_MIN), 2));
// PK < (max, max, 0);
NO_FATALS(check(nullopt, make_tuple<int8_t, int8_t, int8_t>(INT8_MAX, INT8_MAX, 0), 2));
// PK >= (0, 0, 0);
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(0, 0, 0), nullopt, 1));
// PK >= (0, 0, -1);
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(0, 0, -1), nullopt, 1));
// PK >= (0, 0, min);
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(0, 0, INT8_MIN), nullopt, 1));
}
TEST_F(PartitionPrunerTest, TestRangePruning) {
// CREATE TABLE t
// (a INT8, b STRING, c INT8)
// PRIMARY KEY (a, b, c))
// DISTRIBUTE BY RANGE(c, b);
// SPLIT ROWS [(0, "m"), (10, "r")];
Schema schema({ ColumnSchema("a", INT8),
ColumnSchema("b", STRING),
ColumnSchema("c", INT8) },
{ ColumnId(0), ColumnId(1), ColumnId(2) },
3);
PartitionSchemaPB pb;
CreatePartitionSchemaPB({"c", "b"}, {}, &pb);
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema));
KuduPartialRow split1(&schema);
ASSERT_OK(split1.SetInt8("c", 0));
ASSERT_OK(split1.SetStringCopy("b", "m"));
KuduPartialRow split2(&schema);
ASSERT_OK(split2.SetInt8("c", 10));
ASSERT_OK(split2.SetStringCopy("b", "r"));
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions({ split1, split2 }, {}, schema, &partitions));
// Applies the specified predicates to a scan and checks that the expected
// number of partitions are pruned.
const auto check = [&] (const vector<ColumnPredicate>& predicates, size_t remaining_tablets) {
ScanSpec spec;
for (const auto& pred : predicates) {
spec.AddPredicate(pred);
}
size_t pruner_ranges = remaining_tablets == 0 ? 0 : 1;
CheckPrunedPartitions(schema, partition_schema, partitions, spec,
remaining_tablets, pruner_ranges);
};
constexpr int8_t neg_ten = -10;
constexpr int8_t zero = 0;
constexpr int8_t five = 5;
constexpr int8_t ten = 10;
constexpr int8_t hundred = 100;
constexpr int8_t min = INT8_MIN;
constexpr int8_t max = INT8_MAX;
Slice empty = "";
Slice a = "a";
Slice m = "m";
Slice m0 = Slice("m\0", 2);
Slice r = "r";
Slice z = "z";
// No Bounds
NO_FATALS(check({}, 3));
// c < -10
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), nullptr, &neg_ten) }, 1));
// c = -10
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(2), &neg_ten) }, 1));
// c < 10
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), nullptr, &ten) }, 2));
// c < 100
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), nullptr, &hundred) }, 3));
// c < MIN
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), nullptr, &min) }, 0));
// c < MAX
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), nullptr, &max) }, 3));
// c >= -10
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &neg_ten, nullptr) }, 3));
// c >= 0
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &zero, nullptr) }, 3));
// c >= 5
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &five, nullptr) }, 2));
// c >= 10
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &ten, nullptr) }, 2));
// c >= 100
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &hundred, nullptr) }, 1));
// c >= MIN
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &min, nullptr) }, 3));
// c >= MAX
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &max, nullptr) }, 1));
// c = MIN
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(2), &min) }, 1));
// c = MAX
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(2), &max) }, 1));
// c >= -10
// c < 0
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &neg_ten, &zero) }, 1));
// c >= 5
// c < 100
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &five, &hundred) }, 2));
// b = ""
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(1), &empty) }, 3));
// b >= "z"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(1), &z, nullptr) }, 3));
// b < "a"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(1), nullptr, &a) }, 3));
// b >= "m"
// b < "z"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(1), &m, &z) }, 3));
// c >= 10
// b >= "r"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &ten, nullptr),
ColumnPredicate::Range(schema.column(1), &r, nullptr) },
1));
// c >= 10
// b < "r"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &ten, nullptr),
ColumnPredicate::Range(schema.column(1), nullptr, &r) },
2));
// c = 10
// b < "r"
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(2), &ten),
ColumnPredicate::Range(schema.column(1), nullptr, &r) },
1));
// c < 0
// b < "m"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), nullptr, &zero),
ColumnPredicate::Range(schema.column(1), nullptr, &m) },
1));
// c < 0
// b < "z"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), nullptr, &zero),
ColumnPredicate::Range(schema.column(1), nullptr, &z) },
1));
// c = 0
// b = "m\0"
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(2), &zero),
ColumnPredicate::Equality(schema.column(1), &m0) },
1));
// c = 0
// b < "m"
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(2), &zero),
ColumnPredicate::Range(schema.column(1), nullptr, &m) },
1));
// c = 0
// b < "m\0"
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(2), &zero),
ColumnPredicate::Range(schema.column(1), nullptr, &m0) },
2));
// c IS NOT NULL
NO_FATALS(check({ ColumnPredicate::IsNotNull(schema.column(2)) }, 3));
}
TEST_F(PartitionPrunerTest, TestHashPruning) {
// CREATE TABLE t
// (a INT8, b INT8, c INT8)
// PRIMARY KEY (a, b, c)
// DISTRIBUTE BY HASH(a) INTO 2 BUCKETS,
// HASH(b, c) INTO 2 BUCKETS;
Schema schema({ ColumnSchema("a", INT8),
ColumnSchema("b", INT8),
ColumnSchema("c", INT8) },
{ ColumnId(0), ColumnId(1), ColumnId(2) },
3);
PartitionSchemaPB pb;
CreatePartitionSchemaPB({}, { {{"a"}, 2, 0}, {{"b", "c"}, 2, 0} }, &pb);
pb.mutable_range_schema()->Clear();
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema));
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions(vector<KuduPartialRow>(), {},
schema, &partitions));
// Applies the specified predicates to a scan and checks that the expected
// number of partitions are pruned.
const auto check = [&] (const vector<ColumnPredicate>& predicates,
size_t remaining_tablets,
size_t pruner_ranges) {
ScanSpec spec;
for (const auto& pred : predicates) {
spec.AddPredicate(pred);
}
CheckPrunedPartitions(schema, partition_schema, partitions, spec,
remaining_tablets, pruner_ranges);
};
constexpr int8_t zero = 0;
constexpr int8_t one = 1;
constexpr int8_t two = 2;
// No Bounds
NO_FATALS(check({}, 4, 1));
// a = 0;
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(0), &zero) }, 2, 1));
// a >= 0;
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &zero, nullptr) }, 4, 1));
// a >= 0;
// a < 1;
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &zero, &one) }, 2, 1));
// a >= 0;
// a < 2;
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &zero, &two) }, 4, 1));
// b = 1;
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(1), &one) }, 4, 1));
// b = 1;
// c = 2;
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(1), &one),
ColumnPredicate::Equality(schema.column(2), &two) },
2, 2));
// a = 0;
// b = 1;
// c = 2;
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(0), &zero),
ColumnPredicate::Equality(schema.column(1), &one),
ColumnPredicate::Equality(schema.column(2), &two) },
1, 1));
}
TEST_F(PartitionPrunerTest, TestInListHashPruning) {
// CREATE TABLE t
// (a INT8, b INT8, c INT8)
// PRIMARY KEY (a, b, c)
// DISTRIBUTE BY HASH(a) INTO 3 BUCKETS,
// HASH(b) INTO 3 BUCKETS;
// HASH(c) INTO 3 BUCKETS;
Schema schema({ ColumnSchema("a", INT8),
ColumnSchema("b", INT8),
ColumnSchema("c", INT8) },
{ ColumnId(0), ColumnId(1), ColumnId(2) },
3);
PartitionSchemaPB pb;
CreatePartitionSchemaPB({}, { {{"a"}, 3, 0}, {{"b"}, 3, 0}, {{"c"}, 3, 0} }, &pb);
pb.mutable_range_schema()->clear_columns();
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema));
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions(vector<KuduPartialRow>(), {},
schema, &partitions));
// Applies the specified predicates to a scan and checks that the expected
// number of partitions are pruned.
const auto check = [&] (const vector<ColumnPredicate>& predicates,
size_t remaining_tablets,
size_t pruner_ranges) {
ScanSpec spec;
for (const auto& pred : predicates) {
spec.AddPredicate(pred);
}
CheckPrunedPartitions(schema, partition_schema, partitions, spec,
remaining_tablets, pruner_ranges);
};
// zero, one, eight are in different buckets when bucket number is 3 and seed is 0.
constexpr int8_t zero = 0;
constexpr int8_t one = 1;
constexpr int8_t eight = 8;
vector<const void*> a_values;
vector<const void*> b_values;
vector<const void*> c_values;
// a in [0, 1];
a_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(0), &a_values) }, 18, 2));
// a in [0, 1, 8];
a_values = { &zero, &one, &eight };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(0), &a_values) }, 27, 1));
// b in [0, 1]
b_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(1), &b_values) }, 18, 6));
// c in [0, 1]
c_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(2), &c_values) }, 18, 18));
// b in [0, 1], c in [0, 1]
b_values = { &zero, &one };
c_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(1), &b_values),
ColumnPredicate::InList(schema.column(2), &c_values) },
12, 12));
// a in [0, 1], b in [0, 1], c in [0, 1]
a_values = { &zero, &one };
b_values = { &zero, &one };
c_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(0), &a_values),
ColumnPredicate::InList(schema.column(1), &b_values),
ColumnPredicate::InList(schema.column(2), &c_values) },
8, 8));
}
TEST_F(PartitionPrunerTest, TestMultiColumnInListHashPruning) {
// CREATE TABLE t
// (a INT8, b INT8, c INT8)
// PRIMARY KEY (a, b, c)
// DISTRIBUTE BY HASH(a) INTO 3 BUCKETS,
// HASH(b, c) INTO 3 BUCKETS;
Schema schema({ ColumnSchema("a", INT8),
ColumnSchema("b", INT8),
ColumnSchema("c", INT8) },
{ ColumnId(0), ColumnId(1), ColumnId(2) },
3);
PartitionSchemaPB pb;
CreatePartitionSchemaPB({}, { {{"a"}, 3, 0}, {{"b", "c"}, 3, 0} }, &pb);
pb.mutable_range_schema()->clear_columns();
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema));
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions(vector<KuduPartialRow>(), {},
schema, &partitions));
// Applies the specified predicates to a scan and checks that the expected
// number of partitions are pruned.
const auto check = [&] (const vector<ColumnPredicate>& predicates,
size_t remaining_tablets,
size_t pruner_ranges) {
ScanSpec spec;
for (const auto& pred : predicates) {
spec.AddPredicate(pred);
}
CheckPrunedPartitions(schema, partition_schema, partitions, spec,
remaining_tablets, pruner_ranges);
};
// zero, one, eight are in different buckets when bucket number is 3 and seed is 0.
constexpr int8_t zero = 0;
constexpr int8_t one = 1;
constexpr int8_t eight = 8;
vector<const void*> a_values;
vector<const void*> b_values;
vector<const void*> c_values;
// a in [0, 1];
a_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(0), &a_values) }, 6, 2));
// a in [0, 1, 8];
a_values = { &zero, &one, &eight };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(0), &a_values) }, 9, 1));
// b in [0, 1]
b_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(1), &b_values) }, 9, 1));
// c in [0, 1]
c_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(2), &c_values) }, 9, 1));
// b in [0, 1], c in [0, 1]
// (0, 0) in bucket 2
// (0, 1) in bucket 2
// (1, 0) in bucket 1
// (1, 1) in bucket 0
b_values = { &zero, &one };
c_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(1), &b_values),
ColumnPredicate::InList(schema.column(2), &c_values) },
9, 1));
// b = 0, c in [0, 1]
c_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(1), &zero),
ColumnPredicate::InList(schema.column(2), &c_values) },
3, 3));
// b = 1, c in [0, 1]
c_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(1), &one),
ColumnPredicate::InList(schema.column(2), &c_values) },
6, 6));
//a in [0, 1], b in [0, 1], c in [0, 1]
a_values = { &zero, &one };
b_values = { &zero, &one };
c_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(0), &a_values),
ColumnPredicate::InList(schema.column(1), &b_values),
ColumnPredicate::InList(schema.column(2), &c_values) },
6, 2));
}
TEST_F(PartitionPrunerTest, TestPruning) {
// CREATE TABLE timeseries
// (host STRING, metric STRING, time UNIXTIME_MICROS, value DOUBLE)
// PRIMARY KEY (host, metric, time)
// PARTITION BY RANGE (time) (PARTITION VALUES < 10,
// PARTITION VALUES >= 10)
// HASH (host, metric) 2 PARTITIONS;
Schema schema({ ColumnSchema("host", STRING),
ColumnSchema("metric", STRING),
ColumnSchema("time", UNIXTIME_MICROS),
ColumnSchema("value", DOUBLE) },
{ ColumnId(0), ColumnId(1), ColumnId(2), ColumnId(3) },
3);
PartitionSchemaPB pb;
CreatePartitionSchemaPB({"time"}, { {{"host", "metric"}, 2, 0} }, &pb);
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema));
KuduPartialRow split(&schema);
ASSERT_OK(split.SetUnixTimeMicros("time", 10));
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions(
vector<KuduPartialRow>{ split }, {}, schema, &partitions));
ASSERT_EQ(4, partitions.size());
// Applies the specified predicates to a scan and checks that the expected
// number of partitions are pruned.
const auto check = [&] (const vector<ColumnPredicate>& predicates,
const PartitionKey& lower_bound_partition_key,
const PartitionKey& upper_bound_partition_key,
size_t remaining_tablets,
size_t pruner_ranges) {
ScanSpec spec;
spec.SetLowerBoundPartitionKey(lower_bound_partition_key);
spec.SetExclusiveUpperBoundPartitionKey(upper_bound_partition_key);
for (const auto& pred : predicates) {
spec.AddPredicate(pred);
}
CheckPrunedPartitions(schema, partition_schema, partitions, spec,
remaining_tablets, pruner_ranges);
};
Slice a = "a";
constexpr int64_t nine = 9;
constexpr int64_t ten = 10;
constexpr int64_t twenty = 20;
// host = "a"
// metric = "a"
// timestamp >= 9;
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(0), &a),
ColumnPredicate::Equality(schema.column(1), &a),
ColumnPredicate::Range(schema.column(2), &nine, nullptr) },
{}, {},
2, 1));
// host = "a"
// metric = "a"
// timestamp >= 10;
// timestamp < 20;
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(0), &a),
ColumnPredicate::Equality(schema.column(1), &a),
ColumnPredicate::Range(schema.column(2), &ten, &twenty) },
{}, {},
1, 1));
// host = "a"
// metric = "a"
// timestamp < 10;
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(0), &a),
ColumnPredicate::Equality(schema.column(1), &a),
ColumnPredicate::Range(schema.column(2), nullptr, &ten) },
{}, {},
1, 1));
// host = "a"
// metric = "a"
// timestamp >= 10;
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(0), &a),
ColumnPredicate::Equality(schema.column(1), &a),
ColumnPredicate::Range(schema.column(2), &ten, nullptr) },
{}, {},
1, 1));
// host = "a"
// metric = "a"
// timestamp = 10;
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(0), &a),
ColumnPredicate::Equality(schema.column(1), &a),
ColumnPredicate::Equality(schema.column(2), &ten) },
{}, {},
1, 1));
// partition key < (hash=1)
NO_FATALS(check({}, {}, { string("\0\0\0\1", 4), "" }, 2, 1));
// partition key >= (hash=1)
NO_FATALS(check({}, { string("\0\0\0\1", 4), "" }, {}, 2, 1));
// timestamp = 10
// partition key < (hash=1)
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(2), &ten) },
{}, { string("\0\0\0\1", 4), "" }, 1, 1));
// timestamp = 10
// partition key >= (hash=1)
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(2), &ten) },
{ string("\0\0\0\1", 4), "" }, {}, 1, 1));
}
TEST_F(PartitionPrunerTest, TestKudu2173) {
// CREATE TABLE t
// (a INT8, b INT8, PRIMARY KEY (a, b))
// DISTRIBUTE BY RANGE(a)
// SPLIT ROWS [(10)]
Schema schema({ ColumnSchema("a", INT8),
ColumnSchema("b", INT8)},
{ ColumnId(0), ColumnId(1) },
2);
PartitionSchemaPB pb;
CreatePartitionSchemaPB({"a"}, {}, &pb);
PartitionSchema partition_schema;
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema));
KuduPartialRow split1(&schema);
ASSERT_OK(split1.SetInt8("a", 10));
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions({ split1 }, {}, schema, &partitions));
// Applies the specified predicates to a scan and checks that the expected
// number of partitions are pruned.
const auto check = [&] (const vector<ColumnPredicate>& predicates, size_t remaining_tablets) {
ScanSpec spec;
for (const auto& pred : predicates) {
spec.AddPredicate(pred);
}
size_t pruner_ranges = remaining_tablets == 0 ? 0 : 1;
CheckPrunedPartitions(schema, partition_schema, partitions, spec,
remaining_tablets, pruner_ranges);
};
constexpr int8_t eleven = 11;
constexpr int8_t max = INT8_MAX;
// a < 11
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), nullptr, &eleven) }, 2));
// a < 11 AND b < 11
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), nullptr, &eleven),
ColumnPredicate::Range(schema.column(1), nullptr, &eleven) },
2));
// a < max
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), nullptr, &max) }, 2));
// a < max AND b < 11
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), nullptr, &max),
ColumnPredicate::Range(schema.column(1), nullptr, &eleven) },
2));
}
// TODO(aserbin): re-enable this scenario once varying hash dimensions per range
// are supported
TEST_F(PartitionPrunerTest, DISABLED_TestHashSchemasPerRangePruning) {
// CREATE TABLE t
// (A INT8, B INT8, C STRING)
// PRIMARY KEY (A, B, C)
// PARTITION BY RANGE (C)
// DISTRIBUTE BY HASH(A) INTO 2 BUCKETS
// HASH(B) INTO 2 BUCKETS;
Schema schema({ ColumnSchema("A", INT8),
ColumnSchema("B", INT8),
ColumnSchema("C", STRING) },
{ ColumnId(0), ColumnId(1), ColumnId(2) },
3);
PartitionSchemaPB pb;
CreatePartitionSchemaPB({"C"}, { {{"A"}, 2, 0}, {{"B"}, 2, 0} }, &pb);
// Need to add per range hash schema components to the field
// 'ranges_with_hash_schemas_' of PartitionSchema because PartitionPruner will
// use them to construct partition key ranges. Currently,
// PartitionSchema::CreatePartitions() does not leverage this field, so these
// components will have to be passed separately to the function as well.
// None of the ranges below uses the table-wide hash schema.
// [(_, _, a), (_, _, c))
AddRangePartitionWithSchema(schema, {{"C", "a"}}, {{"C", "c"}}, {}, {},
{ {{"A"}, 3, 0} }, &pb);
// [(_, _, d), (_, _, f))
AddRangePartitionWithSchema(schema, {{"C", "d"}}, {{"C", "f"}}, {}, {},
{ {{"A"}, 2, 0}, {{"B"}, 3, 0} }, &pb);
// [(_, _, h), (_, _, j))
AddRangePartitionWithSchema(schema, {{"C", "h"}}, {{"C", "j"}}, {}, {}, {}, &pb);
// [(_, _, k), (_, _, m))
AddRangePartitionWithSchema(schema, {{"C", "k"}}, {{"C", "m"}}, {}, {},
{ {{"B"}, 2, 0} }, &pb);
PartitionSchema partition_schema;
PartitionSchema::RangesWithHashSchemas ranges;
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema, &ranges));
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions(ranges, schema, &partitions));
ASSERT_EQ(12, partitions.size());
// Applies the specified predicates to a scan and checks that the expected
// number of partitions are pruned.
const auto check = [&] (const vector<ColumnPredicate>& predicates,
const PartitionKey& lower_bound_partition_key,
const PartitionKey& upper_bound_partition_key,
size_t remaining_tablets,
size_t pruner_ranges) {
ScanSpec spec;
spec.SetLowerBoundPartitionKey(lower_bound_partition_key);
spec.SetExclusiveUpperBoundPartitionKey(upper_bound_partition_key);
for (const auto& pred : predicates) {
spec.AddPredicate(pred);
}
CheckPrunedPartitions(schema, partition_schema, partitions, spec,
remaining_tablets, pruner_ranges);
};
constexpr int8_t zero = 0;
constexpr int8_t one = 1;
const Slice a = "a";
const Slice b = "b";
const Slice e = "e";
const Slice f = "f";
const Slice i = "i";
const Slice l = "l";
const Slice m = "m";
// No Bounds
NO_FATALS(check({}, {}, {}, 12, 12));
// A = 1
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(0), &one) },
{}, {}, 7, 7));
// B = 1
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(1), &one) },
{}, {}, 7, 7));
// A = 0
// B = 1
// C >= "e"
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(0), &zero),
ColumnPredicate::Equality(schema.column(1), &one),
ColumnPredicate::Range(schema.column(2), &e, nullptr) },
{}, {}, 3, 3));
// A = 0
// B = 1
// C = "e"
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(0), &zero),
ColumnPredicate::Equality(schema.column(1), &one),
ColumnPredicate::Equality(schema.column(2), &e) },
{}, {}, 1, 1));
// B = 1
// C >= "b"
// C < "j"
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(1), &one),
ColumnPredicate::Range(schema.column(2), &b, nullptr),
ColumnPredicate::Range(schema.column(2), nullptr, &i) },
{}, {}, 6, 6));
// B = 0
// C >= "e"
// C < "l"
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(1), &zero),
ColumnPredicate::Range(schema.column(2), &e, nullptr),
ColumnPredicate::Range(schema.column(2), nullptr, &l) },
{}, {}, 4, 4));
// C >= "a"
// C < "b"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &a, &b) },
{}, {}, 3, 3));
// C >= "a"
// C < "e"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &a, &e) },
{}, {}, 9, 9));
// C >= "e"
// C < "i"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &e, &i) },
{}, {}, 7, 7));
// C >= "a"
// C < "l"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &a, &l) },
{}, {}, 12, 12));
// C >= "i"
// C < "l"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &i, &l) },
{}, {}, 3, 3));
// C >= "e"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &e, nullptr) },
{}, {}, 9, 9));
// C < "f"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), nullptr, &f) },
{}, {}, 9, 9));
// C >= "f"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &f, nullptr) },
{}, {}, 3, 3));
// C < "a"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), nullptr, &a) },
{}, {}, 0, 0));
// C >= "m"
NO_FATALS(check({ ColumnPredicate::Range(schema.column(2), &m, nullptr) },
{}, {}, 0, 0));
// Uses None predicate to short circuit scan
NO_FATALS(check({ ColumnPredicate::None(schema.column(2))}, {}, {}, 0, 0));
// partition key >= (hash=1, hash=0)
NO_FATALS(check({}, {string("\0\0\0\1\0\0\0\0", 8), ""}, {}, 7, 7));
// partition key < (hash=1, hash=0)
NO_FATALS(check({}, {}, {string("\0\0\0\1\0\0\0\0", 8), ""}, 5, 5));
// C >= "e"
// C < "m"
// partition key >= (hash=1)
//
// The only non-pruned tablets are:
// * range d-f:
// hash keys:
// ** "\0\0\0\1\0\0\0\0"
// ** "\0\0\0\1\0\0\0\1"
// ** "\0\0\0\1\0\0\0\2"
// * range k-m:
// hash keys:
// ** "\0\0\0\1"
NO_FATALS(check({ColumnPredicate::Range(schema.column(2), &e, &m)},
{ string("\0\0\0\1", 4), "" }, {}, 5, 5));
// C >= "e"
// C < "m"
// partition key < (hash=1)
//
// The only non-pruned tablets are:
// * range d-f:
// hash keys:
// ** "\0\0\0\0\0\0\0\0"
// ** "\0\0\0\0\0\0\0\1"
// ** "\0\0\0\0\0\0\0\2"
// * range k-m:
// hash keys:
// ** "\0\0\0\0"
NO_FATALS(check({ColumnPredicate::Range(schema.column(2), &e, &m)},
{}, { string("\0\0\0\1", 4), "" }, 4, 4));
}
TEST_F(PartitionPrunerTest, TestHashSchemasPerRangeWithPartialPrimaryKeyRangePruning) {
// CREATE TABLE t
// (a INT8, b INT8, c INT8)
// PRIMARY KEY (a, b, c)
// PARTITION BY RANGE(a, b)
Schema schema({ ColumnSchema("a", INT8),
ColumnSchema("b", INT8),
ColumnSchema("c", INT8) },
{ ColumnId(0), ColumnId(1), ColumnId(2) },
3);
PartitionSchemaPB pb;
CreatePartitionSchemaPB({"a", "b"}, { {{"c"}, 2, 10} }, &pb);
// [(0, 0, _), (2, 2, _))
AddRangePartitionWithSchema(schema, {}, {}, {{"a", 0}, {"b", 0}}, {{"a", 2}, {"b", 2}},
{ {{"c"}, 2, 0} }, &pb);
// [(2, 2, _), (4, 4, _))
AddRangePartitionWithSchema(schema, {}, {}, {{"a", 2}, {"b", 2}}, {{"a", 4}, {"b", 4}},
{ {{"c"}, 3, 0} }, &pb);
// [(4, 4, _), (6, 6, _))
AddRangePartitionWithSchema(schema, {}, {}, {{"a", 4}, {"b", 4}}, {{"a", 6}, {"b", 6}},
{ {{"c"}, 4, 0} }, &pb);
PartitionSchema partition_schema;
PartitionSchema::RangesWithHashSchemas ranges;
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema, &ranges));
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions(ranges, schema, &partitions));
ASSERT_EQ(9, partitions.size());
Arena arena(1024);
// Applies the specified lower and upper bound primary keys against the
// schema, and checks that the expected number of partitions are pruned.
const auto check = [&] (optional<tuple<int8_t, int8_t, int8_t>> lower,
optional<tuple<int8_t, int8_t, int8_t>> upper,
size_t remaining_tablets,
size_t pruner_ranges) {
ScanSpec spec;
KuduPartialRow lower_bound(&schema);
KuduPartialRow upper_bound(&schema);
EncodedKey* enc_lower_bound = nullptr;
EncodedKey* enc_upper_bound = nullptr;
if (lower) {
ASSERT_OK(lower_bound.SetInt8("a", get<0>(*lower)));
ASSERT_OK(lower_bound.SetInt8("b", get<1>(*lower)));
ASSERT_OK(lower_bound.SetInt8("c", get<2>(*lower)));
ConstContiguousRow row(lower_bound.schema(), lower_bound.row_data_);
enc_lower_bound = EncodedKey::FromContiguousRow(row, &arena);
spec.SetLowerBoundKey(enc_lower_bound);
}
if (upper) {
ASSERT_OK(upper_bound.SetInt8("a", get<0>(*upper)));
ASSERT_OK(upper_bound.SetInt8("b", get<1>(*upper)));
ASSERT_OK(upper_bound.SetInt8("c", get<2>(*upper)));
ConstContiguousRow row(upper_bound.schema(), upper_bound.row_data_);
enc_upper_bound = EncodedKey::FromContiguousRow(row, &arena);
spec.SetExclusiveUpperBoundKey(enc_upper_bound);
}
CheckPrunedPartitions(schema, partition_schema, partitions, spec,
remaining_tablets, pruner_ranges);
};
// No bounds
NO_FATALS(check(nullopt, nullopt, 9, 13));
// PK < (2, 2, min)
NO_FATALS(check(nullopt, make_tuple<int8_t, int8_t, int8_t>(2, 2, INT8_MIN), 2, 4));
// PK < (2, 2, 0)
NO_FATALS(check(nullopt, make_tuple<int8_t, int8_t, int8_t>(2, 2, 0), 5, 7));
// PK >= (2, 2, 0)
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(2, 2, 0), nullopt, 7, 9));
// PK >= (2, 2, min)
// PK < (4, 4, min)
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(2, 2, INT8_MIN),
make_tuple<int8_t, int8_t, int8_t>(4, 4, INT8_MIN), 3, 3));
// PK >= (2, 2, min)
// PK < (4, 4, 0)
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(2, 2, INT8_MIN),
make_tuple<int8_t, int8_t, int8_t>(4, 4, 0), 7, 7));
// PK >= (2, 0, min)
// PK < (4, 2, min)
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(2, 0, INT8_MIN),
make_tuple<int8_t, int8_t, int8_t>(4, 2, INT8_MIN), 5, 5));
// PK >= (6, 6, min)
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(6, 6, INT8_MIN), nullopt, 0, 2));
// PK >= (4, 4, min)
// PK < (2, 2, min)
// Lower bound PK > Upper bound PK so scan is short-circuited.
NO_FATALS(check(make_tuple<int8_t, int8_t, int8_t>(4, 4, INT8_MIN),
make_tuple<int8_t, int8_t, int8_t>(2, 2, INT8_MIN), 0, 0));
}
TEST_F(PartitionPrunerTest, TestInListHashPruningPerRange) {
// CREATE TABLE t
// (A STRING, B INT8, C INT8)
// PRIMARY KEY (A, B, C)
// PARTITION BY RANGE (A)
// DISTRIBUTE HASH(B, C) INTO 3 BUCKETS;
Schema schema({ ColumnSchema("A", STRING),
ColumnSchema("B", INT8),
ColumnSchema("C", INT8) },
{ ColumnId(0), ColumnId(1), ColumnId(2) },
3);
PartitionSchemaPB pb;
CreatePartitionSchemaPB({"A"}, { {{"B", "C"}, 3, 0} }, &pb);
// None of the ranges below uses the table-wide hash schema.
// [(a, _, _), (c, _, _))
AddRangePartitionWithSchema(schema, {{"A", "a"}}, {{"A", "c"}}, {}, {},
{ {{"B"}, 3, 0} }, &pb);
// [(c, _, _), (e, _, _))
AddRangePartitionWithSchema(schema, {{"A", "c"}}, {{"A", "e"}}, {}, {},
{}, &pb);
// [(e, _, _), (g, _, _))
AddRangePartitionWithSchema(schema, {{"A", "e"}}, {{"A", "g"}}, {}, {},
{ {{"C"}, 3, 0} }, &pb);
PartitionSchema partition_schema;
PartitionSchema::RangesWithHashSchemas ranges;
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema, &ranges));
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions(ranges, schema, &partitions));
ASSERT_EQ(7, partitions.size());
// Applies the specified predicates to a scan and checks that the expected
// number of partitions are pruned.
const auto check = [&] (const vector<ColumnPredicate>& predicates,
size_t remaining_tablets,
size_t pruner_ranges) {
ScanSpec spec;
for (const auto& pred : predicates) {
spec.AddPredicate(pred);
}
CheckPrunedPartitions(schema, partition_schema, partitions, spec,
remaining_tablets, pruner_ranges);
};
// zero, one, eight are in different buckets when bucket number is 3 and seed is 0.
constexpr int8_t zero = 0;
constexpr int8_t one = 1;
constexpr int8_t eight = 8;
vector<const void*> B_values;
vector<const void*> C_values;
// B in [0, 1, 8];
B_values = { &zero, &one, &eight };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(1), &B_values) },
7, 13));
// B in [0, 1];
B_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(1), &B_values) },
6, 12));
// C in [0, 1];
C_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(2), &C_values) },
6, 12));
// B in [0, 1], C in [0, 1]
// (0, 0) in bucket 2
// (0, 1) in bucket 2
// (1, 0) in bucket 1
// (1, 1) in bucket 0
B_values = { &zero, &one };
C_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::InList(schema.column(1), &B_values),
ColumnPredicate::InList(schema.column(2), &C_values) },
5, 11));
// B = 0, C in [0, 1]
C_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(1), &zero),
ColumnPredicate::InList(schema.column(2), &C_values) },
4, 6));
// B = 1, C in [0, 1]
C_values = { &zero, &one };
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(1), &one),
ColumnPredicate::InList(schema.column(2), &C_values) },
4, 8));
}
// TODO(aserbin): re-enable this scenario once varying hash dimensions per range
// are supported
TEST_F(PartitionPrunerTest, DISABLED_TestSingleRangeElementAndBoundaryCase) {
// CREATE TABLE t
// (A INT8, B INT8)
// PRIMARY KEY (A, B)
// PARTITION BY RANGE (A);
Schema schema({ ColumnSchema("A", INT8),
ColumnSchema("B", INT8) },
{ ColumnId(0), ColumnId(1) },
2);
PartitionSchemaPB pb;
CreatePartitionSchemaPB({"A"}, {}, &pb);
// [(_, _), (1, _))
AddRangePartitionWithSchema(schema, {}, {}, {}, {{"A", 1}},
{{{"B"}, 4, 0}}, &pb);
// [(1, _), (2, _))
AddRangePartitionWithSchema(schema, {}, {}, {{"A", 1}}, {{"A", 2}},
{ {{"B"}, 2, 0} }, &pb);
// [(2, _), (3, _))
AddRangePartitionWithSchema(schema, {}, {}, {{"A", 2}}, {{"A", 3}},
{ {{"B"}, 3, 0} }, &pb);
// [(3, _), (_, _))
AddRangePartitionWithSchema(schema, {}, {}, {{"A", 3}}, {}, {}, &pb);
PartitionSchema partition_schema;
PartitionSchema::RangesWithHashSchemas ranges;
ASSERT_OK(PartitionSchema::FromPB(pb, schema, &partition_schema, &ranges));
vector<Partition> partitions;
ASSERT_OK(partition_schema.CreatePartitions(ranges, schema, &partitions));
ASSERT_EQ(10, partitions.size());
// Applies the specified predicates to a scan and checks that the expected
// number of partitions are pruned.
const auto check = [&] (const vector<ColumnPredicate>& predicates,
size_t remaining_tablets,
size_t pruner_ranges) {
ScanSpec spec;
for (const auto& pred : predicates) {
spec.AddPredicate(pred);
}
CheckPrunedPartitions(schema, partition_schema, partitions, spec,
remaining_tablets, pruner_ranges);
};
constexpr int8_t neg_one = -1;
constexpr int8_t zero = 0;
constexpr int8_t one = 1;
constexpr int8_t two = 2;
constexpr int8_t three = 3;
constexpr int8_t four = 4;
constexpr int8_t five = 5;
// No Bounds
NO_FATALS(check({}, 10, 10));
// A >= 0
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &zero, nullptr)}, 10, 10));
// A >= 1
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &one, nullptr)}, 6, 6));
// A < 1
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), nullptr, &one)}, 4, 4));
// A >= 2
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &two, nullptr)}, 4, 4));
// A < 2
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), nullptr, &two)}, 6, 6));
// A < 3
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), nullptr, &three)}, 9, 9));
// A >= 0
// A < 2
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &zero, &two)}, 6, 6));
// A >= 1
// A < 2
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &one, &two)}, 2, 2));
// A >= 1
// A < 3
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &one, &three)}, 5, 5));
// A >= 3
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &three, nullptr)}, 1, 1));
// A >= 4
// A < 5
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &four, &five)}, 1, 1));
// A >= -1
// A < 0
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &neg_one, &zero)}, 4, 4));
// A >= 5
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &five, nullptr)}, 1, 1));
// A < -1
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), nullptr, &neg_one)}, 4, 4));
// B = 1
NO_FATALS(check({ ColumnPredicate::Equality(schema.column(1), &one)}, 4, 4));
// A >= 0
// A < 2
// B = 1
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), &zero, &two),
ColumnPredicate::Equality(schema.column(1), &one)}, 2, 2));
// A < 0
// B = 1
NO_FATALS(check({ ColumnPredicate::Range(schema.column(0), nullptr, &zero),
ColumnPredicate::Equality(schema.column(1), &one)}, 1, 1));
}
// Test for the functionality of PartitionPruner::PrepareRangeSet() method.
class PartitionPrunerRangeSetTest : public PartitionPrunerTest {
public:
struct TestStruct {
const string description;
const string scan_lower_bound;
const string scan_upper_bound;
const PartitionSchema::HashSchema& table_wide_hash_schema;
const PartitionSchema::RangesWithHashSchemas ranges_with_custom_hash_schemas;
const PartitionSchema::RangesWithHashSchemas expected_result;
};
static void DoCheck(const TestStruct& t) {
PartitionSchema::RangesWithHashSchemas result_ranges;
PartitionPruner::PrepareRangeSet(
t.scan_lower_bound, t.scan_upper_bound, t.table_wide_hash_schema,
t.ranges_with_custom_hash_schemas,
&result_ranges);
SCOPED_TRACE(t.description);
ASSERT_EQ(t.expected_result.size(), result_ranges.size())
<< result_ranges;
for (auto i = 0; i < result_ranges.size(); ++i) {
SCOPED_TRACE(Substitute("range $0", i));
const auto& lhs = t.expected_result[i];
const auto& rhs = result_ranges[i];
ASSERT_EQ(lhs.lower, rhs.lower);
ASSERT_EQ(lhs.upper, rhs.upper);
ASSERT_EQ(lhs.hash_schema.size(), rhs.hash_schema.size());
for (auto j = 0; j < lhs.hash_schema.size(); ++j) {
SCOPED_TRACE(Substitute("hash dimension $0", j));
ASSERT_EQ(lhs.hash_schema[j].num_buckets,
rhs.hash_schema[j].num_buckets);
}
}
}
};
ostream& operator<<(ostream& os,
const PartitionSchema::RangeWithHashSchema& range) {
os << "(" << (range.lower.empty() ? "*" : range.lower)
<< " " << (range.upper.empty() ? "*" : range.upper) << ")";
return os;
}
ostream& operator<<(ostream& os,
const PartitionSchema::RangesWithHashSchemas& ranges) {
for (const auto& range : ranges) {
os << range << " ";
}
return os;
}
TEST_F(PartitionPrunerRangeSetTest, PrepareRangeSetX) {
const PartitionSchema::HashSchema _2 = { { {ColumnId(0)}, 2, 0 } };
const PartitionSchema::HashSchema _3 = { { {ColumnId(0)}, 3, 0 } };
const PartitionSchema::HashSchema _4 = { { {ColumnId(0)}, 4, 0 } };
const PartitionSchema::HashSchema _5 = { { {ColumnId(0)}, 5, 0 } };
// For each element, there is a representation of the corresponding scenario
// in the 'description' field: the first line is for the scan boundaries, the
// second line is for the set of ranges with custom hash schemas. Two lines
// are properly aligned to express the disposition of the ranges with custom
// hash schemas vs the scan range. For the range bounds, the asterisk symbol
// means "unlimited", i.e. no bound. Square [] and regular () parentheses
// don't have the inclusivity/exclusivity semantics: they are rather to
// distinguish scan bounds from range bounds.
const vector<TestStruct> test_inputs_and_results = {
{
R"*(
"[a b]"
""
)*",
"a", "b", _2,
{},
{ {"a", "b", _2} }
},
{
R"*(
"[a *]"
"(a *)"
)*",
"a", "", _2,
{ {"a", "", _3} },
{ {"a", "", _3} }
},
{
R"*(
"[* b]"
"(* b)"
)*",
"", "b", _2,
{ {"", "b", _3} },
{ {"", "b", _3} }
},
{
R"*(
"[* c]"
" (b c)"
)*",
"", "c", _2,
{ {"b", "c", _3} },
{ {"", "b", _2}, {"b", "c", _3} }
},
{
R"*(
"[* *]"
"(* b)"
)*",
"", "", _2,
{ {"", "b", _3} },
{ {"", "b", _3}, {"b", "", _2} }
},
{
R"*(
"[* *]"
" (b *)"
)*",
"", "", _2,
{ {"b", "", _3} },
{ { "", "b", _2}, {"b", "", _3} }
},
{
R"*(
" [c d]"
"(a b)"
)*",
"c", "d", _2,
{ {"a", "b", _3} },
{ {"c", "d", _2} }
},
{
R"*(
" [c d]"
"(* b)"
)*",
"c", "d", _2,
{ { "", "b", _3} },
{ {"c", "d", _2} }
},
{
R"*(
" [c d]"
"(a b)(b c)"
)*",
"c", "d", _2,
{ {"a", "b", _3}, {"b", "c", _4} },
{ {"c", "d", _2} }
},
{
R"*(
" [d e]"
"(a b) (c d)"
)*",
"d", "e", _2,
{ {"a", "b", _3}, {"c", "d", _4} },
{ {"d", "e", _2} }
},
{
R"*(
"[a b]"
" (c d)"
)*",
"a", "b", _2,
{ {"c", "d", _3} },
{ {"a", "b", _2} }
},
{
R"*(
"[a b]"
" (c *)"
)*",
"a", "b", _2,
{ {"c", "", _3} },
{ {"a", "b", _2} }
},
{
R"*(
"[* b]"
" (c d)"
)*",
"", "b", _2,
{ {"c", "d", _3} },
{ { "", "b", _2} }
},
{
R"*(
"[* b]"
" (c *)"
)*",
"", "b", _2,
{ {"c", "", _3} },
{ { "", "b", _2} }
},
{
R"*(
" [c d]"
"(a b) (e f)"
)*",
"c", "d", _2,
{ { "a", "b", _3 }, { "e", "f", _4 } },
{ { "c", "d", _2 } }
},
{
R"*(
" [c d]"
"(* b) (e f)"
)*",
"c", "d", _2,
{ { "", "b", _3}, {"e", "f", _4} },
{ {"c", "d", _2} }
},
{
R"*(
" [c d]"
"(a b) (e *)"
)*",
"c", "d", _2,
{ {"a", "b", _3}, {"e", "", _4} },
{ {"c", "d", _2} }
},
{
R"*(
" [c d]"
"(* b) (e *)"
)*",
"c", "d", _2,
{ { "", "b", _3}, {"e", "", _4} },
{ {"c", "d", _2} }
},
{
R"*(
" [b c]"
"(a b) (c d)"
)*",
"b", "c", _2,
{ {"a", "b", _3}, {"c", "d", _4} },
{ {"b", "c", _2} }
},
{
R"*(
" [b c]"
"(* b) (c d)"
)*",
"b", "c", _2,
{ { "", "b", _3}, {"c", "d", _4} },
{ {"b", "c", _2} }
},
{
R"*(
" [b c]"
"(a b) (c *)"
)*",
"b", "c", _2,
{ {"a", "b", _3}, {"c", "", _4} },
{ {"b", "c", _2} }
},
{
R"*(
" [b c]"
"(* b) (c *)"
)*",
"b", "c", _2,
{ { "", "b", _3}, {"c", "", _4} },
{ {"b", "c", _2} }
},
{
R"*(
" [b c]"
"(* b)"
)*",
"b", "c", _2,
{ { "", "b", _3} },
{ {"b", "c", _2} }
},
{
R"*(
" [b c]"
"(a b)"
)*",
"b", "c", _2,
{ {"a", "b", _3} },
{ {"b", "c", _2} }
},
{
R"*(
"[a b]"
" (b c)"
)*",
"a", "b", _2,
{ {"b", "c", _3} },
{ {"a", "b", _2} }
},
{
R"*(
"[a b]"
" (b *)"
)*",
"a", "b", _2,
{ {"b", "", _3} },
{ {"a", "b", _2} }
},
{
R"*(
"[a c]"
"(a b)"
)*",
"a", "c", _2,
{ {"a", "b", _3} },
{ {"a", "b", _3}, {"b", "c", _2} }
},
{
R"*(
"[a c]"
" (b c)"
)*",
"a", "c", _2,
{ {"b", "c", _3} },
{ {"a", "b", _2}, {"b", "c", _3} }
},
{
R"*(
"[a *]"
" (b *)"
)*",
"a", "", _2,
{ {"b", "", _3} },
{ {"a", "b", _2}, {"b", "", _3} }
},
{
R"*(
"[* b]"
"(* a)"
)*",
"", "b", _2,
{ { "", "a", _3} },
{ { "", "a", _3}, {"a", "b", _2} }
},
{
R"*(
"[a d]"
" (b c)"
)*",
"a", "d", _2,
{ {"b", "c", _3} },
{ {"a", "b", _2}, {"b", "c", _3}, {"c", "d", _2} }
},
{
R"*(
"[* d]"
" (b c)"
)*",
"", "d", _2,
{ {"b", "c", _3} },
{ { "", "b", _2}, {"b", "c", _3}, {"c", "d", _2} }
},
{
R"*(
"[* *]"
" (b c)"
)*",
"", "", _2,
{ {"b", "c", _3} },
{ { "", "b", _2}, {"b", "c", _3}, {"c", "", _2} }
},
{
R"*(
" [b c]"
"(a d)"
)*",
"b", "c", _2,
{ {"a", "d", _3} },
{ {"b", "c", _3} }
},
{
R"*(
" [b c]"
"(a *)"
)*",
"b", "c", _2,
{ {"a", "", _3} },
{ {"b", "c", _3} }
},
{
R"*(
" [b c]"
"(* d)"
)*",
"b", "c", _2,
{ { "", "d", _3} },
{ {"b", "c", _3} }
},
{
R"*(
" [b c]"
"(* *)"
)*",
"b", "c", _2,
{ { "", "", _3} },
{ {"b", "c", _3} }
},
{
R"*(
"[a c]"
" (b *)"
)*",
"a", "c", _2,
{ {"b", "", _3} },
{ {"a", "b", _2}, {"b", "c", _3} }
},
{
R"*(
"[a c]"
"(a b)(b c)"
)*",
"a", "c", _2,
{ {"a", "b", _3}, {"b", "c", _4} },
{ {"a", "b", _3}, {"b", "c", _4} }
},
{
R"*(
"[a c]"
"(a b)(b *)"
)*",
"a", "c", _2,
{ {"a", "b", _3}, {"b", "", _4} },
{ {"a", "b", _3}, {"b", "c", _4} }
},
{
R"*(
"[a e]"
" (b c)(c d)"
)*",
"a", "e", _2,
{ {"b", "c", _3}, {"c", "d", _4} },
{ {"a", "b", _2}, {"b", "c", _3}, {"c", "d", _4}, {"d", "e", _2} }
},
{
R"*(
"[a *]"
" (b c) (d *)"
)*",
"a", "", _2,
{ {"b", "c", _3}, {"d", "", _4} },
{ {"a", "b", _2}, {"b", "c", _3}, {"c", "d", _2}, {"d", "", _4} }
},
{
R"*(
"[a f]"
" (b c) (d *)"
)*",
"a", "f", _2,
{ {"b", "c", _3}, {"d", "", _4} },
{ {"a", "b", _2}, {"b", "c", _3}, {"c", "d", _2}, {"d", "f", _4} }
},
{
R"*(
"[a f]"
" (b c) (d e)"
)*",
"a", "f", _2,
{ {"b", "c", _3}, {"d", "e", _4} },
{
{"a", "b", _2},
{"b", "c", _3},
{"c", "d", _2},
{"d", "e", _4},
{"e", "f", _2},
}
},
{
R"*(
"[a *]"
" (b c) (d e)"
)*",
"a", "", _2,
{ {"b", "c", _3}, {"d", "e", _4} },
{
{"a", "b", _2},
{"b", "c", _3},
{"c", "d", _2},
{"d", "e", _4},
{"e", "", _2},
}
},
{
R"*(
"[* f]"
" (b c) (d e)"
)*",
"", "f", _2,
{ {"b", "c", _3}, {"d", "e", _4} },
{
{ "", "b", _2},
{"b", "c", _3},
{"c", "d", _2},
{"d", "e", _4},
{"e", "f", _2},
}
},
{
R"*(
"[a d]"
" (b c) (e f)"
)*",
"a", "d", _2,
{ {"b", "c", _3}, {"e", "f", _4} },
{ {"a", "b", _2}, {"b", "c", _3}, {"c", "d", _2} }
},
{
R"*(
" [b e]"
"(a c) (d *)"
)*",
"b", "e", _2,
{ {"a", "c", _3}, {"d", "", _4} },
{ {"b", "c", _3}, {"c", "d", _2}, {"d", "e", _4} }
},
{
R"*(
"[* e]"
" (b c) (d *)"
)*",
"", "e", _2,
{ {"b", "c", _3}, {"d", "", _4} },
{ { "", "b", _2}, {"b", "c", _3}, {"c", "d", _2}, {"d", "e", _4} }
},
{
R"*(
" [b e]"
"(a b) (c d) (e f)"
)*",
"b", "e", _2,
{ {"a", "b", _3}, {"c", "d", _4}, {"e", "f", _5}, },
{ {"b", "c", _2}, {"c", "d", _4}, {"d", "e", _2}, }
},
{
R"*(
" [b e]"
"(* b) (c d) (e f)"
)*",
"b", "e", _2,
{ { "", "b", _3}, {"c", "d", _4}, {"e", "f", _5}, },
{ {"b", "c", _2}, {"c", "d", _4}, {"d", "e", _2}, }
},
{
R"*(
" [b e]"
"(a b) (c d) (e *)"
)*",
"b", "e", _2,
{ {"a", "b", _3}, {"c", "d", _4}, {"e", "", _5}, },
{ {"b", "c", _2}, {"c", "d", _4}, {"d", "e", _2}, }
},
{
R"*(
" [b e]"
"(* b) (c d) (e *)"
)*",
"b", "e", _2,
{ { "", "b", _3}, {"c", "d", _4}, {"e", "", _5}, },
{ {"b", "c", _2}, {"c", "d", _4}, {"d", "e", _2}, }
},
{
R"*(
"[* e]"
"(* b) (c d) (e *)"
)*",
"", "e", _2,
{ { "", "b", _3}, {"c", "d", _4}, {"e", "", _5}, },
{ { "", "b", _3}, {"b", "c", _2}, {"c", "d", _4}, {"d", "e", _2}, }
},
};
for (const auto& scenario : test_inputs_and_results) {
NO_FATALS(DoCheck(scenario));
}
}
} // namespace kudu