blob: 4f77c115e7bad5adbd2203eedad287291a49facb [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <limits>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
#include "catalog/CatalogAttribute.hpp"
#include "catalog/CatalogRelation.hpp"
#include "catalog/CatalogTypedefs.hpp"
#include "expressions/predicate/ComparisonPredicate.hpp"
#include "expressions/predicate/PredicateCost.hpp"
#include "expressions/scalar/ScalarAttribute.hpp"
#include "expressions/scalar/ScalarLiteral.hpp"
#include "storage/BloomFilterIndexSubBlock.hpp"
#include "storage/StorageBlockInfo.hpp"
#include "storage/StorageBlockLayout.pb.h"
#include "storage/TupleIdSequence.hpp"
#include "storage/tests/MockTupleStorageSubBlock.hpp"
#include "types/CharType.hpp"
#include "types/DoubleType.hpp"
#include "types/FloatType.hpp"
#include "types/LongType.hpp"
#include "types/TypeFactory.hpp"
#include "types/TypeID.hpp"
#include "types/TypedValue.hpp"
#include "types/VarCharType.hpp"
#include "types/containers/Tuple.hpp"
#include "types/operations/comparisons/Comparison.hpp"
#include "types/operations/comparisons/ComparisonFactory.hpp"
#include "types/operations/comparisons/ComparisonID.hpp"
#include "utility/ScopedBuffer.hpp"
#include "glog/logging.h"
#include "gtest/gtest.h"
using std::int64_t;
using std::ostringstream;
using std::size_t;
using std::string;
using std::vector;
namespace quickstep {
class BloomFilterIndexSubBlockTest : public ::testing::Test {
protected:
static const size_t kIndexSubBlockSize = 0x100000; // 100 KB
static const int64_t kLongAttrNullValue = -55555;
static const char kCharAttrNullValue[];
virtual void SetUp() {
// Create a sample relation with a variety of attribute types.
relation_.reset(new CatalogRelation(NULL, "TestRelation"));
CatalogAttribute *long_attr = new CatalogAttribute(relation_.get(),
"long_attr",
TypeFactory::GetType(kLong, false));
ASSERT_EQ(0, relation_->addAttribute(long_attr));
CatalogAttribute *nullable_long_attr = new CatalogAttribute(relation_.get(),
"nullable_long_attr",
TypeFactory::GetType(kLong, true));
ASSERT_EQ(1, relation_->addAttribute(nullable_long_attr));
CatalogAttribute *float_attr = new CatalogAttribute(relation_.get(),
"float_attr",
TypeFactory::GetType(kFloat, false));
ASSERT_EQ(2, relation_->addAttribute(float_attr));
CatalogAttribute *char_attr = new CatalogAttribute(relation_.get(),
"char_attr",
TypeFactory::GetType(kChar, 4, false));
ASSERT_EQ(3, relation_->addAttribute(char_attr));
CatalogAttribute *nullable_char_attr = new CatalogAttribute(relation_.get(),
"nullable_char_attr",
TypeFactory::GetType(kChar, 4, true));
ASSERT_EQ(4, relation_->addAttribute(nullable_char_attr));
CatalogAttribute *big_char_attr = new CatalogAttribute(relation_.get(),
"big_char_attr",
TypeFactory::GetType(kChar, 80, false));
ASSERT_EQ(5, relation_->addAttribute(big_char_attr));
CatalogAttribute *varchar_attr = new CatalogAttribute(relation_.get(),
"varchar_attr",
TypeFactory::GetType(kVarChar, 8, false));
ASSERT_EQ(6, relation_->addAttribute(varchar_attr));
// Create a MockTupleStorageSubBlock to hold tuples for testing.
tuple_store_.reset(new MockTupleStorageSubBlock(*relation_));
index_memory_.reset();
index_description_.reset();
index_.reset();
}
void createIndex(const vector<attribute_id> &indexed_attrs, const size_t index_memory_size) {
// Make the IndexSubBlockDescription.
index_description_.reset(new IndexSubBlockDescription());
index_description_->set_sub_block_type(IndexSubBlockDescription::BLOOM_FILTER);
for (std::size_t i = 0; i < indexed_attrs.size(); ++i) {
index_description_->add_indexed_attribute_ids(indexed_attrs[i]);
}
index_memory_.reset(index_memory_size);
index_.reset(new BloomFilterIndexSubBlock(*tuple_store_,
*index_description_,
true,
index_memory_.get(),
index_memory_size));
}
// Insert a tuple with the specified attribute values into tuple_store_.
tuple_id insertTupleInTupleStore(const int64_t long_val,
const int64_t nullable_long_val,
const float float_val,
const string &char_val,
const string &nullable_char_val,
const string &big_char_val,
const string &varchar_val) {
std::vector<TypedValue> attrs;
attrs.emplace_back(LongType::InstanceNonNullable().makeValue(&long_val));
if (nullable_long_val == kLongAttrNullValue) {
attrs.emplace_back(LongType::InstanceNullable().makeNullValue());
} else {
attrs.emplace_back(LongType::InstanceNullable().makeValue(&nullable_long_val));
}
attrs.emplace_back(FloatType::InstanceNonNullable().makeValue(&float_val));
attrs.emplace_back(CharType::InstanceNonNullable(4).makeValue(
char_val.c_str(),
char_val.size() >= 4 ? 4 : char_val.size() + 1).ensureNotReference());
if (nullable_char_val == kCharAttrNullValue) {
attrs.emplace_back(CharType::InstanceNullable(4).makeNullValue());
} else {
attrs.emplace_back(CharType::InstanceNonNullable(4).makeValue(
nullable_char_val.c_str(),
nullable_char_val.size() >= 4 ? 4 : nullable_char_val.size() + 1).ensureNotReference());
}
attrs.emplace_back(CharType::InstanceNonNullable(80).makeValue(
big_char_val.c_str(),
big_char_val.size() >= 80 ? 80 : big_char_val.size() + 1).ensureNotReference());
TypedValue varchar_typed_value
= VarCharType::InstanceNonNullable(varchar_val.size()).makeValue(
varchar_val.c_str(),
varchar_val.size() + 1);
// Test strings are sometimes longer than 8 characters, so truncate if
// needed.
varchar_typed_value = VarCharType::InstanceNonNullable(8).coerceValue(
varchar_typed_value,
VarCharType::InstanceNonNullable(varchar_val.size()));
varchar_typed_value.ensureNotReference();
attrs.emplace_back(std::move(varchar_typed_value));
// MockTupleStorageSubBlock takes ownership of new tuple passed in with
// addTupleMock() method.
return tuple_store_->addTupleMock(new Tuple(std::move(attrs)));
}
// Generate a sample tuple based on 'base_value' and insert in into
// tuple_store_. The sample tuple will have long_attr equal to 'base_value',
// float_attr equal to 0.25 * base_value, and each of char_attr,
// big_char_attr, and varchar_attr equal to the string representation of
// 'base_value' with 'string_suffix' appended on to it. If 'generate_nulls'
// is true, then both nullable_long_attr and nullable_char_attr will be NULL,
// otherwise nullable_long_attr will be equal to 'base_value' and
// nullable_char_attr will be equal to the other string values. Returns the
// tuple_id of the inserted tuple.
tuple_id generateAndInsertTuple(const int64_t base_value,
const bool generate_nulls,
const string &string_suffix) {
ostringstream string_value_buffer;
string_value_buffer << base_value << string_suffix;
if (generate_nulls) {
return insertTupleInTupleStore(base_value,
kLongAttrNullValue,
0.25 * base_value,
string_value_buffer.str(),
kCharAttrNullValue,
string_value_buffer.str(),
string_value_buffer.str());
} else {
return insertTupleInTupleStore(base_value,
base_value,
0.25 * base_value,
string_value_buffer.str(),
string_value_buffer.str(),
string_value_buffer.str(),
string_value_buffer.str());
}
}
// Create a ComparisonPredicate of the form "attribute comp literal".
template <typename AttributeType>
ComparisonPredicate* generateNumericComparisonPredicate(
const ComparisonID comp,
const attribute_id attribute,
const typename AttributeType::cpptype literal) {
ScalarAttribute *scalar_attribute = new ScalarAttribute(*relation_->getAttributeById(attribute));
ScalarLiteral *scalar_literal
= new ScalarLiteral(AttributeType::InstanceNonNullable().makeValue(&literal),
AttributeType::InstanceNonNullable());
return new ComparisonPredicate(ComparisonFactory::GetComparison(comp), scalar_attribute, scalar_literal);
}
ComparisonPredicate* generateStringComparisonPredicate(
const ComparisonID comp,
const attribute_id attribute,
const string &literal) {
ScalarAttribute *scalar_attribute = new ScalarAttribute(*relation_->getAttributeById(attribute));
ScalarLiteral *scalar_literal = new ScalarLiteral(
VarCharType::InstanceNonNullable(literal.size()).makeValue(
literal.c_str(),
literal.size() + 1).ensureNotReference(),
VarCharType::InstanceNonNullable(literal.size()));
return new ComparisonPredicate(ComparisonFactory::GetComparison(comp), scalar_attribute, scalar_literal);
}
bool requiresRebuild() const {
CHECK(index_) << "Bloom Filter index not yet built!";
return !index_->is_consistent_;
}
std::unique_ptr<CatalogRelation> relation_;
std::unique_ptr<MockTupleStorageSubBlock> tuple_store_;
ScopedBuffer index_memory_;
std::unique_ptr<IndexSubBlockDescription> index_description_;
std::unique_ptr<BloomFilterIndexSubBlock> index_;
};
const char BloomFilterIndexSubBlockTest::kCharAttrNullValue[] = "_NULLSTRING";
TEST_F(BloomFilterIndexSubBlockTest, DescriptionIsValidTest) {
std::unique_ptr<IndexSubBlockDescription> index_description_;
// valid_attrs contains attribute ids which should register as
// a valid DescriptionIsValid.
vector<attribute_id> valid_attrs({ 0, 1, 2, 3, 4, 5, 6 });
// Try to create an index on each of the attributes. Make sure that each
// of the attributes which can be indexed on is marked valid.
for (const CatalogAttribute &attr : *relation_) {
index_description_.reset(new IndexSubBlockDescription());
index_description_->set_sub_block_type(IndexSubBlockDescription::BLOOM_FILTER);
index_description_->add_indexed_attribute_ids(attr.getID());
if (std::find(valid_attrs.begin(), valid_attrs.end(), attr.getID()) != valid_attrs.end()) {
EXPECT_TRUE(BloomFilterIndexSubBlock::DescriptionIsValid(*relation_, *index_description_))
<< "Expected attribute " << attr.getID() << " to be valid.";
} else {
EXPECT_FALSE(BloomFilterIndexSubBlock::DescriptionIsValid(*relation_, *index_description_))
<< "Expected attribute " << attr.getID() << " to be invalid.";
}
}
}
TEST_F(BloomFilterIndexSubBlockTest, TestConstructor) {
// This test checks the correctness of the index's constructor and destructor.
createIndex({0, 1, 2}, kIndexSubBlockSize);
EXPECT_FALSE(requiresRebuild());
// Reset will invoke the destructor for the index.
index_.reset(nullptr);
// Creating a block on the same memory should cause the index to retrieve
// the previously written values.
index_.reset(new BloomFilterIndexSubBlock(*tuple_store_,
*index_description_,
false,
index_memory_.get(),
kIndexSubBlockSize));
EXPECT_FALSE(requiresRebuild());
}
TEST_F(BloomFilterIndexSubBlockTest, TestRebuild) {
// This test checks whether the index is rebuilt correctly
// after the index is marked inconsistent.
const attribute_id indexed_attr = 0;
createIndex({ indexed_attr }, kIndexSubBlockSize);
EXPECT_FALSE(requiresRebuild());
// Insert 1000 tuples in the index.
const std::size_t num_tuples = 1000;
for (std::size_t i = 0; i < num_tuples; ++i) {
generateAndInsertTuple(i, false, "suffix");
}
// Deleting a tuple should mark the index as inconsistent.
const tuple_id tuple_to_remove = 100;
index_->removeEntry(tuple_to_remove);
EXPECT_TRUE(requiresRebuild());
// Rebuilding the index should mark it consistent.
index_->rebuild();
EXPECT_FALSE(requiresRebuild());
}
TEST_F(BloomFilterIndexSubBlockTest, TestGetSelectivity) {
// This test checks the following conditions on selectivity:
// 1. Selectivity for an inconsistent index should be kSelectivityUnknown,
// 2. Selectivity for a false positive should be kSelectivityAll,
// 3. Selectivity for a true negative should be kSelectivityNone.
const attribute_id indexed_attr = 0;
createIndex({ indexed_attr }, kIndexSubBlockSize);
EXPECT_FALSE(requiresRebuild());
const uint64_t indexed_value = 999;
const uint64_t non_indexed_value = 9999;
std::unique_ptr<ComparisonPredicate> indexed_predicate(
generateNumericComparisonPredicate<LongType>(
ComparisonID::kEqual, indexed_attr, indexed_value));
std::unique_ptr<ComparisonPredicate> non_indexed_predicate(
generateNumericComparisonPredicate<LongType>(
ComparisonID::kEqual, indexed_attr, non_indexed_value));
generateAndInsertTuple(indexed_value, false, "suffix");
// Until the tuple is inserted into index, expect the selectivity to be none.
EXPECT_EQ(BloomFilterIndexSubBlock::BloomFilterSelectivity::kSelectivityNone,
index_->getSelectivityForPredicate(*indexed_predicate));
index_->addEntry(0);
// Once the tuple is inserted into index, expect the selectivity to be all.
EXPECT_EQ(BloomFilterIndexSubBlock::BloomFilterSelectivity::kSelectivityAll,
index_->getSelectivityForPredicate(*indexed_predicate));
// A non-indexed value should not be present in the index.
EXPECT_EQ(BloomFilterIndexSubBlock::BloomFilterSelectivity::kSelectivityNone,
index_->getSelectivityForPredicate(*non_indexed_predicate));
index_->removeEntry(0);
// Removing an entry makes the index inconsistent and selectivity unknown.
EXPECT_EQ(BloomFilterIndexSubBlock::BloomFilterSelectivity::kSelectivityUnknown,
index_->getSelectivityForPredicate(*indexed_predicate));
index_->rebuild();
// Once rebuilt, the index is consistent again.
EXPECT_EQ(BloomFilterIndexSubBlock::BloomFilterSelectivity::kSelectivityNone,
index_->getSelectivityForPredicate(*indexed_predicate));
}
TEST_F(BloomFilterIndexSubBlockTest, TestEvaluatePredicateCost) {
// This test checks the following conditions:
// 1. Predicate cost for an inconsistent index should be infinite (maximum),
// 2. Predicate cost for false positives should be infinite (maximum),
// 3. Predicate cost for true negatives should be constant (minimum).
const attribute_id indexed_attr = 0;
createIndex({ indexed_attr }, kIndexSubBlockSize);
EXPECT_FALSE(requiresRebuild());
const uint64_t indexed_value = 999;
const uint64_t non_indexed_value = 9999;
std::unique_ptr<ComparisonPredicate> indexed_predicate(
generateNumericComparisonPredicate<LongType>(
ComparisonID::kEqual, indexed_attr, indexed_value));
std::unique_ptr<ComparisonPredicate> non_indexed_predicate(
generateNumericComparisonPredicate<LongType>(
ComparisonID::kEqual, indexed_attr, non_indexed_value));
generateAndInsertTuple(indexed_value, false, "suffix");
// Until the tuple is inserted into index, expect the predicate cost to be constant.
EXPECT_EQ(predicate_cost::kConstantTime,
index_->estimatePredicateEvaluationCost(*indexed_predicate));
index_->addEntry(0);
// Once the tuple is inserted into index, expect the predicate cost to be infinite.
EXPECT_EQ(predicate_cost::kInfinite,
index_->estimatePredicateEvaluationCost(*indexed_predicate));
// A non-indexed value should not be present in the index.
EXPECT_EQ(predicate_cost::kConstantTime,
index_->estimatePredicateEvaluationCost(*non_indexed_predicate));
index_->removeEntry(0);
// Removing an entry makes the index inconsistent and predicate cost infinite.
EXPECT_EQ(predicate_cost::kInfinite,
index_->estimatePredicateEvaluationCost(*indexed_predicate));
index_->rebuild();
// Once rebuilt, the index is consistent again.
EXPECT_EQ(predicate_cost::kConstantTime,
index_->estimatePredicateEvaluationCost(*indexed_predicate));
}
TEST_F(BloomFilterIndexSubBlockTest, TestGetMatchesForPredicate) {
// This test checks the following conditions:
// 1. All tuples are returned for a predicate with false positive match,
// 2. No tuples are returned for a predicate with true negative match.
const attribute_id indexed_attr = 0;
createIndex({ indexed_attr }, kIndexSubBlockSize);
EXPECT_FALSE(requiresRebuild());
const uint64_t indexed_value = 999;
const uint64_t non_indexed_value = 9999;
std::unique_ptr<ComparisonPredicate> indexed_predicate(
generateNumericComparisonPredicate<LongType>(
ComparisonID::kEqual, indexed_attr, indexed_value));
std::unique_ptr<ComparisonPredicate> non_indexed_predicate(
generateNumericComparisonPredicate<LongType>(
ComparisonID::kEqual, indexed_attr, non_indexed_value));
generateAndInsertTuple(indexed_value, false, "suffix");
index_->addEntry(0);
std::unique_ptr<TupleIdSequence> result;
// Expect all tuples to be returned.
result.reset(index_->getMatchesForPredicate(*indexed_predicate, nullptr));
EXPECT_EQ(static_cast<std::size_t>(tuple_store_->getMaxTupleID() + 1), result->length());
EXPECT_EQ(static_cast<std::size_t>(tuple_store_->getMaxTupleID() + 1), result->numTuples());
// Expect no tuples to be returned.
result.reset(index_->getMatchesForPredicate(*non_indexed_predicate, nullptr));
EXPECT_EQ(static_cast<std::size_t>(tuple_store_->getMaxTupleID() + 1), result->length());
EXPECT_EQ(static_cast<std::size_t>(0), result->numTuples());
}
} // namespace quickstep