blob: 92bd361872a648e3e94a670ebff6bd75a520d9d2 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <limits>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "compression/CompressionDictionary.hpp"
#include "compression/CompressionDictionaryBuilder.hpp"
#include "types/CharType.hpp"
#include "types/DoubleType.hpp"
#include "types/IntType.hpp"
#include "types/Type.hpp"
#include "types/TypeFactory.hpp"
#include "types/TypeID.hpp"
#include "types/TypedValue.hpp"
#include "types/VarCharType.hpp"
#include "types/operations/comparisons/Comparison.hpp"
#include "types/operations/comparisons/ComparisonFactory.hpp"
#include "types/operations/comparisons/ComparisonID.hpp"
#include "utility/ScopedBuffer.hpp"
#include "gtest/gtest.h"
using std::numeric_limits;
using std::pair;
using std::size_t;
using std::sort;
using std::string;
using std::strlen;
using std::uint8_t;
using std::uint32_t;
using std::vector;
namespace quickstep {
class CompressionDictionaryTest : public ::testing::TestWithParam<bool> {
protected:
static const uint32_t kNumSampleValues = 24;
static const int kSampleInts[kNumSampleValues];
static const char *kSampleStrings[kNumSampleValues];
static const size_t kVarCharSizeParameter = 32;
static uint8_t bitsNeededForCodes(int num_codes) {
if (num_codes == 0) {
return 0;
}
for (uint8_t num_bits = 1; num_bits < 32; ++num_bits) {
if ((1 << num_bits) >= num_codes) {
return num_bits;
}
}
return 32;
}
void setupBuilderForType(const Type &type) {
dictionary_type_ = &type;
builder_.reset(new CompressionDictionaryBuilder(type));
}
void setupSampleIntDictionary() {
const IntType &int_type = IntType::Instance(GetParam());
setupBuilderForType(int_type);
for (uint32_t sample_idx = 0; sample_idx < kNumSampleValues; ++sample_idx) {
TypedValue current_value = int_type.makeValue(&(kSampleInts[sample_idx]));
ASSERT_TRUE(builder_->insertEntry(current_value));
}
// Also throw in a null value if we are testing a nullable type.
if (GetParam()) {
ASSERT_TRUE(builder_->insertEntry(TypedValue(kInt)));
}
ASSERT_EQ(kNumSampleValues + GetParam(), builder_->numberOfEntries());
// Build the physical dictionary.
dictionary_memory_.reset(builder_->dictionarySizeBytes());
builder_->buildDictionary(dictionary_memory_.get());
// Construct the dictionary object.
dictionary_.reset(new CompressionDictionary(int_type,
dictionary_memory_.get(),
builder_->dictionarySizeBytes()));
// Delete the builder, we don't need it anymore.
builder_.reset();
}
void setupSampleVarCharDictionary() {
const VarCharType &varchar_type = VarCharType::Instance(kVarCharSizeParameter, GetParam());
setupBuilderForType(varchar_type);
for (uint32_t sample_idx = 0; sample_idx < kNumSampleValues; ++sample_idx) {
TypedValue current_value = varchar_type.makeValue(kSampleStrings[sample_idx],
strlen(kSampleStrings[sample_idx]) + 1);
ASSERT_TRUE(builder_->insertEntry(current_value));
}
// Also throw in a null value if we are testing a nullable type.
if (GetParam()) {
ASSERT_TRUE(builder_->insertEntry(TypedValue(kVarChar)));
}
ASSERT_EQ(kNumSampleValues + GetParam(), builder_->numberOfEntries());
// Build the physical dictionary.
dictionary_memory_.reset(builder_->dictionarySizeBytes());
builder_->buildDictionary(dictionary_memory_.get());
// Construct the dictionary object.
dictionary_.reset(new CompressionDictionary(varchar_type,
dictionary_memory_.get(),
builder_->dictionarySizeBytes()));
// Delete the builder, we don't need it anymore.
builder_.reset();
}
const Type *dictionary_type_;
std::unique_ptr<CompressionDictionaryBuilder> builder_;
std::unique_ptr<CompressionDictionary> dictionary_;
ScopedBuffer dictionary_memory_;
};
const uint32_t CompressionDictionaryTest::kNumSampleValues;
const int CompressionDictionaryTest::kSampleInts[]
= { 0, 5, -5, 1, 2, 3, -1000, 1000, -1, -2, -3, 500,
42, 55, -12, 85, -1234, 92, 43, 41, -6464, 50, 9999, -10};
const char *CompressionDictionaryTest::kSampleStrings[] = {
"Twilight Sparkle",
"Rarity",
"Pinkie Pie",
"Applejack",
"Rainbow Dash",
"Fluttershy",
"Celestia",
"Luna",
"Cadence",
"Applebloom",
"Scootaloo",
"Sweetie Belle",
"Zecora",
"Big Macintosh",
"Granny Smith",
"Shining Armor",
"Cheerilee",
"Diamond Tiara",
"Silver Spoon",
"Photo Finish",
"DJ Pon-3",
"Snips",
"Snails",
"Derpy Hooves"
};
const size_t CompressionDictionaryTest::kVarCharSizeParameter;
typedef CompressionDictionaryTest CompressionDictionaryDeathTest;
TEST_P(CompressionDictionaryDeathTest, LimitCodesNotEqualTest) {
setupSampleIntDictionary();
int value = 0;
EXPECT_DEATH(dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kNotEqual, &value), "");
}
// Some tests are only run for debug builds, which have assertions turned on.
#ifdef QUICKSTEP_DEBUG
TEST_P(CompressionDictionaryDeathTest, UndoInsertOnEmptyBuilderTest) {
setupBuilderForType(TypeFactory::GetType(kInt));
EXPECT_DEATH(builder_->undoLastInsert(), "");
}
TEST_P(CompressionDictionaryDeathTest, BuilderInsertWrongTypeTest) {
setupBuilderForType(TypeFactory::GetType(kInt));
double double_literal = 12.34;
TypedValue value = TypeFactory::GetType(kDouble).makeValue(&double_literal);
EXPECT_DEATH(builder_->insertEntry(value), "");
}
TEST_P(CompressionDictionaryDeathTest, BuilderInsertWrongTypeByReferenceTest) {
setupBuilderForType(TypeFactory::GetType(kInt));
double double_literal = 12.34;
TypedValue value = TypeFactory::GetType(kDouble).makeValue(&double_literal);
EXPECT_DEATH(builder_->insertEntryByReference(value), "");
}
TEST_P(CompressionDictionaryDeathTest, BuilderContainsWrongTypeTest) {
setupBuilderForType(TypeFactory::GetType(kInt));
double double_literal = 12.34;
TypedValue value = TypeFactory::GetType(kDouble).makeValue(&double_literal);
EXPECT_DEATH(builder_->containsValue(value), "");
}
TEST_P(CompressionDictionaryDeathTest, GetValueForNonexistentCodeTest) {
setupSampleIntDictionary();
EXPECT_DEATH(dictionary_->getUntypedValueForCode(kNumSampleValues + GetParam()), "");
}
#endif // QUICKSTEP_DEBUG
TEST_P(CompressionDictionaryTest, IntKeyBuildTest) {
const IntType &int_type = IntType::Instance(GetParam());
TypedValue current_value;
// Construct a CompressionDictionaryBuilder and check its initial state.
setupBuilderForType(int_type);
EXPECT_EQ(0u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(0), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t), builder_->dictionarySizeBytes());
// If the type is nullable, try inserting a null first.
if (GetParam()) {
current_value = TypedValue(kInt);
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t), builder_->dictionarySizeBytes());
// Roll back to zero entries.
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(current_value));
EXPECT_EQ(0u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(0), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t), builder_->dictionarySizeBytes());
}
// Add a value.
current_value = TypedValue(kSampleInts[0]);
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + int_type.maximumByteLength(), builder_->dictionarySizeBytes());
// Roll back to zero entries.
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(current_value));
EXPECT_EQ(0u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(0), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t), builder_->dictionarySizeBytes());
// Re-add the value.
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + int_type.maximumByteLength(), builder_->dictionarySizeBytes());
// If the type is nullable, also try inserting a null with a real value
// present.
if (GetParam()) {
current_value = TypedValue(kInt);
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(2u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + int_type.maximumByteLength(), builder_->dictionarySizeBytes());
// Roll back insert of null.
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(current_value));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + int_type.maximumByteLength(), builder_->dictionarySizeBytes());
// Finally, re-insert the null.
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(2u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + int_type.maximumByteLength(), builder_->dictionarySizeBytes());
}
// Add a second value.
current_value = TypedValue(kSampleInts[1]);
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(2u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + 2 * int_type.maximumByteLength(),
builder_->dictionarySizeBytes());
// Add a third value.
current_value = TypedValue(kSampleInts[2]);
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(3u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(3 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + 3 * int_type.maximumByteLength(), builder_->dictionarySizeBytes());
// Roll it back (this is the last time we will check this, we just need to
// cover all the possible paths where the code length may shrink).
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(current_value));
EXPECT_EQ(2u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + 2 * int_type.maximumByteLength(), builder_->dictionarySizeBytes());
// Insert the rest of the sample values.
for (uint32_t sample_idx = 2; sample_idx < kNumSampleValues; ++sample_idx) {
current_value = TypedValue(kSampleInts[sample_idx]);
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(sample_idx + 1 + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(sample_idx + 1 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + (sample_idx + 1) * int_type.maximumByteLength(),
builder_->dictionarySizeBytes());
}
// Attempt to insert a duplicate entry.
current_value = TypedValue(kSampleInts[kNumSampleValues >> 1]);
EXPECT_FALSE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(kNumSampleValues + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(kNumSampleValues + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + kNumSampleValues * int_type.maximumByteLength(),
builder_->dictionarySizeBytes());
// If type is nullable, attempt to insert a duplicate null.
if (GetParam()) {
current_value = TypedValue(kInt);
EXPECT_FALSE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(kNumSampleValues + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(kNumSampleValues + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + kNumSampleValues * int_type.maximumByteLength(),
builder_->dictionarySizeBytes());
}
// Check for the presence of a nonexistent key.
current_value = TypedValue(7777);
EXPECT_FALSE(builder_->containsValue(current_value));
// Actually build the physical dictionary.
dictionary_memory_.reset(builder_->dictionarySizeBytes());
builder_->buildDictionary(dictionary_memory_.get());
// Check the contents of the physical dictionary.
vector<int> sorted_sample_ints(kSampleInts, kSampleInts + kNumSampleValues);
sort(sorted_sample_ints.begin(), sorted_sample_ints.end());
const int *value_array = reinterpret_cast<const int*>(
static_cast<const char*>(dictionary_memory_.get()) + 2 * sizeof(uint32_t));
EXPECT_EQ(kNumSampleValues, *static_cast<const uint32_t*>(dictionary_memory_.get()));
if (GetParam()) {
EXPECT_EQ(kNumSampleValues,
*(static_cast<const uint32_t*>(dictionary_memory_.get()) + 1));
} else {
EXPECT_EQ(numeric_limits<uint32_t>::max(),
*(static_cast<const uint32_t*>(dictionary_memory_.get()) + 1));
}
for (uint32_t sample_idx = 0; sample_idx < kNumSampleValues; ++sample_idx) {
EXPECT_EQ(sorted_sample_ints[sample_idx], value_array[sample_idx]);
}
}
// Same test as above, but insert values via insertEntryByReference().
TEST_P(CompressionDictionaryTest, IntKeyBuildInsertByReferenceTest) {
const IntType &int_type = IntType::Instance(GetParam());
std::vector<TypedValue> values;
TypedValue other_value;
TypedValue null_value(kInt);
// Construct a CompressionDictionaryBuilder and check its initial state.
setupBuilderForType(int_type);
EXPECT_EQ(0u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(0), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t), builder_->dictionarySizeBytes());
// If the type is nullable, try inserting a null first.
if (GetParam()) {
EXPECT_TRUE(builder_->insertEntryByReference(null_value));
EXPECT_TRUE(builder_->containsValue(null_value));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t), builder_->dictionarySizeBytes());
// Roll back to zero entries.
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(null_value));
EXPECT_EQ(0u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(0), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t), builder_->dictionarySizeBytes());
}
// Add a value.
values.push_back(TypedValue(kSampleInts[0]));
EXPECT_TRUE(builder_->insertEntryByReference(values.back()));
EXPECT_TRUE(builder_->containsValue(values.back()));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + int_type.maximumByteLength(), builder_->dictionarySizeBytes());
// Roll back to zero entries.
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(values.back()));
EXPECT_EQ(0u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(0), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t), builder_->dictionarySizeBytes());
// Re-add the value.
EXPECT_TRUE(builder_->insertEntryByReference(values.back()));
EXPECT_TRUE(builder_->containsValue(values.back()));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + int_type.maximumByteLength(), builder_->dictionarySizeBytes());
// If the type is nullable, also try inserting a null with a real value
// present.
if (GetParam()) {
EXPECT_TRUE(builder_->insertEntryByReference(null_value));
EXPECT_TRUE(builder_->containsValue(null_value));
EXPECT_EQ(2u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + int_type.maximumByteLength(), builder_->dictionarySizeBytes());
// Roll back insert of null.
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(null_value));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + int_type.maximumByteLength(), builder_->dictionarySizeBytes());
// Finally, re-insert the null.
EXPECT_TRUE(builder_->insertEntryByReference(null_value));
EXPECT_TRUE(builder_->containsValue(null_value));
EXPECT_EQ(2u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + int_type.maximumByteLength(), builder_->dictionarySizeBytes());
}
// Add a second value.
values.push_back(TypedValue(kSampleInts[1]));
EXPECT_TRUE(builder_->insertEntryByReference(values.back()));
// Check against a different but equal value.
other_value = TypedValue(kSampleInts[1]);
EXPECT_TRUE(builder_->containsValue(other_value));
EXPECT_EQ(2u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + 2 * int_type.maximumByteLength(), builder_->dictionarySizeBytes());
// Add a third value.
values.push_back(TypedValue(kSampleInts[2]));
EXPECT_TRUE(builder_->insertEntryByReference(values.back()));
EXPECT_TRUE(builder_->containsValue(values.back()));
EXPECT_EQ(3u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(3 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + 3 * int_type.maximumByteLength(), builder_->dictionarySizeBytes());
// Roll it back (this is the last time we will check this, we just need to
// cover all the possible paths where the code length may shrink).
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(values.back()));
EXPECT_EQ(2u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + 2 * int_type.maximumByteLength(), builder_->dictionarySizeBytes());
// Insert the rest of the sample values.
for (uint32_t sample_idx = 2; sample_idx < kNumSampleValues; ++sample_idx) {
values.push_back(TypedValue(kSampleInts[sample_idx]));
EXPECT_TRUE(builder_->insertEntryByReference(values.back()));
EXPECT_TRUE(builder_->containsValue(values.back()));
EXPECT_EQ(sample_idx + 1 + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(sample_idx + 1 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + (sample_idx + 1) * int_type.maximumByteLength(),
builder_->dictionarySizeBytes());
}
// Attempt to insert a duplicate entry.
other_value = TypedValue(kSampleInts[kNumSampleValues >> 1]);
EXPECT_FALSE(builder_->insertEntryByReference(other_value));
EXPECT_TRUE(builder_->containsValue(other_value));
EXPECT_EQ(kNumSampleValues + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(kNumSampleValues + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + kNumSampleValues * int_type.maximumByteLength(),
builder_->dictionarySizeBytes());
// If type is nullable, attempt to insert a duplicate null.
if (GetParam()) {
EXPECT_FALSE(builder_->insertEntryByReference(null_value));
EXPECT_TRUE(builder_->containsValue(null_value));
EXPECT_EQ(kNumSampleValues + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(kNumSampleValues + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + kNumSampleValues * int_type.maximumByteLength(),
builder_->dictionarySizeBytes());
}
// Check for the presence of a nonexistent key.
other_value = TypedValue(7777);
EXPECT_FALSE(builder_->containsValue(other_value));
// Actually build the physical dictionary.
dictionary_memory_.reset(builder_->dictionarySizeBytes());
builder_->buildDictionary(dictionary_memory_.get());
// Check the contents of the physical dictionary.
vector<int> sorted_sample_ints(kSampleInts, kSampleInts + kNumSampleValues);
sort(sorted_sample_ints.begin(), sorted_sample_ints.end());
const int *value_array = reinterpret_cast<const int*>(
static_cast<const char*>(dictionary_memory_.get()) + 2 * sizeof(uint32_t));
EXPECT_EQ(kNumSampleValues, *static_cast<const uint32_t*>(dictionary_memory_.get()));
if (GetParam()) {
EXPECT_EQ(kNumSampleValues,
*(static_cast<const uint32_t*>(dictionary_memory_.get()) + 1));
} else {
EXPECT_EQ(numeric_limits<uint32_t>::max(),
*(static_cast<const uint32_t*>(dictionary_memory_.get()) + 1));
}
for (uint32_t sample_idx = 0; sample_idx < kNumSampleValues; ++sample_idx) {
EXPECT_EQ(sorted_sample_ints[sample_idx], value_array[sample_idx]);
}
}
TEST_P(CompressionDictionaryTest, IntKeyInfoTest) {
setupSampleIntDictionary();
EXPECT_EQ(kNumSampleValues + GetParam(), dictionary_->numberOfCodes());
EXPECT_EQ(bitsNeededForCodes(kNumSampleValues + GetParam()), dictionary_->codeLengthBits());
}
TEST_P(CompressionDictionaryTest, IntKeyGetCodeUntypedTest) {
setupSampleIntDictionary();
vector<int> sorted_sample_ints(kSampleInts, kSampleInts + kNumSampleValues);
sort(sorted_sample_ints.begin(), sorted_sample_ints.end());
// Check all the codes in the dictionary.
for (uint32_t sample_idx = 0; sample_idx < kNumSampleValues; ++sample_idx) {
EXPECT_EQ(sample_idx, dictionary_->getCodeForUntypedValue(&(sorted_sample_ints[sample_idx])));
}
if (GetParam()) {
EXPECT_EQ(kNumSampleValues, dictionary_->getCodeForUntypedValue(nullptr));
}
// Check a value that isn't in the dictionary.
int other_value = 7777;
EXPECT_EQ(kNumSampleValues + GetParam(), dictionary_->getCodeForUntypedValue(&other_value));
}
TEST_P(CompressionDictionaryTest, IntKeyGetCodeTypedTest) {
setupSampleIntDictionary();
vector<int> sorted_sample_ints(kSampleInts, kSampleInts + kNumSampleValues);
sort(sorted_sample_ints.begin(), sorted_sample_ints.end());
// Check all the codes in the dictionary.
for (uint32_t sample_idx = 0; sample_idx < kNumSampleValues; ++sample_idx) {
TypedValue value(sorted_sample_ints[sample_idx]);
EXPECT_EQ(sample_idx, dictionary_->getCodeForTypedValue(value, *dictionary_type_));
}
if (GetParam()) {
EXPECT_EQ(kNumSampleValues,
dictionary_->getCodeForTypedValue(TypedValue(kInt), *dictionary_type_));
}
// Check a value that isn't in the dictionary.
TypedValue other_value(7777);
EXPECT_EQ(kNumSampleValues + GetParam(),
dictionary_->getCodeForTypedValue(other_value, *dictionary_type_));
}
// Same as above, but use DoubleType literals.
TEST_P(CompressionDictionaryTest, IntKeyGetCodeDifferentTypeTest) {
setupSampleIntDictionary();
vector<int> sorted_sample_ints(kSampleInts, kSampleInts + kNumSampleValues);
sort(sorted_sample_ints.begin(), sorted_sample_ints.end());
// Check all the codes in the dictionary.
for (uint32_t sample_idx = 0; sample_idx < kNumSampleValues; ++sample_idx) {
TypedValue value(static_cast<double>(sorted_sample_ints[sample_idx]));
EXPECT_EQ(sample_idx,
dictionary_->getCodeForTypedValue(value, DoubleType::Instance(false)));
}
if (GetParam()) {
EXPECT_EQ(kNumSampleValues,
dictionary_->getCodeForTypedValue(TypedValue(kDouble), DoubleType::Instance(true)));
}
// Check a value that isn't in the dictionary.
TypedValue other_value(static_cast<double>(5.5));
EXPECT_EQ(kNumSampleValues + GetParam(),
dictionary_->getCodeForTypedValue(other_value, DoubleType::Instance(false)));
}
TEST_P(CompressionDictionaryTest, IntKeyGetValueUntypedTest) {
setupSampleIntDictionary();
vector<int> sorted_sample_ints(kSampleInts, kSampleInts + kNumSampleValues);
sort(sorted_sample_ints.begin(), sorted_sample_ints.end());
for (uint8_t code = 0; code < kNumSampleValues; ++code) {
const int *value_ptr = static_cast<const int*>(dictionary_->getUntypedValueForByteCode(code));
EXPECT_EQ(sorted_sample_ints[code], *value_ptr);
value_ptr = static_cast<const int*>(dictionary_->getUntypedValueForShortCode(code));
EXPECT_EQ(sorted_sample_ints[code], *value_ptr);
value_ptr = static_cast<const int*>(dictionary_->getUntypedValueForCode(code));
EXPECT_EQ(sorted_sample_ints[code], *value_ptr);
}
if (GetParam()) {
const void *value_ptr = dictionary_->getUntypedValueForByteCode(kNumSampleValues);
EXPECT_EQ(nullptr, value_ptr);
value_ptr = dictionary_->getUntypedValueForShortCode(kNumSampleValues);
EXPECT_EQ(nullptr, value_ptr);
value_ptr = dictionary_->getUntypedValueForCode(kNumSampleValues);
EXPECT_EQ(nullptr, value_ptr);
}
}
TEST_P(CompressionDictionaryTest, IntKeyGetValueTypedTest) {
setupSampleIntDictionary();
vector<int> sorted_sample_ints(kSampleInts, kSampleInts + kNumSampleValues);
sort(sorted_sample_ints.begin(), sorted_sample_ints.end());
const IntType &int_type = IntType::Instance(GetParam());
const Comparison &equal_comp = ComparisonFactory::GetComparison(ComparisonID::kEqual);
for (uint8_t code = 0; code < kNumSampleValues; ++code) {
TypedValue expected_value(sorted_sample_ints[code]);
TypedValue value = dictionary_->getTypedValueForByteCode(code);
EXPECT_TRUE(equal_comp.compareTypedValuesChecked(expected_value, int_type,
value, int_type));
value = dictionary_->getTypedValueForShortCode(code);
EXPECT_TRUE(equal_comp.compareTypedValuesChecked(expected_value, int_type,
value, int_type));
value = dictionary_->getTypedValueForCode(code);
EXPECT_TRUE(equal_comp.compareTypedValuesChecked(expected_value, int_type,
value, int_type));
}
if (GetParam()) {
TypedValue value = dictionary_->getTypedValueForByteCode(kNumSampleValues);
EXPECT_TRUE(value.isNull());
EXPECT_EQ(kInt, value.getTypeID());
value = dictionary_->getTypedValueForShortCode(kNumSampleValues);
EXPECT_TRUE(value.isNull());
EXPECT_EQ(kInt, value.getTypeID());
value = dictionary_->getTypedValueForCode(kNumSampleValues);
EXPECT_TRUE(value.isNull());
EXPECT_EQ(kInt, value.getTypeID());
}
}
TEST_P(CompressionDictionaryTest, IntKeyBoundCodesUntypedTest) {
setupSampleIntDictionary();
vector<int> sorted_sample_ints(kSampleInts, kSampleInts + kNumSampleValues);
sort(sorted_sample_ints.begin(), sorted_sample_ints.end());
// Check bounds for all the values which are actually in the dictionary.
for (uint32_t sorted_value_idx = 0;
sorted_value_idx < kNumSampleValues;
++sorted_value_idx) {
EXPECT_EQ(sorted_value_idx,
dictionary_->getLowerBoundCodeForUntypedValue(&(sorted_sample_ints[sorted_value_idx])));
EXPECT_EQ(sorted_value_idx + 1,
dictionary_->getUpperBoundCodeForUntypedValue(&(sorted_sample_ints[sorted_value_idx])));
}
// Check a value less than the least value in the dictionary.
int other_value = sorted_sample_ints.front() - 10;
EXPECT_EQ(0u, dictionary_->getLowerBoundCodeForUntypedValue(&other_value));
EXPECT_EQ(0u, dictionary_->getUpperBoundCodeForUntypedValue(&other_value));
// Check a value larger than the greatest value in the dictionary.
other_value = sorted_sample_ints.back() + 10;
EXPECT_EQ(kNumSampleValues + GetParam(), dictionary_->getLowerBoundCodeForUntypedValue(&other_value));
// Note: with nulls, the upper bound can actually be less than the lower
// bound due to an implementation quirk.
EXPECT_EQ(kNumSampleValues, dictionary_->getUpperBoundCodeForUntypedValue(&other_value));
// Split the difference between the last couple of values.
other_value = (sorted_sample_ints.back() + *(sorted_sample_ints.end() - 2)) / 2;
EXPECT_EQ(kNumSampleValues - 1, dictionary_->getLowerBoundCodeForUntypedValue(&other_value));
EXPECT_EQ(kNumSampleValues - 1, dictionary_->getUpperBoundCodeForUntypedValue(&other_value));
}
TEST_P(CompressionDictionaryTest, IntKeyBoundCodesTypedTest) {
setupSampleIntDictionary();
vector<int> sorted_sample_ints(kSampleInts, kSampleInts + kNumSampleValues);
sort(sorted_sample_ints.begin(), sorted_sample_ints.end());
TypedValue value;
// Check bounds for all the values which are actually in the dictionary.
for (uint32_t sorted_value_idx = 0;
sorted_value_idx < kNumSampleValues;
++sorted_value_idx) {
value = TypedValue(sorted_sample_ints[sorted_value_idx]);
EXPECT_EQ(sorted_value_idx,
dictionary_->getLowerBoundCodeForTypedValue(value, *dictionary_type_));
EXPECT_EQ(sorted_value_idx + 1,
dictionary_->getUpperBoundCodeForTypedValue(value, *dictionary_type_));
}
// Check a value less than the least value in the dictionary.
value = TypedValue(sorted_sample_ints.front() - 10);
EXPECT_EQ(0u, dictionary_->getLowerBoundCodeForTypedValue(value, *dictionary_type_));
EXPECT_EQ(0u, dictionary_->getUpperBoundCodeForTypedValue(value, *dictionary_type_));
// Check a value larger than the greatest value in the dictionary.
value = TypedValue(sorted_sample_ints.back() + 10);
EXPECT_EQ(kNumSampleValues + GetParam(),
dictionary_->getLowerBoundCodeForTypedValue(value, *dictionary_type_));
// Note: with nulls, the upper bound can actually be less than the lower
// bound due to an implementation quirk.
EXPECT_EQ(kNumSampleValues,
dictionary_->getUpperBoundCodeForTypedValue(value, *dictionary_type_));
// Split the difference between the last couple of values.
value = TypedValue((sorted_sample_ints.back() + *(sorted_sample_ints.end() - 2)) / 2);
EXPECT_EQ(kNumSampleValues - 1,
dictionary_->getLowerBoundCodeForTypedValue(value, *dictionary_type_));
EXPECT_EQ(kNumSampleValues - 1,
dictionary_->getUpperBoundCodeForTypedValue(value, *dictionary_type_));
}
// Same as a above, but use DoubleType literals.
TEST_P(CompressionDictionaryTest, IntKeyBoundCodesDifferentTypeTest) {
setupSampleIntDictionary();
const Type &value_type = DoubleType::Instance(GetParam());
vector<int> sorted_sample_ints(kSampleInts, kSampleInts + kNumSampleValues);
sort(sorted_sample_ints.begin(), sorted_sample_ints.end());
TypedValue value;
// Check bounds for all the values which are actually in the dictionary.
for (uint32_t sorted_value_idx = 0;
sorted_value_idx < kNumSampleValues;
++sorted_value_idx) {
value = TypedValue(static_cast<double>(sorted_sample_ints[sorted_value_idx]));
EXPECT_EQ(sorted_value_idx,
dictionary_->getLowerBoundCodeForTypedValue(value, value_type));
EXPECT_EQ(sorted_value_idx + 1,
dictionary_->getUpperBoundCodeForTypedValue(value, value_type));
}
// Check a value less than the least value in the dictionary.
value = TypedValue(static_cast<double>(sorted_sample_ints.front()) - 0.5);
EXPECT_EQ(0u, dictionary_->getLowerBoundCodeForTypedValue(value, value_type));
EXPECT_EQ(0u, dictionary_->getUpperBoundCodeForTypedValue(value, value_type));
// Check a value larger than the greatest value in the dictionary.
value = TypedValue(static_cast<double>(sorted_sample_ints.back()) + 0.5);
EXPECT_EQ(kNumSampleValues + GetParam(),
dictionary_->getLowerBoundCodeForTypedValue(value, value_type));
// Note: with nulls, the upper bound can actually be less than the lower
// bound due to an implementation quirk.
EXPECT_EQ(kNumSampleValues,
dictionary_->getUpperBoundCodeForTypedValue(value, value_type));
// Split the difference between the last couple of values.
value = TypedValue(static_cast<double>((sorted_sample_ints.back() + *(sorted_sample_ints.end() - 2)) / 2.0));
EXPECT_EQ(kNumSampleValues - 1,
dictionary_->getLowerBoundCodeForTypedValue(value, value_type));
EXPECT_EQ(kNumSampleValues - 1,
dictionary_->getUpperBoundCodeForTypedValue(value, value_type));
}
TEST_P(CompressionDictionaryTest, IntKeyLimitCodesForComparisonUntypedTest) {
setupSampleIntDictionary();
vector<int> sorted_sample_ints(kSampleInts, kSampleInts + kNumSampleValues);
sort(sorted_sample_ints.begin(), sorted_sample_ints.end());
pair<uint32_t, uint32_t> limits;
for (uint32_t sorted_value_idx = 0;
sorted_value_idx < kNumSampleValues;
++sorted_value_idx) {
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kEqual,
&(sorted_sample_ints[sorted_value_idx]));
EXPECT_EQ(sorted_value_idx, limits.first);
EXPECT_EQ(sorted_value_idx + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLess,
&(sorted_sample_ints[sorted_value_idx]));
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(sorted_value_idx, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLessOrEqual,
&(sorted_sample_ints[sorted_value_idx]));
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(sorted_value_idx + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreater,
&(sorted_sample_ints[sorted_value_idx]));
EXPECT_EQ(sorted_value_idx + 1, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreaterOrEqual,
&(sorted_sample_ints[sorted_value_idx]));
EXPECT_EQ(sorted_value_idx, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
}
// Check a value smaller than all the values in the dictionary.
int small_int = sorted_sample_ints.front() - 1;
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kEqual, &small_int);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLess, &small_int);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(0u, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLessOrEqual, &small_int);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(0u, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreater, &small_int);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreaterOrEqual, &small_int);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
// Check a value larger than all the values in the dictionary.
int big_int = sorted_sample_ints.back() + 1;
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kEqual, &big_int);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLess, &big_int);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLessOrEqual, &big_int);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreater, &big_int);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreaterOrEqual, &big_int);
EXPECT_GE(limits.first, limits.second);
if (GetParam()) {
// Any comparison with null has no matches.
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kEqual, nullptr);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLess, nullptr);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLessOrEqual, nullptr);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreater, nullptr);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreaterOrEqual, nullptr);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
}
// Check kEqual for a nonexistent value.
int other_value = 777;
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kEqual, &other_value);
EXPECT_EQ(limits.first, limits.second);
}
TEST_P(CompressionDictionaryTest, IntKeyLimitCodesForComparisonTypedTest) {
setupSampleIntDictionary();
vector<int> sorted_sample_ints(kSampleInts, kSampleInts + kNumSampleValues);
sort(sorted_sample_ints.begin(), sorted_sample_ints.end());
pair<uint32_t, uint32_t> limits;
for (uint32_t sorted_value_idx = 0;
sorted_value_idx < kNumSampleValues;
++sorted_value_idx) {
TypedValue value(sorted_sample_ints[sorted_value_idx]);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
value,
*dictionary_type_);
EXPECT_EQ(sorted_value_idx, limits.first);
EXPECT_EQ(sorted_value_idx + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
value,
*dictionary_type_);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(sorted_value_idx, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
value,
*dictionary_type_);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(sorted_value_idx + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
value,
*dictionary_type_);
EXPECT_EQ(sorted_value_idx + 1, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
value,
*dictionary_type_);
EXPECT_EQ(sorted_value_idx, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
}
// Check a value smaller than all the values in the dictionary.
TypedValue small_int_value(sorted_sample_ints.front() - 1);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
small_int_value,
*dictionary_type_);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
small_int_value,
*dictionary_type_);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(0u, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
small_int_value,
*dictionary_type_);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(0u, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
small_int_value,
*dictionary_type_);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
small_int_value,
*dictionary_type_);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
// Check a value larger than all the values in the dictionary.
TypedValue big_int_value(sorted_sample_ints.back() + 1);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
big_int_value,
*dictionary_type_);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
big_int_value,
*dictionary_type_);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
big_int_value,
*dictionary_type_);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
big_int_value,
*dictionary_type_);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
big_int_value,
*dictionary_type_);
EXPECT_GE(limits.first, limits.second);
if (GetParam()) {
// Any comparison with null has no matches.
TypedValue null_value(kInt);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
null_value,
*dictionary_type_);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
null_value,
*dictionary_type_);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
null_value,
*dictionary_type_);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
null_value,
*dictionary_type_);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
null_value,
*dictionary_type_);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
}
// Check kEqual for a nonexistent value.
TypedValue other_value(777);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
other_value,
*dictionary_type_);
EXPECT_EQ(limits.first, limits.second);
}
// Same as a above, but use DoubleType literals.
TEST_P(CompressionDictionaryTest, IntKeyLimitCodesForComparisonDifferentTypeTest) {
setupSampleIntDictionary();
const Type &literal_type = DoubleType::Instance(GetParam());
vector<int> sorted_sample_ints(kSampleInts, kSampleInts + kNumSampleValues);
sort(sorted_sample_ints.begin(), sorted_sample_ints.end());
pair<uint32_t, uint32_t> limits;
for (uint32_t sorted_value_idx = 0;
sorted_value_idx < kNumSampleValues;
++sorted_value_idx) {
TypedValue value(static_cast<double>(sorted_sample_ints[sorted_value_idx]));
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
value,
literal_type);
EXPECT_EQ(sorted_value_idx, limits.first);
EXPECT_EQ(sorted_value_idx + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
value,
literal_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(sorted_value_idx, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
value,
literal_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(sorted_value_idx + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
value,
literal_type);
EXPECT_EQ(sorted_value_idx + 1, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
value,
literal_type);
EXPECT_EQ(sorted_value_idx, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
}
// Check a value smaller than all the values in the dictionary.
TypedValue small_double_value(static_cast<double>(sorted_sample_ints.front()) - 0.5);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
small_double_value,
literal_type);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
small_double_value,
literal_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(0u, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
small_double_value,
literal_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(0u, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
small_double_value,
literal_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
small_double_value,
literal_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
// Check a value larger than all the values in the dictionary.
TypedValue big_double_value(static_cast<double>(sorted_sample_ints.back()) + 0.5);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
big_double_value,
literal_type);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
big_double_value,
literal_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
big_double_value,
literal_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
big_double_value,
literal_type);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
big_double_value,
literal_type);
EXPECT_GE(limits.first, limits.second);
if (GetParam()) {
// Any comparison with null has no matches.
TypedValue null_value(kDouble);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
null_value,
literal_type);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
null_value,
literal_type);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
null_value,
literal_type);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
null_value,
literal_type);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
null_value,
literal_type);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
}
// Check kEqual for a nonexistent value.
TypedValue other_value(static_cast<double>(5.5));
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
other_value,
literal_type);
EXPECT_EQ(limits.first, limits.second);
}
TEST_P(CompressionDictionaryTest, VarCharKeyBuildTest) {
const VarCharType &varchar_type = VarCharType::Instance(kVarCharSizeParameter, GetParam());
TypedValue current_value;
size_t total_key_length = 0;
// Construct a CompressionDictionaryBuilder and check its initial state.
setupBuilderForType(varchar_type);
EXPECT_EQ(0u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(0), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 2 + total_key_length, builder_->dictionarySizeBytes());
// If the type is nullable, try inserting a null first.
if (GetParam()) {
current_value = TypedValue(kVarChar);
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + total_key_length, builder_->dictionarySizeBytes());
// Roll back to zero entries.
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(current_value));
EXPECT_EQ(0u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(0), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + total_key_length, builder_->dictionarySizeBytes());
}
// Add a value.
current_value = varchar_type.makeValue(kSampleStrings[0], strlen(kSampleStrings[0]) + 1);
current_value.ensureNotReference();
total_key_length += current_value.getDataSize();
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 3 + total_key_length, builder_->dictionarySizeBytes());
// Roll back to zero entries.
total_key_length -= current_value.getDataSize();
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(current_value));
EXPECT_EQ(0u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(0), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 2 + total_key_length, builder_->dictionarySizeBytes());
// Re-add the value.
total_key_length += current_value.getDataSize();
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 3 + total_key_length, builder_->dictionarySizeBytes());
// If the type is nullable, also try inserting a null with a real value
// present.
if (GetParam()) {
current_value = TypedValue(kVarChar);
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(2u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2), builder_->codeLengthBits());
EXPECT_EQ(3 * sizeof(uint32_t) + total_key_length, builder_->dictionarySizeBytes());
// Roll back insert of null.
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(current_value));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(3 * sizeof(uint32_t) + total_key_length, builder_->dictionarySizeBytes());
// Finally, re-insert the null.
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(2u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2), builder_->codeLengthBits());
EXPECT_EQ(3 * sizeof(uint32_t) + total_key_length, builder_->dictionarySizeBytes());
}
// Add a reference value.
current_value = varchar_type.makeValue(kSampleStrings[1], strlen(kSampleStrings[1]) + 1);
EXPECT_FALSE(current_value.ownsOutOfLineData());
total_key_length += current_value.getDataSize();
EXPECT_TRUE(builder_->insertEntry(current_value));
// Check against a different but equal value.
current_value = varchar_type.makeValue(kSampleStrings[1], strlen(kSampleStrings[1]) + 1);
current_value.ensureNotReference();
EXPECT_TRUE(current_value.ownsOutOfLineData());
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(2u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 4 + total_key_length, builder_->dictionarySizeBytes());
// Roll it back.
total_key_length -= current_value.getDataSize();
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(current_value));
EXPECT_EQ(1u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 3 + total_key_length, builder_->dictionarySizeBytes());
// Re-add it.
current_value = varchar_type.makeValue(kSampleStrings[1], strlen(kSampleStrings[1]) + 1);
EXPECT_FALSE(current_value.ownsOutOfLineData());
total_key_length += current_value.getDataSize();
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(2u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 4 + total_key_length, builder_->dictionarySizeBytes());
// Add a third value.
current_value = varchar_type.makeValue(kSampleStrings[2], strlen(kSampleStrings[2]) + 1);
current_value.ensureNotReference();
total_key_length += current_value.getDataSize();
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(3u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(3 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 5 + total_key_length, builder_->dictionarySizeBytes());
// Roll it back (this is the last time we will check this, we just need to
// cover all the possible paths where the code length may shrink).
builder_->undoLastInsert();
total_key_length -= current_value.getDataSize();
EXPECT_FALSE(builder_->containsValue(current_value));
EXPECT_EQ(2u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 4 + total_key_length, builder_->dictionarySizeBytes());
// Insert the rest of the sample values.
for (uint32_t sample_idx = 2; sample_idx < kNumSampleValues; ++sample_idx) {
current_value = varchar_type.makeValue(kSampleStrings[sample_idx],
strlen(kSampleStrings[sample_idx]) + 1);
total_key_length += current_value.getDataSize();
EXPECT_TRUE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(sample_idx + 1 + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(sample_idx + 1 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * (sample_idx + 3) + total_key_length,
builder_->dictionarySizeBytes());
}
// Attempt to insert a duplicate entry.
current_value = varchar_type.makeValue(kSampleStrings[kNumSampleValues >> 1],
strlen(kSampleStrings[kNumSampleValues >> 1]) + 1);
EXPECT_FALSE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(kNumSampleValues + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(kNumSampleValues + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * (kNumSampleValues + 2) + total_key_length,
builder_->dictionarySizeBytes());
// If type is nullable, attempt to insert a duplicate null.
if (GetParam()) {
current_value = TypedValue(kVarChar);
EXPECT_FALSE(builder_->insertEntry(current_value));
EXPECT_TRUE(builder_->containsValue(current_value));
EXPECT_EQ(kNumSampleValues + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(kNumSampleValues + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * (kNumSampleValues + 2) + total_key_length,
builder_->dictionarySizeBytes());
}
// Check for the presence of a nonexistent key.
current_value = varchar_type.makeValue("foo", 4);
EXPECT_FALSE(builder_->containsValue(current_value));
// Actually build the physical dictionary.
dictionary_memory_.reset(builder_->dictionarySizeBytes());
builder_->buildDictionary(dictionary_memory_.get());
// Check the contents of the physical dictionary.
vector<string> sorted_sample_strings(kSampleStrings, kSampleStrings + kNumSampleValues);
sort(sorted_sample_strings.begin(), sorted_sample_strings.end());
const uint32_t *offset_array = static_cast<const uint32_t*>(dictionary_memory_.get()) + 2;
const char *value_memory =
static_cast<const char*>(dictionary_memory_.get())
+ (*static_cast<const uint32_t*>(dictionary_memory_.get()) + 2) * sizeof(uint32_t);
EXPECT_EQ(kNumSampleValues, *static_cast<const uint32_t*>(dictionary_memory_.get()));
if (GetParam()) {
EXPECT_EQ(kNumSampleValues,
*(static_cast<const uint32_t*>(dictionary_memory_.get()) + 1));
} else {
EXPECT_EQ(numeric_limits<uint32_t>::max(),
*(static_cast<const uint32_t*>(dictionary_memory_.get()) + 1));
}
for (uint32_t sample_idx = 0; sample_idx < kNumSampleValues; ++sample_idx) {
EXPECT_STREQ(sorted_sample_strings[sample_idx].c_str(),
value_memory + offset_array[sample_idx]);
}
}
// Same test as above, but insert values via insertEntryByReference().
TEST_P(CompressionDictionaryTest, VarCharKeyBuildInsertByReferenceTest) {
const VarCharType &varchar_type = VarCharType::Instance(kVarCharSizeParameter, GetParam());
std::vector<TypedValue> values;
TypedValue other_value;
size_t total_key_length = 0;
// Construct a CompressionDictionaryBuilder and check its initial state.
setupBuilderForType(varchar_type);
EXPECT_EQ(0u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(0), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 2 + total_key_length, builder_->dictionarySizeBytes());
// If the type is nullable, try inserting a null first.
if (GetParam()) {
TypedValue null_value = TypedValue(kVarChar);
EXPECT_TRUE(builder_->insertEntryByReference(null_value));
EXPECT_TRUE(builder_->containsValue(null_value));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + total_key_length, builder_->dictionarySizeBytes());
// Roll back to zero entries.
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(null_value));
EXPECT_EQ(0u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(0), builder_->codeLengthBits());
EXPECT_EQ(2 * sizeof(uint32_t) + total_key_length, builder_->dictionarySizeBytes());
}
// Add a value.
values.push_back(varchar_type.makeValue(kSampleStrings[0], strlen(kSampleStrings[0]) + 1));
values.back().ensureNotReference();
total_key_length += values.back().getDataSize();
EXPECT_TRUE(builder_->insertEntryByReference(values.back()));
EXPECT_TRUE(builder_->containsValue(values.back()));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 3 + total_key_length, builder_->dictionarySizeBytes());
// Roll back to zero entries.
total_key_length -= values.back().getDataSize();
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(values.back()));
EXPECT_EQ(0u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(0), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 2 + total_key_length, builder_->dictionarySizeBytes());
// Re-add the value.
total_key_length += values.back().getDataSize();
EXPECT_TRUE(builder_->insertEntryByReference(values.back()));
EXPECT_TRUE(builder_->containsValue(values.back()));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 3 + total_key_length, builder_->dictionarySizeBytes());
// If the type is nullable, also try inserting a null with a real value
// present.
if (GetParam()) {
TypedValue null_value = TypedValue(kVarChar);
EXPECT_TRUE(builder_->insertEntryByReference(null_value));
EXPECT_TRUE(builder_->containsValue(null_value));
EXPECT_EQ(2u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2), builder_->codeLengthBits());
EXPECT_EQ(3 * sizeof(uint32_t) + total_key_length, builder_->dictionarySizeBytes());
// Roll back insert of null.
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(null_value));
EXPECT_EQ(1u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1), builder_->codeLengthBits());
EXPECT_EQ(3 * sizeof(uint32_t) + total_key_length, builder_->dictionarySizeBytes());
// Finally, re-insert the null.
EXPECT_TRUE(builder_->insertEntryByReference(null_value));
EXPECT_TRUE(builder_->containsValue(null_value));
EXPECT_EQ(2u, builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2), builder_->codeLengthBits());
EXPECT_EQ(3 * sizeof(uint32_t) + total_key_length, builder_->dictionarySizeBytes());
}
// Add a reference value.
values.push_back(varchar_type.makeValue(kSampleStrings[1], strlen(kSampleStrings[1]) + 1));
total_key_length += values.back().getDataSize();
EXPECT_TRUE(builder_->insertEntryByReference(values.back()));
// Check against a different but equal value.
other_value = varchar_type.makeValue(kSampleStrings[1], strlen(kSampleStrings[1]) + 1);
other_value.ensureNotReference();
EXPECT_TRUE(builder_->containsValue(other_value));
EXPECT_EQ(2u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 4 + total_key_length, builder_->dictionarySizeBytes());
// Roll it back.
total_key_length -= values.back().getDataSize();
builder_->undoLastInsert();
EXPECT_FALSE(builder_->containsValue(values.back()));
EXPECT_EQ(1u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(1 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 3 + total_key_length, builder_->dictionarySizeBytes());
// Re-add it.
total_key_length += values.back().getDataSize();
EXPECT_TRUE(builder_->insertEntryByReference(values.back()));
EXPECT_TRUE(builder_->containsValue(values.back()));
EXPECT_EQ(2u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 4 + total_key_length, builder_->dictionarySizeBytes());
// Add a third value.
values.push_back(varchar_type.makeValue(kSampleStrings[2], strlen(kSampleStrings[2]) + 1));
total_key_length += values.back().getDataSize();
EXPECT_TRUE(builder_->insertEntryByReference(values.back()));
EXPECT_TRUE(builder_->containsValue(values.back()));
EXPECT_EQ(3u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(3 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 5 + total_key_length, builder_->dictionarySizeBytes());
// Roll it back (this is the last time we will check this, we just need to
// cover all the possible paths where the code length may shrink).
builder_->undoLastInsert();
total_key_length -= values.back().getDataSize();
EXPECT_FALSE(builder_->containsValue(values.back()));
EXPECT_EQ(2u + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(2 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * 4 + total_key_length, builder_->dictionarySizeBytes());
// Insert the rest of the sample values.
for (uint32_t sample_idx = 2; sample_idx < kNumSampleValues; ++sample_idx) {
values.push_back(varchar_type.makeValue(kSampleStrings[sample_idx],
strlen(kSampleStrings[sample_idx]) + 1));
total_key_length += values.back().getDataSize();
EXPECT_TRUE(builder_->insertEntryByReference(values.back()));
EXPECT_TRUE(builder_->containsValue(values.back()));
EXPECT_EQ(sample_idx + 1 + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(sample_idx + 1 + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * (sample_idx + 3) + total_key_length,
builder_->dictionarySizeBytes());
}
// Attempt to insert a duplicate entry.
other_value = varchar_type.makeValue(kSampleStrings[kNumSampleValues >> 1],
strlen(kSampleStrings[kNumSampleValues >> 1]) + 1);
EXPECT_FALSE(builder_->insertEntryByReference(other_value));
EXPECT_TRUE(builder_->containsValue(other_value));
EXPECT_EQ(kNumSampleValues + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(kNumSampleValues + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * (kNumSampleValues + 2) + total_key_length,
builder_->dictionarySizeBytes());
// If type is nullable, attempt to insert a duplicate null.
if (GetParam()) {
TypedValue null_value = TypedValue(kVarChar);
EXPECT_FALSE(builder_->insertEntryByReference(null_value));
EXPECT_TRUE(builder_->containsValue(null_value));
EXPECT_EQ(kNumSampleValues + GetParam(), builder_->numberOfEntries());
EXPECT_EQ(bitsNeededForCodes(kNumSampleValues + GetParam()), builder_->codeLengthBits());
EXPECT_EQ(sizeof(uint32_t) * (kNumSampleValues + 2) + total_key_length,
builder_->dictionarySizeBytes());
}
// Check for the presence of a nonexistent key.
other_value = varchar_type.makeValue("foo", 4);
EXPECT_FALSE(builder_->containsValue(other_value));
// Actually build the physical dictionary.
dictionary_memory_.reset(builder_->dictionarySizeBytes());
builder_->buildDictionary(dictionary_memory_.get());
// Check the contents of the physical dictionary.
vector<string> sorted_sample_strings(kSampleStrings, kSampleStrings + kNumSampleValues);
sort(sorted_sample_strings.begin(), sorted_sample_strings.end());
const uint32_t *offset_array = static_cast<const uint32_t*>(dictionary_memory_.get()) + 2;
const char *value_memory =
static_cast<const char*>(dictionary_memory_.get())
+ (*static_cast<const uint32_t*>(dictionary_memory_.get()) + 2) * sizeof(uint32_t);
EXPECT_EQ(kNumSampleValues, *static_cast<const uint32_t*>(dictionary_memory_.get()));
if (GetParam()) {
EXPECT_EQ(kNumSampleValues,
*(static_cast<const uint32_t*>(dictionary_memory_.get()) + 1));
} else {
EXPECT_EQ(numeric_limits<uint32_t>::max(),
*(static_cast<const uint32_t*>(dictionary_memory_.get()) + 1));
}
for (uint32_t sample_idx = 0; sample_idx < kNumSampleValues; ++sample_idx) {
EXPECT_STREQ(sorted_sample_strings[sample_idx].c_str(),
value_memory + offset_array[sample_idx]);
}
}
TEST_P(CompressionDictionaryTest, VarCharKeyInfoTest) {
setupSampleVarCharDictionary();
EXPECT_EQ(kNumSampleValues + GetParam(), dictionary_->numberOfCodes());
EXPECT_EQ(bitsNeededForCodes(kNumSampleValues + GetParam()), dictionary_->codeLengthBits());
}
TEST_P(CompressionDictionaryTest, VarCharKeyGetCodeUntypedTest) {
setupSampleVarCharDictionary();
vector<string> sorted_sample_strings(kSampleStrings, kSampleStrings + kNumSampleValues);
sort(sorted_sample_strings.begin(), sorted_sample_strings.end());
// Check all the codes in the dictionary.
for (uint32_t sample_idx = 0; sample_idx < kNumSampleValues; ++sample_idx) {
EXPECT_EQ(sample_idx,
dictionary_->getCodeForUntypedValue(sorted_sample_strings[sample_idx].c_str()));
}
if (GetParam()) {
EXPECT_EQ(kNumSampleValues, dictionary_->getCodeForUntypedValue(nullptr));
}
// Check a value that isn't in the dictionary.
EXPECT_EQ(kNumSampleValues + GetParam(), dictionary_->getCodeForUntypedValue("foo"));
}
TEST_P(CompressionDictionaryTest, VarCharKeyGetCodeTypedTest) {
setupSampleVarCharDictionary();
vector<string> sorted_sample_strings(kSampleStrings, kSampleStrings + kNumSampleValues);
sort(sorted_sample_strings.begin(), sorted_sample_strings.end());
// Check all the codes in the dictionary.
const VarCharType &varchar_type = VarCharType::InstanceNonNullable(kVarCharSizeParameter);
for (uint32_t sample_idx = 0; sample_idx < kNumSampleValues; ++sample_idx) {
TypedValue value = varchar_type.makeValue(sorted_sample_strings[sample_idx].c_str(),
sorted_sample_strings[sample_idx].size() + 1);
EXPECT_EQ(sample_idx, dictionary_->getCodeForTypedValue(value, varchar_type));
}
if (GetParam()) {
EXPECT_EQ(kNumSampleValues,
dictionary_->getCodeForTypedValue(TypedValue(kVarChar), *dictionary_type_));
}
// Check a value that isn't in the dictionary.
TypedValue other_value = varchar_type.makeValue("foo", 4);
EXPECT_EQ(kNumSampleValues + GetParam(),
dictionary_->getCodeForTypedValue(other_value, varchar_type));
}
// Same as above, but use CharType literals.
TEST_P(CompressionDictionaryTest, VarCharKeyGetCodeDifferentTypeTest) {
setupSampleVarCharDictionary();
vector<string> sorted_sample_strings(kSampleStrings, kSampleStrings + kNumSampleValues);
sort(sorted_sample_strings.begin(), sorted_sample_strings.end());
// Check all the codes in the dictionary.
const CharType &char_type = CharType::InstanceNonNullable(kVarCharSizeParameter + 10);
for (uint32_t sample_idx = 0; sample_idx < kNumSampleValues; ++sample_idx) {
TypedValue value = char_type.makeValue(sorted_sample_strings[sample_idx].c_str(),
sorted_sample_strings[sample_idx].size() + 1);
EXPECT_EQ(sample_idx, dictionary_->getCodeForTypedValue(value, char_type));
}
if (GetParam()) {
EXPECT_EQ(kNumSampleValues,
dictionary_->getCodeForTypedValue(TypedValue(kChar),
CharType::InstanceNullable(kVarCharSizeParameter)));
}
// Check a value that isn't in the dictionary.
TypedValue other_value = char_type.makeValue("foo", 4);
EXPECT_EQ(kNumSampleValues + GetParam(),
dictionary_->getCodeForTypedValue(other_value, char_type));
}
TEST_P(CompressionDictionaryTest, VarCharKeyGetValueUntypedTest) {
setupSampleVarCharDictionary();
vector<string> sorted_sample_strings(kSampleStrings, kSampleStrings + kNumSampleValues);
sort(sorted_sample_strings.begin(), sorted_sample_strings.end());
for (uint8_t code = 0; code < kNumSampleValues; ++code) {
const char *value_ptr = static_cast<const char*>(dictionary_->getUntypedValueForByteCode(code));
EXPECT_STREQ(sorted_sample_strings[code].c_str(), value_ptr);
value_ptr = static_cast<const char*>(dictionary_->getUntypedValueForShortCode(code));
EXPECT_STREQ(sorted_sample_strings[code].c_str(), value_ptr);
value_ptr = static_cast<const char*>(dictionary_->getUntypedValueForCode(code));
EXPECT_STREQ(sorted_sample_strings[code].c_str(), value_ptr);
}
if (GetParam()) {
const void *value_ptr = dictionary_->getUntypedValueForByteCode(kNumSampleValues);
EXPECT_EQ(nullptr, value_ptr);
value_ptr = dictionary_->getUntypedValueForShortCode(kNumSampleValues);
EXPECT_EQ(nullptr, value_ptr);
value_ptr = dictionary_->getUntypedValueForCode(kNumSampleValues);
EXPECT_EQ(nullptr, value_ptr);
}
}
TEST_P(CompressionDictionaryTest, VarCharKeyGetValueTypedTest) {
setupSampleVarCharDictionary();
vector<string> sorted_sample_strings(kSampleStrings, kSampleStrings + kNumSampleValues);
sort(sorted_sample_strings.begin(), sorted_sample_strings.end());
const VarCharType &varchar_type = VarCharType::Instance(kVarCharSizeParameter, GetParam());
const Comparison &equal_comp = ComparisonFactory::GetComparison(ComparisonID::kEqual);
for (uint8_t code = 0; code < kNumSampleValues; ++code) {
TypedValue expected_value = varchar_type.makeValue(sorted_sample_strings[code].c_str(),
sorted_sample_strings[code].size());
TypedValue value = dictionary_->getTypedValueForByteCode(code);
EXPECT_TRUE(equal_comp.compareTypedValuesChecked(expected_value, varchar_type,
value, varchar_type));
value = dictionary_->getTypedValueForShortCode(code);
EXPECT_TRUE(equal_comp.compareTypedValuesChecked(expected_value, varchar_type,
value, varchar_type));
value = dictionary_->getTypedValueForCode(code);
EXPECT_TRUE(equal_comp.compareTypedValuesChecked(expected_value, varchar_type,
value, varchar_type));
}
if (GetParam()) {
TypedValue value = dictionary_->getTypedValueForByteCode(kNumSampleValues);
EXPECT_TRUE(value.isNull());
EXPECT_EQ(kVarChar, value.getTypeID());
value = dictionary_->getTypedValueForShortCode(kNumSampleValues);
EXPECT_TRUE(value.isNull());
EXPECT_EQ(kVarChar, value.getTypeID());
value = dictionary_->getTypedValueForCode(kNumSampleValues);
EXPECT_TRUE(value.isNull());
EXPECT_EQ(kVarChar, value.getTypeID());
}
}
TEST_P(CompressionDictionaryTest, VarCharKeyBoundCodesUntypedTest) {
setupSampleVarCharDictionary();
vector<string> sorted_sample_strings(kSampleStrings, kSampleStrings + kNumSampleValues);
sort(sorted_sample_strings.begin(), sorted_sample_strings.end());
// Check bounds for all the values which are actually in the dictionary.
for (uint32_t sorted_value_idx = 0;
sorted_value_idx < kNumSampleValues;
++sorted_value_idx) {
EXPECT_EQ(sorted_value_idx,
dictionary_->getLowerBoundCodeForUntypedValue(sorted_sample_strings[sorted_value_idx].c_str()));
EXPECT_EQ(sorted_value_idx + 1,
dictionary_->getUpperBoundCodeForUntypedValue(sorted_sample_strings[sorted_value_idx].c_str()));
}
// Check a value less than the least value in the dictionary.
string other_value = sorted_sample_strings.front();
other_value.resize(other_value.size() - 1);
EXPECT_EQ(0u, dictionary_->getLowerBoundCodeForUntypedValue(other_value.c_str()));
EXPECT_EQ(0u, dictionary_->getUpperBoundCodeForUntypedValue(other_value.c_str()));
// Check a value larger than the greatest value in the dictionary.
other_value = sorted_sample_strings.back();
other_value.push_back('z');
EXPECT_EQ(kNumSampleValues + GetParam(),
dictionary_->getLowerBoundCodeForUntypedValue(other_value.c_str()));
// Note: with nulls, the upper bound can actually be less than the lower
// bound due to an implementation quirk.
EXPECT_EQ(kNumSampleValues,
dictionary_->getUpperBoundCodeForUntypedValue(other_value.c_str()));
// Check a value between the last two.
other_value = *(sorted_sample_strings.end() - 2);
other_value.push_back('z');
EXPECT_EQ(kNumSampleValues - 1, dictionary_->getLowerBoundCodeForUntypedValue(other_value.c_str()));
EXPECT_EQ(kNumSampleValues - 1, dictionary_->getUpperBoundCodeForUntypedValue(other_value.c_str()));
}
TEST_P(CompressionDictionaryTest, VarCharKeyBoundCodesTypedTest) {
setupSampleVarCharDictionary();
vector<string> sorted_sample_strings(kSampleStrings, kSampleStrings + kNumSampleValues);
sort(sorted_sample_strings.begin(), sorted_sample_strings.end());
const VarCharType &varchar_type = VarCharType::InstanceNonNullable(kVarCharSizeParameter);
TypedValue value;
// Check bounds for all the values which are actually in the dictionary.
for (uint32_t sorted_value_idx = 0;
sorted_value_idx < kNumSampleValues;
++sorted_value_idx) {
value = varchar_type.makeValue(sorted_sample_strings[sorted_value_idx].c_str(),
sorted_sample_strings[sorted_value_idx].size() + 1);
EXPECT_EQ(sorted_value_idx,
dictionary_->getLowerBoundCodeForTypedValue(value, varchar_type));
EXPECT_EQ(sorted_value_idx + 1,
dictionary_->getUpperBoundCodeForTypedValue(value, varchar_type));
}
// Check a value less than the least value in the dictionary.
string other_str = sorted_sample_strings.front();
other_str.resize(other_str.size() - 1);
value = varchar_type.makeValue(other_str.c_str(), other_str.size() + 1);
EXPECT_EQ(0u, dictionary_->getLowerBoundCodeForTypedValue(value, varchar_type));
EXPECT_EQ(0u, dictionary_->getUpperBoundCodeForTypedValue(value, varchar_type));
// Check a value larger than the greatest value in the dictionary.
other_str = sorted_sample_strings.back();
other_str.push_back('z');
value = varchar_type.makeValue(other_str.c_str(), other_str.size() + 1);
EXPECT_EQ(kNumSampleValues + GetParam(),
dictionary_->getLowerBoundCodeForTypedValue(value, varchar_type));
// Note: with nulls, the upper bound can actually be less than the lower
// bound due to an implementation quirk.
EXPECT_EQ(kNumSampleValues,
dictionary_->getUpperBoundCodeForTypedValue(value, varchar_type));
// Check a value between the last two.
other_str = *(sorted_sample_strings.end() - 2);
other_str.push_back('z');
value = varchar_type.makeValue(other_str.c_str(), other_str.size() + 1);
EXPECT_EQ(kNumSampleValues - 1,
dictionary_->getLowerBoundCodeForTypedValue(value, varchar_type));
EXPECT_EQ(kNumSampleValues - 1,
dictionary_->getUpperBoundCodeForTypedValue(value, varchar_type));
}
// Same as above, but use CharType literals.
TEST_P(CompressionDictionaryTest, VarCharKeyBoundCodesDifferentTypeTest) {
setupSampleVarCharDictionary();
vector<string> sorted_sample_strings(kSampleStrings, kSampleStrings + kNumSampleValues);
sort(sorted_sample_strings.begin(), sorted_sample_strings.end());
const CharType &char_type = CharType::InstanceNonNullable(kVarCharSizeParameter + 10);
TypedValue value;
// Check bounds for all the values which are actually in the dictionary.
for (uint32_t sorted_value_idx = 0;
sorted_value_idx < kNumSampleValues;
++sorted_value_idx) {
value = char_type.makeValue(sorted_sample_strings[sorted_value_idx].c_str(),
sorted_sample_strings[sorted_value_idx].size() + 1);
EXPECT_EQ(sorted_value_idx,
dictionary_->getLowerBoundCodeForTypedValue(value, char_type));
EXPECT_EQ(sorted_value_idx + 1,
dictionary_->getUpperBoundCodeForTypedValue(value, char_type));
}
// Check a value less than the least value in the dictionary.
string other_str = sorted_sample_strings.front();
other_str.resize(other_str.size() - 1);
value = char_type.makeValue(other_str.c_str(), other_str.size() + 1);
EXPECT_EQ(0u, dictionary_->getLowerBoundCodeForTypedValue(value, char_type));
EXPECT_EQ(0u, dictionary_->getUpperBoundCodeForTypedValue(value, char_type));
// Check a value larger than the greatest value in the dictionary.
other_str = sorted_sample_strings.back();
other_str.push_back('z');
value = char_type.makeValue(other_str.c_str(), other_str.size() + 1);
EXPECT_EQ(kNumSampleValues + GetParam(),
dictionary_->getLowerBoundCodeForTypedValue(value, char_type));
// Note: with nulls, the upper bound can actually be less than the lower
// bound due to an implementation quirk.
EXPECT_EQ(kNumSampleValues,
dictionary_->getUpperBoundCodeForTypedValue(value, char_type));
// Check a value between the last two.
other_str = *(sorted_sample_strings.end() - 2);
other_str.push_back('z');
value = char_type.makeValue(other_str.c_str(), other_str.size() + 1);
EXPECT_EQ(kNumSampleValues - 1,
dictionary_->getLowerBoundCodeForTypedValue(value, char_type));
EXPECT_EQ(kNumSampleValues - 1,
dictionary_->getUpperBoundCodeForTypedValue(value, char_type));
}
TEST_P(CompressionDictionaryTest, VarCharKeyLimitCodesForComparisonUntypedTest) {
setupSampleVarCharDictionary();
vector<string> sorted_sample_strings(kSampleStrings, kSampleStrings + kNumSampleValues);
sort(sorted_sample_strings.begin(), sorted_sample_strings.end());
pair<uint32_t, uint32_t> limits;
for (uint32_t sorted_value_idx = 0;
sorted_value_idx < kNumSampleValues;
++sorted_value_idx) {
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kEqual,
sorted_sample_strings[sorted_value_idx].c_str());
EXPECT_EQ(sorted_value_idx, limits.first);
EXPECT_EQ(sorted_value_idx + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLess,
sorted_sample_strings[sorted_value_idx].c_str());
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(sorted_value_idx, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLessOrEqual,
sorted_sample_strings[sorted_value_idx].c_str());
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(sorted_value_idx + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreater,
sorted_sample_strings[sorted_value_idx].c_str());
EXPECT_EQ(sorted_value_idx + 1, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreaterOrEqual,
sorted_sample_strings[sorted_value_idx].c_str());
EXPECT_EQ(sorted_value_idx, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
}
// Check a string that is ordered before everything in the dictionary.
std::string before_string(sorted_sample_strings.front());
before_string.pop_back();
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kEqual, before_string.c_str());
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLess, before_string.c_str());
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(0u, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLessOrEqual, before_string.c_str());
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(0u, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreater, before_string.c_str());
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreaterOrEqual, before_string.c_str());
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
// Check a string that is ordered after everything in the dictionary.
std::string after_string(sorted_sample_strings.back());
after_string.push_back('z');
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kEqual, after_string.c_str());
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLess, after_string.c_str());
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLessOrEqual, after_string.c_str());
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreater, after_string.c_str());
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreaterOrEqual, after_string.c_str());
EXPECT_GE(limits.first, limits.second);
if (GetParam()) {
// Any comparison with null has no matches.
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kEqual, nullptr);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLess, nullptr);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kLessOrEqual, nullptr);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreater, nullptr);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kGreaterOrEqual, nullptr);
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
}
// Check kEqual for a nonexistent value.
limits = dictionary_->getLimitCodesForComparisonUntyped(ComparisonID::kEqual, "foo");
EXPECT_EQ(limits.first, limits.second);
}
TEST_P(CompressionDictionaryTest, VarCharKeyLimitCodesForComparisonTypedTest) {
setupSampleVarCharDictionary();
vector<string> sorted_sample_strings(kSampleStrings, kSampleStrings + kNumSampleValues);
sort(sorted_sample_strings.begin(), sorted_sample_strings.end());
const VarCharType &varchar_type = VarCharType::InstanceNonNullable(kVarCharSizeParameter);
TypedValue value;
pair<uint32_t, uint32_t> limits;
for (uint32_t sorted_value_idx = 0;
sorted_value_idx < kNumSampleValues;
++sorted_value_idx) {
value = varchar_type.makeValue(sorted_sample_strings[sorted_value_idx].c_str(),
sorted_sample_strings[sorted_value_idx].size() + 1);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
value,
varchar_type);
EXPECT_EQ(sorted_value_idx, limits.first);
EXPECT_EQ(sorted_value_idx + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
value,
varchar_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(sorted_value_idx, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
value,
varchar_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(sorted_value_idx + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
value,
varchar_type);
EXPECT_EQ(sorted_value_idx + 1, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
value,
varchar_type);
EXPECT_EQ(sorted_value_idx, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
}
// Check a string that is ordered before everything in the dictionary.
std::string before_string(sorted_sample_strings.front());
before_string.pop_back();
value = varchar_type.makeValue(before_string.c_str(), before_string.size() + 1);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
value,
varchar_type);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
value,
varchar_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(0u, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
value,
varchar_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(0u, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
value,
varchar_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
value,
varchar_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
// Check a string that is ordered after everything in the dictionary.
std::string after_string(sorted_sample_strings.back());
after_string.push_back('z');
value = varchar_type.makeValue(after_string.c_str(), after_string.size() + 1);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
value,
varchar_type);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
value,
varchar_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
value,
varchar_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
value,
varchar_type);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
value,
varchar_type);
EXPECT_GE(limits.first, limits.second);
if (GetParam()) {
// Any comparison with null has no matches.
TypedValue null_value(kVarChar);
limits = dictionary_->getLimitCodesForComparisonTyped(
ComparisonID::kEqual,
null_value,
VarCharType::InstanceNullable(kVarCharSizeParameter));
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(
ComparisonID::kLess,
null_value,
VarCharType::InstanceNullable(kVarCharSizeParameter));
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(
ComparisonID::kLessOrEqual,
null_value,
VarCharType::InstanceNullable(kVarCharSizeParameter));
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(
ComparisonID::kGreater,
null_value,
VarCharType::InstanceNullable(kVarCharSizeParameter));
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(
ComparisonID::kGreaterOrEqual,
null_value,
VarCharType::InstanceNullable(kVarCharSizeParameter));
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
}
// Check kEqual for a nonexistent value.
value = varchar_type.makeValue("foo", 4);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
value,
varchar_type);
EXPECT_EQ(limits.first, limits.second);
}
// Same as above, but use CharType literals.
TEST_P(CompressionDictionaryTest, VarCharKeyLimitCodesForComparisonDifferentTypeTest) {
setupSampleVarCharDictionary();
vector<string> sorted_sample_strings(kSampleStrings, kSampleStrings + kNumSampleValues);
sort(sorted_sample_strings.begin(), sorted_sample_strings.end());
const CharType &char_type = CharType::InstanceNonNullable(kVarCharSizeParameter + 10);
TypedValue value;
pair<uint32_t, uint32_t> limits;
for (uint32_t sorted_value_idx = 0;
sorted_value_idx < kNumSampleValues;
++sorted_value_idx) {
value = char_type.makeValue(sorted_sample_strings[sorted_value_idx].c_str(),
sorted_sample_strings[sorted_value_idx].size() + 1);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
value,
char_type);
EXPECT_EQ(sorted_value_idx, limits.first);
EXPECT_EQ(sorted_value_idx + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
value,
char_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(sorted_value_idx, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
value,
char_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(sorted_value_idx + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
value,
char_type);
EXPECT_EQ(sorted_value_idx + 1, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
value,
char_type);
EXPECT_EQ(sorted_value_idx, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
}
// Check a string that is ordered before everything in the dictionary.
std::string before_string(sorted_sample_strings.front());
before_string.pop_back();
value = char_type.makeValue(before_string.c_str(), before_string.size() + 1);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
value,
char_type);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
value,
char_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(0u, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
value,
char_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(0u, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
value,
char_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
value,
char_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
// Check a string that is ordered after everything in the dictionary.
std::string after_string(sorted_sample_strings.back());
after_string.push_back('z');
value = char_type.makeValue(after_string.c_str(), after_string.size() + 1);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
value,
char_type);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLess,
value,
char_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kLessOrEqual,
value,
char_type);
EXPECT_EQ(0u, limits.first);
EXPECT_EQ(kNumSampleValues, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreater,
value,
char_type);
EXPECT_GE(limits.first, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kGreaterOrEqual,
value,
char_type);
EXPECT_GE(limits.first, limits.second);
if (GetParam()) {
// Any comparison with null has no matches.
TypedValue null_value(kChar);
limits = dictionary_->getLimitCodesForComparisonTyped(
ComparisonID::kEqual,
null_value,
CharType::InstanceNullable(kVarCharSizeParameter));
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(
ComparisonID::kLess,
null_value,
CharType::InstanceNullable(kVarCharSizeParameter));
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(
ComparisonID::kLessOrEqual,
null_value,
CharType::InstanceNullable(kVarCharSizeParameter));
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(
ComparisonID::kGreater,
null_value,
CharType::InstanceNullable(kVarCharSizeParameter));
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
limits = dictionary_->getLimitCodesForComparisonTyped(
ComparisonID::kGreaterOrEqual,
null_value,
CharType::InstanceNullable(kVarCharSizeParameter));
EXPECT_EQ(kNumSampleValues + 1, limits.first);
EXPECT_EQ(kNumSampleValues + 1, limits.second);
}
// Check kEqual for a nonexistent value.
value = char_type.makeValue("foo", 4);
limits = dictionary_->getLimitCodesForComparisonTyped(ComparisonID::kEqual,
value,
char_type);
EXPECT_EQ(limits.first, limits.second);
}
// Note: INSTANTIATE_TEST_CASE_P has variadic arguments part. If the variable argument part
// is empty, C++11 standard says it should produce a warning. A warning is converted
// to an error since we use -Werror as a compiler parameter. It causes Travis to build.
// This is the reason that we must give an empty string argument as a last parameter
// to supress warning that clang gives.
INSTANTIATE_TEST_CASE_P(WithAndWithoutNullValue,
CompressionDictionaryTest,
::testing::Bool(),); // NOLINT(whitespace/comma)
INSTANTIATE_TEST_CASE_P(WithAndWithoutNullValue,
CompressionDictionaryDeathTest,
::testing::Bool(),); // NOLINT(whitespace/comma)
} // namespace quickstep