blob: 315ac470102ca5ba356441bea51fb3737c285a00 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "orc/OrcFile.hh"
#include "wrap/gmock.h"
#include "wrap/gtest-wrapper.h"
#include "Statistics.hh"
#include <cmath>
namespace orc {
TEST(ColumnStatistics, intColumnStatistics) {
std::unique_ptr<IntegerColumnStatisticsImpl> intStats(
new IntegerColumnStatisticsImpl());
// initial state
EXPECT_EQ(0, intStats->getNumberOfValues());
EXPECT_FALSE(intStats->hasNull());
EXPECT_FALSE(intStats->hasMinimum());
EXPECT_FALSE(intStats->hasMaximum());
EXPECT_TRUE(intStats->hasSum());
EXPECT_EQ(0, intStats->getSum());
// normal operations
intStats->increase(1);
EXPECT_EQ(1, intStats->getNumberOfValues());
intStats->increase(0);
EXPECT_EQ(1, intStats->getNumberOfValues());
intStats->increase(100);
EXPECT_EQ(101, intStats->getNumberOfValues());
intStats->increase(9999999999999899l);
EXPECT_EQ(10000000000000000l, intStats->getNumberOfValues());
intStats->update(0, 1);
EXPECT_TRUE(intStats->hasMinimum());
EXPECT_TRUE(intStats->hasMaximum());
EXPECT_EQ(0, intStats->getMaximum());
EXPECT_EQ(0, intStats->getMinimum());
EXPECT_EQ(0, intStats->getSum());
intStats->update(-100, 1);
intStats->update(101, 1);
EXPECT_EQ(101, intStats->getMaximum());
EXPECT_EQ(-100, intStats->getMinimum());
EXPECT_EQ(1, intStats->getSum());
intStats->update(-50, 2);
intStats->update(50, 3);
EXPECT_EQ(101, intStats->getMaximum());
EXPECT_EQ(-100, intStats->getMinimum());
EXPECT_EQ(51, intStats->getSum());
// test merge
std::unique_ptr<IntegerColumnStatisticsImpl> other(
new IntegerColumnStatisticsImpl());
other->setHasNull(true);
other->increase(100);
other->setMaximum(9999);
other->setMinimum(-9999);
other->setSum(100000);
EXPECT_EQ(100, other->getNumberOfValues());
EXPECT_TRUE(other->hasNull());
EXPECT_EQ(9999, other->getMaximum());
EXPECT_EQ(-9999, other->getMinimum());
EXPECT_TRUE(other->hasSum());
EXPECT_EQ(100000, other->getSum());
intStats->merge(*other);
EXPECT_EQ(10000000000000100l, intStats->getNumberOfValues());
EXPECT_TRUE(intStats->hasNull());
EXPECT_EQ(100051, intStats->getSum());
EXPECT_EQ(9999, intStats->getMaximum());
EXPECT_EQ(-9999, intStats->getMinimum());
// test overflow positively
other->update(std::numeric_limits<int64_t>::max(), 1);
EXPECT_FALSE(other->hasSum());
intStats->merge(*other);
EXPECT_FALSE(intStats->hasSum());
// test overflow negatively
intStats->setSum(-1000);
other->setSum(std::numeric_limits<int64_t>::min() + 500);
EXPECT_EQ(-1000, intStats->getSum());
EXPECT_EQ(std::numeric_limits<int64_t>::min() + 500, other->getSum());
intStats->merge(*other);
EXPECT_FALSE(intStats->hasSum());
}
TEST(ColumnStatistics, doubleColumnStatistics) {
std::unique_ptr<DoubleColumnStatisticsImpl> dblStats(
new DoubleColumnStatisticsImpl());
// initial state
EXPECT_EQ(0, dblStats->getNumberOfValues());
EXPECT_FALSE(dblStats->hasNull());
EXPECT_FALSE(dblStats->hasMinimum());
EXPECT_FALSE(dblStats->hasMaximum());
EXPECT_TRUE(dblStats->hasSum());
EXPECT_TRUE(std::abs(0.0 - dblStats->getSum()) < 0.00001);
// normal operations
dblStats->increase(1);
EXPECT_EQ(1, dblStats->getNumberOfValues());
dblStats->increase(0);
EXPECT_EQ(1, dblStats->getNumberOfValues());
dblStats->increase(100);
EXPECT_EQ(101, dblStats->getNumberOfValues());
dblStats->increase(899);
EXPECT_EQ(1000, dblStats->getNumberOfValues());
dblStats->update(5.5);
EXPECT_TRUE(dblStats->hasMinimum());
EXPECT_TRUE(dblStats->hasMaximum());
EXPECT_TRUE(std::abs(5.5 - dblStats->getMaximum()) < 0.00001);
EXPECT_TRUE(std::abs(5.5 - dblStats->getMinimum()) < 0.00001);
EXPECT_TRUE(std::abs(5.5 - dblStats->getSum()) < 0.00001);
dblStats->update(13.25);
dblStats->update(0.11117);
dblStats->update(1000232.535);
dblStats->update(-324.43);
dblStats->update(-95454.5343);
dblStats->update(63433.54543);
EXPECT_TRUE(std::abs(967905.9773 - dblStats->getSum()) < 0.00001);
EXPECT_TRUE(std::abs(1000232.535 - dblStats->getMaximum()) < 0.00001);
EXPECT_TRUE(std::abs(-95454.5343 - dblStats->getMinimum()) < 0.00001);
// test merge
std::unique_ptr<DoubleColumnStatisticsImpl> other(
new DoubleColumnStatisticsImpl());
other->setHasNull(true);
other->increase(987);
other->setMaximum(1000232.5355);
other->setMinimum(-9999.0);
other->setSum(3435.343);
EXPECT_EQ(987, other->getNumberOfValues());
EXPECT_TRUE(other->hasNull());
EXPECT_TRUE(std::abs(1000232.5355 - other->getMaximum()) < 0.00001);
EXPECT_TRUE(std::abs(-9999.0 - other->getMinimum()) < 0.00001);
EXPECT_TRUE(std::abs(3435.343 - other->getSum()) < 0.00001);
dblStats->merge(*other);
EXPECT_EQ(1987, dblStats->getNumberOfValues());
EXPECT_TRUE(dblStats->hasNull());
EXPECT_TRUE(std::abs(1000232.5355 - dblStats->getMaximum()) < 0.00001);
EXPECT_TRUE(std::abs(-95454.5343 - dblStats->getMinimum()) < 0.00001);
EXPECT_TRUE(std::abs(971341.3203 - dblStats->getSum()) < 0.00001);
}
TEST(ColumnStatistics, stringColumnStatistics) {
std::unique_ptr<StringColumnStatisticsImpl> strStats(
new StringColumnStatisticsImpl());
EXPECT_FALSE(strStats->hasMinimum());
EXPECT_FALSE(strStats->hasMaximum());
EXPECT_EQ(0, strStats->getNumberOfValues());
EXPECT_TRUE(strStats->hasTotalLength());
EXPECT_EQ(0, strStats->getTotalLength());
strStats->update("abc", 3);
EXPECT_TRUE(strStats->hasMinimum());
EXPECT_TRUE(strStats->hasMaximum());
EXPECT_TRUE(strStats->hasTotalLength());
EXPECT_EQ(3, strStats->getTotalLength());
EXPECT_EQ("abc", strStats->getMaximum());
EXPECT_EQ("abc", strStats->getMinimum());
strStats->update("ab", 2);
EXPECT_EQ(5, strStats->getTotalLength());
EXPECT_EQ("abc", strStats->getMaximum());
EXPECT_EQ("ab", strStats->getMinimum());
strStats->update(nullptr, 0);
EXPECT_EQ(5, strStats->getTotalLength());
EXPECT_EQ("abc", strStats->getMaximum());
EXPECT_EQ("ab", strStats->getMinimum());
strStats->update("abcd", 4);
EXPECT_EQ(9, strStats->getTotalLength());
EXPECT_EQ("abcd", strStats->getMaximum());
EXPECT_EQ("ab", strStats->getMinimum());
strStats->update("xyz", 0);
EXPECT_EQ(9, strStats->getTotalLength());
EXPECT_EQ("abcd", strStats->getMaximum());
EXPECT_EQ("", strStats->getMinimum());
}
TEST(ColumnStatistics, boolColumnStatistics) {
std::unique_ptr<BooleanColumnStatisticsImpl> boolStats(
new BooleanColumnStatisticsImpl());
// initial state
EXPECT_EQ(0, boolStats->getNumberOfValues());
EXPECT_FALSE(boolStats->hasNull());
EXPECT_EQ(0, boolStats->getTrueCount());
EXPECT_EQ(0, boolStats->getFalseCount());
// normal operations
boolStats->increase(5);
boolStats->update(true, 3);
boolStats->update(false, 2);
EXPECT_EQ(5, boolStats->getNumberOfValues());
EXPECT_EQ(2, boolStats->getFalseCount());
EXPECT_EQ(3, boolStats->getTrueCount());
// test merge
std::unique_ptr<BooleanColumnStatisticsImpl> other(
new BooleanColumnStatisticsImpl());
other->setHasNull(true);
other->increase(100);
other->update(true, 50);
other->update(false, 50);
boolStats->merge(*other);
EXPECT_EQ(105, boolStats->getNumberOfValues());
EXPECT_TRUE(boolStats->hasNull());
EXPECT_EQ(53, boolStats->getTrueCount());
EXPECT_EQ(52, boolStats->getFalseCount());
}
TEST(ColumnStatistics, timestampColumnStatistics) {
std::unique_ptr<TimestampColumnStatisticsImpl> tsStats(
new TimestampColumnStatisticsImpl());
EXPECT_FALSE(tsStats->hasMaximum() || tsStats->hasMaximum());
// normal operations
tsStats->update(100);
EXPECT_EQ(100, tsStats->getMaximum());
EXPECT_EQ(100, tsStats->getMinimum());
tsStats->update(150);
EXPECT_EQ(150, tsStats->getMaximum());
EXPECT_EQ(100, tsStats->getMinimum());
// test merge
std::unique_ptr<TimestampColumnStatisticsImpl> other(
new TimestampColumnStatisticsImpl());
other->setMaximum(160);
other->setMinimum(90);
tsStats->merge(*other);
EXPECT_EQ(160, other->getMaximum());
EXPECT_EQ(90, other->getMinimum());
}
TEST(ColumnStatistics, dateColumnStatistics) {
std::unique_ptr<DateColumnStatisticsImpl> tsStats(
new DateColumnStatisticsImpl());
EXPECT_FALSE(tsStats->hasMaximum() || tsStats->hasMaximum());
// normal operations
tsStats->update(100);
EXPECT_EQ(100, tsStats->getMaximum());
EXPECT_EQ(100, tsStats->getMinimum());
tsStats->update(150);
EXPECT_EQ(150, tsStats->getMaximum());
EXPECT_EQ(100, tsStats->getMinimum());
// test merge
std::unique_ptr<DateColumnStatisticsImpl> other(
new DateColumnStatisticsImpl());
other->setMaximum(160);
other->setMinimum(90);
tsStats->merge(*other);
EXPECT_EQ(160, other->getMaximum());
EXPECT_EQ(90, other->getMinimum());
}
TEST(ColumnStatistics, otherColumnStatistics) {
std::unique_ptr<ColumnStatisticsImpl> stats(new ColumnStatisticsImpl());
EXPECT_EQ(0, stats->getNumberOfValues());
EXPECT_FALSE(stats->hasNull());
stats->increase(5);
EXPECT_EQ(5, stats->getNumberOfValues());
stats->increase(10);
EXPECT_EQ(15, stats->getNumberOfValues());
stats->setHasNull(true);
EXPECT_TRUE(stats->hasNull());
}
TEST(ColumnStatistics, decimalColumnStatistics) {
std::unique_ptr<DecimalColumnStatisticsImpl> decStats(
new DecimalColumnStatisticsImpl());
// initial state
EXPECT_EQ(0, decStats->getNumberOfValues());
EXPECT_FALSE(decStats->hasNull());
EXPECT_FALSE(decStats->hasMinimum());
EXPECT_FALSE(decStats->hasMaximum());
EXPECT_TRUE(decStats->hasSum());
EXPECT_EQ(Int128(0), decStats->getSum().value);
EXPECT_EQ(0, decStats->getSum().scale);
// normal operations
decStats->update(Decimal(100, 1));
EXPECT_TRUE(decStats->hasMinimum());
EXPECT_TRUE(decStats->hasMaximum());
EXPECT_TRUE(decStats->hasSum());
EXPECT_EQ(Int128(100), decStats->getMaximum().value);
EXPECT_EQ(1, decStats->getMaximum().scale);
EXPECT_EQ(Int128(100), decStats->getMinimum().value);
EXPECT_EQ(1, decStats->getMinimum().scale);
EXPECT_EQ(Int128(100), decStats->getSum().value);
EXPECT_EQ(1, decStats->getSum().scale);
// update with same scale
decStats->update(Decimal(90, 1));
decStats->update(Decimal(110, 1));
EXPECT_EQ(Int128(110), decStats->getMaximum().value);
EXPECT_EQ(1, decStats->getMaximum().scale);
EXPECT_EQ(Int128(90), decStats->getMinimum().value);
EXPECT_EQ(1, decStats->getMinimum().scale);
EXPECT_EQ(Int128(300), decStats->getSum().value);
EXPECT_EQ(1, decStats->getSum().scale);
// update with different scales
decStats->update(Decimal(100, 2));
decStats->update(Decimal(Int128(555), 3));
decStats->update(Decimal(200, 2));
EXPECT_EQ(Int128(110), decStats->getMaximum().value);
EXPECT_EQ(1, decStats->getMaximum().scale);
EXPECT_EQ(Int128(555), decStats->getMinimum().value);
EXPECT_EQ(3, decStats->getMinimum().scale);
EXPECT_EQ(Int128(33555), decStats->getSum().value);
EXPECT_EQ(3, decStats->getSum().scale);
// update with large values and scales
decStats->update(Decimal(Int128(1000000000000l), 10));
EXPECT_EQ(Int128(1335550000000l), decStats->getSum().value);
EXPECT_EQ(10, decStats->getSum().scale);
decStats->update(Decimal(Int128("100000000000000000000000"), 22));
EXPECT_EQ(Int128("1435550000000000000000000"), decStats->getSum().value);
EXPECT_EQ(22, decStats->getSum().scale);
// update negative decimals
decStats->update(Decimal(-1000, 2));
EXPECT_EQ(Int128(-1000), decStats->getMinimum().value);
EXPECT_EQ(2, decStats->getMinimum().scale);
EXPECT_EQ(Int128("1335550000000000000000000"), decStats->getSum().value);
EXPECT_EQ(22, decStats->getSum().scale);
// test sum overflow
decStats->update(Decimal(Int128("123456789012345678901234567890"), 10));
EXPECT_FALSE(decStats->hasSum());
}
}