blob: 642a8019ded0faaeff136624b1d748fc76c2f3eb [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Statistics.hh"
#include "TestUtil.hh"
#include "orc/OrcFile.hh"
#include "wrap/gmock.h"
#include "wrap/gtest-wrapper.h"
#include <cmath>
namespace orc {
TEST(ColumnStatistics, intColumnStatistics) {
auto intStats = std::make_unique<IntegerColumnStatisticsImpl>();
// initial state
EXPECT_EQ(0, intStats->getNumberOfValues());
EXPECT_FALSE(intStats->hasNull());
EXPECT_FALSE(intStats->hasMinimum());
EXPECT_FALSE(intStats->hasMaximum());
EXPECT_TRUE(intStats->hasSum());
EXPECT_EQ(0, intStats->getSum());
// normal operations
intStats->increase(1);
EXPECT_EQ(1, intStats->getNumberOfValues());
intStats->increase(0);
EXPECT_EQ(1, intStats->getNumberOfValues());
intStats->increase(100);
EXPECT_EQ(101, intStats->getNumberOfValues());
intStats->increase(9999999999999899l);
EXPECT_EQ(10000000000000000l, intStats->getNumberOfValues());
intStats->update(0, 1);
EXPECT_TRUE(intStats->hasMinimum());
EXPECT_TRUE(intStats->hasMaximum());
EXPECT_EQ(0, intStats->getMaximum());
EXPECT_EQ(0, intStats->getMinimum());
EXPECT_EQ(0, intStats->getSum());
intStats->update(-100, 1);
intStats->update(101, 1);
EXPECT_EQ(101, intStats->getMaximum());
EXPECT_EQ(-100, intStats->getMinimum());
EXPECT_EQ(1, intStats->getSum());
intStats->update(-50, 2);
intStats->update(50, 3);
EXPECT_EQ(101, intStats->getMaximum());
EXPECT_EQ(-100, intStats->getMinimum());
EXPECT_EQ(51, intStats->getSum());
// test merge
auto other = std::make_unique<IntegerColumnStatisticsImpl>();
other->setHasNull(true);
other->increase(100);
other->setMaximum(9999);
other->setMinimum(-9999);
other->setSum(100000);
EXPECT_EQ(100, other->getNumberOfValues());
EXPECT_TRUE(other->hasNull());
EXPECT_EQ(9999, other->getMaximum());
EXPECT_EQ(-9999, other->getMinimum());
EXPECT_TRUE(other->hasSum());
EXPECT_EQ(100000, other->getSum());
intStats->merge(*other);
EXPECT_EQ(10000000000000100l, intStats->getNumberOfValues());
EXPECT_TRUE(intStats->hasNull());
EXPECT_EQ(100051, intStats->getSum());
EXPECT_EQ(9999, intStats->getMaximum());
EXPECT_EQ(-9999, intStats->getMinimum());
// test overflow positively
other->update(std::numeric_limits<int64_t>::max(), 1);
EXPECT_FALSE(other->hasSum());
intStats->merge(*other);
EXPECT_FALSE(intStats->hasSum());
// test overflow negatively
intStats->setSum(-1000);
other->setSum(std::numeric_limits<int64_t>::min() + 500);
EXPECT_EQ(-1000, intStats->getSum());
EXPECT_EQ(std::numeric_limits<int64_t>::min() + 500, other->getSum());
intStats->merge(*other);
EXPECT_FALSE(intStats->hasSum());
auto intStats2 = std::make_unique<IntegerColumnStatisticsImpl>();
intStats2->update(1, 1);
EXPECT_TRUE(intStats2->hasSum());
intStats2->update(std::numeric_limits<int64_t>::max(), 3);
EXPECT_FALSE(intStats2->hasSum());
}
TEST(ColumnStatistics, doubleColumnStatistics) {
auto dblStats = std::make_unique<DoubleColumnStatisticsImpl>();
// initial state
EXPECT_EQ(0, dblStats->getNumberOfValues());
EXPECT_FALSE(dblStats->hasNull());
EXPECT_FALSE(dblStats->hasMinimum());
EXPECT_FALSE(dblStats->hasMaximum());
EXPECT_TRUE(dblStats->hasSum());
EXPECT_TRUE(std::abs(0.0 - dblStats->getSum()) < 0.00001);
// normal operations
dblStats->increase(1);
EXPECT_EQ(1, dblStats->getNumberOfValues());
dblStats->increase(0);
EXPECT_EQ(1, dblStats->getNumberOfValues());
dblStats->increase(100);
EXPECT_EQ(101, dblStats->getNumberOfValues());
dblStats->increase(899);
EXPECT_EQ(1000, dblStats->getNumberOfValues());
dblStats->update(5.5);
EXPECT_TRUE(dblStats->hasMinimum());
EXPECT_TRUE(dblStats->hasMaximum());
EXPECT_TRUE(std::abs(5.5 - dblStats->getMaximum()) < 0.00001);
EXPECT_TRUE(std::abs(5.5 - dblStats->getMinimum()) < 0.00001);
EXPECT_TRUE(std::abs(5.5 - dblStats->getSum()) < 0.00001);
dblStats->update(13.25);
dblStats->update(0.11117);
dblStats->update(1000232.535);
dblStats->update(-324.43);
dblStats->update(-95454.5343);
dblStats->update(63433.54543);
EXPECT_TRUE(std::abs(967905.9773 - dblStats->getSum()) < 0.00001);
EXPECT_TRUE(std::abs(1000232.535 - dblStats->getMaximum()) < 0.00001);
EXPECT_TRUE(std::abs(-95454.5343 - dblStats->getMinimum()) < 0.00001);
// test merge
auto other = std::make_unique<DoubleColumnStatisticsImpl>();
other->setHasNull(true);
other->increase(987);
other->setMaximum(1000232.5355);
other->setMinimum(-9999.0);
other->setSum(3435.343);
EXPECT_EQ(987, other->getNumberOfValues());
EXPECT_TRUE(other->hasNull());
EXPECT_TRUE(std::abs(1000232.5355 - other->getMaximum()) < 0.00001);
EXPECT_TRUE(std::abs(-9999.0 - other->getMinimum()) < 0.00001);
EXPECT_TRUE(std::abs(3435.343 - other->getSum()) < 0.00001);
dblStats->merge(*other);
EXPECT_EQ(1987, dblStats->getNumberOfValues());
EXPECT_TRUE(dblStats->hasNull());
EXPECT_TRUE(std::abs(1000232.5355 - dblStats->getMaximum()) < 0.00001);
EXPECT_TRUE(std::abs(-95454.5343 - dblStats->getMinimum()) < 0.00001);
EXPECT_TRUE(std::abs(971341.3203 - dblStats->getSum()) < 0.00001);
}
TEST(ColumnStatistics, stringColumnStatistics) {
auto strStats = std::make_unique<StringColumnStatisticsImpl>();
EXPECT_FALSE(strStats->hasMinimum());
EXPECT_FALSE(strStats->hasMaximum());
EXPECT_EQ(0, strStats->getNumberOfValues());
EXPECT_TRUE(strStats->hasTotalLength());
EXPECT_EQ(0, strStats->getTotalLength());
strStats->update("abc", 3);
EXPECT_TRUE(strStats->hasMinimum());
EXPECT_TRUE(strStats->hasMaximum());
EXPECT_TRUE(strStats->hasTotalLength());
EXPECT_EQ(3, strStats->getTotalLength());
EXPECT_EQ("abc", strStats->getMaximum());
EXPECT_EQ("abc", strStats->getMinimum());
strStats->update("ab", 2);
EXPECT_EQ(5, strStats->getTotalLength());
EXPECT_EQ("abc", strStats->getMaximum());
EXPECT_EQ("ab", strStats->getMinimum());
strStats->update(nullptr, 0);
EXPECT_EQ(5, strStats->getTotalLength());
EXPECT_EQ("abc", strStats->getMaximum());
EXPECT_EQ("ab", strStats->getMinimum());
strStats->update("abcd", 4);
EXPECT_EQ(9, strStats->getTotalLength());
EXPECT_EQ("abcd", strStats->getMaximum());
EXPECT_EQ("ab", strStats->getMinimum());
strStats->update("xyz", 0);
EXPECT_EQ(9, strStats->getTotalLength());
EXPECT_EQ("abcd", strStats->getMaximum());
EXPECT_EQ("", strStats->getMinimum());
}
TEST(ColumnStatistics, boolColumnStatistics) {
auto boolStats = std::make_unique<BooleanColumnStatisticsImpl>();
// initial state
EXPECT_EQ(0, boolStats->getNumberOfValues());
EXPECT_FALSE(boolStats->hasNull());
EXPECT_EQ(0, boolStats->getTrueCount());
EXPECT_EQ(0, boolStats->getFalseCount());
// normal operations
boolStats->increase(5);
boolStats->update(true, 3);
boolStats->update(false, 2);
EXPECT_EQ(5, boolStats->getNumberOfValues());
EXPECT_EQ(2, boolStats->getFalseCount());
EXPECT_EQ(3, boolStats->getTrueCount());
// test merge
auto other = std::make_unique<BooleanColumnStatisticsImpl>();
other->setHasNull(true);
other->increase(100);
other->update(true, 50);
other->update(false, 50);
boolStats->merge(*other);
EXPECT_EQ(105, boolStats->getNumberOfValues());
EXPECT_TRUE(boolStats->hasNull());
EXPECT_EQ(53, boolStats->getTrueCount());
EXPECT_EQ(52, boolStats->getFalseCount());
}
TEST(ColumnStatistics, timestampColumnStatistics) {
auto tsStats = std::make_unique<TimestampColumnStatisticsImpl>();
EXPECT_FALSE(tsStats->hasMaximum() || tsStats->hasMaximum());
// normal operations
tsStats->update(100);
EXPECT_EQ(100, tsStats->getMaximum());
EXPECT_EQ(100, tsStats->getMinimum());
EXPECT_EQ(0, tsStats->getMinimumNanos());
EXPECT_EQ(999999, tsStats->getMaximumNanos());
tsStats->update(150);
EXPECT_EQ(150, tsStats->getMaximum());
EXPECT_EQ(100, tsStats->getMinimum());
EXPECT_EQ(0, tsStats->getMinimumNanos());
EXPECT_EQ(999999, tsStats->getMaximumNanos());
// test merge
auto other = std::make_unique<TimestampColumnStatisticsImpl>();
other->setMaximum(160);
other->setMinimum(90);
tsStats->merge(*other);
EXPECT_EQ(160, tsStats->getMaximum());
EXPECT_EQ(90, tsStats->getMinimum());
EXPECT_EQ(0, tsStats->getMinimumNanos());
EXPECT_EQ(999999, tsStats->getMaximumNanos());
}
TEST(ColumnStatistics, dateColumnStatistics) {
auto tsStats = std::make_unique<DateColumnStatisticsImpl>();
EXPECT_FALSE(tsStats->hasMaximum() || tsStats->hasMaximum());
// normal operations
tsStats->update(100);
EXPECT_EQ(100, tsStats->getMaximum());
EXPECT_EQ(100, tsStats->getMinimum());
tsStats->update(150);
EXPECT_EQ(150, tsStats->getMaximum());
EXPECT_EQ(100, tsStats->getMinimum());
// test merge
auto other = std::make_unique<DateColumnStatisticsImpl>();
other->setMaximum(160);
other->setMinimum(90);
tsStats->merge(*other);
EXPECT_EQ(160, other->getMaximum());
EXPECT_EQ(90, other->getMinimum());
}
TEST(ColumnStatistics, otherColumnStatistics) {
auto stats = std::make_unique<ColumnStatisticsImpl>();
EXPECT_EQ(0, stats->getNumberOfValues());
EXPECT_FALSE(stats->hasNull());
stats->increase(5);
EXPECT_EQ(5, stats->getNumberOfValues());
stats->increase(10);
EXPECT_EQ(15, stats->getNumberOfValues());
stats->setHasNull(true);
EXPECT_TRUE(stats->hasNull());
}
TEST(ColumnStatistics, decimalColumnStatistics) {
auto decStats = std::make_unique<DecimalColumnStatisticsImpl>();
// initial state
EXPECT_EQ(0, decStats->getNumberOfValues());
EXPECT_FALSE(decStats->hasNull());
EXPECT_FALSE(decStats->hasMinimum());
EXPECT_FALSE(decStats->hasMaximum());
EXPECT_TRUE(decStats->hasSum());
EXPECT_EQ(Int128(0), decStats->getSum().value);
EXPECT_EQ(0, decStats->getSum().scale);
// normal operations
decStats->update(Decimal(100, 1));
EXPECT_TRUE(decStats->hasMinimum());
EXPECT_TRUE(decStats->hasMaximum());
EXPECT_TRUE(decStats->hasSum());
EXPECT_EQ(Int128(100), decStats->getMaximum().value);
EXPECT_EQ(1, decStats->getMaximum().scale);
EXPECT_EQ(Int128(100), decStats->getMinimum().value);
EXPECT_EQ(1, decStats->getMinimum().scale);
EXPECT_EQ(Int128(100), decStats->getSum().value);
EXPECT_EQ(1, decStats->getSum().scale);
// update with same scale
decStats->update(Decimal(90, 1));
decStats->update(Decimal(110, 1));
EXPECT_EQ(Int128(110), decStats->getMaximum().value);
EXPECT_EQ(1, decStats->getMaximum().scale);
EXPECT_EQ(Int128(90), decStats->getMinimum().value);
EXPECT_EQ(1, decStats->getMinimum().scale);
EXPECT_EQ(Int128(300), decStats->getSum().value);
EXPECT_EQ(1, decStats->getSum().scale);
// update with different scales
decStats->update(Decimal(100, 2));
decStats->update(Decimal(Int128(555), 3));
decStats->update(Decimal(200, 2));
EXPECT_EQ(Int128(110), decStats->getMaximum().value);
EXPECT_EQ(1, decStats->getMaximum().scale);
EXPECT_EQ(Int128(555), decStats->getMinimum().value);
EXPECT_EQ(3, decStats->getMinimum().scale);
EXPECT_EQ(Int128(33555), decStats->getSum().value);
EXPECT_EQ(3, decStats->getSum().scale);
// update with large values and scales
decStats->update(Decimal(Int128(1000000000000l), 10));
EXPECT_EQ(Int128(1335550000000l), decStats->getSum().value);
EXPECT_EQ(10, decStats->getSum().scale);
decStats->update(Decimal(Int128("100000000000000000000000"), 22));
EXPECT_EQ(Int128("1435550000000000000000000"), decStats->getSum().value);
EXPECT_EQ(22, decStats->getSum().scale);
// update negative decimals
decStats->update(Decimal(-1000, 2));
EXPECT_EQ(Int128(-1000), decStats->getMinimum().value);
EXPECT_EQ(2, decStats->getMinimum().scale);
EXPECT_EQ(Int128("1335550000000000000000000"), decStats->getSum().value);
EXPECT_EQ(22, decStats->getSum().scale);
// test sum overflow
decStats->update(Decimal(Int128("123456789012345678901234567890"), 10));
EXPECT_FALSE(decStats->hasSum());
}
TEST(ColumnStatistics, timestampColumnStatisticsWithNanos) {
auto tsStats = std::make_unique<TimestampColumnStatisticsImpl>();
// normal operations
for (int32_t i = 1; i <= 1024; ++i) {
tsStats->update(i * 100, i * 1000);
tsStats->increase(1);
}
EXPECT_EQ(102400, tsStats->getMaximum());
EXPECT_EQ(1024000, tsStats->getMaximumNanos());
EXPECT_EQ(100, tsStats->getMinimum());
EXPECT_EQ(1000, tsStats->getMinimumNanos());
// update with same milli but different nanos
tsStats->update(102400, 1024001);
tsStats->update(102400, 1023999);
tsStats->update(100, 1001);
tsStats->update(100, 999);
EXPECT_EQ(102400, tsStats->getMaximum());
EXPECT_EQ(1024001, tsStats->getMaximumNanos());
EXPECT_EQ(100, tsStats->getMinimum());
EXPECT_EQ(999, tsStats->getMinimumNanos());
// test merge with no change
auto other1 = std::make_unique<TimestampColumnStatisticsImpl>();
for (int32_t i = 1; i <= 1024; ++i) {
other1->update(i * 100, i * 1000);
other1->increase(1);
}
tsStats->merge(*other1);
EXPECT_EQ(102400, tsStats->getMaximum());
EXPECT_EQ(1024001, tsStats->getMaximumNanos());
EXPECT_EQ(100, tsStats->getMinimum());
EXPECT_EQ(999, tsStats->getMinimumNanos());
// test merge with min/max change only in nano
auto other2 = std::make_unique<TimestampColumnStatisticsImpl>();
other2->update(102400, 1024002);
other2->update(100, 998);
tsStats->merge(*other2);
EXPECT_EQ(102400, tsStats->getMaximum());
EXPECT_EQ(1024002, tsStats->getMaximumNanos());
EXPECT_EQ(100, tsStats->getMinimum());
EXPECT_EQ(998, tsStats->getMinimumNanos());
// test merge with min/max change in milli
auto other3 = std::make_unique<TimestampColumnStatisticsImpl>();
other3->update(102401, 1);
other3->update(99, 1);
tsStats->merge(*other3);
EXPECT_EQ(102401, tsStats->getMaximum());
EXPECT_EQ(1, tsStats->getMaximumNanos());
EXPECT_EQ(99, tsStats->getMinimum());
EXPECT_EQ(1, tsStats->getMinimumNanos());
}
TEST(ColumnStatistics, timestampColumnStatisticsProbubuf) {
auto tsStats = std::make_unique<TimestampColumnStatisticsImpl>();
tsStats->increase(2);
tsStats->update(100);
tsStats->update(200);
proto::ColumnStatistics pbStats;
tsStats->toProtoBuf(pbStats);
EXPECT_EQ(100, pbStats.timestamp_statistics().minimum_utc());
EXPECT_EQ(200, pbStats.timestamp_statistics().maximum_utc());
EXPECT_FALSE(pbStats.timestamp_statistics().has_minimum_nanos());
EXPECT_FALSE(pbStats.timestamp_statistics().has_maximum_nanos());
StatContext ctx(true, nullptr);
auto tsStatsFromPb = std::make_unique<TimestampColumnStatisticsImpl>(pbStats, ctx);
EXPECT_EQ(100, tsStatsFromPb->getMinimum());
EXPECT_EQ(200, tsStatsFromPb->getMaximum());
EXPECT_EQ(0, tsStatsFromPb->getMinimumNanos());
EXPECT_EQ(999999, tsStatsFromPb->getMaximumNanos());
tsStats->update(50, 5555);
tsStats->update(500, 9999);
pbStats.Clear();
tsStats->toProtoBuf(pbStats);
EXPECT_EQ(50, pbStats.timestamp_statistics().minimum_utc());
EXPECT_EQ(500, pbStats.timestamp_statistics().maximum_utc());
EXPECT_TRUE(pbStats.timestamp_statistics().has_minimum_nanos());
EXPECT_TRUE(pbStats.timestamp_statistics().has_maximum_nanos());
EXPECT_EQ(5555 + 1, pbStats.timestamp_statistics().minimum_nanos());
EXPECT_EQ(9999 + 1, pbStats.timestamp_statistics().maximum_nanos());
tsStatsFromPb.reset(new TimestampColumnStatisticsImpl(pbStats, ctx));
EXPECT_EQ(50, tsStatsFromPb->getMinimum());
EXPECT_EQ(500, tsStatsFromPb->getMaximum());
EXPECT_EQ(5555, tsStatsFromPb->getMinimumNanos());
EXPECT_EQ(9999, tsStatsFromPb->getMaximumNanos());
}
TEST(ColumnStatistics, collectionColumnStatistics) {
auto collectionStats = std::make_unique<CollectionColumnStatisticsImpl>();
// initial state
EXPECT_EQ(0, collectionStats->getNumberOfValues());
EXPECT_FALSE(collectionStats->hasNull());
EXPECT_FALSE(collectionStats->hasMinimumChildren());
EXPECT_FALSE(collectionStats->hasMaximumChildren());
EXPECT_TRUE(collectionStats->hasTotalChildren());
EXPECT_EQ(0, collectionStats->getTotalChildren());
// normal operations
collectionStats->increase(1);
EXPECT_EQ(1, collectionStats->getNumberOfValues());
collectionStats->increase(0);
EXPECT_EQ(1, collectionStats->getNumberOfValues());
collectionStats->increase(9999999999999999l);
EXPECT_EQ(10000000000000000l, collectionStats->getNumberOfValues());
collectionStats->update(10);
EXPECT_EQ(10, collectionStats->getMaximumChildren());
EXPECT_EQ(10, collectionStats->getMinimumChildren());
collectionStats->update(20);
EXPECT_EQ(20, collectionStats->getMaximumChildren());
EXPECT_EQ(10, collectionStats->getMinimumChildren());
EXPECT_EQ(30, collectionStats->getTotalChildren());
// test merge
auto other = std::make_unique<CollectionColumnStatisticsImpl>();
other->update(40);
other->update(30);
collectionStats->merge(*other);
EXPECT_EQ(40, other->getMaximumChildren());
EXPECT_EQ(30, other->getMinimumChildren());
EXPECT_EQ(40, collectionStats->getMaximumChildren());
EXPECT_EQ(10, collectionStats->getMinimumChildren());
EXPECT_EQ(100, collectionStats->getTotalChildren());
// test overflow
other->update(std::numeric_limits<uint64_t>::max());
EXPECT_FALSE(other->hasTotalChildren());
// test merge overflow
other->setTotalChildren(std::numeric_limits<uint64_t>::max() - 50);
EXPECT_EQ(std::numeric_limits<uint64_t>::max() - 50, other->getTotalChildren());
collectionStats->merge(*other);
EXPECT_FALSE(collectionStats->hasTotalChildren());
}
TEST(ColumnStatistics, TestGeospatialDefaults) {
std::unique_ptr<GeospatialColumnStatisticsImpl> geoStats(new GeospatialColumnStatisticsImpl());
EXPECT_TRUE(geoStats->getGeospatialTypes().empty());
auto bbox = geoStats->getBoundingBox();
for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
EXPECT_TRUE(bbox.boundEmpty(i));
EXPECT_TRUE(bbox.boundValid(i));
}
EXPECT_EQ("<GeoStatistics> x: empty y: empty z: empty m: empty geometry_types: []",
geoStats->toString());
}
TEST(ColumnStatistics, TestGeospatialUpdate) {
std::unique_ptr<GeospatialColumnStatisticsImpl> geoStats(new GeospatialColumnStatisticsImpl());
EXPECT_TRUE(geoStats->getGeospatialTypes().empty());
const auto& bbox = geoStats->getBoundingBox();
for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
EXPECT_TRUE(bbox.boundEmpty(i));
EXPECT_TRUE(bbox.boundValid(i));
}
EXPECT_EQ(geoStats->getGeospatialTypes().size(), 0);
geospatial::BoundingBox::XYZM expectedMin;
geospatial::BoundingBox::XYZM expectedMax;
std::array<bool, geospatial::MAX_DIMENSIONS> expectedEmpty;
std::array<bool, geospatial::MAX_DIMENSIONS> expectedValid;
std::vector<int32_t> expectedTypes;
for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
expectedMin[i] = geospatial::INF;
expectedMax[i] = -geospatial::INF;
expectedEmpty[i] = true;
expectedValid[i] = true;
}
auto Verify = [&]() {
EXPECT_EQ(expectedEmpty, geoStats->getBoundingBox().dimensionEmpty());
EXPECT_EQ(expectedValid, geoStats->getBoundingBox().dimensionValid());
EXPECT_EQ(expectedTypes, geoStats->getGeospatialTypes());
for (int i = 0; i < geospatial::MAX_DIMENSIONS; i++) {
if (geoStats->getBoundingBox().boundValid(i)) {
EXPECT_EQ(expectedMin[i], geoStats->getBoundingBox().lowerBound()[i]);
EXPECT_EQ(expectedMax[i], geoStats->getBoundingBox().upperBound()[i]);
} else {
EXPECT_TRUE(std::isnan(geoStats->getBoundingBox().lowerBound()[i]));
EXPECT_TRUE(std::isnan(geoStats->getBoundingBox().upperBound()[i]));
}
}
};
// Update a xy point
std::string xy0 = MakeWKBPoint({10, 11}, false, false);
geoStats->update(xy0.c_str(), xy0.size());
expectedMin[0] = expectedMax[0] = 10;
expectedMin[1] = expectedMax[1] = 11;
expectedEmpty[0] = expectedEmpty[1] = false;
expectedTypes.push_back(1);
Verify();
// Update a xyz point.
std::string xyz0 = MakeWKBPoint({11, 12, 13}, true, false);
geoStats->update(xyz0.c_str(), xyz0.size());
expectedMax[0] = 11;
expectedMax[1] = 12;
expectedMin[2] = expectedMax[2] = 13;
expectedEmpty[2] = false;
expectedTypes.push_back(1001);
Verify();
// Update a xym point.
std::string xym0 = MakeWKBPoint({9, 10, 0, 11}, false, true);
geoStats->update(xym0.c_str(), xym0.size());
expectedMin[0] = 9;
expectedMin[1] = 10;
expectedMin[3] = expectedMax[3] = 11;
expectedEmpty[3] = false;
expectedTypes.push_back(2001);
Verify();
// Update a xymz point.
std::string xymz0 = MakeWKBPoint({8, 9, 10, 12}, true, true);
geoStats->update(xymz0.c_str(), xymz0.size());
expectedMin[0] = 8;
expectedMin[1] = 9;
expectedMin[2] = 10;
expectedMax[3] = 12;
expectedTypes.push_back(3001);
Verify();
// Update NaN to every dimension.
std::string xyzm1 = MakeWKBPoint(
{std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN(),
std::numeric_limits<double>::quiet_NaN(), std::numeric_limits<double>::quiet_NaN()},
true, false);
geoStats->update(xyzm1.c_str(), xyzm1.size());
Verify();
// Update a invalid WKB
std::string invalidWKB;
geoStats->update(invalidWKB.c_str(), invalidWKB.size());
expectedValid[0] = expectedValid[1] = expectedValid[2] = expectedValid[3] = false;
expectedTypes.clear();
Verify();
// Update a xy point again
std::string xy1 = MakeWKBPoint({10, 11}, false, false);
geoStats->update(xy1.c_str(), xy1.size());
Verify();
}
TEST(ColumnStatistics, TestGeospatialToProto) {
// Test Empty
std::unique_ptr<GeospatialColumnStatisticsImpl> geoStats(new GeospatialColumnStatisticsImpl());
proto::ColumnStatistics pbStats;
geoStats->toProtoBuf(pbStats);
EXPECT_TRUE(pbStats.has_geospatial_statistics());
EXPECT_EQ(0, pbStats.geospatial_statistics().geospatial_types().size());
EXPECT_FALSE(pbStats.geospatial_statistics().has_bbox());
// Update a xy point
std::string xy = MakeWKBPoint({10, 11}, false, false);
geoStats->update(xy.c_str(), xy.size());
pbStats.Clear();
geoStats->toProtoBuf(pbStats);
EXPECT_TRUE(pbStats.has_geospatial_statistics());
EXPECT_EQ(1, pbStats.geospatial_statistics().geospatial_types().size());
EXPECT_EQ(1, pbStats.geospatial_statistics().geospatial_types(0));
EXPECT_TRUE(pbStats.geospatial_statistics().has_bbox());
const auto& bbox0 = pbStats.geospatial_statistics().bbox();
EXPECT_TRUE(bbox0.has_xmin());
EXPECT_TRUE(bbox0.has_xmax());
EXPECT_TRUE(bbox0.has_ymin());
EXPECT_TRUE(bbox0.has_ymax());
EXPECT_FALSE(bbox0.has_zmin());
EXPECT_FALSE(bbox0.has_zmax());
EXPECT_FALSE(bbox0.has_mmin());
EXPECT_FALSE(bbox0.has_mmax());
EXPECT_EQ(10, bbox0.xmin());
EXPECT_EQ(10, bbox0.xmax());
EXPECT_EQ(11, bbox0.ymin());
EXPECT_EQ(11, bbox0.ymax());
// Update a xyzm point.
std::string xyzm = MakeWKBPoint({-10, -11, -12, -13}, true, true);
geoStats->update(xyzm.c_str(), xyzm.size());
pbStats.Clear();
geoStats->toProtoBuf(pbStats);
EXPECT_TRUE(pbStats.has_geospatial_statistics());
EXPECT_EQ(2, pbStats.geospatial_statistics().geospatial_types().size());
EXPECT_EQ(1, pbStats.geospatial_statistics().geospatial_types(0));
EXPECT_EQ(3001, pbStats.geospatial_statistics().geospatial_types(1));
EXPECT_TRUE(pbStats.geospatial_statistics().has_bbox());
const auto& bbox1 = pbStats.geospatial_statistics().bbox();
EXPECT_TRUE(bbox1.has_xmin());
EXPECT_TRUE(bbox1.has_xmax());
EXPECT_TRUE(bbox1.has_ymin());
EXPECT_TRUE(bbox1.has_ymax());
EXPECT_TRUE(bbox1.has_zmin());
EXPECT_TRUE(bbox1.has_zmax());
EXPECT_TRUE(bbox1.has_mmin());
EXPECT_TRUE(bbox1.has_mmax());
EXPECT_EQ(-10, bbox1.xmin());
EXPECT_EQ(10, bbox1.xmax());
EXPECT_EQ(-11, bbox1.ymin());
EXPECT_EQ(11, bbox1.ymax());
EXPECT_EQ(-12, bbox1.zmin());
EXPECT_EQ(-12, bbox1.zmax());
EXPECT_EQ(-13, bbox1.mmin());
EXPECT_EQ(-13, bbox1.mmax());
// Update a invalid point
std::string invalidWKB;
geoStats->update(invalidWKB.c_str(), invalidWKB.size());
pbStats.Clear();
geoStats->toProtoBuf(pbStats);
EXPECT_TRUE(pbStats.has_geospatial_statistics());
EXPECT_EQ(0, pbStats.geospatial_statistics().geospatial_types().size());
EXPECT_FALSE(pbStats.geospatial_statistics().has_bbox());
}
TEST(ColumnStatistics, TestGeospatialMerge) {
std::unique_ptr<GeospatialColumnStatisticsImpl> invalidStats(
new GeospatialColumnStatisticsImpl());
invalidStats->update("0", 0);
std::unique_ptr<GeospatialColumnStatisticsImpl> emptyStats(
new GeospatialColumnStatisticsImpl());
std::unique_ptr<GeospatialColumnStatisticsImpl> xyStats(new GeospatialColumnStatisticsImpl());
std::string xy = MakeWKBPoint({10, 11}, false, false);
xyStats->update(xy.c_str(), xy.size());
std::unique_ptr<GeospatialColumnStatisticsImpl> xyzStats(new GeospatialColumnStatisticsImpl());
std::string xyz = MakeWKBPoint({12, 13, 14}, true, false);
xyzStats->update(xyz.c_str(), xyz.size());
std::unique_ptr<GeospatialColumnStatisticsImpl> xyzmStats(new GeospatialColumnStatisticsImpl());
std::string xyzm = MakeWKBPoint({-10, -11, -12, -13}, true, true);
xyzmStats->update(xyzm.c_str(), xyzm.size());
// invalid merge invalid
invalidStats->merge(*invalidStats);
std::array<bool, 4> expectedValid = {false, false, false, false};
EXPECT_EQ(invalidStats->getBoundingBox().dimensionValid(), expectedValid);
EXPECT_EQ(invalidStats->getGeospatialTypes().size(), 0);
// Empty merge empty
emptyStats->merge(*emptyStats);
expectedValid = {true, true, true, true};
std::array<bool, 4> expectedEmpty = {true, true, true, true};
EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
EXPECT_EQ(emptyStats->getBoundingBox().dimensionEmpty(), expectedEmpty);
EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 0);
// Empty merge xy
emptyStats->merge(*xyStats);
expectedEmpty = {false, false, true, true};
EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
EXPECT_EQ(emptyStats->getBoundingBox().dimensionEmpty(), expectedEmpty);
EXPECT_EQ(10, emptyStats->getBoundingBox().lowerBound()[0]);
EXPECT_EQ(10, emptyStats->getBoundingBox().upperBound()[0]);
EXPECT_EQ(11, emptyStats->getBoundingBox().lowerBound()[1]);
EXPECT_EQ(11, emptyStats->getBoundingBox().upperBound()[1]);
EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 1);
EXPECT_EQ(emptyStats->getGeospatialTypes()[0], 1);
// Empty merge xyz
emptyStats->merge(*xyzStats);
expectedEmpty = {false, false, false, true};
EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
EXPECT_EQ(emptyStats->getBoundingBox().dimensionEmpty(), expectedEmpty);
EXPECT_EQ(10, emptyStats->getBoundingBox().lowerBound()[0]);
EXPECT_EQ(12, emptyStats->getBoundingBox().upperBound()[0]);
EXPECT_EQ(11, emptyStats->getBoundingBox().lowerBound()[1]);
EXPECT_EQ(13, emptyStats->getBoundingBox().upperBound()[1]);
EXPECT_EQ(14, emptyStats->getBoundingBox().lowerBound()[2]);
EXPECT_EQ(14, emptyStats->getBoundingBox().upperBound()[2]);
EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 2);
EXPECT_EQ(emptyStats->getGeospatialTypes()[0], 1);
EXPECT_EQ(emptyStats->getGeospatialTypes()[1], 1001);
// Empty merge xyzm
emptyStats->merge(*xyzmStats);
expectedEmpty = {false, false, false, false};
EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
EXPECT_EQ(emptyStats->getBoundingBox().dimensionEmpty(), expectedEmpty);
EXPECT_EQ(-10, emptyStats->getBoundingBox().lowerBound()[0]);
EXPECT_EQ(12, emptyStats->getBoundingBox().upperBound()[0]);
EXPECT_EQ(-11, emptyStats->getBoundingBox().lowerBound()[1]);
EXPECT_EQ(13, emptyStats->getBoundingBox().upperBound()[1]);
EXPECT_EQ(-12, emptyStats->getBoundingBox().lowerBound()[2]);
EXPECT_EQ(14, emptyStats->getBoundingBox().upperBound()[2]);
EXPECT_EQ(-13, emptyStats->getBoundingBox().lowerBound()[3]);
EXPECT_EQ(-13, emptyStats->getBoundingBox().upperBound()[3]);
EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 3);
EXPECT_EQ(emptyStats->getGeospatialTypes()[0], 1);
EXPECT_EQ(emptyStats->getGeospatialTypes()[1], 1001);
EXPECT_EQ(emptyStats->getGeospatialTypes()[2], 3001);
// Empty merge invalid
emptyStats->merge(*invalidStats);
expectedValid = {false, false, false, false};
EXPECT_EQ(emptyStats->getBoundingBox().dimensionValid(), expectedValid);
EXPECT_EQ(emptyStats->getGeospatialTypes().size(), 0);
}
TEST(ColumnStatistics, TestGeospatialFromProto) {
proto::ColumnStatistics pbStats;
// No geostats
std::unique_ptr<GeospatialColumnStatisticsImpl> emptyStats0(
new GeospatialColumnStatisticsImpl(pbStats));
std::array<bool, 4> expectedValid = {false, false, false, false};
EXPECT_TRUE(emptyStats0->getGeospatialTypes().empty());
EXPECT_EQ(emptyStats0->getBoundingBox().dimensionValid(), expectedValid);
// Add empty geostats
pbStats.mutable_geospatial_statistics();
std::unique_ptr<GeospatialColumnStatisticsImpl> emptyStats1(
new GeospatialColumnStatisticsImpl(pbStats));
EXPECT_TRUE(emptyStats1->getGeospatialTypes().empty());
EXPECT_EQ(emptyStats1->getBoundingBox().dimensionValid(), expectedValid);
// Set xy bounds
auto* geoProtoStas = pbStats.mutable_geospatial_statistics();
geoProtoStas->mutable_bbox()->set_xmin(0);
geoProtoStas->mutable_bbox()->set_xmax(1);
geoProtoStas->mutable_bbox()->set_ymin(0);
geoProtoStas->mutable_bbox()->set_ymax(1);
geoProtoStas->mutable_geospatial_types()->Add(2);
std::unique_ptr<GeospatialColumnStatisticsImpl> xyStats(
new GeospatialColumnStatisticsImpl(pbStats));
expectedValid = {true, true, false, false};
EXPECT_EQ(xyStats->getGeospatialTypes().size(), 1);
EXPECT_EQ(xyStats->getGeospatialTypes()[0], 2);
EXPECT_EQ(xyStats->getBoundingBox().dimensionValid(), expectedValid);
EXPECT_EQ(0, xyStats->getBoundingBox().lowerBound()[0]);
EXPECT_EQ(1, xyStats->getBoundingBox().upperBound()[0]);
EXPECT_EQ(0, xyStats->getBoundingBox().lowerBound()[1]);
EXPECT_EQ(1, xyStats->getBoundingBox().upperBound()[1]);
// Set xyz bounds
geoProtoStas->mutable_bbox()->set_zmin(0);
geoProtoStas->mutable_bbox()->set_zmax(1);
geoProtoStas->mutable_geospatial_types()->Add(1003);
std::unique_ptr<GeospatialColumnStatisticsImpl> xyzStats(
new GeospatialColumnStatisticsImpl(pbStats));
expectedValid = {true, true, true, false};
EXPECT_EQ(xyzStats->getGeospatialTypes().size(), 2);
EXPECT_EQ(xyzStats->getGeospatialTypes()[0], 2);
EXPECT_EQ(xyzStats->getGeospatialTypes()[1], 1003);
EXPECT_EQ(xyzStats->getBoundingBox().dimensionValid(), expectedValid);
EXPECT_EQ(0, xyzStats->getBoundingBox().lowerBound()[0]);
EXPECT_EQ(1, xyzStats->getBoundingBox().upperBound()[0]);
EXPECT_EQ(0, xyzStats->getBoundingBox().lowerBound()[1]);
EXPECT_EQ(1, xyzStats->getBoundingBox().upperBound()[1]);
EXPECT_EQ(0, xyzStats->getBoundingBox().lowerBound()[2]);
EXPECT_EQ(1, xyzStats->getBoundingBox().upperBound()[2]);
// Set xyzm bounds
geoProtoStas->mutable_bbox()->set_mmin(0);
geoProtoStas->mutable_bbox()->set_mmax(1);
geoProtoStas->mutable_geospatial_types()->Add(3003);
std::unique_ptr<GeospatialColumnStatisticsImpl> xyzmStats(
new GeospatialColumnStatisticsImpl(pbStats));
expectedValid = {true, true, true, true};
EXPECT_EQ(xyzmStats->getGeospatialTypes().size(), 3);
EXPECT_EQ(xyzmStats->getGeospatialTypes()[0], 2);
EXPECT_EQ(xyzmStats->getGeospatialTypes()[1], 1003);
EXPECT_EQ(xyzmStats->getGeospatialTypes()[2], 3003);
EXPECT_EQ(xyzmStats->getBoundingBox().dimensionValid(), expectedValid);
EXPECT_EQ(0, xyzmStats->getBoundingBox().lowerBound()[0]);
EXPECT_EQ(1, xyzmStats->getBoundingBox().upperBound()[0]);
EXPECT_EQ(0, xyzmStats->getBoundingBox().lowerBound()[1]);
EXPECT_EQ(1, xyzmStats->getBoundingBox().upperBound()[1]);
EXPECT_EQ(0, xyzmStats->getBoundingBox().lowerBound()[2]);
EXPECT_EQ(1, xyzmStats->getBoundingBox().upperBound()[2]);
EXPECT_EQ(0, xyzmStats->getBoundingBox().lowerBound()[3]);
EXPECT_EQ(1, xyzmStats->getBoundingBox().upperBound()[3]);
}
} // namespace orc