blob: ac9744363dc15c41078c498deae22f7761d2c986 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "orc/OrcFile.hh"
#include "orc/Reader.hh"
#include "Adaptor.hh"
#include "MemoryInputStream.hh"
#include "MemoryOutputStream.hh"
#include "wrap/gmock.h"
#include "wrap/gtest-wrapper.h"
namespace orc {
static const int DEFAULT_MEM_STREAM_SIZE = 1024 * 1024; // 1M
TEST(TestTimestampStatistics, testOldFile) {
std::stringstream ss;
if(const char* example_dir = std::getenv("ORC_EXAMPLE_DIR")) {
ss << example_dir;
} else {
ss << "../../../examples";
}
ss << "/TestOrcFile.testTimestamp.orc";
std::unique_ptr<orc::Reader> reader =
createReader(readLocalFile(ss.str().c_str()), ReaderOptions());
std::unique_ptr<orc::ColumnStatistics> footerStats = reader->getColumnStatistics(0);
const orc::TimestampColumnStatistics* footerColStats =
reinterpret_cast<const orc::TimestampColumnStatistics*>(footerStats.get());
std::unique_ptr<orc::StripeStatistics> stripeStats = reader->getStripeStatistics(0);
const orc::TimestampColumnStatistics* stripeColStats =
reinterpret_cast<const orc::TimestampColumnStatistics*>(stripeStats->getColumnStatistics(0));
EXPECT_FALSE(footerColStats->hasMinimum());
EXPECT_FALSE(footerColStats->hasMaximum());
EXPECT_TRUE(footerColStats->hasLowerBound());
EXPECT_TRUE(footerColStats->hasUpperBound());
EXPECT_EQ("Data type: Timestamp\nValues: 12\nHas null: no\nMinimum is not defined\nLowerBound: 1994-12-31 07:00:00.688\nMaximum is not defined\nUpperBound: 2037-01-02 09:00:00.1\n", footerColStats->toString());
EXPECT_TRUE(stripeColStats->hasMinimum());
EXPECT_TRUE(stripeColStats->hasMaximum());
EXPECT_EQ("Data type: Timestamp\nValues: 12\nHas null: no\nMinimum: 1995-01-01 00:00:00.688\nLowerBound: 1995-01-01 00:00:00.688\nMaximum: 2037-01-01 00:00:00.0\nUpperBound: 2037-01-01 00:00:00.1\n", stripeColStats->toString());
}
TEST(TestTimestampStatistics, testTimezoneUTC) {
MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
MemoryPool *pool = getDefaultPool();
std::unique_ptr<Type> type(Type::buildTypeFromString("struct<col:timestamp>"));
WriterOptions wOptions;
wOptions.setMemoryPool(pool);
std::unique_ptr<Writer> writer = createWriter(*type, &memStream, wOptions);
std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(1024);
StructVectorBatch *root = dynamic_cast<StructVectorBatch *>(batch.get());
TimestampVectorBatch *col = dynamic_cast<orc::TimestampVectorBatch *>(root->fields[0]);
int64_t expectedMinMillis = 1650133963321; // 2022-04-16T18:32:43.321+00:00
int64_t expectedMaxMillis = 1650133964321; // 2022-04-16T18:32:44.321+00:00
col->data[0] = expectedMinMillis / 1000;
col->nanoseconds[0] = expectedMinMillis % 1000 * 1000000;
col->data[1] = expectedMaxMillis / 1000;
col->nanoseconds[1] = expectedMaxMillis % 1000 * 1000000;
col->numElements = 2;
root->numElements = 2;
writer->add(*batch);
writer->close();
std::unique_ptr<InputStream> inStream(new MemoryInputStream(
memStream.getData(), memStream.getLength()));
ReaderOptions rOptions;
rOptions.setMemoryPool(*pool);
std::unique_ptr<Reader> reader = createReader(std::move(inStream), rOptions);
std::unique_ptr<StripeStatistics> stripeStats = reader->getStripeStatistics(0);
const TimestampColumnStatistics* stripeColStats =
reinterpret_cast<const TimestampColumnStatistics*>(stripeStats->getColumnStatistics(1));
EXPECT_TRUE(stripeColStats->hasLowerBound());
EXPECT_TRUE(stripeColStats->hasUpperBound());
EXPECT_TRUE(stripeColStats->hasMinimum());
EXPECT_TRUE(stripeColStats->hasMaximum());
EXPECT_EQ(stripeColStats->getMinimum(), expectedMinMillis);
EXPECT_EQ(stripeColStats->getMaximum(), expectedMaxMillis);
EXPECT_EQ(stripeColStats->getLowerBound(), expectedMinMillis);
EXPECT_EQ(stripeColStats->getUpperBound(), expectedMaxMillis + 1);
}
TEST(TestTimestampStatistics, testTimezoneNonUTC) {
MemoryOutputStream memStream(DEFAULT_MEM_STREAM_SIZE);
MemoryPool *pool = getDefaultPool();
std::unique_ptr<Type> type(Type::buildTypeFromString("struct<col:timestamp>"));
WriterOptions wOptions;
wOptions.setMemoryPool(pool);
wOptions.setTimezoneName("America/Los_Angeles");
std::unique_ptr<Writer> writer = createWriter(*type, &memStream, wOptions);
std::unique_ptr<ColumnVectorBatch> batch = writer->createRowBatch(1024);
StructVectorBatch *root = dynamic_cast<StructVectorBatch *>(batch.get());
TimestampVectorBatch *col = dynamic_cast<orc::TimestampVectorBatch *>(root->fields[0]);
int64_t minMillis = 1650133963321; // 2022-04-16T18:32:43.321+00:00
int64_t maxMillis = 1650133964321; // 2022-04-16T18:32:44.321+00:00
col->data[0] = minMillis / 1000;
col->nanoseconds[0] = minMillis % 1000 * 1000000;
col->data[1] = maxMillis / 1000;
col->nanoseconds[1] = maxMillis % 1000 * 1000000;
col->numElements = 2;
root->numElements = 2;
writer->add(*batch);
writer->close();
std::unique_ptr<InputStream> inStream(new MemoryInputStream(
memStream.getData(), memStream.getLength()));
ReaderOptions rOptions;
rOptions.setMemoryPool(*pool);
std::unique_ptr<Reader> reader = createReader(std::move(inStream), rOptions);
std::unique_ptr<StripeStatistics> stripeStats = reader->getStripeStatistics(0);
const TimestampColumnStatistics* stripeColStats =
reinterpret_cast<const TimestampColumnStatistics*>(stripeStats->getColumnStatistics(1));
int64_t expectedMaxMillis = 1650108764321; // 2022-04-16T11:32:44.321+00:00
int64_t expectedMinMillis = 1650108763321; // 2022-04-16T11:32:43.321+00:00
EXPECT_TRUE(stripeColStats->hasLowerBound());
EXPECT_TRUE(stripeColStats->hasUpperBound());
EXPECT_TRUE(stripeColStats->hasMinimum());
EXPECT_TRUE(stripeColStats->hasMaximum());
EXPECT_EQ(stripeColStats->getMinimum(), expectedMinMillis);
EXPECT_EQ(stripeColStats->getMaximum(), expectedMaxMillis);
EXPECT_EQ(stripeColStats->getLowerBound(), expectedMinMillis);
EXPECT_EQ(stripeColStats->getUpperBound(), expectedMaxMillis + 1);
}
} // namespace