blob: 9a8fcf677554ab9e0dca59cbbde8237d22c51c22 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include <fcntl.h>
#include <cstdlib>
#include <cstdint>
#include <iostream>
#include <memory>
#include <string>
#include "parquet/file/reader.h"
#include "parquet/file/reader-internal.h"
#include "parquet/column/reader.h"
#include "parquet/column/scanner.h"
#include "parquet/util/input.h"
#include "parquet/util/mem-allocator.h"
using std::string;
namespace parquet {
const char* data_dir = std::getenv("PARQUET_TEST_DATA");
class TestAllTypesPlain : public ::testing::Test {
public:
void SetUp() {
std::string dir_string(data_dir);
std::stringstream ss;
ss << dir_string << "/"
<< "alltypes_plain.parquet";
reader_ = ParquetFileReader::OpenFile(ss.str());
}
void TearDown() {}
protected:
std::unique_ptr<ParquetFileReader> reader_;
};
TEST_F(TestAllTypesPlain, NoopConstructDestruct) {}
TEST_F(TestAllTypesPlain, TestBatchRead) {
std::shared_ptr<RowGroupReader> group = reader_->RowGroup(0);
// column 0, id
std::shared_ptr<Int32Reader> col =
std::dynamic_pointer_cast<Int32Reader>(group->Column(0));
int16_t def_levels[4];
int16_t rep_levels[4];
int32_t values[4];
// This file only has 8 rows
ASSERT_EQ(8, reader_->num_rows());
// This file only has 1 row group
ASSERT_EQ(1, reader_->num_row_groups());
// This row group must have 8 rows
ASSERT_EQ(8, group->num_rows());
ASSERT_TRUE(col->HasNext());
int64_t values_read;
int levels_read = col->ReadBatch(4, def_levels, rep_levels, values, &values_read);
ASSERT_EQ(4, levels_read);
ASSERT_EQ(4, values_read);
// Now read past the end of the file
ASSERT_TRUE(col->HasNext());
levels_read = col->ReadBatch(5, def_levels, rep_levels, values, &values_read);
ASSERT_EQ(4, levels_read);
ASSERT_EQ(4, values_read);
ASSERT_FALSE(col->HasNext());
}
TEST_F(TestAllTypesPlain, TestFlatScannerInt32) {
std::shared_ptr<RowGroupReader> group = reader_->RowGroup(0);
// column 0, id
std::shared_ptr<Int32Scanner> scanner(new Int32Scanner(group->Column(0)));
int32_t val;
bool is_null;
for (int i = 0; i < 8; ++i) {
ASSERT_TRUE(scanner->HasNext());
ASSERT_TRUE(scanner->NextValue(&val, &is_null));
ASSERT_FALSE(is_null);
}
ASSERT_FALSE(scanner->HasNext());
ASSERT_FALSE(scanner->NextValue(&val, &is_null));
}
TEST_F(TestAllTypesPlain, TestSetScannerBatchSize) {
std::shared_ptr<RowGroupReader> group = reader_->RowGroup(0);
// column 0, id
std::shared_ptr<Int32Scanner> scanner(new Int32Scanner(group->Column(0)));
ASSERT_EQ(128, scanner->batch_size());
scanner->SetBatchSize(1024);
ASSERT_EQ(1024, scanner->batch_size());
}
TEST_F(TestAllTypesPlain, DebugPrintWorks) {
std::stringstream ss;
std::list<int> columns;
reader_->DebugPrint(ss, columns);
std::string result = ss.str();
ASSERT_GT(result.size(), 0);
}
TEST_F(TestAllTypesPlain, ColumnSelection) {
std::stringstream ss;
std::list<int> columns;
columns.push_back(5);
columns.push_back(0);
columns.push_back(10);
reader_->DebugPrint(ss, columns);
std::string result = ss.str();
ASSERT_GT(result.size(), 0);
}
TEST_F(TestAllTypesPlain, ColumnSelectionOutOfRange) {
std::stringstream ss;
std::list<int> columns;
columns.push_back(100);
ASSERT_THROW(reader_->DebugPrint(ss, columns), ParquetException);
columns.clear();
columns.push_back(-1);
ASSERT_THROW(reader_->DebugPrint(ss, columns), ParquetException);
}
class TestLocalFileSource : public ::testing::Test {
public:
void SetUp() {
std::string dir_string(data_dir);
std::stringstream ss;
ss << dir_string << "/"
<< "alltypes_plain.parquet";
file.reset(new LocalFileSource());
file->Open(ss.str());
}
void TearDown() {}
protected:
std::unique_ptr<LocalFileSource> file;
};
TEST_F(TestLocalFileSource, FileClosedOnDestruction) {
int file_desc = file->file_descriptor();
{
auto contents = SerializedFile::Open(std::move(file));
std::unique_ptr<ParquetFileReader> result(new ParquetFileReader());
result->Open(std::move(contents));
}
ASSERT_EQ(-1, fcntl(file_desc, F_GETFD));
ASSERT_EQ(EBADF, errno);
}
} // namespace parquet