blob: eb2a840f043dc8e43c43625617460db55e6b1533 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <ostream>
#include <string>
#include <vector>
// IWYU pragma: no_include "testing/base/public/gunit.h"
#include <gflags/gflags_declare.h>
#include <glog/logging.h>
#include <glog/stl_logging.h> // IWYU pragma: keep
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include "kudu/codegen/code_generator.h"
#include "kudu/codegen/compilation_manager.h"
#include "kudu/codegen/row_projector.h"
#include "kudu/common/common.pb.h"
#include "kudu/common/row.h"
#include "kudu/common/rowblock.h"
#include "kudu/common/rowblock_memory.h"
#include "kudu/common/schema.h"
#include "kudu/gutil/ref_counted.h"
#include "kudu/gutil/singleton.h"
#include "kudu/util/logging_test_util.h"
#include "kudu/util/memory/arena.h"
#include "kudu/util/random.h"
#include "kudu/util/random_util.h"
#include "kudu/util/slice.h"
#include "kudu/util/status.h"
#include "kudu/util/test_macros.h"
#include "kudu/util/test_util.h"
using std::string;
using std::unique_ptr;
using std::vector;
DECLARE_bool(codegen_dump_mc);
DECLARE_int32(codegen_cache_capacity);
namespace kudu {
typedef RowProjector NoCodegenRP;
typedef codegen::RowProjector CodegenRP;
using codegen::CompilationManager;
class CodegenTest : public KuduTest {
public:
CodegenTest()
: random_(SeedRandom()),
// Set the initial Arena size as small as possible to catch errors during relocation.
projections_mem_(16) {
// Create the base schema.
vector<ColumnSchema> cols = { ColumnSchema("key ", UINT64, false),
ColumnSchema("int32 ", INT32, false),
ColumnSchema("int32-null-val", INT32, true),
ColumnSchema("int32-null ", INT32, true),
ColumnSchema("str32 ", STRING, false),
ColumnSchema("str32-null-val", STRING, true),
ColumnSchema("str32-null ", STRING, true) };
base_.Reset(cols, 1);
base_ = SchemaBuilder(base_).Build(); // add IDs
// Create an extended default schema
cols.emplace_back("int32-R ", INT32, false, false, kI32R, nullptr);
cols.emplace_back("int32-RW", INT32, false, false, kI32R, kI32W);
cols.emplace_back("str32-R ", STRING, false, false, kStrR, nullptr);
cols.emplace_back("str32-RW", STRING, false, false, kStrR, kStrW);
defaults_.Reset(cols, 1);
defaults_ = SchemaBuilder(defaults_).Build(); // add IDs
test_rows_arena_.reset(new Arena(2 * 1024));
RowBuilder rb(&base_);
for (int i = 0; i < kNumTestRows; ++i) {
rb.AddUint64(i);
rb.AddInt32(random_.Next32());
rb.AddInt32(random_.Next32());
rb.AddNull();
AddRandomString(&rb);
AddRandomString(&rb);
rb.AddNull();
void* arena_data = test_rows_arena_->AllocateBytes(
ContiguousRowHelper::row_size(base_));
ContiguousRow dst(&base_, static_cast<uint8_t*>(arena_data));
CHECK_OK(CopyRow(rb.row(), &dst, test_rows_arena_.get()));
test_rows_[i].reset(new ConstContiguousRow(dst));
rb.Reset();
}
}
protected:
Schema base_;
Schema defaults_;
// Compares the projection-for-read and projection-for-write results
// of the codegen projection and the non-codegen projection
template<bool READ>
void TestProjection(const Schema* proj);
// Generates a new row projector for the given projection schema.
Status Generate(const Schema* proj, unique_ptr<CodegenRP>* out);
enum {
// Base schema column indices
kKeyCol,
kI32Col,
kI32NullValCol,
kI32NullCol,
kStrCol,
kStrNullValCol,
kStrNullCol,
// Extended default projection schema column indices
kI32RCol,
kI32RWCol,
kStrRCol,
kStrRWCol
};
Status CreatePartialSchema(const vector<size_t>& col_indexes,
Schema* out);
private:
// Projects the test rows into parameter rowblock using projector and
// member projections_mem_ (should be Reset() manually).
template<bool READ, class RowProjectorType>
void ProjectTestRows(RowProjectorType* rp, RowBlock* rb);
void AddRandomString(RowBuilder* rb);
static const int kRandomStringMaxLength = 32;
static const int kNumTestRows = 10;
static const size_t kIndirectPerRow = 4 * kRandomStringMaxLength;
static const size_t kIndirectPerProjection = kIndirectPerRow * kNumTestRows;
typedef const void* DefaultValueType;
static const DefaultValueType kI32R, kI32W, kStrR, kStrW;
codegen::CodeGenerator generator_;
Random random_;
unique_ptr<ConstContiguousRow> test_rows_[kNumTestRows];
RowBlockMemory projections_mem_;
unique_ptr<Arena> test_rows_arena_;
};
namespace {
const int32_t kI32RValue = 0xFFFF0000;
const int32_t kI32WValue = 0x0000FFFF;
const Slice kStrRValue = "RRRRR STRING DEFAULT READ";
const Slice kStrWValue = "WWWWW STRING DEFAULT WRITE";
// Assumes all rows are selected
// Also assumes schemas are the same.
void CheckRowBlocksEqual(const RowBlock* rb1, const RowBlock* rb2,
const string& name1, const string& name2) {
CHECK_EQ(rb1->nrows(), rb2->nrows());
const Schema* schema = rb1->schema();
for (int i = 0; i < rb1->nrows(); ++i) {
RowBlockRow row1 = rb1->row(i);
RowBlockRow row2 = rb2->row(i);
CHECK_EQ(schema->Compare(row1, row2), 0)
<< "Rows unequal (failed at row " << i << "):\n"
<< "\t(" << name1 << ") = " << schema->DebugRow(row1) << "\n"
<< "\t(" << name2 << ") = " << schema->DebugRow(row2);
}
}
} // anonymous namespace
const CodegenTest::DefaultValueType CodegenTest::kI32R = &kI32RValue;
const CodegenTest::DefaultValueType CodegenTest::kI32W = &kI32WValue;
const CodegenTest::DefaultValueType CodegenTest::kStrR = &kStrRValue;
const CodegenTest::DefaultValueType CodegenTest::kStrW = &kStrWValue;
void CodegenTest::AddRandomString(RowBuilder* rb) {
static char buf[kRandomStringMaxLength];
int size = random_.Uniform(kRandomStringMaxLength);
RandomString(buf, size, &random_);
rb->AddString(Slice(buf, size));
}
template<bool READ, class RowProjectorType>
void CodegenTest::ProjectTestRows(RowProjectorType* rp, RowBlock* rb) {
// Even though we can test two rows at a time, without using up the
// extra memory for keeping an entire row block around, this tests
// what the actual use case will be.
for (int i = 0; i < kNumTestRows; ++i) {
ConstContiguousRow src = *test_rows_[i];
RowBlockRow dst = rb->row(i);
if (READ) {
CHECK_OK(rp->ProjectRowForRead(src, &dst, rb->arena()));
} else {
CHECK_OK(rp->ProjectRowForWrite(src, &dst, rb->arena()));
}
}
}
template<bool READ>
void CodegenTest::TestProjection(const Schema* proj) {
unique_ptr<CodegenRP> with;
ASSERT_OK(Generate(proj, &with));
NoCodegenRP without(&base_, proj);
ASSERT_OK(without.Init());
CHECK_EQ(with->base_schema(), &base_);
CHECK_EQ(with->projection(), proj);
RowBlock rb_with(proj, kNumTestRows, &projections_mem_);
RowBlock rb_without(proj, kNumTestRows, &projections_mem_);
projections_mem_.Reset();
ProjectTestRows<READ>(with.get(), &rb_with);
ProjectTestRows<READ>(&without, &rb_without);
CheckRowBlocksEqual(&rb_with, &rb_without, "Codegen", "Expected");
}
Status CodegenTest::Generate(const Schema* proj, unique_ptr<CodegenRP>* out) {
scoped_refptr<codegen::RowProjectorFunctions> functions;
RETURN_NOT_OK(generator_.CompileRowProjector(base_, *proj, &functions));
out->reset(new CodegenRP(&base_, proj, functions));
return Status::OK();
}
Status CodegenTest::CreatePartialSchema(const vector<size_t>& col_indexes,
Schema* out) {
vector<ColumnId> col_ids;
for (size_t col_idx : col_indexes) {
col_ids.push_back(defaults_.column_id(col_idx));
}
return defaults_.CreateProjectionByIdsIgnoreMissing(col_ids, out);
}
TEST_F(CodegenTest, ObservablesTest) {
// Test when not identity
Schema proj = base_.CreateKeyProjection();
unique_ptr<CodegenRP> with;
CHECK_OK(Generate(&proj, &with));
NoCodegenRP without(&base_, &proj);
ASSERT_OK(without.Init());
ASSERT_EQ(with->base_schema(), without.base_schema());
ASSERT_EQ(with->projection(), without.projection());
ASSERT_EQ(with->is_identity(), without.is_identity());
ASSERT_FALSE(with->is_identity());
// Test when identity
Schema iproj = *&base_;
unique_ptr<CodegenRP> iwith;
CHECK_OK(Generate(&iproj, &iwith));
NoCodegenRP iwithout(&base_, &iproj);
ASSERT_OK(iwithout.Init());
ASSERT_EQ(iwith->base_schema(), iwithout.base_schema());
ASSERT_EQ(iwith->projection(), iwithout.projection());
ASSERT_EQ(iwith->is_identity(), iwithout.is_identity());
ASSERT_TRUE(iwith->is_identity());
}
// Test empty projection
TEST_F(CodegenTest, TestEmpty) {
Schema empty;
TestProjection<true>(&empty);
TestProjection<false>(&empty);
}
// Test key projection
TEST_F(CodegenTest, TestKey) {
Schema key = base_.CreateKeyProjection();
TestProjection<true>(&key);
TestProjection<false>(&key);
}
// Test int projection
TEST_F(CodegenTest, TestInts) {
Schema ints;
vector<size_t> part_cols = { kI32Col, kI32NullValCol, kI32NullCol };
ASSERT_OK(CreatePartialSchema(part_cols, &ints));
TestProjection<true>(&ints);
TestProjection<false>(&ints);
}
// Test string projection
TEST_F(CodegenTest, TestStrings) {
Schema strs;
vector<size_t> part_cols = { kStrCol, kStrNullValCol, kStrNullCol };
ASSERT_OK(CreatePartialSchema(part_cols, &strs));
TestProjection<true>(&strs);
TestProjection<false>(&strs);
}
// Tests the projection of every non-nullable column
TEST_F(CodegenTest, TestNonNullables) {
Schema non_null;
vector<size_t> part_cols = { kKeyCol, kI32Col, kStrCol };
ASSERT_OK(CreatePartialSchema(part_cols, &non_null));
TestProjection<true>(&non_null);
TestProjection<false>(&non_null);
}
// Tests the projection of every nullable column
TEST_F(CodegenTest, TestNullables) {
Schema nullables;
vector<size_t> part_cols = { kI32NullValCol, kI32NullCol, kStrNullValCol, kStrNullCol };
ASSERT_OK(CreatePartialSchema(part_cols, &nullables));
TestProjection<true>(&nullables);
TestProjection<false>(&nullables);
}
// Test full schema projection
TEST_F(CodegenTest, TestFullSchema) {
TestProjection<true>(&base_);
TestProjection<false>(&base_);
}
// Tests just the default projection
TEST_F(CodegenTest, TestDefaultsOnly) {
Schema pure_defaults;
// Default read projections
vector<size_t> part_cols = { kI32RCol, kI32RWCol, kStrRCol, kStrRWCol };
ASSERT_OK(CreatePartialSchema(part_cols, &pure_defaults));
TestProjection<true>(&pure_defaults);
// Default write projections
part_cols = { kI32RWCol, kStrRWCol };
ASSERT_OK(CreatePartialSchema(part_cols, &pure_defaults));
TestProjection<false>(&pure_defaults);
}
// Test full defaults projection
TEST_F(CodegenTest, TestFullSchemaWithDefaults) {
TestProjection<true>(&defaults_);
// Default write projection
Schema full_write;
vector<size_t> part_cols = { kKeyCol,
kI32Col,
kI32NullValCol,
kI32NullCol,
kStrCol,
kStrNullValCol,
kStrNullCol,
kI32RWCol,
kStrRWCol };
ASSERT_OK(CreatePartialSchema(part_cols, &full_write));
TestProjection<false>(&full_write);
}
// Test the codegen_dump_mc flag works properly.
TEST_F(CodegenTest, TestDumpMC) {
FLAGS_codegen_dump_mc = true;
StringVectorSink sink;
ScopedRegisterSink srs(&sink);
Schema ints;
vector<size_t> part_cols = { kI32Col, kI32NullValCol, kI32NullCol, kStrCol };
ASSERT_OK(CreatePartialSchema(part_cols, &ints));
TestProjection<true>(&ints);
const vector<string>& msgs = sink.logged_msgs();
ASSERT_EQ(msgs.size(), 1);
#ifndef __aarch64__
EXPECT_THAT(msgs[0], testing::ContainsRegex("retq"));
#else
EXPECT_THAT(msgs[0], testing::ContainsRegex("ret"));
#endif //__aarch64__
}
// Basic test for the CompilationManager code cache.
// This runs a bunch of compilation tasks and ensures that the cache
// sometimes hits on the second attempt for the same projection.
TEST_F(CodegenTest, TestCodeCache) {
Singleton<CompilationManager>::UnsafeReset();
FLAGS_codegen_cache_capacity = 10;
CompilationManager* cm = CompilationManager::GetSingleton();
for (int pass = 0; pass < 2; pass++) {
int num_hits = 0;
// Generate all permutations of the first four columns (24 permutations).
// For each such permutation, we'll create a projection and request code generation.
vector<size_t> perm = { 0, 1, 2, 3 };
do {
SCOPED_TRACE(perm);
Schema projection;
ASSERT_OK(CreatePartialSchema(perm, &projection));
unique_ptr<CodegenRP> projector;
if (cm->RequestRowProjector(&base_, &projection, &projector)) {
num_hits++;
}
cm->Wait();
} while (std::next_permutation(perm.begin(), perm.end()));
if (pass == 0) {
// On the first pass, the cache should have been empty and gotten 0 hits.
ASSERT_EQ(0, num_hits);
} else {
// Otherwise, we expect to have gotten some hits.
// If our cache were a perfect LRU implementation, then we would actually
// expect 0 hits here as well, since we are accessing the entries in
// exactly the same order as we inserted them, and thus would evict
// an entry before we look for it again. But, our LRU cache is sharded
// so we expect to get some hits on the second time.
ASSERT_GT(num_hits, 0);
ASSERT_LT(num_hits, 24);
}
}
}
} // namespace kudu