blob: df9d3d2ff37b073f5e84eb384cd41ef61f11c766 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <cstdlib>
#include <cstdio>
#include <vector>
#include <boost/scoped_ptr.hpp>
#include "common/init.h"
#include "runtime/descriptors.h"
#include "runtime/mem-pool.h"
#include "runtime/mem-tracker.h"
#include "runtime/tuple-row.h"
#include "service/fe-support.h"
#include "service/frontend.h"
#include "scratch-tuple-batch.h"
#include "testutil/desc-tbl-builder.h"
#include "testutil/gtest-util.h"
#include "common/names.h"
using namespace impala;
namespace impala {
scoped_ptr<Frontend> fe;
class ScratchTupleBatchTest : public testing::Test {
public:
ScratchTupleBatchTest() {}
static void VerifyMicroBatches(const boost::scoped_array<bool>& selected_rows,
ScratchMicroBatch* micro_batches, int num_batches, int gap, int batch_size) {
EXPECT_TRUE(num_batches > 0);
// All elements upto first micro batch should be False.
for (int idx = 0; idx < micro_batches[0].start; idx++) {
EXPECT_FALSE(selected_rows[idx]);
}
// All elements after last micro batch should be False
for (int idx = micro_batches[num_batches - 1].end + 1; idx < batch_size; idx++) {
EXPECT_FALSE(selected_rows[idx]);
}
// Verify every batch
for (int i = 0; i < num_batches; i++) {
const ScratchMicroBatch& batch = micro_batches[i];
EXPECT_TRUE(batch.start <= batch.end);
EXPECT_TRUE(batch.length == batch.end - batch.start + 1);
EXPECT_TRUE(selected_rows[batch.start]);
EXPECT_TRUE(selected_rows[batch.end]);
int last_true_idx = batch.start;
for (int j = batch.start + 1; j < batch.end; j++) {
if (selected_rows[j]) {
EXPECT_LE(j - last_true_idx, gap);
last_true_idx = j;
}
}
}
// Verify any two consecutive batches i and i+1
for (int i = 0; i < num_batches - 1; i++) {
const ScratchMicroBatch& batch = micro_batches[i];
const ScratchMicroBatch& nbatch = micro_batches[i + 1];
EXPECT_TRUE(batch.end < nbatch.start);
EXPECT_TRUE(nbatch.start - batch.end >= gap);
// Any row in betweeen the two batches should not be selected
for (int j = batch.end + 1; j < nbatch.start; j++) {
EXPECT_FALSE(selected_rows[j]);
}
}
}
protected:
MemTracker tracker_;
ObjectPool pool_;
RowDescriptor* desc_;
virtual void SetUp() {
DescriptorTblBuilder builder(fe.get(), &pool_);
builder.DeclareTuple() << TYPE_INT;
DescriptorTbl* desc_tbl = builder.Build();
vector<bool> nullable_tuples(1, false);
vector<TTupleId> tuple_id(1, (TTupleId) 0);
desc_ = pool_.Add(new RowDescriptor(*desc_tbl, tuple_id, nullable_tuples));
}
};
// This tests checks conversion of 'selected_rows' with interleaved
// 'true' values to 'ScratchMicroBatch';
TEST_F(ScratchTupleBatchTest, TestInterleavedMicroBatches) {
const int BATCH_SIZE = 1024;
scoped_ptr<ScratchTupleBatch> scratch_batch(
new ScratchTupleBatch(*desc_, BATCH_SIZE, &tracker_));
scratch_batch->num_tuples = BATCH_SIZE;
// Interleaving gap
vector<int> gaps = {2, 4, 8, 16, 32};
for (auto n : gaps) {
// Set every nth row as selected.
for (int batch_idx = 0; batch_idx < 1024; ++batch_idx) {
scratch_batch->selected_rows[batch_idx] = (batch_idx + 1) % n == 0 ? true : false;
}
ScratchMicroBatch micro_batches[BATCH_SIZE];
int num_batches = scratch_batch->GetMicroBatches(10 /*Skip Length*/, micro_batches);
ScratchTupleBatchTest::VerifyMicroBatches(
scratch_batch->selected_rows, micro_batches, num_batches, 10, BATCH_SIZE);
}
}
// This tests checks conversion of 'selected_rows' with clustered
// 'true' values to 'ScratchMicroBatch';
TEST_F(ScratchTupleBatchTest, TestClusteredMicroBatches) {
const int BATCH_SIZE = 1024;
scoped_ptr<ScratchTupleBatch> scratch_batch(
new ScratchTupleBatch(*desc_, BATCH_SIZE, &tracker_));
scratch_batch->num_tuples = BATCH_SIZE;
// clustered size
vector<int> cluster_sizes = {32, 64, 128, 256};
for (auto n : cluster_sizes) {
int batch_idx = 0;
bool selected = false;
// Set cluster of 'true' and 'false' values
while (batch_idx < 1024) {
int last_row = batch_idx + n;
while (batch_idx < last_row && batch_idx < 1024) {
scratch_batch->selected_rows[batch_idx++] = selected;
}
selected = !selected;
}
ScratchMicroBatch micro_batches[BATCH_SIZE];
EXPECT_EQ(scratch_batch->GetMicroBatches(
10 /*Skip Length*/, micro_batches), 1024/(n * 2));
ScratchTupleBatchTest::VerifyMicroBatches(
scratch_batch->selected_rows, micro_batches, 1024/(n * 2), 10, BATCH_SIZE);
}
}
}
TEST_F(ScratchTupleBatchTest, TestRandomGeneratedMicroBatches) {
const int BATCH_SIZE = 1024;
scoped_ptr<ScratchTupleBatch> scratch_batch(
new ScratchTupleBatch(*desc_, BATCH_SIZE, &tracker_));
scratch_batch->num_tuples = BATCH_SIZE;
// gaps to try
vector<int> gaps = {5, 16, 29, 37, 1025};
vector<float> selected_ratios = {0.5, 0.75, 0.1, 1.0, 0.44};
for (int g = 0; g < gaps.size(); g++) {
int n = gaps[g];
// Set random locations as selected.
srand(time(NULL));
bool atleast_one_true = false;
for (int batch_idx = 0; batch_idx < BATCH_SIZE; ++batch_idx) {
scratch_batch->selected_rows[batch_idx] =
(rand() % BATCH_SIZE) < (BATCH_SIZE * selected_ratios[g]);
if (scratch_batch->selected_rows[batch_idx]) {
atleast_one_true = true;
}
}
// Ensure atleast one value is true when invoking 'GetMicroBatches'
if (!atleast_one_true) {
// Set one of the values randomly as true.
scratch_batch->selected_rows[rand() % BATCH_SIZE] = true;
}
ScratchMicroBatch micro_batches[BATCH_SIZE];
int num_batches = scratch_batch->GetMicroBatches(n, micro_batches);
ScratchTupleBatchTest::VerifyMicroBatches(
scratch_batch->selected_rows, micro_batches, num_batches, n, BATCH_SIZE);
}
}
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
InitFeSupport();
fe.reset(new Frontend());
return RUN_ALL_TESTS();
}