blob: 5574a7aa99d0c2c14971f8c409a02258940ff3b7 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <glog/logging.h>
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
#include <stddef.h>
#include <memory>
#include <ostream>
#include "gtest/gtest_pred_impl.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_vector.h"
#include "vec/common/string_buffer.hpp"
#include "vec/data_types/data_type_date_or_datetime_v2.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
#include "vec/runtime/vdatetime_value.h"
namespace doris {
namespace vectorized {
class IColumn;
} // namespace vectorized
} // namespace doris
namespace doris::vectorized {
void register_aggregate_function_window_funnel(AggregateFunctionSimpleFactory& factory);
class VWindowFunnelTest : public testing::Test {
public:
AggregateFunctionPtr agg_function;
VWindowFunnelTest() {}
void SetUp() {
AggregateFunctionSimpleFactory factory = AggregateFunctionSimpleFactory::instance();
DataTypes data_types = {
std::make_shared<DataTypeInt64>(), std::make_shared<DataTypeString>(),
std::make_shared<DataTypeDateTimeV2>(), std::make_shared<DataTypeUInt8>(),
std::make_shared<DataTypeUInt8>(), std::make_shared<DataTypeUInt8>(),
std::make_shared<DataTypeUInt8>()};
agg_function = factory.get("window_funnel", data_types, nullptr, false,
BeExecVersionManager::get_newest_version());
EXPECT_NE(agg_function, nullptr);
}
void TearDown() {}
Arena arena;
};
TEST_F(VWindowFunnelTest, testEmpty) {
std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
AggregateDataPtr place = memory.get();
agg_function->create(place);
ColumnString buf;
VectorBufferWriter buf_writer(buf);
agg_function->serialize(place, buf_writer);
buf_writer.commit();
LOG(INFO) << "buf size : " << buf.size();
VectorBufferReader buf_reader(buf.get_data_at(0));
agg_function->deserialize(place, buf_reader, arena);
std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]);
AggregateDataPtr place2 = memory2.get();
agg_function->create(place2);
agg_function->merge(place, place2, arena);
ColumnInt32 column_result;
agg_function->insert_result_into(place, column_result);
EXPECT_EQ(column_result.get_data()[0], 0);
ColumnInt32 column_result2;
agg_function->insert_result_into(place2, column_result2);
EXPECT_EQ(column_result2.get_data()[0], 0);
agg_function->destroy(place);
agg_function->destroy(place2);
}
TEST_F(VWindowFunnelTest, testSerialize) {
const int NUM_CONDS = 4;
auto column_mode = ColumnString::create();
for (int i = 0; i < NUM_CONDS; i++) {
column_mode->insert(vectorized::Field::create_field<TYPE_STRING>("mode"));
}
auto column_timestamp = ColumnDateTimeV2::create();
for (int i = 0; i < NUM_CONDS; i++) {
VecDateTimeValue time_value;
time_value.unchecked_set_time(2022, 2, 28, 0, 0, i);
auto dtv2 = time_value.to_datetime_v2();
column_timestamp->insert_data((char*)&dtv2, 0);
}
auto column_event1 = ColumnUInt8::create();
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
auto column_event2 = ColumnUInt8::create();
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
auto column_event3 = ColumnUInt8::create();
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
auto column_event4 = ColumnUInt8::create();
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
auto column_window = ColumnInt64::create();
for (int i = 0; i < NUM_CONDS; i++) {
column_window->insert(vectorized::Field::create_field<TYPE_BIGINT>(2));
}
std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
AggregateDataPtr place = memory.get();
agg_function->create(place);
const IColumn* column[7] = {column_window.get(), column_mode.get(), column_timestamp.get(),
column_event1.get(), column_event2.get(), column_event3.get(),
column_event4.get()};
for (int i = 0; i < NUM_CONDS; i++) {
agg_function->add(place, column, i, arena);
}
ColumnInt32 column_result;
agg_function->insert_result_into(place, column_result);
EXPECT_EQ(column_result.get_data()[0], 3);
ColumnString buf;
VectorBufferWriter buf_writer(buf);
agg_function->serialize(place, buf_writer);
buf_writer.commit();
agg_function->destroy(place);
std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]);
AggregateDataPtr place2 = memory2.get();
agg_function->create(place2);
VectorBufferReader buf_reader(buf.get_data_at(0));
agg_function->deserialize(place2, buf_reader, arena);
ColumnInt32 column_result2;
agg_function->insert_result_into(place2, column_result2);
EXPECT_EQ(column_result2.get_data()[0], 3);
agg_function->destroy(place2);
}
TEST_F(VWindowFunnelTest, testMax4SortedNoMerge) {
const int NUM_CONDS = 4;
auto column_mode = ColumnString::create();
for (int i = 0; i < NUM_CONDS; i++) {
column_mode->insert(vectorized::Field::create_field<TYPE_STRING>("mode"));
}
auto column_timestamp = ColumnDateTimeV2::create();
for (int i = 0; i < NUM_CONDS; i++) {
VecDateTimeValue time_value;
time_value.unchecked_set_time(2022, 2, 28, 0, 0, i);
auto dtv2 = time_value.to_datetime_v2();
column_timestamp->insert_data((char*)&dtv2, 0);
}
auto column_event1 = ColumnUInt8::create();
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
auto column_event2 = ColumnUInt8::create();
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
auto column_event3 = ColumnUInt8::create();
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
auto column_event4 = ColumnUInt8::create();
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
for (int win = 0; win < NUM_CONDS + 1; win++) {
auto column_window = ColumnInt64::create();
for (int i = 0; i < NUM_CONDS; i++) {
column_window->insert(vectorized::Field::create_field<TYPE_BIGINT>(win));
}
std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
AggregateDataPtr place = memory.get();
agg_function->create(place);
const IColumn* column[7] = {column_window.get(), column_mode.get(),
column_timestamp.get(), column_event1.get(),
column_event2.get(), column_event3.get(),
column_event4.get()};
for (int i = 0; i < NUM_CONDS; i++) {
agg_function->add(place, column, i, arena);
}
ColumnInt32 column_result;
agg_function->insert_result_into(place, column_result);
EXPECT_EQ(column_result.get_data()[0],
win < 0 ? 1 : (win < NUM_CONDS ? win + 1 : NUM_CONDS));
agg_function->destroy(place);
}
}
TEST_F(VWindowFunnelTest, testMax4SortedMerge) {
const int NUM_CONDS = 4;
auto column_mode = ColumnString::create();
for (int i = 0; i < NUM_CONDS; i++) {
column_mode->insert(vectorized::Field::create_field<TYPE_STRING>("mode"));
}
auto column_timestamp = ColumnDateTimeV2::create();
for (int i = 0; i < NUM_CONDS; i++) {
VecDateTimeValue time_value;
time_value.unchecked_set_time(2022, 2, 28, 0, 0, i);
auto dtv2 = time_value.to_datetime_v2();
column_timestamp->insert_data((char*)&dtv2, 0);
}
auto column_event1 = ColumnUInt8::create();
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
auto column_event2 = ColumnUInt8::create();
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
auto column_event3 = ColumnUInt8::create();
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
auto column_event4 = ColumnUInt8::create();
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
for (int win = 0; win < NUM_CONDS + 1; win++) {
auto column_window = ColumnInt64::create();
for (int i = 0; i < NUM_CONDS; i++) {
column_window->insert(vectorized::Field::create_field<TYPE_BIGINT>(win));
}
std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
AggregateDataPtr place = memory.get();
agg_function->create(place);
const IColumn* column[7] = {column_window.get(), column_mode.get(),
column_timestamp.get(), column_event1.get(),
column_event2.get(), column_event3.get(),
column_event4.get()};
for (int i = 0; i < NUM_CONDS; i++) {
agg_function->add(place, column, i, arena);
}
std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]);
AggregateDataPtr place2 = memory2.get();
agg_function->create(place2);
agg_function->merge(place2, place, arena);
ColumnInt32 column_result;
agg_function->insert_result_into(place2, column_result);
EXPECT_EQ(column_result.get_data()[0],
win < 0 ? 1 : (win < NUM_CONDS ? win + 1 : NUM_CONDS));
agg_function->destroy(place);
agg_function->destroy(place2);
}
}
TEST_F(VWindowFunnelTest, testMax4ReverseSortedNoMerge) {
const int NUM_CONDS = 4;
auto column_mode = ColumnString::create();
for (int i = 0; i < NUM_CONDS; i++) {
column_mode->insert(vectorized::Field::create_field<TYPE_STRING>("mode"));
}
auto column_timestamp = ColumnDateTimeV2::create();
for (int i = 0; i < NUM_CONDS; i++) {
VecDateTimeValue time_value;
time_value.unchecked_set_time(2022, 2, 28, 0, 0, NUM_CONDS - i);
auto dtv2 = time_value.to_datetime_v2();
column_timestamp->insert_data((char*)&dtv2, 0);
}
auto column_event1 = ColumnUInt8::create();
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
auto column_event2 = ColumnUInt8::create();
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
auto column_event3 = ColumnUInt8::create();
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
auto column_event4 = ColumnUInt8::create();
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
for (int win = 0; win < NUM_CONDS + 1; win++) {
auto column_window = ColumnInt64::create();
for (int i = 0; i < NUM_CONDS; i++) {
column_window->insert(vectorized::Field::create_field<TYPE_BIGINT>(win));
}
std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
AggregateDataPtr place = memory.get();
agg_function->create(place);
const IColumn* column[7] = {column_window.get(), column_mode.get(),
column_timestamp.get(), column_event1.get(),
column_event2.get(), column_event3.get(),
column_event4.get()};
for (int i = 0; i < NUM_CONDS; i++) {
agg_function->add(place, column, i, arena);
}
LOG(INFO) << "win " << win;
ColumnInt32 column_result;
agg_function->insert_result_into(place, column_result);
EXPECT_EQ(column_result.get_data()[0],
win < 0 ? 1 : (win < NUM_CONDS ? win + 1 : NUM_CONDS));
agg_function->destroy(place);
}
}
TEST_F(VWindowFunnelTest, testMax4ReverseSortedMerge) {
const int NUM_CONDS = 4;
auto column_mode = ColumnString::create();
for (int i = 0; i < NUM_CONDS; i++) {
column_mode->insert(vectorized::Field::create_field<TYPE_STRING>("mode"));
}
auto column_timestamp = ColumnDateTimeV2::create();
for (int i = 0; i < NUM_CONDS; i++) {
VecDateTimeValue time_value;
time_value.unchecked_set_time(2022, 2, 28, 0, 0, NUM_CONDS - i);
auto dtv2 = time_value.to_datetime_v2();
column_timestamp->insert_data((char*)&dtv2, 0);
}
auto column_event1 = ColumnUInt8::create();
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event1->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
auto column_event2 = ColumnUInt8::create();
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event2->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
auto column_event3 = ColumnUInt8::create();
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event3->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
auto column_event4 = ColumnUInt8::create();
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(1));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
column_event4->insert(vectorized::Field::create_field<TYPE_BOOLEAN>(0));
for (int win = 0; win < NUM_CONDS + 1; win++) {
auto column_window = ColumnInt64::create();
for (int i = 0; i < NUM_CONDS; i++) {
column_window->insert(vectorized::Field::create_field<TYPE_BIGINT>(win));
}
std::unique_ptr<char[]> memory(new char[agg_function->size_of_data()]);
AggregateDataPtr place = memory.get();
agg_function->create(place);
const IColumn* column[7] = {column_window.get(), column_mode.get(),
column_timestamp.get(), column_event1.get(),
column_event2.get(), column_event3.get(),
column_event4.get()};
for (int i = 0; i < NUM_CONDS; i++) {
agg_function->add(place, column, i, arena);
}
std::unique_ptr<char[]> memory2(new char[agg_function->size_of_data()]);
AggregateDataPtr place2 = memory2.get();
agg_function->create(place2);
agg_function->merge(place2, place, arena);
ColumnInt32 column_result;
agg_function->insert_result_into(place2, column_result);
EXPECT_EQ(column_result.get_data()[0],
win < 0 ? 1 : (win < NUM_CONDS ? win + 1 : NUM_CONDS));
agg_function->destroy(place);
agg_function->destroy(place2);
}
}
} // namespace doris::vectorized