blob: f4efdb40e7afc3d18158ab8039ba73a869cf61ce [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "olap/push_handler.h"
#include <gtest/gtest.h>
#include "exprs/cast_functions.h"
#include "gen_cpp/Descriptors_types.h"
#include "gen_cpp/PlanNodes_types.h"
#include "olap/row.h"
#include "runtime/descriptors.h"
#include "runtime/exec_env.h"
#include "runtime/user_function_cache.h"
namespace doris {
class PushHandlerTest : public testing::Test {
public:
PushHandlerTest() { init(); }
static void SetUpTestCase() {
UserFunctionCache::instance()->init(
"./be/test/runtime/test_data/user_function_cache/normal");
CastFunctions::init();
}
protected:
virtual void SetUp() {}
virtual void TearDown() {}
private:
void init();
Schema create_schema();
int create_src_tuple(TDescriptorTable& t_desc_table, int next_slot_id);
int create_dst_tuple(TDescriptorTable& t_desc_table, int next_slot_id);
void create_expr_info();
TDescriptorTable init_desc_table();
TDescriptorTable _t_desc_table;
TBrokerScanRangeParams _params;
};
Schema PushHandlerTest::create_schema() {
std::vector<TabletColumn> columns;
columns.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_INT, true);
columns.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_SMALLINT, true);
columns.emplace_back(OLAP_FIELD_AGGREGATION_NONE, OLAP_FIELD_TYPE_VARCHAR, true);
columns.emplace_back(OLAP_FIELD_AGGREGATION_SUM, OLAP_FIELD_TYPE_BIGINT, true);
Schema schema(columns, 3);
return schema;
}
#define TUPLE_ID_DST 0
#define TUPLE_ID_SRC 1
#define COLUMN_NUMBERS 4
#define DST_TUPLE_SLOT_ID_START 1
#define SRC_TUPLE_SLOT_ID_START 5
int PushHandlerTest::create_src_tuple(TDescriptorTable& t_desc_table, int next_slot_id) {
const char* columnNames[] = {"k1_int", "k2_smallint", "k3_varchar", "v_bigint"};
for (int i = 0; i < COLUMN_NUMBERS; i++) {
TSlotDescriptor slot_desc;
slot_desc.id = next_slot_id++;
slot_desc.parent = 1;
TTypeDesc type;
{
TTypeNode node;
node.__set_type(TTypeNodeType::SCALAR);
TScalarType scalar_type;
scalar_type.__set_type(TPrimitiveType::VARCHAR);
scalar_type.__set_len(65535);
node.__set_scalar_type(scalar_type);
type.types.push_back(node);
}
slot_desc.slotType = type;
slot_desc.columnPos = i;
slot_desc.byteOffset = i * 16 + 8; // 8 bytes for null
slot_desc.nullIndicatorBit = i % 8;
slot_desc.colName = columnNames[i];
slot_desc.slotIdx = i + 1;
slot_desc.isMaterialized = true;
t_desc_table.slotDescriptors.push_back(slot_desc);
}
{
// TTupleDescriptor source
TTupleDescriptor t_tuple_desc;
t_tuple_desc.id = TUPLE_ID_SRC;
t_tuple_desc.byteSize = COLUMN_NUMBERS * 16 + 8; // 8 bytes for null
t_tuple_desc.numNullBytes = 1;
t_tuple_desc.tableId = 0;
t_tuple_desc.__isset.tableId = true;
t_desc_table.tupleDescriptors.push_back(t_tuple_desc);
}
return next_slot_id;
}
int PushHandlerTest::create_dst_tuple(TDescriptorTable& t_desc_table, int next_slot_id) {
{ //k1_int
TSlotDescriptor slot_desc;
slot_desc.id = next_slot_id++;
slot_desc.parent = 0;
TTypeDesc type;
{
TTypeNode node;
node.__set_type(TTypeNodeType::SCALAR);
TScalarType scalar_type;
scalar_type.__set_type(TPrimitiveType::INT);
node.__set_scalar_type(scalar_type);
type.types.push_back(node);
}
slot_desc.slotType = type;
slot_desc.columnPos = -1;
slot_desc.byteOffset = 4;
slot_desc.nullIndicatorByte = 0;
slot_desc.nullIndicatorBit = 1;
slot_desc.colName = "k1_int";
slot_desc.slotIdx = 1;
slot_desc.isMaterialized = true;
t_desc_table.slotDescriptors.push_back(slot_desc);
}
{ // k2_smallint
TSlotDescriptor slot_desc;
slot_desc.id = next_slot_id++;
slot_desc.parent = 0;
TTypeDesc type;
{
TTypeNode node;
node.__set_type(TTypeNodeType::SCALAR);
TScalarType scalar_type;
scalar_type.__set_type(TPrimitiveType::SMALLINT);
node.__set_scalar_type(scalar_type);
type.types.push_back(node);
}
slot_desc.slotType = type;
slot_desc.columnPos = -1;
slot_desc.byteOffset = 2;
slot_desc.nullIndicatorByte = 0;
slot_desc.nullIndicatorBit = 0;
slot_desc.colName = "k2_smallint";
slot_desc.slotIdx = 0;
slot_desc.isMaterialized = true;
t_desc_table.slotDescriptors.push_back(slot_desc);
}
{ //k3_varchar
TSlotDescriptor slot_desc;
slot_desc.id = next_slot_id++;
slot_desc.parent = 0;
TTypeDesc type;
{
TTypeNode node;
node.__set_type(TTypeNodeType::SCALAR);
TScalarType scalar_type;
scalar_type.__set_type(TPrimitiveType::VARCHAR);
scalar_type.__set_len(65535);
node.__set_scalar_type(scalar_type);
type.types.push_back(node);
}
slot_desc.slotType = type;
slot_desc.columnPos = -1;
slot_desc.byteOffset = 16;
slot_desc.nullIndicatorByte = 0;
slot_desc.nullIndicatorBit = 3;
slot_desc.colName = "k3_varchar";
slot_desc.slotIdx = 3;
slot_desc.isMaterialized = true;
t_desc_table.slotDescriptors.push_back(slot_desc);
}
{ // v_bigint
TSlotDescriptor slot_desc;
slot_desc.id = next_slot_id++;
slot_desc.parent = 0;
TTypeDesc type;
{
TTypeNode node;
node.__set_type(TTypeNodeType::SCALAR);
TScalarType scalar_type;
scalar_type.__set_type(TPrimitiveType::BIGINT);
node.__set_scalar_type(scalar_type);
type.types.push_back(node);
}
slot_desc.slotType = type;
slot_desc.columnPos = -1;
slot_desc.byteOffset = 8;
slot_desc.nullIndicatorByte = 0;
slot_desc.nullIndicatorBit = 2;
slot_desc.colName = "v_bigint";
slot_desc.slotIdx = 2;
slot_desc.isMaterialized = true;
t_desc_table.slotDescriptors.push_back(slot_desc);
}
t_desc_table.__isset.slotDescriptors = true;
{
// TTupleDescriptor dest
TTupleDescriptor t_tuple_desc;
t_tuple_desc.id = TUPLE_ID_DST;
t_tuple_desc.byteSize = 32;
t_tuple_desc.numNullBytes = 1;
t_tuple_desc.tableId = 0;
t_tuple_desc.__isset.tableId = true;
t_desc_table.tupleDescriptors.push_back(t_tuple_desc);
}
return next_slot_id;
}
TDescriptorTable PushHandlerTest::init_desc_table() {
TDescriptorTable t_desc_table;
int next_slot_id = 1;
next_slot_id = create_dst_tuple(t_desc_table, next_slot_id);
next_slot_id = create_src_tuple(t_desc_table, next_slot_id);
return t_desc_table;
}
void PushHandlerTest::create_expr_info() {
TTypeDesc varchar_type;
{
TTypeNode node;
node.__set_type(TTypeNodeType::SCALAR);
TScalarType scalar_type;
scalar_type.__set_type(TPrimitiveType::VARCHAR);
scalar_type.__set_len(5000);
node.__set_scalar_type(scalar_type);
varchar_type.types.push_back(node);
}
// k1_int VARCHAR --> INT
{
TTypeDesc int_type;
{
TTypeNode node;
node.__set_type(TTypeNodeType::SCALAR);
TScalarType scalar_type;
scalar_type.__set_type(TPrimitiveType::INT);
node.__set_scalar_type(scalar_type);
int_type.types.push_back(node);
}
TExprNode cast_expr;
cast_expr.node_type = TExprNodeType::CAST_EXPR;
cast_expr.type = int_type;
cast_expr.__set_opcode(TExprOpcode::CAST);
cast_expr.__set_num_children(1);
cast_expr.__set_output_scale(-1);
cast_expr.__isset.fn = true;
cast_expr.fn.name.function_name = "casttoint";
cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN;
cast_expr.fn.arg_types.push_back(varchar_type);
cast_expr.fn.ret_type = int_type;
cast_expr.fn.has_var_args = false;
cast_expr.fn.__set_signature("casttoint(VARCHAR(*))");
cast_expr.fn.__isset.scalar_fn = true;
cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_int_val";
TExprNode slot_ref;
slot_ref.node_type = TExprNodeType::SLOT_REF;
slot_ref.type = varchar_type;
slot_ref.num_children = 0;
slot_ref.__isset.slot_ref = true;
slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START;
slot_ref.slot_ref.tuple_id = 1;
TExpr expr;
expr.nodes.push_back(cast_expr);
expr.nodes.push_back(slot_ref);
_params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START, expr);
_params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START);
}
// k2_smallint VARCHAR --> SMALLINT
{
TTypeDesc smallint_type;
{
TTypeNode node;
node.__set_type(TTypeNodeType::SCALAR);
TScalarType scalar_type;
scalar_type.__set_type(TPrimitiveType::SMALLINT);
node.__set_scalar_type(scalar_type);
smallint_type.types.push_back(node);
}
TExprNode cast_expr;
cast_expr.node_type = TExprNodeType::CAST_EXPR;
cast_expr.type = smallint_type;
cast_expr.__set_opcode(TExprOpcode::CAST);
cast_expr.__set_num_children(1);
cast_expr.__set_output_scale(-1);
cast_expr.__isset.fn = true;
cast_expr.fn.name.function_name = "casttosmallint";
cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN;
cast_expr.fn.arg_types.push_back(varchar_type);
cast_expr.fn.ret_type = smallint_type;
cast_expr.fn.has_var_args = false;
cast_expr.fn.__set_signature("casttosmallint(VARCHAR(*))");
cast_expr.fn.__isset.scalar_fn = true;
cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_small_int_val";
TExprNode slot_ref;
slot_ref.node_type = TExprNodeType::SLOT_REF;
slot_ref.type = varchar_type;
slot_ref.num_children = 0;
slot_ref.__isset.slot_ref = true;
slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 1;
slot_ref.slot_ref.tuple_id = 1;
TExpr expr;
expr.nodes.push_back(cast_expr);
expr.nodes.push_back(slot_ref);
_params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 1, expr);
_params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 1);
}
// k3_varchar VARCHAR --> VARCHAR
{
TExprNode slot_ref;
slot_ref.node_type = TExprNodeType::SLOT_REF;
slot_ref.type = varchar_type;
slot_ref.num_children = 0;
slot_ref.__isset.slot_ref = true;
slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 2;
slot_ref.slot_ref.tuple_id = 1;
TExpr expr;
expr.nodes.push_back(slot_ref);
_params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 2, expr);
_params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 2);
}
// v_bigint VARCHAR --> BIGINT
{
TTypeDesc bigint_type;
{
TTypeNode node;
node.__set_type(TTypeNodeType::SCALAR);
TScalarType scalar_type;
scalar_type.__set_type(TPrimitiveType::BIGINT);
node.__set_scalar_type(scalar_type);
bigint_type.types.push_back(node);
}
TExprNode cast_expr;
cast_expr.node_type = TExprNodeType::CAST_EXPR;
cast_expr.type = bigint_type;
cast_expr.__set_opcode(TExprOpcode::CAST);
cast_expr.__set_num_children(1);
cast_expr.__set_output_scale(-1);
cast_expr.__isset.fn = true;
cast_expr.fn.name.function_name = "casttobigint";
cast_expr.fn.binary_type = TFunctionBinaryType::BUILTIN;
cast_expr.fn.arg_types.push_back(varchar_type);
cast_expr.fn.ret_type = bigint_type;
cast_expr.fn.has_var_args = false;
cast_expr.fn.__set_signature("casttobigint(VARCHAR(*))");
cast_expr.fn.__isset.scalar_fn = true;
cast_expr.fn.scalar_fn.symbol = "doris::CastFunctions::cast_to_big_int_val";
TExprNode slot_ref;
slot_ref.node_type = TExprNodeType::SLOT_REF;
slot_ref.type = varchar_type;
slot_ref.num_children = 0;
slot_ref.__isset.slot_ref = true;
slot_ref.slot_ref.slot_id = SRC_TUPLE_SLOT_ID_START + 3;
slot_ref.slot_ref.tuple_id = 1;
TExpr expr;
expr.nodes.push_back(cast_expr);
expr.nodes.push_back(slot_ref);
_params.expr_of_dest_slot.emplace(DST_TUPLE_SLOT_ID_START + 3, expr);
_params.src_slot_ids.push_back(SRC_TUPLE_SLOT_ID_START + 3);
}
_params.__set_dest_tuple_id(TUPLE_ID_DST);
_params.__set_src_tuple_id(TUPLE_ID_SRC);
}
void PushHandlerTest::init() {
create_expr_info();
_t_desc_table = init_desc_table();
}
TEST_F(PushHandlerTest, PushBrokerReaderNormal) {
TBrokerScanRange broker_scan_range;
broker_scan_range.params = _params;
TBrokerRangeDesc range;
range.start_offset = 0;
range.size = -1;
range.format_type = TFileFormatType::FORMAT_PARQUET;
range.splittable = false;
range.path = "./be/test/olap/test_data/push_broker_reader.parquet";
range.file_type = TFileType::FILE_LOCAL;
broker_scan_range.ranges.push_back(range);
ExecEnv::GetInstance()->_thread_mgr = new ThreadResourceMgr();
Schema schema = create_schema();
// data
// k1_int k2_smallint varchar bigint
// 0 0 a0 0
// 0 2 a1 3
// 1 4 a2 6
PushBrokerReader reader;
reader.init(&schema, broker_scan_range, _t_desc_table);
uint8_t* tuple_buf = reader.mem_pool()->allocate(schema.schema_size());
ContiguousRow row(&schema, tuple_buf);
// line 1
reader.next(&row);
ASSERT_FALSE(reader.eof());
ASSERT_EQ(0, *(int32_t*)row.cell(0).cell_ptr());
ASSERT_EQ(0, *(int16_t*)row.cell(1).cell_ptr());
ASSERT_EQ("a0", ((Slice*)row.cell(2).cell_ptr())->to_string());
ASSERT_EQ(0, *(int64_t*)row.cell(3).cell_ptr());
// line 2
reader.next(&row);
ASSERT_FALSE(reader.eof());
ASSERT_EQ(0, *(int32_t*)row.cell(0).cell_ptr());
ASSERT_EQ(2, *(int16_t*)row.cell(1).cell_ptr());
ASSERT_EQ("a1", ((Slice*)row.cell(2).cell_ptr())->to_string());
ASSERT_EQ(3, *(int64_t*)row.cell(3).cell_ptr());
// line 3
reader.next(&row);
ASSERT_FALSE(reader.eof());
ASSERT_EQ(1, *(int32_t*)row.cell(0).cell_ptr());
ASSERT_EQ(4, *(int16_t*)row.cell(1).cell_ptr());
ASSERT_EQ("a2", ((Slice*)row.cell(2).cell_ptr())->to_string());
ASSERT_EQ(6, *(int64_t*)row.cell(3).cell_ptr());
// eof
reader.next(&row);
ASSERT_TRUE(reader.eof());
reader.close();
}
} // namespace doris
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
CpuInfo::init();
return RUN_ALL_TESTS();
}