blob: b9948da248d7919d20fbc8cde5f000ca14f54976 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest-message.h>
#include <gtest/gtest-test-part.h>
#include "gtest/gtest_pred_impl.h"
#include "util/runtime_profile.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_const.h"
#include "vec/core/field.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_date.h"
#include "vec/data_types/data_type_date_time.h"
#include "vec/data_types/data_type_decimal.h"
#include "vec/data_types/data_type_map.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
#include "vec/data_types/data_type_struct.h"
namespace doris::vectorized {
DataTypes create_scala_data_types() {
DataTypePtr dt = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>());
DataTypePtr d = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDate>());
DataTypePtr dc = std::make_shared<DataTypeNullable>(vectorized::create_decimal(10, 2, false));
DataTypePtr dcv2 =
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDecimalV2>(27, 9));
DataTypePtr n3 = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt128>());
DataTypePtr n1 = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeInt64>());
DataTypePtr s1 = std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
DataTypes dataTypes;
dataTypes.push_back(dt);
dataTypes.push_back(d);
dataTypes.push_back(dc);
dataTypes.push_back(dcv2);
dataTypes.push_back(n3);
dataTypes.push_back(n1);
dataTypes.push_back(s1);
return dataTypes;
}
TEST(HashFuncTest, ArrayTypeTest) {
DataTypes dataTypes = create_scala_data_types();
std::vector<uint64_t> xx_hash_vals(1);
std::vector<uint32_t> crc_hash_vals(1);
auto* __restrict xx_hashes = xx_hash_vals.data();
auto* __restrict crc_hashes = crc_hash_vals.data();
for (auto d : dataTypes) {
DataTypePtr a = std::make_shared<DataTypeArray>(d);
ColumnPtr col_a = a->create_column_const_with_default_value(1);
// xxHash
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(xx_hashes));
std::cout << xx_hashes[0] << std::endl;
// crcHash
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_crcs_with_value(
crc_hashes, PrimitiveType::TYPE_ARRAY, 1));
std::cout << crc_hashes[0] << std::endl;
}
}
TEST(HashFuncTest, ArraySimpleBenchmarkTest) {
DataTypes dataTypes = create_scala_data_types();
DataTypePtr d = std::make_shared<DataTypeInt64>();
DataTypePtr array_ptr = std::make_shared<DataTypeArray>(d);
MutableColumnPtr array_mutable_col = array_ptr->create_column();
int r_num = 50;
for (int r = 0; r < r_num; ++r) {
Array a;
for (int i = 0; i < 10000; ++i) {
a.push_back(vectorized::Field::create_field<TYPE_BIGINT>(Int64(i)));
}
array_mutable_col->insert(vectorized::Field::create_field<TYPE_ARRAY>(a));
}
std::vector<uint32_t> crc_hash_vals(r_num);
int64_t time_t = 0;
{
SCOPED_RAW_TIMER(&time_t);
EXPECT_NO_FATAL_FAILURE(array_mutable_col->update_crcs_with_value(
crc_hash_vals.data(), PrimitiveType::TYPE_ARRAY, r_num));
}
std::cout << time_t << "ns" << std::endl;
}
TEST(HashFuncTest, ArrayNestedArrayTest) {
DataTypes dataTypes = create_scala_data_types();
DataTypePtr d = std::make_shared<DataTypeInt64>();
MutableColumnPtr scala_mutable_col = d->create_column();
DataTypePtr nested_array_ptr = std::make_shared<DataTypeArray>(d);
DataTypePtr array_ptr = std::make_shared<DataTypeArray>(nested_array_ptr);
MutableColumnPtr array_mutable_col = array_ptr->create_column();
Array a, a1, a2, a3, nested, nested1;
nested.push_back(vectorized::Field::create_field<TYPE_BIGINT>(Int64(1)));
nested1.push_back(vectorized::Field::create_field<TYPE_BIGINT>(Int64(2)));
// a: [[1], [2]]
a.push_back(vectorized::Field::create_field<TYPE_ARRAY>(nested));
a.push_back(vectorized::Field::create_field<TYPE_ARRAY>(nested1));
// a1: [[2], [1]]
a1.push_back(vectorized::Field::create_field<TYPE_ARRAY>(nested1));
a1.push_back(vectorized::Field::create_field<TYPE_ARRAY>(nested));
// a2: [[], [1]]
a2.push_back(vectorized::Field::create_field<TYPE_ARRAY>(Array()));
a2.push_back(vectorized::Field::create_field<TYPE_ARRAY>(nested));
// a3: [[1], []]
a3.push_back(vectorized::Field::create_field<TYPE_ARRAY>(nested));
a3.push_back(vectorized::Field::create_field<TYPE_ARRAY>(Array()));
array_mutable_col->insert(vectorized::Field::create_field<TYPE_ARRAY>(a));
array_mutable_col->insert(vectorized::Field::create_field<TYPE_ARRAY>(a1));
array_mutable_col->insert(vectorized::Field::create_field<TYPE_ARRAY>(a2));
array_mutable_col->insert(vectorized::Field::create_field<TYPE_ARRAY>(a3));
auto nested_col =
reinterpret_cast<vectorized::ColumnArray*>(array_mutable_col.get())->get_data_ptr();
EXPECT_EQ(nested_col->size(), 8);
std::vector<uint64_t> xx_hash_vals(4);
std::vector<uint32_t> crc_hash_vals(4);
auto* __restrict xx_hashes = xx_hash_vals.data();
auto* __restrict crc_hashes = crc_hash_vals.data();
// xxHash
EXPECT_NO_FATAL_FAILURE(array_mutable_col->update_hashes_with_value(xx_hashes));
EXPECT_TRUE(xx_hashes[0] != xx_hashes[1]);
EXPECT_TRUE(xx_hashes[2] != xx_hashes[3]);
// crcHash
EXPECT_NO_FATAL_FAILURE(
array_mutable_col->update_crcs_with_value(crc_hashes, PrimitiveType::TYPE_ARRAY, 4));
EXPECT_TRUE(crc_hashes[0] != crc_hashes[1]);
EXPECT_TRUE(crc_hashes[2] != crc_hashes[3]);
}
TEST(HashFuncTest, ArrayCornerCaseTest) {
DataTypes dataTypes = create_scala_data_types();
DataTypePtr d = std::make_shared<DataTypeInt64>();
DataTypePtr a = std::make_shared<DataTypeArray>(d);
MutableColumnPtr array_mutable_col = a->create_column();
Array a1, a2;
a1.push_back(vectorized::Field::create_field<TYPE_BIGINT>(Int64(1)));
a1.push_back(vectorized::Field::create_field<TYPE_BIGINT>(Int64(2)));
a1.push_back(vectorized::Field::create_field<TYPE_BIGINT>(Int64(3)));
array_mutable_col->insert(vectorized::Field::create_field<TYPE_ARRAY>(a1));
array_mutable_col->insert(vectorized::Field::create_field<TYPE_ARRAY>(a1));
a2.push_back(vectorized::Field::create_field<TYPE_BIGINT>(Int64(11)));
a2.push_back(vectorized::Field::create_field<TYPE_BIGINT>(Int64(12)));
a2.push_back(vectorized::Field::create_field<TYPE_BIGINT>(Int64(13)));
array_mutable_col->insert(vectorized::Field::create_field<TYPE_ARRAY>(a2));
EXPECT_EQ(array_mutable_col->size(), 3);
std::vector<uint64_t> xx_hash_vals(3);
std::vector<uint32_t> crc_hash_vals(3);
auto* __restrict xx_hashes = xx_hash_vals.data();
auto* __restrict crc_hashes = crc_hash_vals.data();
// xxHash
EXPECT_NO_FATAL_FAILURE(array_mutable_col->update_hashes_with_value(xx_hashes));
EXPECT_EQ(xx_hashes[0], xx_hashes[1]);
EXPECT_TRUE(xx_hashes[0] != xx_hashes[2]);
// crcHash
EXPECT_NO_FATAL_FAILURE(array_mutable_col->update_crcs_with_value(
crc_hashes, PrimitiveType::TYPE_ARRAY, array_mutable_col->size()));
EXPECT_EQ(crc_hashes[0], crc_hashes[1]);
EXPECT_TRUE(xx_hashes[0] != xx_hashes[2]);
}
TEST(HashFuncTest, MapTypeTest) {
DataTypes dataTypes = create_scala_data_types();
std::vector<uint64_t> xx_hash_vals(1);
std::vector<uint32_t> crc_hash_vals(1);
auto* __restrict xx_hashes = xx_hash_vals.data();
auto* __restrict crc_hashes = crc_hash_vals.data();
// data_type_map
for (int i = 0; i < dataTypes.size() - 1; ++i) {
DataTypePtr a = std::make_shared<DataTypeMap>(dataTypes[i], dataTypes[i + 1]);
ColumnPtr col_a = a->create_column_const_with_default_value(1);
// xxHash
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(xx_hashes));
std::cout << xx_hashes[0] << std::endl;
// crcHash
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_crcs_with_value(
crc_hashes, PrimitiveType::TYPE_MAP, 1));
std::cout << crc_hashes[0] << std::endl;
}
}
TEST(HashFuncTest, StructTypeTest) {
DataTypes dataTypes = create_scala_data_types();
std::vector<uint64_t> xx_hash_vals(1);
std::vector<uint32_t> crc_hash_vals(1);
auto* __restrict xx_hashes = xx_hash_vals.data();
auto* __restrict crc_hashes = crc_hash_vals.data();
// data_type_struct
DataTypePtr a = std::make_shared<DataTypeStruct>(dataTypes);
ColumnPtr col_a = a->create_column_const_with_default_value(1);
// xxHash
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(xx_hashes));
std::cout << xx_hashes[0] << std::endl;
// crcHash
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_crcs_with_value(
crc_hashes, PrimitiveType::TYPE_STRUCT, 1));
std::cout << crc_hashes[0] << std::endl;
}
TEST(HashFuncTest, StructTypeTestWithSepcificValueCrcHash) {
DataTypePtr n1 = std::make_shared<DataTypeInt64>();
DataTypePtr s1 = std::make_shared<DataTypeString>();
DataTypes dataTypes;
dataTypes.push_back(n1);
dataTypes.push_back(s1);
Tuple t;
t.push_back(vectorized::Field::create_field<TYPE_BIGINT>(Int64(1)));
t.push_back(vectorized::Field::create_field<TYPE_STRING>("hello"));
DataTypePtr a = std::make_shared<DataTypeStruct>(dataTypes);
std::cout << a->get_name() << std::endl;
MutableColumnPtr struct_mutable_col = a->create_column();
struct_mutable_col->insert(vectorized::Field::create_field<TYPE_STRUCT>(t));
uint32_t hash_val = 0;
struct_mutable_col->update_crc_with_value(0, 1, hash_val, nullptr);
for (int i = 0; i < 100; ++i) {
uint32_t should_same_hash_val = 0;
struct_mutable_col->update_crc_with_value(0, 1, should_same_hash_val, nullptr);
EXPECT_EQ(hash_val, should_same_hash_val);
}
}
} // namespace doris::vectorized