| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "olap/block_column_predicate.h" |
| |
| #include <gen_cpp/olap_file.pb.h> |
| #include <gtest/gtest-message.h> |
| #include <gtest/gtest-test-part.h> |
| |
| #include <boost/iterator/iterator_facade.hpp> |
| #include <cmath> |
| #include <limits> |
| #include <memory> |
| #include <string> |
| #include <type_traits> |
| #include <utility> |
| #include <vector> |
| |
| #include "common/status.h" |
| #include "exprs/hybrid_set.h" |
| #include "gtest/gtest_pred_impl.h" |
| #include "olap/column_predicate.h" |
| #include "olap/comparison_predicate.h" |
| #include "olap/in_list_predicate.h" |
| #include "olap/null_predicate.h" |
| #include "olap/tablet_schema.h" |
| #include "runtime/define_primitive_type.h" |
| #include "runtime/type_limit.h" |
| #include "vec/columns/column.h" |
| #include "vec/columns/predicate_column.h" |
| #include "vec/core/field.h" |
| #include "vec/exec/format/parquet/parquet_block_split_bloom_filter.h" |
| #include "vec/exec/format/parquet/vparquet_reader.h" |
| #include "vec/runtime/timestamptz_value.h" |
| |
| namespace doris { |
| |
| class BlockColumnPredicateTest : public testing::Test { |
| public: |
| BlockColumnPredicateTest() = default; |
| |
| ~BlockColumnPredicateTest() = default; |
| |
| void SetTabletSchema(std::string name, const std::string& type, const std::string& aggregation, |
| uint32_t length, bool is_allow_null, bool is_key, |
| TabletSchemaSPtr tablet_schema) { |
| TabletSchemaPB tablet_schema_pb; |
| static int id = 0; |
| ColumnPB* column = tablet_schema_pb.add_column(); |
| column->set_unique_id(++id); |
| column->set_name(name); |
| column->set_type(type); |
| column->set_is_key(is_key); |
| column->set_is_nullable(is_allow_null); |
| column->set_length(length); |
| column->set_aggregation(aggregation); |
| column->set_precision(1000); |
| column->set_frac(1000); |
| column->set_is_bf_column(false); |
| tablet_schema->init_from_pb(tablet_schema_pb); |
| } |
| }; |
| |
| TEST_F(BlockColumnPredicateTest, SINGLE_COLUMN_VEC) { |
| vectorized::MutableColumns block; |
| block.push_back(vectorized::PredicateColumnType<TYPE_INT>::create()); |
| |
| int value = 5; |
| int rows = 10; |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(col_idx, value)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| |
| std::vector<uint16_t> sel_idx(rows); |
| uint16_t selected_size = rows; |
| block[col_idx]->reserve(rows); |
| for (int i = 0; i < rows; i++) { |
| int* int_ptr = &i; |
| block[col_idx]->insert_data((char*)int_ptr, 0); |
| sel_idx[i] = i; |
| } |
| |
| selected_size = single_column_block_pred.evaluate(block, sel_idx.data(), selected_size); |
| EXPECT_EQ(selected_size, 1); |
| auto* pred_col = |
| reinterpret_cast<vectorized::PredicateColumnType<TYPE_INT>*>(block[col_idx].get()); |
| EXPECT_EQ(pred_col->get_data()[sel_idx[0]], value); |
| } |
| |
| TEST_F(BlockColumnPredicateTest, AND_MUTI_COLUMN_VEC) { |
| vectorized::MutableColumns block; |
| block.push_back(vectorized::PredicateColumnType<TYPE_INT>::create()); |
| |
| int less_value = 5; |
| int great_value = 3; |
| int rows = 10; |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> less_pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::LT>(col_idx, less_value)); |
| std::shared_ptr<ColumnPredicate> great_pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::GT>(col_idx, great_value)); |
| auto single_less_pred = SingleColumnBlockPredicate::create_unique(less_pred); |
| auto single_great_pred = SingleColumnBlockPredicate::create_unique(great_pred); |
| |
| AndBlockColumnPredicate and_block_column_pred; |
| and_block_column_pred.add_column_predicate(std::move(single_less_pred)); |
| and_block_column_pred.add_column_predicate(std::move(single_great_pred)); |
| |
| std::vector<uint16_t> sel_idx(rows); |
| uint16_t selected_size = rows; |
| block[col_idx]->reserve(rows); |
| for (int i = 0; i < rows; i++) { |
| int* int_ptr = &i; |
| block[col_idx]->insert_data((char*)int_ptr, 0); |
| sel_idx[i] = i; |
| } |
| |
| selected_size = and_block_column_pred.evaluate(block, sel_idx.data(), selected_size); |
| EXPECT_EQ(selected_size, 1); |
| auto* pred_col = |
| reinterpret_cast<vectorized::PredicateColumnType<TYPE_INT>*>(block[col_idx].get()); |
| EXPECT_EQ(pred_col->get_data()[sel_idx[0]], 4); |
| } |
| |
| TEST_F(BlockColumnPredicateTest, OR_MUTI_COLUMN_VEC) { |
| vectorized::MutableColumns block; |
| block.push_back(vectorized::PredicateColumnType<TYPE_INT>::create()); |
| |
| int less_value = 5; |
| int great_value = 3; |
| int rows = 10; |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> less_pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::LT>(col_idx, less_value)); |
| std::shared_ptr<ColumnPredicate> great_pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::GT>(col_idx, great_value)); |
| auto single_less_pred = SingleColumnBlockPredicate::create_unique(less_pred); |
| auto single_great_pred = SingleColumnBlockPredicate::create_unique(great_pred); |
| |
| OrBlockColumnPredicate or_block_column_pred; |
| or_block_column_pred.add_column_predicate(std::move(single_less_pred)); |
| or_block_column_pred.add_column_predicate(std::move(single_great_pred)); |
| |
| std::vector<uint16_t> sel_idx(rows); |
| uint16_t selected_size = rows; |
| block[col_idx]->reserve(rows); |
| for (int i = 0; i < rows; i++) { |
| int* int_ptr = &i; |
| block[col_idx]->insert_data((char*)int_ptr, 0); |
| sel_idx[i] = i; |
| } |
| |
| selected_size = or_block_column_pred.evaluate(block, sel_idx.data(), selected_size); |
| EXPECT_EQ(selected_size, 10); |
| auto* pred_col = |
| reinterpret_cast<vectorized::PredicateColumnType<TYPE_INT>*>(block[col_idx].get()); |
| EXPECT_EQ(pred_col->get_data()[sel_idx[0]], 0); |
| } |
| |
| TEST_F(BlockColumnPredicateTest, OR_AND_MUTI_COLUMN_VEC) { |
| vectorized::MutableColumns block; |
| block.push_back(vectorized::PredicateColumnType<TYPE_INT>::create()); |
| |
| int less_value = 5; |
| int great_value = 3; |
| int rows = 10; |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> less_pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::LT>(0, less_value)); |
| std::shared_ptr<ColumnPredicate> great_pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::GT>(0, great_value)); |
| std::shared_ptr<ColumnPredicate> less_pred1( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::LT>(0, great_value)); |
| |
| // Test for and or single |
| // (column < 5 and column > 3) or column < 3 |
| auto and_block_column_pred = AndBlockColumnPredicate::create_unique(); |
| and_block_column_pred->add_column_predicate( |
| SingleColumnBlockPredicate::create_unique(less_pred)); |
| and_block_column_pred->add_column_predicate( |
| SingleColumnBlockPredicate::create_unique(great_pred)); |
| |
| OrBlockColumnPredicate or_block_column_pred; |
| or_block_column_pred.add_column_predicate(std::move(and_block_column_pred)); |
| or_block_column_pred.add_column_predicate( |
| SingleColumnBlockPredicate::create_unique(less_pred1)); |
| |
| std::vector<uint16_t> sel_idx(rows); |
| uint16_t selected_size = rows; |
| block[col_idx]->reserve(rows); |
| for (int i = 0; i < rows; i++) { |
| int* int_ptr = &i; |
| block[col_idx]->insert_data((char*)int_ptr, 0); |
| sel_idx[i] = i; |
| } |
| |
| selected_size = or_block_column_pred.evaluate(block, sel_idx.data(), selected_size); |
| EXPECT_EQ(selected_size, 4); |
| auto* pred_col = |
| reinterpret_cast<vectorized::PredicateColumnType<TYPE_INT>*>(block[col_idx].get()); |
| EXPECT_EQ(pred_col->get_data()[sel_idx[0]], 0); |
| EXPECT_EQ(pred_col->get_data()[sel_idx[1]], 1); |
| EXPECT_EQ(pred_col->get_data()[sel_idx[2]], 2); |
| EXPECT_EQ(pred_col->get_data()[sel_idx[3]], 4); |
| |
| // Test for single or and |
| // column < 3 or (column < 5 and column > 3) |
| auto and_block_column_pred1 = AndBlockColumnPredicate::create_unique(); |
| and_block_column_pred1->add_column_predicate( |
| SingleColumnBlockPredicate::create_unique(less_pred)); |
| and_block_column_pred1->add_column_predicate( |
| SingleColumnBlockPredicate::create_unique(great_pred)); |
| |
| OrBlockColumnPredicate or_block_column_pred1; |
| or_block_column_pred1.add_column_predicate( |
| SingleColumnBlockPredicate::create_unique(less_pred1)); |
| or_block_column_pred1.add_column_predicate(std::move(and_block_column_pred1)); |
| |
| selected_size = or_block_column_pred1.evaluate(block, sel_idx.data(), selected_size); |
| EXPECT_EQ(selected_size, 4); |
| EXPECT_EQ(pred_col->get_data()[sel_idx[0]], 0); |
| EXPECT_EQ(pred_col->get_data()[sel_idx[1]], 1); |
| EXPECT_EQ(pred_col->get_data()[sel_idx[2]], 2); |
| EXPECT_EQ(pred_col->get_data()[sel_idx[3]], 4); |
| } |
| |
| TEST_F(BlockColumnPredicateTest, AND_OR_MUTI_COLUMN_VEC) { |
| vectorized::MutableColumns block; |
| block.push_back(vectorized::PredicateColumnType<TYPE_INT>::create()); |
| |
| int less_value = 5; |
| int great_value = 3; |
| int rows = 10; |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> less_pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::LT>(0, less_value)); |
| std::shared_ptr<ColumnPredicate> great_pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::GT>(0, great_value)); |
| std::shared_ptr<ColumnPredicate> less_pred1( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::LT>(0, great_value)); |
| |
| // Test for and or single |
| // (column < 5 or column < 3) and column > 3 |
| auto or_block_column_pred = OrBlockColumnPredicate::create_unique(); |
| or_block_column_pred->add_column_predicate( |
| SingleColumnBlockPredicate::create_unique(less_pred)); |
| or_block_column_pred->add_column_predicate( |
| SingleColumnBlockPredicate::create_unique(less_pred1)); |
| |
| AndBlockColumnPredicate and_block_column_pred; |
| and_block_column_pred.add_column_predicate(std::move(or_block_column_pred)); |
| and_block_column_pred.add_column_predicate( |
| SingleColumnBlockPredicate::create_unique(great_pred)); |
| |
| std::vector<uint16_t> sel_idx(rows); |
| uint16_t selected_size = rows; |
| block[col_idx]->reserve(rows); |
| for (int i = 0; i < rows; i++) { |
| int* int_ptr = &i; |
| block[col_idx]->insert_data((char*)int_ptr, 0); |
| sel_idx[i] = i; |
| } |
| |
| selected_size = and_block_column_pred.evaluate(block, sel_idx.data(), selected_size); |
| |
| auto* pred_col = |
| reinterpret_cast<vectorized::PredicateColumnType<TYPE_INT>*>(block[col_idx].get()); |
| EXPECT_EQ(selected_size, 1); |
| EXPECT_EQ(pred_col->get_data()[sel_idx[0]], 4); |
| |
| // Test for single or and |
| // column > 3 and (column < 5 or column < 3) |
| auto or_block_column_pred1 = OrBlockColumnPredicate::create_unique(); |
| or_block_column_pred1->add_column_predicate( |
| SingleColumnBlockPredicate::create_unique(less_pred)); |
| or_block_column_pred1->add_column_predicate( |
| SingleColumnBlockPredicate::create_unique(less_pred1)); |
| |
| AndBlockColumnPredicate and_block_column_pred1; |
| and_block_column_pred1.add_column_predicate( |
| SingleColumnBlockPredicate::create_unique(great_pred)); |
| and_block_column_pred1.add_column_predicate(std::move(or_block_column_pred1)); |
| |
| EXPECT_EQ(selected_size, 1); |
| EXPECT_EQ(pred_col->get_data()[sel_idx[0]], 4); |
| } |
| |
| template <PrimitiveType T, PredicateType PT> |
| void single_column_predicate_test_func(const std::pair<WrapperField*, WrapperField*>& statistic, |
| typename PrimitiveTypeTraits<T>::CppType check_value, |
| bool expect_match) { |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred(new ComparisonPredicateBase<T, PT>(col_idx, check_value)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| |
| bool matched = single_column_block_pred.evaluate_and(statistic); |
| EXPECT_EQ(matched, expect_match); |
| } |
| |
| // test zonemap index |
| TEST_F(BlockColumnPredicateTest, test_double_single_column_predicate) { |
| FieldType type = FieldType::OLAP_FIELD_TYPE_DOUBLE; |
| std::unique_ptr<WrapperField> min_field(WrapperField::create_by_type(type, 0)); |
| std::unique_ptr<WrapperField> max_field(WrapperField::create_by_type(type, 0)); |
| static auto constexpr nan = std::numeric_limits<double>::quiet_NaN(); |
| static auto constexpr neg_inf = -std::numeric_limits<double>::infinity(); |
| static auto constexpr pos_inf = std::numeric_limits<double>::infinity(); |
| static auto constexpr min = std::numeric_limits<double>::lowest(); |
| static auto constexpr max = std::numeric_limits<double>::max(); |
| |
| // test normal value min max: |
| { |
| std::cout << "========test normal value min max\n"; |
| double zonemap_min_v = std::numeric_limits<float>::lowest(); |
| double zonemap_max_v = std::numeric_limits<float>::max(); |
| min_field->set_raw_value(&zonemap_min_v, sizeof(zonemap_min_v)); |
| max_field->set_raw_value(&zonemap_max_v, sizeof(zonemap_max_v)); |
| |
| // test NaN |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| |
| // test +Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| |
| // test -Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| |
| std::vector<double> test_values_in_range = { |
| zonemap_min_v, zonemap_max_v, -123456.789012345, -0.0, 0.0, 123456.789012345, |
| }; |
| for (auto v : test_values_in_range) { |
| // test EQ |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, true); |
| // test NE |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test LT |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, v != zonemap_min_v); |
| |
| // test LE |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test GT |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, v != zonemap_max_v); |
| |
| // test GE |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, true); |
| } |
| |
| // test values out of zonemap range |
| { |
| double v = zonemap_min_v * 2; |
| // test EQ |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, false); |
| // test NE |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test LT |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| // test LE |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| // test GT |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test GE |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, true); |
| } |
| { |
| double v = zonemap_max_v * 2; |
| // test EQ |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, false); |
| // test NE |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test LT |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test LE |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test GT |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| // test GE |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, false); |
| } |
| } |
| // test special range: [normal, +Infinity] |
| { |
| std::cout << "========test special range: [normal, +Infinity]\n"; |
| double zonemap_min_v = std::numeric_limits<float>::lowest(); |
| min_field->set_raw_value(&zonemap_min_v, sizeof(zonemap_min_v)); |
| max_field->set_raw_value(&pos_inf, sizeof(pos_inf)); |
| |
| // test NaN |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| |
| // test +Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| |
| // test -Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| |
| std::vector<double> test_values_in_range = { |
| zonemap_min_v, max, pos_inf, -123456.789012345, -0.0, 0.0, 123456.789012345, |
| }; |
| for (auto v : test_values_in_range) { |
| // test EQ |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, true); |
| // test NE |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test LT |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, v != zonemap_min_v); |
| |
| // test LE |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test GT |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, v != pos_inf); |
| |
| // test GE |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, true); |
| } |
| |
| // test values out of zonemap range |
| { |
| double v = zonemap_min_v * 2; |
| // test EQ |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, false); |
| // test NE |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test LT |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| // test LE |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| // test GT |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test GE |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, true); |
| } |
| } |
| // test special range: [-Infinity, normal] |
| { |
| std::cout << "========test special range: [-Infinity, normal]\n"; |
| double zonemap_max_v = std::numeric_limits<float>::max(); |
| min_field->set_raw_value(&neg_inf, sizeof(neg_inf)); |
| max_field->set_raw_value(&zonemap_max_v, sizeof(zonemap_max_v)); |
| |
| // test NaN |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| |
| // test +Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| |
| // test -Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| |
| std::vector<double> test_values_in_range = { |
| neg_inf, min, zonemap_max_v, -123456.789012345, -0.0, 0.0, 123456.789012345, |
| }; |
| for (auto v : test_values_in_range) { |
| // test EQ |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, true); |
| // test NE |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test LT |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, v != neg_inf); |
| |
| // test LE |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test GT |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, v != zonemap_max_v); |
| |
| // test GE |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, true); |
| } |
| // test values out of zonemap range |
| { |
| double v = zonemap_max_v * 2; |
| // test EQ |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, false); |
| // test NE |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test LT |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test LE |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test GT |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| // test GE |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, false); |
| } |
| } |
| // test special range: [normal, NaN] |
| { |
| std::cout << "========test special range: [normal, NaN]\n"; |
| double zonemap_min_v = std::numeric_limits<float>::lowest(); |
| min_field->set_raw_value(&zonemap_min_v, sizeof(zonemap_min_v)); |
| max_field->set_raw_value(&nan, sizeof(nan)); |
| |
| // test NaN |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| |
| // test +Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| |
| // test -Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| |
| std::vector<double> test_values_in_range = { |
| zonemap_min_v, max, pos_inf, -123456.789012345, -0.0, 0.0, 123456.789012345, |
| }; |
| for (auto v : test_values_in_range) { |
| // test EQ |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, true); |
| // test NE |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test LT |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, v != zonemap_min_v); |
| |
| // test LE |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test GT |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, !std::isnan(v)); |
| |
| // test GE |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, true); |
| } |
| |
| // test values out of zonemap range |
| { |
| double v = zonemap_min_v * 2; |
| // test EQ |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, false); |
| // test NE |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test LT |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| // test LE |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| // test GT |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test GE |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, true); |
| } |
| } |
| // test special range: [-Infinity, +Infinity] |
| { |
| std::cout << "========test special range: [-Infinity, +Infinity]\n"; |
| min_field->set_raw_value(&neg_inf, sizeof(neg_inf)); |
| max_field->set_raw_value(&pos_inf, sizeof(pos_inf)); |
| |
| // test NaN |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| |
| // test +Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| |
| // test -Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| |
| std::vector<double> test_values_in_range = { |
| min, max, -123456.789012345, -0.0, 0.0, 123456.789012345, |
| }; |
| for (auto v : test_values_in_range) { |
| // test EQ |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, true); |
| // test NE |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test LT |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, v != neg_inf); |
| |
| // test LE |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // test GT |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, v != pos_inf); |
| |
| // test GE |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, true); |
| } |
| } |
| // test special range: [-Infinity, NaN] |
| { |
| std::cout << "========test special range: [-Infinity, NaN]\n"; |
| min_field->set_raw_value(&neg_inf, sizeof(neg_inf)); |
| max_field->set_raw_value(&nan, sizeof(nan)); |
| |
| // test NaN |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| |
| // test +Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| |
| // test -Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| |
| std::vector<double> test_values_in_range = { |
| min, max, -123456.789012345, -0.0, 0.0, 123456.789012345, |
| }; |
| for (auto v : test_values_in_range) { |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, true); |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, true); |
| } |
| } |
| // test special range: [-Infinity, -Infinity] |
| { |
| std::cout << "========test special range: [-Infinity, -Infinity]\n"; |
| min_field->set_raw_value(&neg_inf, sizeof(neg_inf)); |
| max_field->set_raw_value(&neg_inf, sizeof(neg_inf)); |
| |
| // test NaN |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| |
| // test +Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| |
| // test -Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| |
| std::vector<double> test_values_not_in_range = { |
| min, max, -123456.789012345, -0.0, 0.0, 123456.789012345, |
| }; |
| for (auto v : test_values_not_in_range) { |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, false); |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, false); |
| } |
| } |
| // test special range: [+Infinity, +Infinity] |
| { |
| std::cout << "========test special range: [+Infinity, +Infinity]\n"; |
| min_field->set_raw_value(&pos_inf, sizeof(pos_inf)); |
| max_field->set_raw_value(&pos_inf, sizeof(pos_inf)); |
| |
| // test NaN |
| std::cout << "========test NaN\n"; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| |
| // test +Infinity |
| std::cout << "========test +Infinity\n"; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| |
| // test -Infinity |
| std::cout << "========test -Infinity\n"; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| |
| std::cout << "========test values not in range\n"; |
| std::vector<double> test_values_not_in_range = { |
| min, max, -123456.789012345, -0.0, 0.0, 123456.789012345, |
| }; |
| for (auto v : test_values_not_in_range) { |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, false); |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, true); |
| } |
| } |
| // test special range: [NaN, NaN] |
| { |
| std::cout << "========test special range: [NaN, NaN]\n"; |
| min_field->set_raw_value(&nan, sizeof(nan)); |
| max_field->set_raw_value(&nan, sizeof(nan)); |
| |
| // test NaN |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, nan, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, nan, true); |
| |
| // test +Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, pos_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, pos_inf, false); |
| |
| // test -Infinity |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, neg_inf, true); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, neg_inf, false); |
| |
| std::vector<double> test_values_not_in_range = { |
| min, max, -123456.789012345, -0.0, 0.0, 123456.789012345, |
| }; |
| for (auto v : test_values_not_in_range) { |
| // std::cout << "test double EQ value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, false); |
| // std::cout << "test double NE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // std::cout << "test double LT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| // std::cout << "test double LE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| // std::cout << "test double GT value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| // std::cout << "test double GE value: " << v << std::endl; |
| single_column_predicate_test_func<TYPE_DOUBLE, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, true); |
| } |
| } |
| } |
| |
| // test timestamptz zonemap index |
| TEST_F(BlockColumnPredicateTest, test_timestamptz_zonemap_index) { |
| FieldType type = FieldType::OLAP_FIELD_TYPE_TIMESTAMPTZ; |
| std::unique_ptr<WrapperField> min_field(WrapperField::create_by_type(type, 0)); |
| std::unique_ptr<WrapperField> max_field(WrapperField::create_by_type(type, 0)); |
| |
| cctz::time_zone time_zone = cctz::fixed_time_zone(std::chrono::hours(0)); |
| TimezoneUtils::load_offsets_to_cache(); |
| vectorized::CastParameters params; |
| params.is_strict = true; |
| |
| // test normal value min max: |
| { |
| std::cout << "========test normal value min max\n"; |
| // auto zonemap_min_v = type_limit<TimestampTzValue>::min(); |
| // auto zonemap_max_v = type_limit<TimestampTzValue>::max(); |
| TimestampTzValue zonemap_min_v; |
| TimestampTzValue zonemap_max_v; |
| EXPECT_TRUE(zonemap_min_v.from_string(StringRef {"0001-01-01 00:00:00"}, &time_zone, params, |
| 0)); |
| EXPECT_TRUE(zonemap_max_v.from_string(StringRef {"8999-12-31 23:59:59"}, &time_zone, params, |
| 0)); |
| min_field->set_raw_value(&zonemap_min_v, sizeof(zonemap_min_v)); |
| max_field->set_raw_value(&zonemap_max_v, sizeof(zonemap_max_v)); |
| |
| // test values within zonemap range |
| std::vector<std::string> values = {"0001-01-01 00:00:00", "2023-01-01 15:00:00", |
| "8999-12-31 23:59:59"}; |
| for (auto str : values) { |
| TimestampTzValue tz {}; |
| EXPECT_TRUE(tz.from_string(StringRef {str}, &time_zone, params, 0)); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, tz, true); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, tz, true); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, tz, tz != zonemap_min_v); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, tz, true); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, tz, tz != zonemap_max_v); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, tz, true); |
| } |
| // test values out of zonemap range |
| { |
| auto v = type_limit<TimestampTzValue>::min(); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, false); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, true); |
| } |
| // test values out of zonemap range |
| { |
| auto v = type_limit<TimestampTzValue>::max(); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, v, false); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, v, true); |
| |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, v, false); |
| |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, v, false); |
| } |
| } |
| |
| // test range [min, max]: |
| { |
| std::cout << "========test range [min, max]\n"; |
| auto zonemap_min_v = type_limit<TimestampTzValue>::min(); |
| auto zonemap_max_v = type_limit<TimestampTzValue>::max(); |
| min_field->set_raw_value(&zonemap_min_v, sizeof(zonemap_min_v)); |
| max_field->set_raw_value(&zonemap_max_v, sizeof(zonemap_max_v)); |
| |
| // test values within zonemap range |
| std::vector<std::string> values = {"0000-01-01 00:00:00", "2023-01-01 15:00:00", |
| "9999-12-31 23:59:59.999999"}; |
| for (auto str : values) { |
| TimestampTzValue tz {}; |
| EXPECT_TRUE(tz.from_string(StringRef {str}, &time_zone, params, 6)); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::EQ>( |
| {min_field.get(), max_field.get()}, tz, true); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::NE>( |
| {min_field.get(), max_field.get()}, tz, true); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::LT>( |
| {min_field.get(), max_field.get()}, tz, tz != zonemap_min_v); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::LE>( |
| {min_field.get(), max_field.get()}, tz, true); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::GT>( |
| {min_field.get(), max_field.get()}, tz, tz != zonemap_max_v); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::GE>( |
| {min_field.get(), max_field.get()}, tz, true); |
| } |
| } |
| } |
| |
| template <PrimitiveType T, PredicateType PT> |
| void single_column_predicate_test_func(const segment_v2::BloomFilter* bf, |
| typename PrimitiveTypeTraits<T>::CppType check_value, |
| bool expect_match) { |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred(new ComparisonPredicateBase<T, PT>(col_idx, check_value)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| |
| bool matched = single_column_block_pred.evaluate_and(bf); |
| EXPECT_EQ(matched, expect_match); |
| } |
| // test timestamptz bloom filter |
| TEST_F(BlockColumnPredicateTest, test_timestamptz_bloom_filter) { |
| cctz::time_zone time_zone = cctz::fixed_time_zone(std::chrono::hours(0)); |
| TimezoneUtils::load_offsets_to_cache(); |
| vectorized::CastParameters params; |
| params.is_strict = true; |
| |
| std::vector<std::string> str_values = {"0001-01-01 00:00:00", "2023-01-01 15:00:00", |
| "1111-01-01 01:01:01", "5555-05-05 05:05:05", |
| "6666-06-06 06:06:06", "7777-07-07 07:07:07", |
| "6666-12-01 23:00:00", "8999-12-31 23:59:59"}; |
| |
| std::unique_ptr<BloomFilter> bf; |
| auto st = BloomFilter::create(BLOCK_BLOOM_FILTER, &bf); |
| EXPECT_TRUE(st.ok()); |
| EXPECT_NE(nullptr, bf); |
| st = bf->init(1024, 0.05, HASH_MURMUR3_X64_64); |
| EXPECT_TRUE(st.ok()); |
| EXPECT_TRUE(bf->size() > 0); |
| |
| std::vector<TimestampTzValue> values; |
| for (const auto& str : str_values) { |
| TimestampTzValue tz {}; |
| EXPECT_TRUE(tz.from_string(StringRef {str}, &time_zone, params, 0)); |
| bf->add_bytes((char*)&tz, sizeof(TimestampTzValue)); |
| values.push_back(tz); |
| } |
| |
| for (const auto& v : values) { |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::EQ>(bf.get(), v, true); |
| } |
| { |
| auto str = "0000-01-01 00:00:00"; |
| TimestampTzValue tz {}; |
| EXPECT_TRUE(tz.from_string(StringRef {str}, &time_zone, params, 0)); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::EQ>(bf.get(), tz, false); |
| } |
| { |
| auto str = "9999-12-31 23:59:59.999999"; |
| TimestampTzValue tz {}; |
| EXPECT_TRUE(tz.from_string(StringRef {str}, &time_zone, params, 6)); |
| single_column_predicate_test_func<TYPE_TIMESTAMPTZ, PredicateType::EQ>(bf.get(), tz, false); |
| } |
| } |
| |
| TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE) { |
| { // INT |
| {// EQ |
| int value = 5; |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(col_idx, value)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = |
| vectorized::DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| { |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [6, 7] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| int lower = 6; |
| int upper = 7; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [1, 4] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| int lower = 1; |
| int upper = 4; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // get stat failed |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| return false; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| } |
| { |
| // NE |
| int value = 5; |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::NE>(col_idx, value)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = |
| vectorized::DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| { |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [6, 7] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| int lower = 6; |
| int upper = 7; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [1, 4] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| int lower = 1; |
| int upper = 4; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| } |
| { |
| // GE |
| int value = 5; |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::GE>(col_idx, value)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = |
| vectorized::DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| { |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [6, 7] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| int lower = 6; |
| int upper = 7; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [1, 4] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| int lower = 1; |
| int upper = 4; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| } |
| { |
| // LE |
| int value = 5; |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::LE>(col_idx, value)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = |
| vectorized::DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| { |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [6, 7] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| int lower = 6; |
| int upper = 7; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [1, 4] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| int lower = 1; |
| int upper = 4; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| } |
| } // namespace doris |
| { |
| // FLOAT |
| { |
| // EQ |
| float value = 5.0; |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_FLOAT, PredicateType::EQ>(col_idx, value)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = vectorized::DataTypeFactory::instance().create_data_type( |
| PrimitiveType::TYPE_FLOAT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::FLOAT; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| { |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [6, 7] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| float lower = 6.0; |
| float upper = 7.0; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [1, 4] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| float lower = 1.0; |
| float upper = 4.0; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // get stat failed |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| return false; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // get min max failed |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| float lower = nanf(""); |
| float upper = 4.0; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| } |
| { |
| // NE |
| float value = 5; |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_FLOAT, PredicateType::NE>(col_idx, value)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = vectorized::DataTypeFactory::instance().create_data_type( |
| PrimitiveType::TYPE_FLOAT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::FLOAT; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| { |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [6, 7] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| float lower = 6.0; |
| float upper = 7.0; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [1, 4] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| float lower = 1.0; |
| float upper = 4.0; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| } |
| { |
| // GE |
| float value = 5.0; |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_FLOAT, PredicateType::GE>(col_idx, value)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = vectorized::DataTypeFactory::instance().create_data_type( |
| PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::FLOAT; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| { |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [6, 7] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| float lower = 6.0; |
| float upper = 7.0; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [1, 4] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| float lower = 1.0; |
| float upper = 4.0; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| } |
| { |
| // LE |
| float value = 5.0; |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_FLOAT, PredicateType::LE>(col_idx, value)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = vectorized::DataTypeFactory::instance().create_data_type( |
| PrimitiveType::TYPE_FLOAT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::FLOAT; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| { |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [6, 7] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| float lower = 6.0; |
| float upper = 7.0; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 not belongs to [1, 4] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| float lower = 1.0; |
| float upper = 4.0; |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| stat->encoded_min_value = |
| std::string(reinterpret_cast<const char*>(&lower), sizeof(lower)); |
| stat->encoded_max_value = |
| std::string(reinterpret_cast<const char*>(&upper), sizeof(upper)); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| } |
| } |
| } |
| |
| TEST_F(BlockColumnPredicateTest, PARQUET_IN_PREDICATE) { |
| { // INT |
| { |
| int value = 5; |
| int col_idx = 0; |
| auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_INT>>(false); |
| hybrid_set->insert(&value); |
| std::shared_ptr<ColumnPredicate> pred( |
| new InListPredicateBase<TYPE_INT, PredicateType::IN_LIST, 1>( |
| col_idx, hybrid_set, false)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = |
| vectorized::DataTypeFactory::instance().create_data_type( |
| PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| { |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // get stat failed |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| return false; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| } |
| { |
| int value = 5; |
| int col_idx = 0; |
| auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_INT>>(false); |
| hybrid_set->insert(&value); |
| std::shared_ptr<ColumnPredicate> pred( |
| new InListPredicateBase<TYPE_INT, PredicateType::IN_LIST, 1>( |
| col_idx, hybrid_set, false)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = |
| vectorized::DataTypeFactory::instance().create_data_type( |
| PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| { |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| int tmp_v = 6; |
| auto tmp = std::string(reinterpret_cast<const char*>(&tmp_v), sizeof(tmp_v)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // get stat failed |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| return false; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| } |
| } |
| } |
| |
| TEST_F(BlockColumnPredicateTest, PARQUET_COMPARISON_PREDICATE_BLOOM_FILTER) { |
| const int value = 42; |
| const int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(col_idx, value)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| |
| auto parquet_field = std::make_unique<vectorized::FieldSchema>(); |
| parquet_field->name = "col1"; |
| parquet_field->data_type = |
| vectorized::DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_INT, true); |
| parquet_field->field_id = -1; |
| parquet_field->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| auto encode_value = [](int v) { |
| return std::string(reinterpret_cast<const char*>(&v), sizeof(v)); |
| }; |
| |
| { |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func = |
| [&](vectorized::ParquetPredicate::ColumnStat* current_stat, int cid) { |
| EXPECT_EQ(col_idx, cid); |
| current_stat->col_schema = parquet_field.get(); |
| current_stat->is_all_null = false; |
| current_stat->has_null = false; |
| current_stat->encoded_min_value = encode_value(value); |
| current_stat->encoded_max_value = encode_value(value); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| |
| int loader_calls = 0; |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_bloom_filter_func = |
| [&](vectorized::ParquetPredicate::ColumnStat* current_stat, int cid) { |
| EXPECT_EQ(col_idx, cid); |
| loader_calls++; |
| if (!current_stat->bloom_filter) { |
| current_stat->bloom_filter = |
| std::make_unique<vectorized::ParquetBlockSplitBloomFilter>(); |
| auto* bloom = static_cast<vectorized::ParquetBlockSplitBloomFilter*>( |
| current_stat->bloom_filter.get()); |
| Status st = bloom->init(256, segment_v2::HashStrategyPB::XX_HASH_64); |
| EXPECT_TRUE(st.ok()); |
| bloom->add_bytes(reinterpret_cast<const char*>(&value), sizeof(value)); |
| } |
| return true; |
| }; |
| stat.get_bloom_filter_func = &get_bloom_filter_func; |
| |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| EXPECT_EQ(1, loader_calls); |
| } |
| |
| { |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func = |
| [&](vectorized::ParquetPredicate::ColumnStat* current_stat, int cid) { |
| EXPECT_EQ(col_idx, cid); |
| current_stat->col_schema = parquet_field.get(); |
| current_stat->is_all_null = false; |
| current_stat->has_null = false; |
| current_stat->encoded_min_value = encode_value(value); |
| current_stat->encoded_max_value = encode_value(value); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| |
| int loader_calls = 0; |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_bloom_filter_func = |
| [&](vectorized::ParquetPredicate::ColumnStat* current_stat, int cid) { |
| EXPECT_EQ(col_idx, cid); |
| loader_calls++; |
| if (!current_stat->bloom_filter) { |
| current_stat->bloom_filter = |
| std::make_unique<vectorized::ParquetBlockSplitBloomFilter>(); |
| auto* bloom = static_cast<vectorized::ParquetBlockSplitBloomFilter*>( |
| current_stat->bloom_filter.get()); |
| Status st = bloom->init(256, segment_v2::HashStrategyPB::XX_HASH_64); |
| EXPECT_TRUE(st.ok()); |
| int other_value = value + 10; |
| bloom->add_bytes(reinterpret_cast<const char*>(&other_value), |
| sizeof(other_value)); |
| } |
| return true; |
| }; |
| stat.get_bloom_filter_func = &get_bloom_filter_func; |
| |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| EXPECT_EQ(1, loader_calls); |
| } |
| |
| { |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func = |
| [&](vectorized::ParquetPredicate::ColumnStat* current_stat, int cid) { |
| EXPECT_EQ(col_idx, cid); |
| current_stat->col_schema = parquet_field.get(); |
| current_stat->is_all_null = false; |
| current_stat->has_null = false; |
| current_stat->encoded_min_value = encode_value(value); |
| current_stat->encoded_max_value = encode_value(value); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| |
| bool loader_invoked = false; |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_bloom_filter_func = |
| [&](vectorized::ParquetPredicate::ColumnStat* current_stat, int cid) { |
| EXPECT_EQ(col_idx, cid); |
| loader_invoked = true; |
| return false; |
| }; |
| stat.get_bloom_filter_func = &get_bloom_filter_func; |
| |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| EXPECT_TRUE(loader_invoked); |
| } |
| |
| { |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func = |
| [&](vectorized::ParquetPredicate::ColumnStat* current_stat, int cid) { |
| EXPECT_EQ(col_idx, cid); |
| current_stat->col_schema = parquet_field.get(); |
| current_stat->is_all_null = false; |
| current_stat->has_null = false; |
| int min_value = value + 5; |
| int max_value = value + 10; |
| current_stat->encoded_min_value = encode_value(min_value); |
| current_stat->encoded_max_value = encode_value(max_value); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| |
| int loader_calls = 0; |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_bloom_filter_func = |
| [&](vectorized::ParquetPredicate::ColumnStat*, int) { |
| loader_calls++; |
| return true; |
| }; |
| stat.get_bloom_filter_func = &get_bloom_filter_func; |
| |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| EXPECT_EQ(0, loader_calls); |
| } |
| } |
| |
| TEST_F(BlockColumnPredicateTest, PARQUET_IN_PREDICATE_BLOOM_FILTER) { |
| const int col_idx = 0; |
| auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_INT>>(false); |
| const int included_value = 7; |
| hybrid_set->insert(&included_value); |
| std::shared_ptr<ColumnPredicate> pred( |
| new InListPredicateBase<TYPE_INT, PredicateType::IN_LIST, 1>(col_idx, hybrid_set, |
| false)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| |
| auto parquet_field = std::make_unique<vectorized::FieldSchema>(); |
| parquet_field->name = "col1"; |
| parquet_field->data_type = |
| vectorized::DataTypeFactory::instance().create_data_type(PrimitiveType::TYPE_INT, true); |
| parquet_field->field_id = -1; |
| parquet_field->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| auto encode_value = [](int v) { |
| return std::string(reinterpret_cast<const char*>(&v), sizeof(v)); |
| }; |
| |
| { |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func = |
| [&](vectorized::ParquetPredicate::ColumnStat* current_stat, int cid) { |
| EXPECT_EQ(col_idx, cid); |
| current_stat->col_schema = parquet_field.get(); |
| current_stat->is_all_null = false; |
| current_stat->has_null = false; |
| current_stat->encoded_min_value = encode_value(included_value); |
| current_stat->encoded_max_value = encode_value(included_value); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| |
| int loader_calls = 0; |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_bloom_filter_func = |
| [&](vectorized::ParquetPredicate::ColumnStat* current_stat, int cid) { |
| EXPECT_EQ(col_idx, cid); |
| loader_calls++; |
| if (!current_stat->bloom_filter) { |
| current_stat->bloom_filter = |
| std::make_unique<vectorized::ParquetBlockSplitBloomFilter>(); |
| auto* bloom = static_cast<vectorized::ParquetBlockSplitBloomFilter*>( |
| current_stat->bloom_filter.get()); |
| Status st = bloom->init(256, segment_v2::HashStrategyPB::XX_HASH_64); |
| EXPECT_TRUE(st.ok()); |
| bloom->add_bytes(reinterpret_cast<const char*>(&included_value), |
| sizeof(included_value)); |
| } |
| return true; |
| }; |
| stat.get_bloom_filter_func = &get_bloom_filter_func; |
| |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| EXPECT_EQ(1, loader_calls); |
| } |
| |
| { |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func = |
| [&](vectorized::ParquetPredicate::ColumnStat* current_stat, int cid) { |
| EXPECT_EQ(col_idx, cid); |
| current_stat->col_schema = parquet_field.get(); |
| current_stat->is_all_null = false; |
| current_stat->has_null = false; |
| current_stat->encoded_min_value = encode_value(included_value); |
| current_stat->encoded_max_value = encode_value(included_value); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| |
| int loader_calls = 0; |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_bloom_filter_func = |
| [&](vectorized::ParquetPredicate::ColumnStat* current_stat, int cid) { |
| EXPECT_EQ(col_idx, cid); |
| loader_calls++; |
| if (!current_stat->bloom_filter) { |
| current_stat->bloom_filter = |
| std::make_unique<vectorized::ParquetBlockSplitBloomFilter>(); |
| auto* bloom = static_cast<vectorized::ParquetBlockSplitBloomFilter*>( |
| current_stat->bloom_filter.get()); |
| Status st = bloom->init(256, segment_v2::HashStrategyPB::XX_HASH_64); |
| EXPECT_TRUE(st.ok()); |
| int excluded_value = included_value + 1; |
| bloom->add_bytes(reinterpret_cast<const char*>(&excluded_value), |
| sizeof(excluded_value)); |
| } |
| return true; |
| }; |
| stat.get_bloom_filter_func = &get_bloom_filter_func; |
| |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| EXPECT_EQ(1, loader_calls); |
| } |
| |
| { |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func = |
| [&](vectorized::ParquetPredicate::ColumnStat* current_stat, int cid) { |
| EXPECT_EQ(col_idx, cid); |
| current_stat->col_schema = parquet_field.get(); |
| current_stat->is_all_null = false; |
| current_stat->has_null = false; |
| int min_value = included_value + 5; |
| int max_value = included_value + 10; |
| current_stat->encoded_min_value = encode_value(min_value); |
| current_stat->encoded_max_value = encode_value(max_value); |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| |
| int loader_calls = 0; |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_bloom_filter_func = |
| [&](vectorized::ParquetPredicate::ColumnStat*, int) { |
| loader_calls++; |
| return true; |
| }; |
| stat.get_bloom_filter_func = &get_bloom_filter_func; |
| |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| EXPECT_EQ(0, loader_calls); |
| } |
| } |
| |
| TEST_F(BlockColumnPredicateTest, NULL_PREDICATE) { |
| { |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred( |
| new NullPredicate(col_idx, true, PrimitiveType::TYPE_INT)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = vectorized::DataTypeFactory::instance().create_data_type( |
| PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| { |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // get stat failed |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| return false; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| } |
| { |
| int col_idx = 0; |
| std::shared_ptr<ColumnPredicate> pred( |
| new NullPredicate(col_idx, false, PrimitiveType::TYPE_INT)); |
| SingleColumnBlockPredicate single_column_block_pred(pred); |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = vectorized::DataTypeFactory::instance().create_data_type( |
| PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| { |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = true; |
| stat->has_null = false; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| { |
| // get stat failed |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| return false; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(single_column_block_pred.evaluate_and(&stat)); |
| } |
| } |
| } |
| |
| TEST_F(BlockColumnPredicateTest, COMBINED_PREDICATE) { |
| { |
| AndBlockColumnPredicate and_block_column_pred; |
| |
| std::unique_ptr<SingleColumnBlockPredicate> true_predicate; |
| int col_idx = 0; |
| int value = 5; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(col_idx, value)); |
| true_predicate = std::make_unique<SingleColumnBlockPredicate>(pred); |
| |
| std::unique_ptr<SingleColumnBlockPredicate> false_predicate; |
| std::shared_ptr<ColumnPredicate> pred2( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::NE>(col_idx, value)); |
| false_predicate = std::make_unique<SingleColumnBlockPredicate>(pred2); |
| |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = vectorized::DataTypeFactory::instance().create_data_type( |
| PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(true_predicate->evaluate_and(&stat)); |
| EXPECT_FALSE(false_predicate->evaluate_and(&stat)); |
| and_block_column_pred.add_column_predicate(std::move(true_predicate)); |
| and_block_column_pred.add_column_predicate(std::move(false_predicate)); |
| EXPECT_FALSE(and_block_column_pred.evaluate_and(&stat)); |
| } |
| { |
| AndBlockColumnPredicate and_block_column_pred; |
| |
| std::unique_ptr<SingleColumnBlockPredicate> true_predicate; |
| int col_idx = 0; |
| int value = 5; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(col_idx, value)); |
| true_predicate = std::make_unique<SingleColumnBlockPredicate>(pred); |
| |
| std::unique_ptr<SingleColumnBlockPredicate> true_predicate2; |
| std::shared_ptr<ColumnPredicate> pred2( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(col_idx, value)); |
| true_predicate2 = std::make_unique<SingleColumnBlockPredicate>(pred2); |
| |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = vectorized::DataTypeFactory::instance().create_data_type( |
| PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(true_predicate->evaluate_and(&stat)); |
| EXPECT_TRUE(true_predicate2->evaluate_and(&stat)); |
| and_block_column_pred.add_column_predicate(std::move(true_predicate)); |
| and_block_column_pred.add_column_predicate(std::move(true_predicate2)); |
| EXPECT_TRUE(and_block_column_pred.evaluate_and(&stat)); |
| } |
| { |
| OrBlockColumnPredicate or_block_column_pred; |
| |
| std::unique_ptr<SingleColumnBlockPredicate> true_predicate; |
| int col_idx = 0; |
| int value = 5; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::EQ>(col_idx, value)); |
| true_predicate = std::make_unique<SingleColumnBlockPredicate>(pred); |
| |
| std::unique_ptr<SingleColumnBlockPredicate> false_predicate; |
| std::shared_ptr<ColumnPredicate> pred2( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::NE>(col_idx, value)); |
| false_predicate = std::make_unique<SingleColumnBlockPredicate>(pred2); |
| |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = vectorized::DataTypeFactory::instance().create_data_type( |
| PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_TRUE(true_predicate->evaluate_and(&stat)); |
| EXPECT_FALSE(false_predicate->evaluate_and(&stat)); |
| or_block_column_pred.add_column_predicate(std::move(true_predicate)); |
| or_block_column_pred.add_column_predicate(std::move(false_predicate)); |
| EXPECT_TRUE(or_block_column_pred.evaluate_and(&stat)); |
| } |
| { |
| OrBlockColumnPredicate or_block_column_pred; |
| |
| std::unique_ptr<SingleColumnBlockPredicate> false_predicate2; |
| int col_idx = 0; |
| int value = 5; |
| std::shared_ptr<ColumnPredicate> pred( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::NE>(col_idx, value)); |
| false_predicate2 = std::make_unique<SingleColumnBlockPredicate>(pred); |
| |
| std::unique_ptr<SingleColumnBlockPredicate> false_predicate; |
| std::shared_ptr<ColumnPredicate> pred2( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::NE>(col_idx, value)); |
| false_predicate = std::make_unique<SingleColumnBlockPredicate>(pred2); |
| |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = vectorized::DataTypeFactory::instance().create_data_type( |
| PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(false_predicate2->evaluate_and(&stat)); |
| EXPECT_FALSE(false_predicate->evaluate_and(&stat)); |
| or_block_column_pred.add_column_predicate(std::move(false_predicate2)); |
| or_block_column_pred.add_column_predicate(std::move(false_predicate)); |
| EXPECT_FALSE(or_block_column_pred.evaluate_and(&stat)); |
| } |
| { |
| OrBlockColumnPredicate or_block_column_pred; |
| |
| int col_idx = 0; |
| int value = 5; |
| std::unique_ptr<SingleColumnBlockPredicate> false_predicate; |
| std::shared_ptr<ColumnPredicate> pred2( |
| new ComparisonPredicateBase<TYPE_INT, PredicateType::NE>(col_idx, value)); |
| false_predicate = std::make_unique<SingleColumnBlockPredicate>(pred2); |
| |
| std::unique_ptr<vectorized::FieldSchema> parquet_field_col1 = |
| std::make_unique<vectorized::FieldSchema>(); |
| parquet_field_col1->name = "col1"; |
| parquet_field_col1->data_type = vectorized::DataTypeFactory::instance().create_data_type( |
| PrimitiveType::TYPE_INT, true); |
| parquet_field_col1->field_id = -1; |
| parquet_field_col1->parquet_schema.type = tparquet::Type::type::INT32; |
| |
| vectorized::ParquetPredicate::ColumnStat stat; |
| cctz::time_zone tmp_ctz; |
| stat.ctz = &tmp_ctz; |
| |
| std::function<bool(vectorized::ParquetPredicate::ColumnStat*, int)> get_stat_func; |
| // 5 belongs to [5, 5] |
| get_stat_func = [&](vectorized::ParquetPredicate::ColumnStat* stat, const int cid) { |
| stat->col_schema = parquet_field_col1.get(); |
| stat->is_all_null = false; |
| stat->has_null = false; |
| auto tmp = std::string(reinterpret_cast<const char*>(&value), sizeof(value)); |
| stat->encoded_min_value = tmp; |
| stat->encoded_max_value = tmp; |
| return true; |
| }; |
| stat.get_stat_func = &get_stat_func; |
| EXPECT_FALSE(false_predicate->evaluate_and(&stat)); |
| or_block_column_pred.add_column_predicate(std::move(false_predicate)); |
| EXPECT_FALSE(or_block_column_pred.evaluate_and(&stat)); |
| } |
| } |
| |
| } // namespace doris |