| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include <gtest/gtest.h> |
| |
| #include "common/logging.h" |
| #include "env/env.h" |
| #include "olap/fs/block_manager.h" |
| #include "olap/fs/fs_util.h" |
| #include "olap/key_coder.h" |
| #include "olap/olap_common.h" |
| #include "olap/rowset/segment_v2/bloom_filter.h" |
| #include "olap/rowset/segment_v2/bloom_filter_index_reader.h" |
| #include "olap/rowset/segment_v2/bloom_filter_index_writer.h" |
| #include "olap/types.h" |
| #include "util/file_utils.h" |
| |
| namespace doris { |
| namespace segment_v2 { |
| |
| const std::string dname = "./ut_dir/bloom_filter_index_reader_writer_test"; |
| |
| class BloomFilterIndexReaderWriterTest : public testing::Test { |
| public: |
| virtual void SetUp() { |
| if (FileUtils::is_dir(dname)) { |
| std::set<std::string> files; |
| ASSERT_TRUE(FileUtils::list_dirs_files(dname, nullptr, &files, Env::Default()).ok()); |
| for (const auto& file : files) { |
| Status s = Env::Default()->delete_file(dname + "/" + file); |
| ASSERT_TRUE(s.ok()) << s.to_string(); |
| } |
| ASSERT_TRUE(Env::Default()->delete_dir(dname).ok()); |
| } |
| } |
| }; |
| |
| template <FieldType type> |
| void write_bloom_filter_index_file(const std::string& file_name, const void* values, |
| size_t value_count, size_t null_count, |
| ColumnIndexMetaPB* index_meta) { |
| const TypeInfo* type_info = get_type_info(type); |
| using CppType = typename CppTypeTraits<type>::CppType; |
| FileUtils::create_dir(dname); |
| std::string fname = dname + "/" + file_name; |
| { |
| std::unique_ptr<fs::WritableBlock> wblock; |
| fs::CreateBlockOptions opts({fname}); |
| Status st = fs::fs_util::block_manager()->create_block(opts, &wblock); |
| ASSERT_TRUE(st.ok()) << st.to_string(); |
| |
| std::unique_ptr<BloomFilterIndexWriter> bloom_filter_index_writer; |
| BloomFilterOptions bf_options; |
| BloomFilterIndexWriter::create(bf_options, type_info, &bloom_filter_index_writer); |
| const CppType* vals = (const CppType*)values; |
| for (int i = 0; i < value_count;) { |
| size_t num = std::min(1024, (int)value_count - i); |
| bloom_filter_index_writer->add_values(vals + i, num); |
| if (i == 2048) { |
| // second page |
| bloom_filter_index_writer->add_nulls(null_count); |
| } |
| st = bloom_filter_index_writer->flush(); |
| ASSERT_TRUE(st.ok()); |
| i += 1024; |
| } |
| st = bloom_filter_index_writer->finish(wblock.get(), index_meta); |
| ASSERT_TRUE(st.ok()) << "writer finish status:" << st.to_string(); |
| ASSERT_TRUE(wblock->close().ok()); |
| ASSERT_EQ(BLOOM_FILTER_INDEX, index_meta->type()); |
| ASSERT_EQ(bf_options.strategy, index_meta->bloom_filter_index().hash_strategy()); |
| } |
| } |
| |
| void get_bloom_filter_reader_iter(const std::string& file_name, const ColumnIndexMetaPB& meta, |
| std::unique_ptr<RandomAccessFile>* rfile, |
| BloomFilterIndexReader** reader, |
| std::unique_ptr<BloomFilterIndexIterator>* iter) { |
| std::string fname = dname + "/" + file_name; |
| |
| *reader = new BloomFilterIndexReader(fname, &meta.bloom_filter_index()); |
| auto st = (*reader)->load(true, false); |
| ASSERT_TRUE(st.ok()); |
| |
| st = (*reader)->new_iterator(iter); |
| ASSERT_TRUE(st.ok()); |
| } |
| |
| template <FieldType Type> |
| void test_bloom_filter_index_reader_writer_template( |
| const std::string file_name, typename TypeTraits<Type>::CppType* val, size_t num, |
| size_t null_num, typename TypeTraits<Type>::CppType* not_exist_value, |
| bool is_slice_type = false) { |
| typedef typename TypeTraits<Type>::CppType CppType; |
| ColumnIndexMetaPB meta; |
| write_bloom_filter_index_file<Type>(file_name, val, num, null_num, &meta); |
| { |
| std::unique_ptr<RandomAccessFile> rfile; |
| BloomFilterIndexReader* reader = nullptr; |
| std::unique_ptr<BloomFilterIndexIterator> iter; |
| get_bloom_filter_reader_iter(file_name, meta, &rfile, &reader, &iter); |
| |
| // page 0 |
| std::unique_ptr<BloomFilter> bf; |
| auto st = iter->read_bloom_filter(0, &bf); |
| ASSERT_TRUE(st.ok()); |
| for (int i = 0; i < 1024; ++i) { |
| if (is_slice_type) { |
| Slice* value = (Slice*)(val + i); |
| ASSERT_TRUE(bf->test_bytes(value->data, value->size)); |
| } else { |
| ASSERT_TRUE(bf->test_bytes((char*)&val[i], sizeof(CppType))); |
| } |
| } |
| |
| // page 1 |
| st = iter->read_bloom_filter(1, &bf); |
| ASSERT_TRUE(st.ok()); |
| for (int i = 1024; i < 2048; ++i) { |
| if (is_slice_type) { |
| Slice* value = (Slice*)(val + i); |
| ASSERT_TRUE(bf->test_bytes(value->data, value->size)); |
| } else { |
| ASSERT_TRUE(bf->test_bytes((char*)&val[i], sizeof(CppType))); |
| } |
| } |
| |
| // page 2 |
| st = iter->read_bloom_filter(2, &bf); |
| ASSERT_TRUE(st.ok()); |
| for (int i = 2048; i < 3071; ++i) { |
| if (is_slice_type) { |
| Slice* value = (Slice*)(val + i); |
| ASSERT_TRUE(bf->test_bytes(value->data, value->size)); |
| } else { |
| ASSERT_TRUE(bf->test_bytes((char*)&val[i], sizeof(CppType))); |
| } |
| } |
| // test nullptr |
| ASSERT_TRUE(bf->test_bytes(nullptr, 1)); |
| |
| delete reader; |
| } |
| } |
| |
| TEST_F(BloomFilterIndexReaderWriterTest, test_int) { |
| size_t num = 1024 * 3 - 1; |
| int* val = new int[num]; |
| for (int i = 0; i < num; ++i) { |
| // there will be 3 bloom filter pages |
| val[i] = 10000 + i + 1; |
| } |
| |
| std::string file_name = "bloom_filter_int"; |
| int not_exist_value = 18888; |
| test_bloom_filter_index_reader_writer_template<OLAP_FIELD_TYPE_INT>(file_name, val, num, 1, |
| ¬_exist_value); |
| delete[] val; |
| } |
| |
| TEST_F(BloomFilterIndexReaderWriterTest, test_bigint) { |
| size_t num = 1024 * 3 - 1; |
| int64_t* val = new int64_t[num]; |
| for (int i = 0; i < num; ++i) { |
| // there will be 3 bloom filter pages |
| val[i] = 100000000 + i + 1; |
| } |
| |
| std::string file_name = "bloom_filter_bigint"; |
| int64_t not_exist_value = 18888; |
| test_bloom_filter_index_reader_writer_template<OLAP_FIELD_TYPE_BIGINT>(file_name, val, num, 1, |
| ¬_exist_value); |
| delete[] val; |
| } |
| |
| TEST_F(BloomFilterIndexReaderWriterTest, test_largeint) { |
| size_t num = 1024 * 3 - 1; |
| int128_t* val = new int128_t[num]; |
| for (int i = 0; i < num; ++i) { |
| // there will be 3 bloom filter pages |
| val[i] = 100000000 + i + 1; |
| } |
| |
| std::string file_name = "bloom_filter_largeint"; |
| int128_t not_exist_value = 18888; |
| test_bloom_filter_index_reader_writer_template<OLAP_FIELD_TYPE_LARGEINT>(file_name, val, num, 1, |
| ¬_exist_value); |
| delete[] val; |
| } |
| |
| TEST_F(BloomFilterIndexReaderWriterTest, test_varchar_type) { |
| size_t num = 1024 * 3 - 1; |
| std::string* val = new std::string[num]; |
| for (int i = 0; i < num; ++i) { |
| // there will be 3 bloom filter pages |
| val[i] = "prefix_" + std::to_string(i); |
| } |
| Slice* slices = new Slice[num]; |
| for (int i = 0; i < num; ++i) { |
| // there will be 3 bloom filter pages |
| slices[i] = Slice(val[i].c_str(), val[i].size()); |
| } |
| std::string file_name = "bloom_filter_varchar"; |
| Slice not_exist_value("value_not_exist"); |
| test_bloom_filter_index_reader_writer_template<OLAP_FIELD_TYPE_VARCHAR>( |
| file_name, slices, num, 1, ¬_exist_value, true); |
| delete[] val; |
| delete[] slices; |
| } |
| |
| TEST_F(BloomFilterIndexReaderWriterTest, test_char) { |
| size_t num = 1024 * 3 - 1; |
| std::string* val = new std::string[num]; |
| for (int i = 0; i < num; ++i) { |
| // there will be 3 bloom filter pages |
| val[i] = "prefix_" + std::to_string(10000 + i); |
| } |
| Slice* slices = new Slice[num]; |
| for (int i = 0; i < num; ++i) { |
| // there will be 3 bloom filter pages |
| slices[i] = Slice(val[i].c_str(), val[i].size()); |
| } |
| std::string file_name = "bloom_filter_char"; |
| Slice not_exist_value("char_value_not_exist"); |
| test_bloom_filter_index_reader_writer_template<OLAP_FIELD_TYPE_CHAR>(file_name, slices, num, 1, |
| ¬_exist_value, true); |
| delete[] val; |
| delete[] slices; |
| } |
| |
| TEST_F(BloomFilterIndexReaderWriterTest, test_date) { |
| size_t num = 1024 * 3 - 1; |
| uint24_t* val = new uint24_t[num]; |
| for (int i = 0; i < num; ++i) { |
| // there will be 3 bloom filter pages |
| val[i] = 10000 + i + 1; |
| } |
| |
| std::string file_name = "bloom_filter_date"; |
| uint24_t not_exist_value = 18888; |
| test_bloom_filter_index_reader_writer_template<OLAP_FIELD_TYPE_DATE>(file_name, val, num, 1, |
| ¬_exist_value); |
| delete[] val; |
| } |
| |
| TEST_F(BloomFilterIndexReaderWriterTest, test_datetime) { |
| size_t num = 1024 * 3 - 1; |
| int64_t* val = new int64_t[num]; |
| for (int i = 0; i < num; ++i) { |
| // there will be 3 bloom filter pages |
| val[i] = 10000 + i + 1; |
| } |
| |
| std::string file_name = "bloom_filter_datetime"; |
| int64_t not_exist_value = 18888; |
| test_bloom_filter_index_reader_writer_template<OLAP_FIELD_TYPE_DATETIME>(file_name, val, num, 1, |
| ¬_exist_value); |
| delete[] val; |
| } |
| |
| TEST_F(BloomFilterIndexReaderWriterTest, test_decimal) { |
| size_t num = 1024 * 3 - 1; |
| decimal12_t* val = new decimal12_t[num]; |
| for (int i = 0; i < num; ++i) { |
| // there will be 3 bloom filter pages |
| val[i] = decimal12_t(i + 1, i + 1); |
| } |
| |
| std::string file_name = "bloom_filter_decimal"; |
| decimal12_t not_exist_value = decimal12_t(666, 666); |
| test_bloom_filter_index_reader_writer_template<OLAP_FIELD_TYPE_DECIMAL>(file_name, val, num, 1, |
| ¬_exist_value); |
| delete[] val; |
| } |
| |
| } // namespace segment_v2 |
| } // namespace doris |
| |
| int main(int argc, char** argv) { |
| doris::StoragePageCache::create_global_cache(1 << 30, 0.1); |
| ::testing::InitGoogleTest(&argc, argv); |
| return RUN_ALL_TESTS(); |
| } |