| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #pragma once |
| |
| #include <cstddef> |
| #include <cstdint> |
| #include <memory> |
| |
| #include "kudu/cfile/cfile_reader.h" |
| #include "kudu/cfile/cfile_writer.h" |
| #include "kudu/gutil/macros.h" |
| #include "kudu/util/bloom_filter.h" |
| #include "kudu/util/faststring.h" |
| #include "kudu/util/mem_tracker.h" |
| #include "kudu/util/once.h" |
| #include "kudu/util/slice.h" |
| #include "kudu/util/status.h" |
| |
| namespace kudu { |
| |
| namespace fs { |
| class BlockCreationTransaction; |
| struct IOContext; |
| class ReadableBlock; |
| class WritableBlock; |
| } |
| |
| namespace cfile { |
| |
| class BloomBlockHeaderPB; |
| struct ReaderOptions; |
| |
| class BloomFileWriter { |
| public: |
| BloomFileWriter(std::unique_ptr<fs::WritableBlock> block, |
| const BloomFilterSizing &sizing); |
| |
| Status Start(); |
| Status AppendKeys(const Slice *keys, size_t n_keys); |
| |
| // Close the bloom's CFile, closing the underlying writable block. |
| Status Finish(); |
| |
| // Close the bloom's CFile, finalizing the underlying block and |
| // releasing it to 'transaction'. |
| Status FinishAndReleaseBlock(fs::BlockCreationTransaction* transaction); |
| |
| // Estimate the amount of data already written to this file. |
| size_t written_size() const; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(BloomFileWriter); |
| |
| Status FinishCurrentBloomBlock(); |
| |
| std::unique_ptr<cfile::CFileWriter> writer_; |
| |
| BloomFilterBuilder bloom_builder_; |
| |
| // first key inserted in the current block. |
| faststring first_key_; |
| |
| // last key inserted in the previous block |
| faststring last_key_; |
| }; |
| |
| // Reader for a bloom file. |
| // NB: this is not currently thread-safe. |
| // When making it thread-safe, should make sure that the threads |
| // share a single CFileReader, or else the cache keys won't end up |
| // shared! |
| class BloomFileReader { |
| public: |
| |
| // Fully open a bloom file using a previously opened block. |
| // |
| // After this call, the bloom reader is safe for use. |
| static Status Open(std::unique_ptr<fs::ReadableBlock> block, |
| ReaderOptions options, |
| std::unique_ptr<BloomFileReader>* reader); |
| |
| // Lazily opens a bloom file using a previously opened block. A lazy open |
| // does not incur additional I/O, nor does it validate the contents of |
| // the bloom file. |
| // |
| // Init() must be called before using CheckKeyPresent(). |
| static Status OpenNoInit(std::unique_ptr<fs::ReadableBlock> block, |
| ReaderOptions options, |
| std::unique_ptr<BloomFileReader>* reader); |
| |
| // Fully opens a previously lazily opened bloom file, parsing and |
| // validating its contents. |
| // |
| // May be called multiple times; subsequent calls will no-op. |
| Status Init(const fs::IOContext* io_context); |
| |
| // Check if the given key may be present in the file. |
| // |
| // Sets *maybe_present to false if the key is definitely not |
| // present, otherwise sets it to true to indicate maybe present. |
| Status CheckKeyPresent(const BloomKeyProbe& probe, |
| const fs::IOContext* io_context, |
| bool* maybe_present); |
| |
| // Can be called before Init(). |
| uint64_t FileSize() const { |
| return reader_->file_size(); |
| } |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(BloomFileReader); |
| |
| BloomFileReader(std::unique_ptr<CFileReader> reader, ReaderOptions options); |
| |
| // Parse the header present in the given block. |
| // |
| // Returns the parsed header inside *hdr, and returns |
| // a Slice to the true bloom filter data inside |
| // *bloom_data. |
| Status ParseBlockHeader(const Slice &block, |
| BloomBlockHeaderPB* hdr, |
| Slice* bloom_data) const; |
| |
| // Callback used in 'init_once_' to initialize this bloom file. |
| Status InitOnce(const fs::IOContext* io_context); |
| |
| // Returns the memory usage of this object including the object itself but |
| // excluding the CFileReader, which is tracked independently. |
| size_t memory_footprint_excluding_reader() const; |
| |
| // Sequence number for the instance, generated from a global counter. |
| // Used for a ThreadLocalCache key. |
| // TODO(todd): if we want to conserve a bit of memory we could try to |
| // collapse this into the init_once_ object or some-such. |
| const uint64_t instance_nonce_; |
| |
| std::unique_ptr<CFileReader> reader_; |
| |
| KuduOnceLambda init_once_; |
| |
| ScopedTrackedConsumption mem_consumption_; |
| }; |
| |
| } // namespace cfile |
| } // namespace kudu |
| |