| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #pragma once |
| |
| #include <stddef.h> |
| |
| #include <functional> |
| #include <memory> |
| |
| #include "common/config.h" |
| #include "common/status.h" |
| #include "storage/cache/page_cache.h" |
| #include "storage/segment/options.h" |
| #include "util/slice.h" |
| |
| namespace doris { |
| |
| enum class FieldType; |
| |
| namespace segment_v2 { |
| |
| class PageBuilder; |
| class PageDecoder; |
| struct PageBuilderOptions; |
| struct PageDecoderOptions; |
| enum EncodingTypePB : int; |
| |
| // For better performance, some encodings (like BitShuffle) need to be decoded before being added to the PageCache. |
| class DataPagePreDecoder { |
| public: |
| virtual Status decode(std::unique_ptr<DataPage>* page, Slice* page_slice, size_t size_of_tail, |
| bool _use_cache, segment_v2::PageTypePB page_type, |
| const std::string& file_path, size_t size_of_prefix = 0) = 0; |
| virtual ~DataPagePreDecoder() = default; |
| }; |
| |
| class EncodingInfo { |
| public: |
| // Get EncodingInfo for TypeInfo and EncodingTypePB |
| static Status get(FieldType type, EncodingTypePB encoding_type, |
| EncodingPreference encoding_preference, const EncodingInfo** encoding); |
| |
| // optimize_value_search: whether the encoding scheme should optimize for ordered data |
| // and support fast value seek operation |
| static EncodingTypePB get_default_encoding(FieldType type, |
| EncodingPreference encoding_preference, |
| bool optimize_value_seek); |
| |
| Status create_page_builder(const PageBuilderOptions& opts, PageBuilder** builder) const { |
| return _create_builder_func(opts, builder); |
| } |
| Status create_page_builder(const PageBuilderOptions& opts, |
| std::unique_ptr<PageBuilder>& builder) const; |
| Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, |
| PageDecoder** decoder) const { |
| return _create_decoder_func(data, opts, decoder); |
| } |
| Status create_page_decoder(const Slice& data, const PageDecoderOptions& opts, |
| std::unique_ptr<PageDecoder>& decoder) const; |
| FieldType type() const { return _type; } |
| EncodingTypePB encoding() const { return _encoding; } |
| |
| DataPagePreDecoder* get_data_page_pre_decoder() const { return _data_page_pre_decoder.get(); } |
| |
| private: |
| friend class EncodingInfoResolver; |
| |
| template <typename TypeEncodingTraits> |
| explicit EncodingInfo(TypeEncodingTraits traits); |
| |
| using CreateBuilderFunc = std::function<Status(const PageBuilderOptions&, PageBuilder**)>; |
| CreateBuilderFunc _create_builder_func; |
| |
| using CreateDecoderFunc = |
| std::function<Status(const Slice&, const PageDecoderOptions& opts, PageDecoder**)>; |
| CreateDecoderFunc _create_decoder_func; |
| |
| FieldType _type; |
| EncodingTypePB _encoding; |
| std::unique_ptr<DataPagePreDecoder> _data_page_pre_decoder; |
| }; |
| |
| struct EncodingMapHash { |
| size_t operator()(const FieldType& type) const { return int(type); } |
| size_t operator()(const std::pair<FieldType, EncodingTypePB>& pair) const { |
| return (int(pair.first) << 6) ^ pair.second; |
| } |
| }; |
| |
| class EncodingInfoResolver { |
| public: |
| EncodingInfoResolver(); |
| ~EncodingInfoResolver(); |
| |
| EncodingTypePB get_default_encoding(FieldType type, EncodingPreference encoding_preference, |
| bool optimize_value_seek) const; |
| |
| Status get(FieldType data_type, EncodingTypePB encoding_type, |
| EncodingPreference encoding_preference, const EncodingInfo** out); |
| |
| private: |
| // Not thread-safe |
| template <FieldType type, EncodingTypePB encoding_type, bool optimize_value_seek = false> |
| void _add_map(); |
| |
| std::unordered_map<FieldType, EncodingTypePB, EncodingMapHash> _default_encoding_type_map; |
| |
| // default encoding for each type which optimizes value seek |
| std::unordered_map<FieldType, EncodingTypePB, EncodingMapHash> _value_seek_encoding_map; |
| |
| std::unordered_map<std::pair<FieldType, EncodingTypePB>, EncodingInfo*, EncodingMapHash> |
| _encoding_map; |
| }; |
| |
| template <FieldType type, EncodingTypePB encoding, typename CppType, typename Enabled = void> |
| struct TypeEncodingTraits {}; |
| |
| template <FieldType field_type, EncodingTypePB encoding_type> |
| struct EncodingTraits : TypeEncodingTraits<field_type, encoding_type, |
| typename CppTypeTraits<field_type>::CppType> { |
| using CppType = typename CppTypeTraits<field_type>::CppType; |
| static const FieldType type = field_type; |
| static const EncodingTypePB encoding = encoding_type; |
| }; |
| |
| template <FieldType type, EncodingTypePB encoding_type, bool optimize_value_seek> |
| void EncodingInfoResolver::_add_map() { |
| EncodingTraits<type, encoding_type> traits; |
| std::unique_ptr<EncodingInfo> encoding(new EncodingInfo(traits)); |
| if (_default_encoding_type_map.find(type) == std::end(_default_encoding_type_map)) { |
| _default_encoding_type_map[type] = encoding_type; |
| } |
| if (optimize_value_seek && |
| _value_seek_encoding_map.find(type) == _value_seek_encoding_map.end()) { |
| _value_seek_encoding_map[type] = encoding_type; |
| } |
| auto key = std::make_pair(type, encoding_type); |
| auto it = _encoding_map.find(key); |
| if (it != _encoding_map.end()) { |
| return; |
| } |
| _encoding_map.emplace(key, encoding.release()); |
| } |
| |
| } // namespace segment_v2 |
| } // namespace doris |