| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #pragma once |
| |
| #include <stddef.h> |
| #include <stdint.h> |
| |
| #include "common/compiler_util.h" // IWYU pragma: keep |
| #include "common/status.h" |
| #include "core/data_type/data_type.h" |
| #include "format/parquet/decoder.h" |
| #include "util/bit_stream_utils.h" |
| #include "util/bit_stream_utils.inline.h" |
| #include "util/slice.h" |
| |
| namespace doris { |
| class ColumnSelectVector; |
| } // namespace doris |
| |
| namespace doris { |
| #include "common/compile_check_begin.h" |
| /// Decoder bit-packed boolean-encoded values. |
| /// Implementation from https://github.com/apache/impala/blob/master/be/src/exec/parquet/parquet-bool-decoder.h |
| //bit-packed-run-len and rle-run-len must be in the range [1, 2^31 - 1]. |
| // This means that a Parquet implementation can always store the run length in a signed 32-bit integer |
| class BoolPlainDecoder final : public Decoder { |
| public: |
| BoolPlainDecoder() = default; |
| ~BoolPlainDecoder() override = default; |
| |
| // Set the data to be decoded |
| Status set_data(Slice* data) override { |
| bool_values_.Reset((const uint8_t*)data->data, data->size); |
| num_unpacked_values_ = 0; |
| unpacked_value_idx_ = 0; |
| _offset = 0; |
| return Status::OK(); |
| } |
| |
| Status decode_values(MutableColumnPtr& doris_column, DataTypePtr& data_type, |
| ColumnSelectVector& select_vector, bool is_dict_filter) override; |
| |
| template <bool has_filter> |
| Status _decode_values(MutableColumnPtr& doris_column, DataTypePtr& data_type, |
| ColumnSelectVector& select_vector, bool is_dict_filter); |
| |
| Status skip_values(size_t num_values) override; |
| |
| protected: |
| inline bool _decode_value(bool* value) { |
| if (LIKELY(unpacked_value_idx_ < num_unpacked_values_)) { |
| *value = unpacked_values_[unpacked_value_idx_++]; |
| } else { |
| num_unpacked_values_ = |
| bool_values_.UnpackBatch(1, UNPACKED_BUFFER_LEN, &unpacked_values_[0]); |
| if (UNLIKELY(num_unpacked_values_ == 0)) { |
| return false; |
| } |
| *value = unpacked_values_[0]; |
| unpacked_value_idx_ = 1; |
| } |
| return true; |
| } |
| |
| /// A buffer to store unpacked values. Must be a multiple of 32 size to use the |
| /// batch-oriented interface of BatchedBitReader. We use uint8_t instead of bool because |
| /// bit unpacking is only supported for unsigned integers. The values are converted to |
| /// bool when returned to the user. |
| static const int UNPACKED_BUFFER_LEN = 128; |
| uint8_t unpacked_values_[UNPACKED_BUFFER_LEN]; |
| |
| /// The number of valid values in 'unpacked_values_'. |
| int num_unpacked_values_ = 0; |
| |
| /// The next value to return from 'unpacked_values_'. |
| int unpacked_value_idx_ = 0; |
| |
| /// Bit packed decoder, used if 'encoding_' is PLAIN. |
| BatchedBitReader bool_values_; |
| }; |
| #include "common/compile_check_end.h" |
| |
| } // namespace doris |