blob: d3ea66a1a8a9c566a1bd1faed324eff72f9acedc [file]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "storage/cache/page_cache.h"
#include "storage/segment/binary_dict_page.h"
#include "storage/segment/bitshuffle_page.h"
#include "storage/segment/encoding_info.h"
namespace doris {
namespace segment_v2 {
/**
* @brief Pre-decoder for BitShuffle encoded pages
*
* This decoder handles pure bitshuffle + lz4 compressed data without any dict page header.
* For bitshuffle data within BinaryDictPage, use BinaryDictPagePreDecoder instead.
*/
struct BitShufflePagePreDecoder : public DataPagePreDecoder {
/**
* @brief Decode bitshuffle data
*
* The input should be data encoded by bitshuffle + lz4.
* This decoder does NOT handle BinaryDictPage headers - use BinaryDictPagePreDecoder for that.
*
* @param page unique_ptr to hold page data, will be replaced by decoded data
* @param page_slice data to decode, will be updated to point to decoded data
* @param size_of_tail including size of footer and null map
* @param _use_cache whether to use page cache
* @param page_type the type of page
* @param file_path file path for error reporting
* @param size_of_prefix size of prefix space to reserve (for dict page header)
* @return Status
*/
Status decode(std::unique_ptr<DataPage>* page, Slice* page_slice, size_t size_of_tail,
bool _use_cache, segment_v2::PageTypePB page_type, const std::string& file_path,
size_t size_of_prefix = 0) override {
size_t num_elements, compressed_size, num_element_after_padding;
int size_of_element;
Slice data(page_slice->data, page_slice->size - size_of_tail);
RETURN_IF_ERROR(parse_bit_shuffle_header(data, num_elements, compressed_size,
num_element_after_padding, size_of_element));
if (compressed_size != data.size) {
return Status::InternalError(
"Size information unmatched in file: {}, compressed_size:{}, "
"num_elements:{}, data size:{}",
file_path, compressed_size, num_elements, data.size);
}
Slice decoded_slice;
decoded_slice.size = size_of_prefix + BITSHUFFLE_PAGE_HEADER_SIZE +
num_element_after_padding * size_of_element + size_of_tail;
std::unique_ptr<DataPage> decoded_page =
std::make_unique<DataPage>(decoded_slice.size, _use_cache, page_type);
decoded_slice.data = decoded_page->data();
// Copy bitshuffle header to the position after prefix
memcpy(decoded_slice.data + size_of_prefix, data.data, BITSHUFFLE_PAGE_HEADER_SIZE);
// Decompress data to the position after prefix and header
auto bytes = bitshuffle::decompress_lz4(
&data.data[BITSHUFFLE_PAGE_HEADER_SIZE],
decoded_slice.data + size_of_prefix + BITSHUFFLE_PAGE_HEADER_SIZE,
num_element_after_padding, size_of_element, 0);
if (bytes < 0) [[unlikely]] {
// Ideally, this should not happen.
warn_with_bitshuffle_error(bytes);
return Status::RuntimeError("Unshuffle Process failed in file: {}", file_path);
}
// Copy tail to the end
memcpy(decoded_slice.data + decoded_slice.size - size_of_tail,
page_slice->data + page_slice->size - size_of_tail, size_of_tail);
*page_slice = decoded_slice;
*page = std::move(decoded_page);
return Status::OK();
}
};
} // namespace segment_v2
} // namespace doris