blob: 29ddf9db603a593faedcb06141d0e031befbc8ad [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "io/fs/packed_file_trailer.h"
#include <array>
#include <fstream>
#include "common/status.h"
#include "util/coding.h"
namespace doris::io {
Status parse_packed_file_trailer(std::string_view data, cloud::PackedFileFooterPB* debug_pb,
uint32_t* version) {
if (debug_pb == nullptr || version == nullptr) {
return Status::InvalidArgument("Output parameters must not be null");
}
if (data.size() < kPackedFileTrailerSuffixSize) {
return Status::InternalError("Packed file too small to contain trailer");
}
const size_t suffix_offset = data.size() - kPackedFileTrailerSuffixSize;
const auto* suffix_ptr = reinterpret_cast<const uint8_t*>(data.data() + suffix_offset);
const uint32_t trailer_size = decode_fixed32_le(suffix_ptr);
const uint32_t trailer_version = decode_fixed32_le(suffix_ptr + sizeof(uint32_t));
// Preferred format: [PackedFileFooterPB][length][version]
if (trailer_size > 0 && trailer_size <= data.size() - kPackedFileTrailerSuffixSize) {
const size_t payload_offset = data.size() - kPackedFileTrailerSuffixSize - trailer_size;
std::string_view payload(data.data() + payload_offset, trailer_size);
if (payload.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
return Status::InternalError("Packed file trailer payload too large");
}
cloud::PackedFileFooterPB parsed_pb;
if (parsed_pb.ParseFromArray(payload.data(), static_cast<int>(payload.size()))) {
debug_pb->Swap(&parsed_pb);
*version = trailer_version;
return Status::OK();
}
}
// Legacy format fallback: [PackedFileInfoPB][length]
if (data.size() < sizeof(uint32_t)) {
return Status::InternalError("Packed file trailer corrupted");
}
const size_t legacy_suffix_offset = data.size() - sizeof(uint32_t);
const auto* legacy_ptr = reinterpret_cast<const uint8_t*>(data.data() + legacy_suffix_offset);
const uint32_t legacy_size = decode_fixed32_le(legacy_ptr);
if (legacy_size == 0 || legacy_size > data.size() - sizeof(uint32_t)) {
return Status::InternalError("Packed file trailer corrupted");
}
const size_t legacy_payload_offset = data.size() - sizeof(uint32_t) - legacy_size;
std::string_view legacy_payload(data.data() + legacy_payload_offset, legacy_size);
cloud::PackedFileInfoPB packed_info;
if (legacy_payload.size() > static_cast<size_t>(std::numeric_limits<int>::max())) {
return Status::InternalError("Packed file legacy trailer payload too large");
}
if (!packed_info.ParseFromArray(legacy_payload.data(),
static_cast<int>(legacy_payload.size()))) {
return Status::InternalError("Failed to parse packed file trailer");
}
debug_pb->Clear();
debug_pb->mutable_packed_file_info()->Swap(&packed_info);
*version = 0;
return Status::OK();
}
Status read_packed_file_trailer(const std::string& file_path, cloud::PackedFileFooterPB* debug_pb,
uint32_t* version) {
if (debug_pb == nullptr || version == nullptr) {
return Status::InvalidArgument("Output parameters must not be null");
}
std::ifstream file(file_path, std::ios::binary);
if (!file.is_open()) {
return Status::IOError("Failed to open packed file {}", file_path);
}
file.seekg(0, std::ios::end);
const std::streamoff file_size = file.tellg();
if (file_size < static_cast<std::streamoff>(sizeof(uint32_t))) {
return Status::InternalError("Packed file {} is too small", file_path);
}
auto read_tail = [&](std::streamoff count, std::string* out) -> Status {
out->assign(static_cast<size_t>(count), '\0');
file.seekg(file_size - count);
file.read(out->data(), count);
if (!file) {
return Status::IOError("Failed to read last {} bytes from {}", count, file_path);
}
return Status::OK();
};
// Try new format first.
if (file_size >= static_cast<std::streamoff>(kPackedFileTrailerSuffixSize)) {
std::array<char, kPackedFileTrailerSuffixSize> suffix {};
file.seekg(file_size - static_cast<std::streamoff>(suffix.size()));
file.read(suffix.data(), suffix.size());
if (file) {
const uint32_t trailer_size =
decode_fixed32_le(reinterpret_cast<uint8_t*>(suffix.data()));
const uint32_t trailer_version =
decode_fixed32_le(reinterpret_cast<uint8_t*>(suffix.data()) + sizeof(uint32_t));
const std::streamoff required =
static_cast<std::streamoff>(kPackedFileTrailerSuffixSize + trailer_size);
if (trailer_size > 0 && file_size >= required) {
std::string tail;
RETURN_IF_ERROR(read_tail(required, &tail));
Status st = parse_packed_file_trailer(tail, debug_pb, version);
if (st.ok() && *version == trailer_version) {
return st;
}
}
}
file.clear();
}
// Legacy fallback: PackedFileInfoPB + length.
std::array<char, sizeof(uint32_t)> legacy_suffix {};
file.seekg(file_size - static_cast<std::streamoff>(legacy_suffix.size()));
file.read(legacy_suffix.data(), legacy_suffix.size());
if (!file) {
return Status::IOError("Failed to read legacy trailer length from {}", file_path);
}
const uint32_t legacy_size =
decode_fixed32_le(reinterpret_cast<uint8_t*>(legacy_suffix.data()));
const std::streamoff required = static_cast<std::streamoff>(sizeof(uint32_t) + legacy_size);
if (legacy_size == 0 || file_size < required) {
return Status::InternalError("Packed file trailer corrupted for {}", file_path);
}
std::string tail;
RETURN_IF_ERROR(read_tail(required, &tail));
return parse_packed_file_trailer(tail, debug_pb, version);
}
} // namespace doris::io