blob: 42a316b774039f2e53f7284417f3120db7d179d4 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <crc32c/crc32c.h>
#include <gflags/gflags.h>
#include <fstream>
#include <iostream>
#include <sstream>
#include "common/status.h"
#include "gen_cpp/file_cache.pb.h"
#include "io/cache/cache_lru_dumper.h"
#include "io/cache/file_cache_common.h"
#include "io/cache/lru_queue_recorder.h"
#include "util/coding.h"
using namespace doris;
DEFINE_string(filename, "", "dump file name");
std::string get_usage(const std::string& progname) {
std::stringstream ss;
ss << progname << " is the Doris BE file cache lru tool for examing dumped content.\n";
ss << "Usage:\n";
ss << progname << " --filename [filename]\n";
ss << "\nExample:\n";
ss << progname << " --filename ./lru_dump_ttl.tail\n";
return ss.str();
}
Status check_ifstream_status(std::ifstream& in, std::string& filename) {
if (!in.good()) {
std::ios::iostate state = in.rdstate();
std::stringstream err_msg;
if (state & std::ios::eofbit) {
err_msg << "End of file reached.";
}
if (state & std::ios::failbit) {
err_msg << "Input/output operation failed, err_code: " << strerror(errno);
}
if (state & std::ios::badbit) {
err_msg << "Serious I/O error occurred, err_code: " << strerror(errno);
}
in.close();
std::string warn_msg = std::string(
fmt::format("dump lru reading failed, file={}, {}", filename, err_msg.str()));
std::cerr << warn_msg << std::endl;
return Status::InternalError<false>(warn_msg);
}
return Status::OK();
}
struct Footer {
size_t meta_offset;
uint32_t checksum;
uint8_t version;
char magic[3];
std::string serialize_as_string() const;
bool deserialize_from_string(const std::string& data) {
DCHECK(data.size() == sizeof(Footer));
const char* ptr = data.data();
// Deserialize meta_offset (convert from little-endian)
uint64_t meta_offset_le;
std::memcpy(&meta_offset_le, ptr, sizeof(meta_offset_le));
meta_offset = decode_fixed64_le(reinterpret_cast<uint8_t*>(&meta_offset_le));
ptr += sizeof(meta_offset_le);
// Deserialize checksum (convert from little-endian)
uint32_t checksum_le;
std::memcpy(&checksum_le, ptr, sizeof(checksum_le));
checksum = decode_fixed32_le(reinterpret_cast<uint8_t*>(&checksum_le));
ptr += sizeof(checksum_le);
version = *((uint8_t*)ptr);
ptr += sizeof(version);
// Deserialize magic
std::memcpy(magic, ptr, sizeof(magic));
return true;
}
} __attribute__((packed));
Status parse_dump_footer(std::ifstream& in, std::string& filename, size_t& entry_num,
doris::io::cache::LRUDumpMetaPb& parse_meta,
doris::io::cache::LRUDumpEntryGroupPb& current_parse_group) {
size_t file_size = std::filesystem::file_size(filename);
// Read footer
Footer footer;
size_t footer_size = sizeof(footer);
if (file_size < footer_size) {
std::string warn_msg = std::string(fmt::format(
"LRU dump file too small to contain footer, file={}, skip restore", filename));
std::cerr << warn_msg << std::endl;
return Status::InternalError<false>(warn_msg);
}
in.seekg(-footer_size, std::ios::end);
std::string footer_str(footer_size, '\0');
in.read(&footer_str[0], footer_size);
RETURN_IF_ERROR(check_ifstream_status(in, filename));
if (!footer.deserialize_from_string(footer_str)) {
std::string warn_msg = std::string(
fmt::format("Failed to deserialize footer, file={}, skip restore", filename));
std::cerr << warn_msg << std::endl;
return Status::InternalError<false>(warn_msg);
}
// Validate footer
if (footer.version != 1 || std::string(footer.magic, 3) != "DOR") {
std::string warn_msg = std::string(fmt::format(
"LRU dump file invalid footer format, file={}, skip restore", filename));
std::cerr << warn_msg << std::endl;
return Status::InternalError<false>(warn_msg);
}
// Read meta
in.seekg(footer.meta_offset, std::ios::beg);
size_t meta_size = file_size - footer.meta_offset - footer_size;
if (meta_size <= 0) {
std::string warn_msg = std::string(
fmt::format("LRU dump file invalid meta size, file={}, skip restore", filename));
std::cerr << warn_msg << std::endl;
return Status::InternalError<false>(warn_msg);
}
std::string meta_serialized(meta_size, '\0');
in.read(&meta_serialized[0], meta_serialized.size());
RETURN_IF_ERROR(check_ifstream_status(in, filename));
parse_meta.Clear();
current_parse_group.Clear();
if (!parse_meta.ParseFromString(meta_serialized)) {
std::string warn_msg = std::string(
fmt::format("LRU dump file meta parse failed, file={}, skip restore", filename));
std::cerr << warn_msg << std::endl;
return Status::InternalError<false>(warn_msg);
}
std::cout << "parse meta: " << parse_meta.DebugString() << std::endl;
entry_num = parse_meta.entry_num();
return Status::OK();
}
Status parse_one_lru_entry(std::ifstream& in, std::string& filename, io::UInt128Wrapper& hash,
size_t& offset, size_t& size,
doris::io::cache::LRUDumpMetaPb& parse_meta,
doris::io::cache::LRUDumpEntryGroupPb& current_parse_group) {
// Read next group if current is empty
if (current_parse_group.entries_size() == 0) {
if (parse_meta.group_offset_size_size() == 0) {
return Status::EndOfFile("No more entries");
}
auto group_info = parse_meta.group_offset_size(0);
in.seekg(group_info.offset(), std::ios::beg);
std::string group_serialized(group_info.size(), '\0');
in.read(&group_serialized[0], group_serialized.size());
RETURN_IF_ERROR(check_ifstream_status(in, filename));
uint32_t checksum = crc32c::Crc32c(group_serialized.data(), group_serialized.size());
if (checksum != group_info.checksum()) {
std::string warn_msg =
fmt::format("restore lru failed as checksum not match, file={}", filename);
std::cerr << warn_msg << std::endl;
return Status::InternalError(warn_msg);
}
if (!current_parse_group.ParseFromString(group_serialized)) {
std::string warn_msg =
fmt::format("restore lru failed to parse group, file={}", filename);
std::cerr << warn_msg << std::endl;
return Status::InternalError(warn_msg);
}
// Remove processed group info
parse_meta.mutable_group_offset_size()->erase(parse_meta.group_offset_size().begin());
}
// Get next entry from current group
auto entry = current_parse_group.entries(0);
hash = io::UInt128Wrapper((static_cast<uint128_t>(entry.hash().high()) << 64) |
entry.hash().low());
offset = entry.offset();
size = entry.size();
std::cout << hash.to_string() << " " << offset << " " << size << std::endl;
// Remove processed entry
current_parse_group.mutable_entries()->erase(current_parse_group.entries().begin());
return Status::OK();
}
int main(int argc, char** argv) {
std::string usage = get_usage(argv[0]);
gflags::SetUsageMessage(usage);
google::ParseCommandLineFlags(&argc, &argv, true);
std::ifstream in(FLAGS_filename, std::ios::binary);
size_t entry_num;
doris::io::cache::LRUDumpMetaPb parse_meta;
doris::io::cache::LRUDumpEntryGroupPb current_parse_group;
auto s = parse_dump_footer(in, FLAGS_filename, entry_num, parse_meta, current_parse_group);
in.seekg(0, std::ios::beg);
io::UInt128Wrapper hash;
size_t offset, size;
for (int i = 0; i < entry_num; ++i) {
EXIT_IF_ERROR(parse_one_lru_entry(in, FLAGS_filename, hash, offset, size, parse_meta,
current_parse_group));
}
return 0;
}