blob: 62123705e4d84d65c576f619b6a98416f63bc199 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include "gen_cpp/cloud.pb.h"
#include "io/fs/s3_file_system.h"
#include "olap/rowset/rowset_meta.h"
#include "olap/storage_policy.h"
namespace doris {
TEST(StorageResourceTest, RemotePath) {
S3Conf s3_conf {.bucket = "bucket",
.prefix = "prefix",
.client_conf = {
.endpoint = "endpoint",
.region = "region",
.ak = "ak",
.sk = "sk",
.token = "",
.bucket = "",
.role_arn = "",
.external_id = "",
}};
auto res = io::S3FileSystem::create(std::move(s3_conf), io::FileSystem::TMP_FS_ID);
ASSERT_TRUE(res.has_value()) << res.error();
StorageResource storage_resource(res.value()); // path v0
EXPECT_EQ(storage_resource.remote_tablet_path(10005), "data/10005");
constexpr std::string_view rowset_id_str = "0200000000001cc2224124562e7dfd4834d031b13c0210be";
EXPECT_EQ(storage_resource.remote_segment_path(10005, rowset_id_str, 5),
"data/10005/0200000000001cc2224124562e7dfd4834d031b13c0210be_5.dat");
RowsetMeta rs_meta;
rs_meta.set_tablet_id(10005);
RowsetId rowset_id;
rowset_id.init(rowset_id_str);
rs_meta.set_rowset_id(rowset_id);
EXPECT_EQ(storage_resource.remote_segment_path(rs_meta, 5),
"data/10005/0200000000001cc2224124562e7dfd4834d031b13c0210be_5.dat");
EXPECT_EQ(storage_resource.cooldown_tablet_meta_path(10005, 10006, 13),
"data/10005/10006.13.meta");
cloud::StorageVaultPB storage_vault_pb;
storage_resource = StorageResource(res.value(), storage_vault_pb.path_format()); // path v0
EXPECT_EQ(storage_resource.remote_tablet_path(10005), "data/10005");
EXPECT_EQ(storage_resource.remote_segment_path(10005, rowset_id_str, 5),
"data/10005/0200000000001cc2224124562e7dfd4834d031b13c0210be_5.dat");
EXPECT_EQ(storage_resource.remote_segment_path(rs_meta, 5),
"data/10005/0200000000001cc2224124562e7dfd4834d031b13c0210be_5.dat");
auto* path_format = storage_vault_pb.mutable_path_format();
path_format->set_path_version(1);
path_format->set_shard_num(1000);
storage_resource = StorageResource(res.value(), storage_vault_pb.path_format()); // path v1
EXPECT_EQ(storage_resource.remote_tablet_path(10005), "data/611/10005");
EXPECT_EQ(storage_resource.remote_segment_path(10005, rowset_id_str, 5),
"data/611/10005/0200000000001cc2224124562e7dfd4834d031b13c0210be/5.dat");
EXPECT_EQ(storage_resource.remote_segment_path(rs_meta, 5),
"data/611/10005/0200000000001cc2224124562e7dfd4834d031b13c0210be/5.dat");
EXPECT_EQ(storage_resource.cooldown_tablet_meta_path(10005, 10006, 13),
"data/611/10005/10006.13.meta");
path_format->set_path_version(2);
ASSERT_DEATH(StorageResource(res.value(), storage_vault_pb.path_format()), "unknown");
}
TEST(StorageResourceTest, ParseTabletIdFromPath) {
// Test Version 0 format: data/{tablet_id}/{rowset_id}_{seg_id}.dat
// see function StorageResource::remote_segment_path
// fmt::format("{}/{}/{}_{}.dat", DATA_PREFIX, tablet_id, rowset_id, seg_id);
EXPECT_EQ(
StorageResource::parse_tablet_id_from_path(
"prefix_xxx/data/10005/0200000000001cc2224124562e7dfd4834d031b13c0210be_5.dat"),
10005);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("//data/12345/rowset_001_0.dat"), 12345);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("data/999999/rowset_abc_10.dat"), 999999);
// Test Version 0 format with .idx files (v1 format)
// see function StorageResource::remote_idx_v1_path
// fmt::format("{}/{}/{}_{}_{}{}.idx", DATA_PREFIX, rowset.tablet_id(), rowset.rowset_id().to_string(), seg_id, index_id, suffix);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path(
"//data/10005/0200000000001cc2224124562e7_6_6666_suffix.idx"),
10005);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path(
"bucket_xxx/data/12345/rowsetid_1_666_suffix.idx"),
12345);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("data/999999/rowsetid_10_8888_suffix.idx"),
999999);
// Test Version 0 format with .idx files (v2 format)
// see function StorageResource::remote_idx_v2_path
// fmt::format("{}/{}/{}_{}.idx", DATA_PREFIX, rowset.tablet_id(), rowset.rowset_id().to_string(), seg_id);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path(
"s3://prefix_bucket/data/10005/0200000000001cc2224124562e7_5.idx"),
10005);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("/data/12345/rowset001_0.idx"), 12345);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("data/999999/rowsetabc_10.idx"), 999999);
// Test Version 1 format: data/{shard}/{tablet_id}/{rowset_id}/{seg_id}.dat
// see function StorageResource::remote_segment_path
// fmt::format("{}/{}/{}/{}/{}.dat", DATA_PREFIX, shard_fn(rowset.tablet_id()), rowset.tablet_id(), rowset.rowset_id().to_string(), seg_id);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path(
"prefix_xxxx/data/611/10005/0200000000001cc2224124562e7dfd4834d031b13c0210be/"
"5.dat"),
10005);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("data/0/12345/rowset_001/0.dat"), 12345);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("s3:///data/999/999999/rowset_abc/10.dat"),
999999);
// Test Version 1 format with .idx files (v1 format)
// see function StorageResource::remote_idx_v1_path
// fmt::format("{}/{}/{}/{}/{}_{}{}.idx", DATA_PREFIX, shard_fn(rowset.tablet_id()), rowset.tablet_id(), rowset.rowset_id().to_string(), seg_id, index_id, suffix);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path(
"s3:///data/611/10005/0200000000001cc2224124562e7dfd4834d031b13c0210be/"
"5_6666_suffix.idx"),
10005);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path(
"prefix_bucket/data/0/12345/rowsetid/1_666_suffix.idx"),
12345);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path(
"data/999/999999/rowsetid/10_8888_suffix.idx"),
999999);
// Test Version 1 format with .idx files (v2 format)
// see function StorageResource::remote_idx_v2_path
// fmt::format("{}/{}/{}/{}/{}.idx", DATA_PREFIX, shard_fn(rowset.tablet_id()), rowset.tablet_id(), rowset.rowset_id().to_string(), seg_id);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path(
"s3://prefix_bucket/data/611/10005/"
"0200000000001cc2224124562e7dfd4834d031b13c0210be/5.idx"),
10005);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("/data/0/12345/rowset001/0.idx"), 12345);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("data/999/999999/rowsetabc/10.idx"),
999999);
// Test edge cases
// fmt::format("{}/{}/{}_{}.dat", DATA_PREFIX, tablet_id, rowset_id, seg_id);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("prefix_bucket/data/0/rowset001_0.dat"),
0);
// fmt::format("{}/{}/{}/{}/{}.dat", DATA_PREFIX, shard_fn(rowset.tablet_id()), rowset.tablet_id(), rowset.rowset_id().to_string(), seg_id);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("/data/0/0/rowset001/0.dat"), 0);
// Test invalid cases
EXPECT_EQ(StorageResource::parse_tablet_id_from_path(""), std::nullopt);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("invalid_path"), std::nullopt);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("data/"), std::nullopt);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("/data/abc/rowset_001_0.dat"),
std::nullopt);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path(
"s3://prefix_bucket/data/0/abc/rowset_001/0.dat"),
std::nullopt);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("data/10005/rowset_001_0.txt"),
std::nullopt);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("data/10005/rowset_001_0"), std::nullopt);
// Test paths with different slash counts (should return nullopt)
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("data/10005/rowset_001/extra/0.dat"),
std::nullopt);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("/data/10005/rowset_001/extra/0.idx"),
std::nullopt);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path(
"prefix_bucket/data/10005/rowset_001/extra/0.dat"),
std::nullopt);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("data/10005.dat"), std::nullopt);
// Test paths without data prefix
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("10005/rowset_001_0.dat"), std::nullopt);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("0/12345/rowset_001/0.dat"), std::nullopt);
// Test paths with leading slash after data prefix
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("data//10005/rowset_001_0.dat"),
std::nullopt);
EXPECT_EQ(StorageResource::parse_tablet_id_from_path("data//0/12345/rowset_001/0.dat"),
std::nullopt);
}
} // namespace doris