| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #pragma once |
| |
| #include <atomic> |
| #include <cstdint> |
| #include <iosfwd> |
| #include <memory> |
| #include <optional> |
| #include <string> |
| #include <vector> |
| |
| #include <gflags/gflags_declare.h> |
| #include <glog/logging.h> |
| #include <gtest/gtest_prod.h> |
| |
| #include "kudu/fs/dir_manager.h" |
| #include "kudu/fs/error_manager.h" |
| #include "kudu/gutil/macros.h" |
| #include "kudu/gutil/ref_counted.h" |
| #include "kudu/util/env.h" |
| #include "kudu/util/metrics.h" |
| #include "kudu/util/oid_generator.h" |
| #include "kudu/util/path_util.h" |
| #include "kudu/util/status.h" |
| |
| namespace kudu { |
| namespace security { |
| class KeyProvider; |
| } // namespace security |
| } // namespace kudu |
| |
| DECLARE_bool(enable_data_block_fsync); |
| |
| namespace kudu { |
| |
| class BlockId; |
| class FileCache; |
| class InstanceMetadataPB; |
| class MemTracker; |
| class Timer; |
| |
| namespace fs { |
| |
| class BlockManager; |
| class DataDirManager; |
| class FsManagerTestBase_TestDuplicatePaths_Test; |
| class FsManagerTestBase_TestEIOWhileRunningUpdateDirsTool_Test; |
| class FsManagerTestBase_TestIsolatedMetadataDir_Test; |
| class FsManagerTestBase_TestMetadataDirInDataRoot_Test; |
| class FsManagerTestBase_TestMetadataDirInWALRoot_Test; |
| class FsManagerTestBase_TestOpenWithDuplicateInstanceFiles_Test; |
| class ReadableBlock; |
| class WritableBlock; |
| struct CreateBlockOptions; |
| struct FsReport; |
| |
| } // namespace fs |
| |
| namespace itest { |
| class MiniClusterFsInspector; |
| } // namespace itest |
| |
| namespace tserver { |
| class MiniTabletServerTest_TestFsLayoutEndToEnd_Test; |
| } // namespace tserver |
| |
| // Options that control the behavior of FsManager. |
| struct FsManagerOpts { |
| // Creates a new FsManagerOpts with default values. |
| FsManagerOpts(); |
| |
| // Creates a new FsManagerOpts with default values except 'wal_root' and |
| // 'data_roots', which are both initialized to 'root'. |
| // |
| // Should only be used in unit tests. |
| explicit FsManagerOpts(const std::string& root); |
| |
| // The entity under which all metrics should be grouped. If null, metrics |
| // will not be produced. |
| // |
| // Defaults to null. |
| scoped_refptr<MetricEntity> metric_entity; |
| |
| // The memory tracker under which all new memory trackers will be parented. |
| // If null, new memory trackers will be parented to the root tracker. |
| // |
| // Defaults to null. |
| std::shared_ptr<MemTracker> parent_mem_tracker; |
| |
| // The directory root where WALs will be stored. Cannot be empty. |
| std::string wal_root; |
| |
| // The directory root where data blocks will be stored. If empty, Kudu will |
| // use the WAL root. |
| std::vector<std::string> data_roots; |
| |
| // The directory root where metadata will be stored. If empty, Kudu will use |
| // the WAL root, or the first configured data root if metadata already exists |
| // in it from a previous deployment (the only option in Kudu 1.6 and below |
| // was to use the first data root). |
| std::string metadata_root; |
| |
| // The block manager type. Must be either "file" or "log". |
| // |
| // Defaults to the value of FLAGS_block_manager. |
| std::string block_manager_type; |
| |
| // Whether or not read-write operations should be allowed. |
| // |
| // Defaults to false. |
| bool read_only; |
| |
| // Whether to update the on-disk instances when opening directories if |
| // inconsistencies are detected. |
| // |
| // Defaults to UPDATE_AND_IGNORE_FAILURES. |
| fs::UpdateInstanceBehavior update_instances; |
| |
| // The file cache to be used for long-lived opened files (e.g. in the block |
| // manager). If null, opened files will not be cached. |
| // |
| // Defaults to null. |
| FileCache* file_cache; |
| |
| // Whether or not to skip opening the block manager. FsManager operations that |
| // require the block manager will crash. |
| // |
| // Default to false. |
| bool skip_block_manager; |
| }; |
| |
| // FsManager provides helpers to read data and metadata files, |
| // and it's responsible for abstracting the file-system layout. |
| // |
| // The user should not be aware of where files are placed, |
| // but instead should interact with the storage in terms of "open the block xyz" |
| // or "write a new schema metadata file for table kwz". |
| // |
| // The current layout is: |
| // <kudu.root.dir>/data/ |
| // <kudu.root.dir>/data/<prefix-0>/<prefix-2>/<prefix-4>/<name> |
| class FsManager { |
| public: |
| static const char *kWalFileNamePrefix; |
| static const char *kWalsRecoveryDirSuffix; |
| |
| FsManager(Env* env, FsManagerOpts opts); |
| ~FsManager(); |
| |
| // ========================================================================== |
| // Initialization |
| // ========================================================================== |
| |
| // Initializes and loads the instance metadata files, and verifies that they |
| // are all matching, returning any root paths that do not have metadata |
| // files. Sets 'metadata_' on success, and returns NotFound if none of the |
| // metadata files could be read. This must be called before calling uuid(). |
| // |
| // This only partially initialize the FsManager to expose the file |
| // system's UUID. To do anything more than that, call Open() or |
| // CreateInitialFileSystemLayout(). |
| Status PartialOpen(CanonicalizedRootsList* missing_roots = nullptr); |
| |
| // Initializes and loads the basic filesystem metadata, checking it for |
| // inconsistencies. If found, and if the FsManager was not constructed in |
| // read-only mode, an attempt will be made to repair them. |
| // |
| // If 'report' is not null, it will be populated with the results of the |
| // check (and repair, if applicable); otherwise, the results of the check |
| // will be logged and the presence of fatal inconsistencies will manifest as |
| // a returned error. |
| // |
| // If the filesystem has not been initialized, returns NotFound. In that |
| // case, CreateInitialFileSystemLayout() may be used to initialize the |
| // on-disk and in-memory structures. |
| // |
| // If 'read_instance_metadata_files' and 'read_data_directories' are not nullptr, |
| // they will be populated with time spent reading the instance metadata files |
| // and time spent reading data directories respectively. |
| // |
| // If 'containers_processed' and 'containers_total' are not nullptr, they will |
| // be populated with total containers attempted to be opened/processed and |
| // total containers present respectively in the subsequent calls made to |
| // the block manager. |
| Status Open(fs::FsReport* report = nullptr, |
| Timer* read_instance_metadata_files = nullptr, |
| Timer* read_data_directories = nullptr, |
| std::atomic<int>* containers_processed = nullptr, |
| std::atomic<int>* containers_total = nullptr ); |
| |
| // Create the initial filesystem layout. If 'uuid' is provided, uses it as |
| // uuid of the filesystem. Otherwise generates one at random. If 'server_key', |
| // 'server_key_iv', and 'server_key_version' are provided, they are used as |
| // the server key of the filesystem. Otherwise, if encryption is enabled, |
| // generates one at random. |
| // |
| // Returns an error if the file system is already initialized. |
| Status CreateInitialFileSystemLayout( |
| std::optional<std::string> uuid = std::nullopt, |
| std::optional<std::string> server_key = std::nullopt, |
| std::optional<std::string> server_key_iv = std::nullopt, |
| std::optional<std::string> server_key_version = std::nullopt); |
| |
| // ========================================================================== |
| // Error handling helpers |
| // ========================================================================== |
| |
| // Registers an error-handling callback with the FsErrorManager. |
| // |
| // If a disk failure is detected, this callback will be invoked with the |
| // relevant DataDir's UUID as its input parameter. |
| void SetErrorNotificationCb(fs::ErrorHandlerType e, fs::ErrorNotificationCb cb); |
| |
| // Unregisters the error-handling callback with the FsErrorManager. |
| // |
| // This must be called before the callback's callee is destroyed. Calls to |
| // this are idempotent and are safe even if a callback has not been set. |
| void UnsetErrorNotificationCb(fs::ErrorHandlerType e); |
| |
| // ========================================================================== |
| // Data read/write interfaces |
| // ========================================================================== |
| |
| // Creates a new block based on the options specified in 'opts'. |
| // |
| // Block will be synced on close. |
| Status CreateNewBlock(const fs::CreateBlockOptions& opts, |
| std::unique_ptr<fs::WritableBlock>* block); |
| |
| Status OpenBlock(const BlockId& block_id, |
| std::unique_ptr<fs::ReadableBlock>* block); |
| |
| bool BlockExists(const BlockId& block_id) const; |
| |
| // ========================================================================== |
| // on-disk path |
| // ========================================================================== |
| std::vector<std::string> GetDataRootDirs() const; |
| |
| std::string GetWalsRootDir() const { |
| DCHECK(initted_); |
| return JoinPathSegments(canonicalized_wal_fs_root_.path, kWalDirName); |
| } |
| |
| std::string GetTabletWalDir(const std::string& tablet_id) const { |
| return JoinPathSegments(GetWalsRootDir(), tablet_id); |
| } |
| |
| std::string GetTabletWalRecoveryDir(const std::string& tablet_id) const; |
| |
| std::string GetWalSegmentFileName(const std::string& tablet_id, |
| uint64_t sequence_number) const; |
| |
| // Return the directory where tablet superblocks should be stored. |
| std::string GetTabletMetadataDir() const; |
| |
| // Return the path for a specific tablet's superblock. |
| std::string GetTabletMetadataPath(const std::string& tablet_id) const; |
| |
| // List the tablet IDs in the metadata directory. |
| Status ListTabletIds(std::vector<std::string>* tablet_ids); |
| |
| // Return the path where InstanceMetadataPB is stored. |
| std::string GetInstanceMetadataPath(const std::string& root) const; |
| |
| // Return the directory where the consensus metadata is stored. |
| std::string GetConsensusMetadataDir() const { |
| DCHECK(initted_); |
| return JoinPathSegments(canonicalized_metadata_fs_root_.path, kConsensusMetadataDirName); |
| } |
| |
| // Return the path where ConsensusMetadataPB is stored. |
| std::string GetConsensusMetadataPath(const std::string& tablet_id) const { |
| return JoinPathSegments(GetConsensusMetadataDir(), tablet_id); |
| } |
| |
| Env* env() { return env_; } |
| |
| bool read_only() const { |
| return opts_.read_only; |
| } |
| |
| // Return the UUID persisted in the local filesystem. If PartialOpen() or |
| // Open() have not been called, this will crash. |
| const std::string& uuid() const; |
| |
| // Return the server key persisted on the local filesystem. After the server |
| // key is decrypted, it can be used to encrypt/decrypt file keys on the |
| // filesystem. If PartialOpen() or Open() have not been called, this will |
| // crash. If the file system is not encrypted, it returns an empty string. |
| const std::string& server_key() const; |
| |
| // Return the initialization vector for the server key. |
| const std::string& server_key_iv() const; |
| |
| // Return the version of the server key. |
| const std::string& server_key_version() const; |
| |
| // ========================================================================== |
| // file-system helpers |
| // ========================================================================== |
| bool Exists(const std::string& path) const { |
| return env_->FileExists(path); |
| } |
| |
| Status ListDir(const std::string& path, std::vector<std::string> *objects) const { |
| return env_->GetChildren(path, objects); |
| } |
| |
| fs::DataDirManager* dd_manager() const { |
| return dd_manager_.get(); |
| } |
| |
| fs::BlockManager* block_manager() { |
| DCHECK(block_manager_); |
| return block_manager_.get(); |
| } |
| |
| // Prints the file system trees under the file system roots. |
| void DumpFileSystemTree(std::ostream& out); |
| |
| bool meta_on_xfs() const { |
| return meta_on_xfs_; |
| } |
| |
| private: |
| FRIEND_TEST(fs::FsManagerTestBase, TestDuplicatePaths); |
| FRIEND_TEST(fs::FsManagerTestBase, TestEIOWhileRunningUpdateDirsTool); |
| FRIEND_TEST(fs::FsManagerTestBase, TestIsolatedMetadataDir); |
| FRIEND_TEST(fs::FsManagerTestBase, TestMetadataDirInWALRoot); |
| FRIEND_TEST(fs::FsManagerTestBase, TestMetadataDirInDataRoot); |
| FRIEND_TEST(fs::FsManagerTestBase, TestOpenWithDuplicateInstanceFiles); |
| FRIEND_TEST(tserver::MiniTabletServerTest, TestFsLayoutEndToEnd); |
| friend class itest::MiniClusterFsInspector; // for access to directory names |
| |
| // Initializes, sanitizes, and canonicalizes the filesystem roots. |
| // Determines the correct filesystem root for tablet-specific metadata. |
| Status Init(); |
| |
| // Select and create an instance of the appropriate block manager. |
| // |
| // Does not actually perform any on-disk operations. |
| void InitBlockManager(); |
| |
| // Creates filesystem roots from 'canonicalized_roots', writing new on-disk |
| // instances using 'metadata'. |
| // |
| // |
| // All created directories and files will be appended to 'created_dirs' and |
| // 'created_files' respectively. It is the responsibility of the caller to |
| // synchronize the directories containing these newly created file objects. |
| Status CreateFileSystemRoots(CanonicalizedRootsList canonicalized_roots, |
| const InstanceMetadataPB& metadata, |
| std::vector<std::string>* created_dirs, |
| std::vector<std::string>* created_files); |
| |
| // Create a new InstanceMetadataPB. |
| Status CreateInstanceMetadata(std::optional<std::string> uuid, |
| std::optional<std::string> server_key, |
| std::optional<std::string> server_key_iv, |
| std::optional<std::string> server_key_version, |
| InstanceMetadataPB* metadata); |
| |
| // Save a InstanceMetadataPB to the filesystem. |
| // Does not mutate the current state of the fsmanager. |
| Status WriteInstanceMetadata(const InstanceMetadataPB& metadata, |
| const std::string& root); |
| |
| // ========================================================================== |
| // file-system helpers |
| // ========================================================================== |
| |
| // Prints the file system tree for the objects in 'objects' under the given |
| // 'path'. Prints lines with the given 'prefix'. |
| void DumpFileSystemTree(std::ostream& out, |
| const std::string& prefix, |
| const std::string& path, |
| const std::vector<std::string>& objects); |
| |
| // Deletes leftover temporary files in all "special" top-level directories |
| // (e.g. WAL root directory). |
| // |
| // Logs warnings in case of errors. |
| void CleanTmpFiles(); |
| |
| // Checks that the permissions of the root data directories conform to the |
| // configured umask, and tightens them as necessary if they do not. |
| void CheckAndFixPermissions(); |
| |
| // Returns true if 'fname' is a valid tablet ID. |
| bool IsValidTabletId(const std::string& fname); |
| |
| static const char *kDataDirName; |
| static const char *kTabletMetadataDirName; |
| static const char *kWalDirName; |
| static const char *kInstanceMetadataFileName; |
| static const char *kConsensusMetadataDirName; |
| |
| // The environment to be used for all filesystem operations. |
| Env* env_; |
| |
| // The options that the FsManager was created with. |
| const FsManagerOpts opts_; |
| |
| // Canonicalized forms of the root directories. Constructed during Init() |
| // with ordering maintained. |
| // |
| // - The first data root is used as the metadata root. |
| // - Common roots in the collections have been deduplicated. |
| CanonicalizedRootAndStatus canonicalized_wal_fs_root_; |
| CanonicalizedRootAndStatus canonicalized_metadata_fs_root_; |
| CanonicalizedRootsList canonicalized_data_fs_roots_; |
| CanonicalizedRootsList canonicalized_all_fs_roots_; |
| |
| std::unique_ptr<InstanceMetadataPB> metadata_; |
| |
| std::unique_ptr<fs::FsErrorManager> error_manager_; |
| std::unique_ptr<fs::DataDirManager> dd_manager_; |
| std::unique_ptr<fs::BlockManager> block_manager_; |
| |
| std::unique_ptr<security::KeyProvider> key_provider_; |
| |
| ObjectIdGenerator oid_generator_; |
| |
| bool initted_; |
| |
| // Cache whether or not the metadata directory is on an XFS directory. |
| bool meta_on_xfs_; |
| |
| DISALLOW_COPY_AND_ASSIGN(FsManager); |
| }; |
| |
| } // namespace kudu |
| |