blob: efb2125e7685e3df114584d8650a50989e6a7ef7 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <string>
#include <vector>
#include "kudu/consensus/metadata.pb.h"
#include "kudu/master/master.pb.h"
#include "kudu/tablet/tablet_peer.h"
#include "kudu/util/status.h"
namespace kudu {
class Schema;
class FsManager;
namespace tserver {
class WriteRequestPB;
class WriteResponsePB;
namespace master {
class Master;
struct MasterOptions;
class TableInfo;
class TabletInfo;
// The SysCatalogTable has two separate visitors because the tables
// data must be loaded into memory before the tablets data.
class TableVisitor {
virtual Status VisitTable(const std::string& table_id,
const SysTablesEntryPB& metadata) = 0;
class TabletVisitor {
virtual Status VisitTablet(const std::string& table_id,
const std::string& tablet_id,
const SysTabletsEntryPB& metadata) = 0;
// SysCatalogTable is a Kudu table that keeps track of table and
// tablet metadata.
// - SysCatalogTable has only one tablet.
// - SysCatalogTable is managed by the master and not exposed to the user
// as a "normal table", instead we have Master APIs to query the table.
class SysCatalogTable {
typedef Callback<Status()> ElectedLeaderCallback;
enum CatalogEntryType {
// 'leader_cb_' is invoked whenever this node is elected as a leader
// of the consensus configuration for this tablet, including for local standalone
// master consensus configurations. It used to initialize leader state, submit any
// leader-specific tasks and so forth.
/// NOTE: Since 'leader_cb_' is invoked synchronously and can block
// the consensus configuration's progress, any long running tasks (e.g., scanning
// tablets) should be performed asynchronously (by, e.g., submitting
// them to a to a separate threadpool).
SysCatalogTable(Master* master, MetricRegistry* metrics,
ElectedLeaderCallback leader_cb);
// Allow for orderly shutdown of tablet peer, etc.
void Shutdown();
// Load the Metadata from disk, and initialize the TabletPeer for the sys-table
Status Load(FsManager *fs_manager);
// Create the new Metadata and initialize the TabletPeer for the sys-table.
Status CreateNew(FsManager *fs_manager);
// Perform a series of table/tablet actions in one WriteTransaction.
struct Actions {
TableInfo* table_to_add;
TableInfo* table_to_update;
TableInfo* table_to_delete;
std::vector<TabletInfo*> tablets_to_add;
std::vector<TabletInfo*> tablets_to_update;
std::vector<TabletInfo*> tablets_to_delete;
Status Write(const Actions& actions);
// Scan of the table-related entries.
Status VisitTables(TableVisitor* visitor);
// Scan of the tablet-related entries.
Status VisitTablets(TabletVisitor* visitor);
FRIEND_TEST(MasterTest, TestMasterMetadataConsistentDespiteFailures);
friend class CatalogManager;
const char *table_name() const { return "sys.catalog"; }
const char *table_id() const { return ""; }
// Return the schema of the table.
// NOTE: This is the "server-side" schema, so it must have the column IDs.
Schema BuildTableSchema();
// Returns 'Status::OK()' if the WriteTranasction completed
Status SyncWrite(const tserver::WriteRequestPB *req, tserver::WriteResponsePB *resp);
void SysCatalogStateChanged(const std::string& tablet_id, const std::string& reason);
Status SetupTablet(const scoped_refptr<tablet::TabletMetadata>& metadata);
// Use the master options to generate a new consensus configuration.
// In addition, resolve all UUIDs of this consensus configuration.
// Note: The current node adds itself to the peers whether leader or
// follower, depending on whether the Master options leader flag is
// set. Even if the local node should be a follower, it should not be listed
// in the Master options followers list, as it will add itself automatically.
// TODO: Revisit this whole thing when integrating leader election.
Status SetupDistributedConfig(const MasterOptions& options,
consensus::RaftConfigPB* committed_config);
const scoped_refptr<tablet::TabletPeer>& tablet_peer() const {
return tablet_peer_;
std::string tablet_id() const {
return tablet_peer_->tablet_id();
// Conventional "T xxx P xxxx..." prefix for logging.
std::string LogPrefix() const;
// Waits for the tablet to reach 'RUNNING' state.
// Contrary to tablet servers, in master we actually wait for the master tablet
// to become online synchronously, this allows us to fail fast if something fails
// and shouldn't induce the all-workers-blocked-waiting-for-tablets problem
// that we've seen in tablet servers since the master only has to boot a few
// tablets.
Status WaitUntilRunning();
// Table related private methods.
Status VisitTableFromRow(const RowBlockRow& row, TableVisitor* visitor);
// Tablet related private methods.
// Add dirty tablet data to the given row operations.
Status AddTabletsToPB(const std::vector<TabletInfo*>& tablets,
RowOperationsPB::Type op_type,
RowOperationsPB* ops) const;
Status VisitTabletFromRow(const RowBlockRow& row, TabletVisitor* visitor);
// Initializes the RaftPeerPB for the local peer.
// Crashes due to an invariant check if the rpc server is not running.
void InitLocalRaftPeerPB();
// Add an operation to a write adding/updating/deleting a table or tablet.
Status ReqAddTable(tserver::WriteRequestPB* req, const TableInfo* table);
Status ReqUpdateTable(tserver::WriteRequestPB* req, const TableInfo* table);
Status ReqDeleteTable(tserver::WriteRequestPB* req, const TableInfo* table);
Status ReqAddTablets(tserver::WriteRequestPB* req,
const std::vector<TabletInfo*>& tablets);
Status ReqUpdateTablets(tserver::WriteRequestPB* req,
const std::vector<TabletInfo*>& tablets);
Status ReqDeleteTablets(tserver::WriteRequestPB* req,
const std::vector<TabletInfo*>& tablets);
// Special string injected into SyncWrite() random failures (if enabled).
// Only useful for tests.
static const char* kInjectedFailureStatusMsg;
// Table schema, without IDs, used to send messages to the TabletPeer
Schema schema_;
Schema key_schema_;
MetricRegistry* metric_registry_;
gscoped_ptr<ThreadPool> apply_pool_;
scoped_refptr<tablet::TabletPeer> tablet_peer_;
Master* master_;
ElectedLeaderCallback leader_cb_;
consensus::RaftPeerPB local_peer_pb_;
} // namespace master
} // namespace kudu