| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| #ifndef KUDU_TABLET_MAINTENANCE_MANAGER_H |
| #define KUDU_TABLET_MAINTENANCE_MANAGER_H |
| |
| #include <stdint.h> |
| |
| #include <map> |
| #include <memory> |
| #include <set> |
| #include <string> |
| #include <vector> |
| |
| #include "kudu/gutil/macros.h" |
| #include "kudu/tablet/mvcc.h" |
| #include "kudu/tablet/tablet.pb.h" |
| #include "kudu/util/condition_variable.h" |
| #include "kudu/util/monotime.h" |
| #include "kudu/util/mutex.h" |
| #include "kudu/util/countdown_latch.h" |
| #include "kudu/util/thread.h" |
| #include "kudu/util/threadpool.h" |
| |
| namespace kudu { |
| |
| template<class T> |
| class AtomicGauge; |
| class Histogram; |
| class MaintenanceManager; |
| class MemTracker; |
| |
| class MaintenanceOpStats { |
| public: |
| MaintenanceOpStats(); |
| |
| // Zero all stats. They are invalid until the first setter is called. |
| void Clear(); |
| |
| bool runnable() const { |
| DCHECK(valid_); |
| return runnable_; |
| } |
| |
| void set_runnable(bool runnable) { |
| UpdateLastModified(); |
| runnable_ = runnable; |
| } |
| |
| uint64_t ram_anchored() const { |
| DCHECK(valid_); |
| return ram_anchored_; |
| } |
| |
| void set_ram_anchored(uint64_t ram_anchored) { |
| UpdateLastModified(); |
| ram_anchored_ = ram_anchored; |
| } |
| |
| int64_t logs_retained_bytes() const { |
| DCHECK(valid_); |
| return logs_retained_bytes_; |
| } |
| |
| void set_logs_retained_bytes(int64_t logs_retained_bytes) { |
| UpdateLastModified(); |
| logs_retained_bytes_ = logs_retained_bytes; |
| } |
| |
| double perf_improvement() const { |
| DCHECK(valid_); |
| return perf_improvement_; |
| } |
| |
| void set_perf_improvement(double perf_improvement) { |
| UpdateLastModified(); |
| perf_improvement_ = perf_improvement; |
| } |
| |
| const MonoTime& last_modified() const { |
| DCHECK(valid_); |
| return last_modified_; |
| } |
| |
| bool valid() const { |
| return valid_; |
| } |
| |
| private: |
| void UpdateLastModified() { |
| valid_ = true; |
| last_modified_ = MonoTime::Now(MonoTime::FINE); |
| } |
| |
| // True if these stats are valid. |
| bool valid_; |
| |
| // True if this op can be run now. |
| bool runnable_; |
| |
| // The approximate amount of memory that not doing this operation keeps |
| // around. This number is used to decide when to start freeing memory, so it |
| // should be fairly accurate. May be 0. |
| uint64_t ram_anchored_; |
| |
| // The approximate amount of disk space that not doing this operation keeps us from GCing from |
| // the logs. May be 0. |
| int64_t logs_retained_bytes_; |
| |
| // The estimated performance improvement-- how good it is to do this on some |
| // absolute scale (yet TBD). |
| double perf_improvement_; |
| |
| // The last time that the stats were modified. |
| MonoTime last_modified_; |
| }; |
| |
| // MaintenanceOp objects represent background operations that the |
| // MaintenanceManager can schedule. Once a MaintenanceOp is registered, the |
| // manager will periodically poll it for statistics. The registrant is |
| // responsible for managing the memory associated with the MaintenanceOp object. |
| // Op objects should be unregistered before being de-allocated. |
| class MaintenanceOp { |
| public: |
| friend class MaintenanceManager; |
| |
| // General indicator of how much IO the Op will use. |
| enum IOUsage { |
| LOW_IO_USAGE, // Low impact operations like removing a file, updating metadata. |
| HIGH_IO_USAGE // Everything else. |
| }; |
| |
| explicit MaintenanceOp(std::string name, IOUsage io_usage); |
| virtual ~MaintenanceOp(); |
| |
| // Unregister this op, if it is currently registered. |
| void Unregister(); |
| |
| // Update the op statistics. This will be called every scheduling period |
| // (about a few times a second), so it should not be too expensive. It's |
| // possible for the returned statistics to be invalid; the caller should |
| // call MaintenanceOpStats::valid() before using them. This will be run |
| // under the MaintenanceManager lock. |
| virtual void UpdateStats(MaintenanceOpStats* stats) = 0; |
| |
| // Prepare to perform the operation. This will be run without holding the |
| // maintenance manager lock. It should be short, since it is run from the |
| // context of the maintenance op scheduler thread rather than a worker thread. |
| // If this returns false, we will abort the operation. |
| virtual bool Prepare() = 0; |
| |
| // Perform the operation. This will be run without holding the maintenance |
| // manager lock, and may take a long time. |
| virtual void Perform() = 0; |
| |
| // Returns the histogram for this op that tracks duration. Cannot be NULL. |
| virtual scoped_refptr<Histogram> DurationHistogram() const = 0; |
| |
| // Returns the gauge for this op that tracks when this op is running. Cannot be NULL. |
| virtual scoped_refptr<AtomicGauge<uint32_t> > RunningGauge() const = 0; |
| |
| uint32_t running() { return running_; } |
| |
| std::string name() const { return name_; } |
| |
| IOUsage io_usage() const { return io_usage_; } |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(MaintenanceOp); |
| |
| // The name of the operation. Op names must be unique. |
| const std::string name_; |
| |
| // The number of times that this op is currently running. |
| uint32_t running_; |
| |
| // Condition variable which the UnregisterOp function can wait on. |
| // |
| // Note: 'cond_' is used with the MaintenanceManager's mutex. As such, |
| // it only exists when the op is registered. |
| gscoped_ptr<ConditionVariable> cond_; |
| |
| // The MaintenanceManager with which this op is registered, or null |
| // if it is not registered. |
| std::shared_ptr<MaintenanceManager> manager_; |
| |
| IOUsage io_usage_; |
| }; |
| |
| struct MaintenanceOpComparator { |
| bool operator() (const MaintenanceOp* lhs, |
| const MaintenanceOp* rhs) const { |
| return lhs->name().compare(rhs->name()) < 0; |
| } |
| }; |
| |
| // Holds the information regarding a recently completed operation. |
| struct CompletedOp { |
| std::string name; |
| MonoDelta duration; |
| MonoTime start_mono_time; |
| }; |
| |
| // The MaintenanceManager manages the scheduling of background operations such |
| // as flushes or compactions. It runs these operations in the background, in a |
| // thread pool. It uses information provided in MaintenanceOpStats objects to |
| // decide which operations, if any, to run. |
| class MaintenanceManager : public std::enable_shared_from_this<MaintenanceManager> { |
| public: |
| struct Options { |
| int32_t num_threads; |
| int32_t polling_interval_ms; |
| uint32_t history_size; |
| std::shared_ptr<MemTracker> parent_mem_tracker; |
| }; |
| |
| explicit MaintenanceManager(const Options& options); |
| ~MaintenanceManager(); |
| |
| Status Init(); |
| void Shutdown(); |
| |
| // Register an op with the manager. |
| void RegisterOp(MaintenanceOp* op); |
| |
| // Unregister an op with the manager. |
| // If the Op is currently running, it will not be interrupted. However, this |
| // function will block until the Op is finished. |
| void UnregisterOp(MaintenanceOp* op); |
| |
| void GetMaintenanceManagerStatusDump(tablet::MaintenanceManagerStatusPB* out_pb); |
| |
| static const Options DEFAULT_OPTIONS; |
| |
| private: |
| FRIEND_TEST(MaintenanceManagerTest, TestLogRetentionPrioritization); |
| typedef std::map<MaintenanceOp*, MaintenanceOpStats, |
| MaintenanceOpComparator> OpMapTy; |
| |
| void RunSchedulerThread(); |
| |
| // find the best op, or null if there is nothing we want to run |
| MaintenanceOp* FindBestOp(); |
| |
| void LaunchOp(MaintenanceOp* op); |
| |
| const int32_t num_threads_; |
| OpMapTy ops_; // registered operations |
| Mutex lock_; |
| scoped_refptr<kudu::Thread> monitor_thread_; |
| gscoped_ptr<ThreadPool> thread_pool_; |
| ConditionVariable cond_; |
| bool shutdown_; |
| uint64_t running_ops_; |
| int32_t polling_interval_ms_; |
| // Vector used as a circular buffer for recently completed ops. Elements need to be added at |
| // the completed_ops_count_ % the vector's size and then the count needs to be incremented. |
| std::vector<CompletedOp> completed_ops_; |
| int64_t completed_ops_count_; |
| std::shared_ptr<MemTracker> parent_mem_tracker_; |
| |
| DISALLOW_COPY_AND_ASSIGN(MaintenanceManager); |
| }; |
| |
| } // namespace kudu |
| |
| #endif |