be/src/common/daemon.cpp - doris - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include "common/daemon.h"

 // IWYU pragma: no_include <bthread/errno.h>
 #include <errno.h> // IWYU pragma: keep
 #include <gflags/gflags.h>
 #include <gperftools/malloc_extension.h> // IWYU pragma: keep
 // IWYU pragma: no_include <bits/std_abs.h>
 #include <butil/iobuf.h>
 #include <math.h>
 #include <signal.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>

 #include <algorithm>
 // IWYU pragma: no_include <bits/chrono.h>
 #include <chrono> // IWYU pragma: keep
 #include <map>
 #include <ostream>
 #include <set>
 #include <string>

 #include "common/config.h"
 #include "common/logging.h"
 #include "common/status.h"
 #include "olap/memtable_memory_limiter.h"
 #include "olap/options.h"
 #include "olap/storage_engine.h"
 #include "olap/tablet_manager.h"
 #include "runtime/block_spill_manager.h"
 #include "runtime/client_cache.h"
 #include "runtime/exec_env.h"
 #include "runtime/fragment_mgr.h"
 #include "runtime/memory/mem_tracker.h"
 #include "runtime/memory/mem_tracker_limiter.h"
 #include "runtime/runtime_query_statistics_mgr.h"
 #include "runtime/task_group/task_group_manager.h"
 #include "util/cpu_info.h"
 #include "util/debug_util.h"
 #include "util/disk_info.h"
 #include "util/doris_metrics.h"
 #include "util/mem_info.h"
 #include "util/metrics.h"
 #include "util/network_util.h"
 #include "util/perf_counters.h"
 #include "util/system_metrics.h"
 #include "util/thrift_util.h"
 #include "util/time.h"

 namespace doris {

 void Daemon::tcmalloc_gc_thread() {
     // TODO All cache GC wish to be supported
 #if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) && \
         !defined(USE_JEMALLOC)

     // Limit size of tcmalloc cache via release_rate and max_cache_percent.
     // We adjust release_rate according to memory_pressure, which is usage percent of memory.
     int64_t max_cache_percent = 60;
     double release_rates[10] = {1.0, 1.0, 1.0, 5.0, 5.0, 20.0, 50.0, 100.0, 500.0, 2000.0};
     int64_t pressure_limit = 90;
     bool is_performance_mode = false;
     int64_t physical_limit_bytes =
             std::min(MemInfo::physical_mem() - MemInfo::sys_mem_available_low_water_mark(),
                      MemInfo::mem_limit());

     if (config::memory_mode == std::string("performance")) {
         max_cache_percent = 100;
         pressure_limit = 90;
         is_performance_mode = true;
         physical_limit_bytes = std::min(MemInfo::mem_limit(), MemInfo::physical_mem());
     } else if (config::memory_mode == std::string("compact")) {
         max_cache_percent = 20;
         pressure_limit = 80;
     }

     int last_ms = 0;
     const int kMaxLastMs = 30000;
     const int kIntervalMs = 10;
     size_t init_aggressive_decommit = 0;
     size_t current_aggressive_decommit = 0;
     size_t expected_aggressive_decommit = 0;
     int64_t last_memory_pressure = 0;

     MallocExtension::instance()->GetNumericProperty("tcmalloc.aggressive_memory_decommit",
                                                     &init_aggressive_decommit);
     current_aggressive_decommit = init_aggressive_decommit;

     while (!_stop_background_threads_latch.wait_for(std::chrono::milliseconds(kIntervalMs))) {
         size_t tc_used_bytes = 0;
         size_t tc_alloc_bytes = 0;
         size_t rss = PerfCounters::get_vm_rss();

         MallocExtension::instance()->GetNumericProperty("generic.total_physical_bytes",
                                                         &tc_alloc_bytes);
         MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes",
                                                         &tc_used_bytes);
         int64_t tc_cached_bytes = (int64_t)tc_alloc_bytes - (int64_t)tc_used_bytes;
         int64_t to_free_bytes =
                 (int64_t)tc_cached_bytes - ((int64_t)tc_used_bytes * max_cache_percent / 100);
         to_free_bytes = std::max(to_free_bytes, (int64_t)0);

         int64_t memory_pressure = 0;
         int64_t rss_pressure = 0;
         int64_t alloc_bytes = std::max(rss, tc_alloc_bytes);
         memory_pressure = alloc_bytes * 100 / physical_limit_bytes;
         rss_pressure = rss * 100 / physical_limit_bytes;

         expected_aggressive_decommit = init_aggressive_decommit;
         if (memory_pressure > pressure_limit) {
             // We are reaching oom, so release cache aggressively.
             // Ideally, we should reuse cache and not allocate from system any more,
             // however, it is hard to set limit on cache of tcmalloc and doris
             // use mmap in vectorized mode.
             // Limit cache capactiy is enough.
             if (rss_pressure > pressure_limit) {
                 int64_t min_free_bytes = alloc_bytes - physical_limit_bytes * 9 / 10;
                 to_free_bytes = std::max(to_free_bytes, min_free_bytes);
                 to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 30 / 100);
                 // We assure that we have at least 500M bytes in cache.
                 to_free_bytes = std::min(to_free_bytes, tc_cached_bytes - 500 * 1024 * 1024);
                 expected_aggressive_decommit = 1;
             }
             last_ms = kMaxLastMs;
         } else if (memory_pressure > (pressure_limit - 10)) {
             // In most cases, adjusting release rate is enough, if memory are consumed quickly
             // we should release manually.
             if (last_memory_pressure <= (pressure_limit - 10)) {
                 to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 10 / 100);
             }
         }

         int release_rate_index = memory_pressure / 10;
         double release_rate = 1.0;
         if (release_rate_index >= sizeof(release_rates) / sizeof(release_rates[0])) {
             release_rate = 2000.0;
         } else {
             release_rate = release_rates[release_rate_index];
         }
         MallocExtension::instance()->SetMemoryReleaseRate(release_rate);

         if ((current_aggressive_decommit != expected_aggressive_decommit) && !is_performance_mode) {
             MallocExtension::instance()->SetNumericProperty("tcmalloc.aggressive_memory_decommit",
                                                             expected_aggressive_decommit);
             current_aggressive_decommit = expected_aggressive_decommit;
         }

         last_memory_pressure = memory_pressure;
         // We release at least 2% bytes once, frequent releasing hurts performance.
         if (to_free_bytes > (physical_limit_bytes * 2 / 100)) {
             last_ms += kIntervalMs;
             if (last_ms >= kMaxLastMs) {
                 LOG(INFO) << "generic.current_allocated_bytes " << tc_used_bytes
                           << ", generic.total_physical_bytes " << tc_alloc_bytes << ", rss " << rss
                           << ", max_cache_percent " << max_cache_percent << ", release_rate "
                           << release_rate << ", memory_pressure " << memory_pressure
                           << ", physical_limit_bytes " << physical_limit_bytes << ", to_free_bytes "
                           << to_free_bytes << ", current_aggressive_decommit "
                           << current_aggressive_decommit;
                 MallocExtension::instance()->ReleaseToSystem(to_free_bytes);
                 last_ms = 0;
             }
         } else {
             last_ms = 0;
         }
     }
 #endif
 }

 void Daemon::memory_maintenance_thread() {
     int32_t interval_milliseconds = config::memory_maintenance_sleep_time_ms;
     int64_t last_print_proc_mem = PerfCounters::get_vm_rss();
     while (!_stop_background_threads_latch.wait_for(
             std::chrono::milliseconds(interval_milliseconds))) {
         // Refresh process memory metrics.
         doris::PerfCounters::refresh_proc_status();
         doris::MemInfo::refresh_proc_meminfo();
         doris::MemInfo::refresh_proc_mem_no_allocator_cache();

         // Update and print memory stat when the memory changes by 256M.
         if (abs(last_print_proc_mem - PerfCounters::get_vm_rss()) > 268435456) {
             last_print_proc_mem = PerfCounters::get_vm_rss();
             doris::MemTrackerLimiter::enable_print_log_process_usage();

             // Refresh mem tracker each type counter.
             doris::MemTrackerLimiter::refresh_global_counter();

             // Refresh allocator memory metrics.
 #if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER)
             doris::MemInfo::refresh_allocator_mem();
             if (config::enable_system_metrics) {
                 DorisMetrics::instance()->system_metrics()->update_allocator_metrics();
             }
 #endif

             ExecEnv::GetInstance()->brpc_iobuf_block_memory_tracker()->set_consumption(
                     butil::IOBuf::block_memory());
             LOG(INFO) << MemTrackerLimiter::
                             process_mem_log_str(); // print mem log when memory state by 256M
         }
     }
 }

 void Daemon::memory_gc_thread() {
     int32_t interval_milliseconds = config::memory_maintenance_sleep_time_ms;
     int32_t memory_minor_gc_sleep_time_ms = 0;
     int32_t memory_full_gc_sleep_time_ms = 0;
     int32_t memory_gc_sleep_time_ms = config::memory_gc_sleep_time_ms;
     while (!_stop_background_threads_latch.wait_for(
             std::chrono::milliseconds(interval_milliseconds))) {
         if (config::disable_memory_gc) {
             continue;
         }
         auto sys_mem_available = doris::MemInfo::sys_mem_available();
         auto proc_mem_no_allocator_cache = doris::MemInfo::proc_mem_no_allocator_cache();

         // GC excess memory for resource groups that not enable overcommit
         auto tg_free_mem = doris::MemInfo::tg_not_enable_overcommit_group_gc();
         sys_mem_available += tg_free_mem;
         proc_mem_no_allocator_cache -= tg_free_mem;

         if (memory_full_gc_sleep_time_ms <= 0 &&
             (sys_mem_available < doris::MemInfo::sys_mem_available_low_water_mark() ||
              proc_mem_no_allocator_cache >= doris::MemInfo::mem_limit())) {
             // No longer full gc and minor gc during sleep.
             memory_full_gc_sleep_time_ms = memory_gc_sleep_time_ms;
             memory_minor_gc_sleep_time_ms = memory_gc_sleep_time_ms;
             LOG(INFO) << fmt::format("[MemoryGC] start full GC, {}.",
                                      MemTrackerLimiter::process_limit_exceeded_errmsg_str());
             doris::MemTrackerLimiter::print_log_process_usage();
             if (doris::MemInfo::process_full_gc()) {
                 // If there is not enough memory to be gc, the process memory usage will not be printed in the next continuous gc.
                 doris::MemTrackerLimiter::enable_print_log_process_usage();
             }
         } else if (memory_minor_gc_sleep_time_ms <= 0 &&
                    (sys_mem_available < doris::MemInfo::sys_mem_available_warning_water_mark() ||
                     proc_mem_no_allocator_cache >= doris::MemInfo::soft_mem_limit())) {
             // No minor gc during sleep, but full gc is possible.
             memory_minor_gc_sleep_time_ms = memory_gc_sleep_time_ms;
             LOG(INFO) << fmt::format("[MemoryGC] start minor GC, {}.",
                                      MemTrackerLimiter::process_soft_limit_exceeded_errmsg_str());
             doris::MemTrackerLimiter::print_log_process_usage();
             if (doris::MemInfo::process_minor_gc()) {
                 doris::MemTrackerLimiter::enable_print_log_process_usage();
             }
         } else {
             if (memory_full_gc_sleep_time_ms > 0) {
                 memory_full_gc_sleep_time_ms -= interval_milliseconds;
             }
             if (memory_minor_gc_sleep_time_ms > 0) {
                 memory_minor_gc_sleep_time_ms -= interval_milliseconds;
             }
         }
     }
 }

 void Daemon::memtable_memory_limiter_tracker_refresh_thread() {
     // Refresh the memory statistics of the load channel tracker more frequently,
     // which helps to accurately control the memory of LoadChannelMgr.
     while (!_stop_background_threads_latch.wait_for(
             std::chrono::milliseconds(config::memtable_mem_tracker_refresh_interval_ms))) {
         doris::ExecEnv::GetInstance()->memtable_memory_limiter()->refresh_mem_tracker();
     }
 }

 /*
  * this thread will calculate some metrics at a fix interval(15 sec)
  * 1. push bytes per second
  * 2. scan bytes per second
  * 3. max io util of all disks
  * 4. max network send bytes rate
  * 5. max network receive bytes rate
  */
 void Daemon::calculate_metrics_thread() {
     int64_t last_ts = -1L;
     int64_t lst_query_bytes = -1;

     std::map<std::string, int64_t> lst_disks_io_time;
     std::map<std::string, int64_t> lst_net_send_bytes;
     std::map<std::string, int64_t> lst_net_receive_bytes;

     do {
         DorisMetrics::instance()->metric_registry()->trigger_all_hooks(true);

         if (last_ts == -1L) {
             last_ts = GetMonoTimeMicros() / 1000;
             lst_query_bytes = DorisMetrics::instance()->query_scan_bytes->value();
             if (config::enable_system_metrics) {
                 DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);
                 DorisMetrics::instance()->system_metrics()->get_network_traffic(
                         &lst_net_send_bytes, &lst_net_receive_bytes);
             }
         } else {
             int64_t current_ts = GetMonoTimeMicros() / 1000;
             long interval = (current_ts - last_ts) / 1000;
             last_ts = current_ts;

             // 1. query bytes per second
             int64_t current_query_bytes = DorisMetrics::instance()->query_scan_bytes->value();
             int64_t qps = (current_query_bytes - lst_query_bytes) / (interval + 1);
             DorisMetrics::instance()->query_scan_bytes_per_second->set_value(qps < 0 ? 0 : qps);
             lst_query_bytes = current_query_bytes;

             if (config::enable_system_metrics) {
                 // 2. max disk io util
                 DorisMetrics::instance()->system_metrics()->update_max_disk_io_util_percent(
                         lst_disks_io_time, 15);

                 // update lst map
                 DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);

                 // 3. max network traffic
                 int64_t max_send = 0;
                 int64_t max_receive = 0;
                 DorisMetrics::instance()->system_metrics()->get_max_net_traffic(
                         lst_net_send_bytes, lst_net_receive_bytes, 15, &max_send, &max_receive);
                 DorisMetrics::instance()->system_metrics()->update_max_network_send_bytes_rate(
                         max_send);
                 DorisMetrics::instance()->system_metrics()->update_max_network_receive_bytes_rate(
                         max_receive);
                 // update lst map
                 DorisMetrics::instance()->system_metrics()->get_network_traffic(
                         &lst_net_send_bytes, &lst_net_receive_bytes);
             }

             DorisMetrics::instance()->all_rowsets_num->set_value(
                     StorageEngine::instance()->tablet_manager()->get_rowset_nums());
             DorisMetrics::instance()->all_segments_num->set_value(
                     StorageEngine::instance()->tablet_manager()->get_segment_nums());
         }
     } while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(15)));
 }

 // clean up stale spilled files
 void Daemon::block_spill_gc_thread() {
     while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(60))) {
         ExecEnv::GetInstance()->block_spill_mgr()->gc(200);
     }
 }

 void Daemon::report_runtime_query_statistics_thread() {
     while (!_stop_background_threads_latch.wait_for(
             std::chrono::milliseconds(config::report_query_statistics_interval_ms))) {
         ExecEnv::GetInstance()->runtime_query_statistics_mgr()->report_runtime_query_statistics();
     }
 }

 void Daemon::je_purge_dirty_pages_thread() const {
     do {
         std::unique_lock<std::mutex> l(doris::MemInfo::je_purge_dirty_pages_lock);
         while (_stop_background_threads_latch.count() != 0 &&
                !doris::MemInfo::je_purge_dirty_pages_notify.load(std::memory_order_relaxed)) {
             doris::MemInfo::je_purge_dirty_pages_cv.wait_for(l, std::chrono::seconds(1));
         }
         if (_stop_background_threads_latch.count() == 0) {
             break;
         }
         doris::MemInfo::je_purge_all_arena_dirty_pages();
         doris::MemInfo::je_purge_dirty_pages_notify.store(false, std::memory_order_relaxed);
     } while (true);
 }

 void Daemon::start() {
     Status st;
     st = Thread::create(
             "Daemon", "tcmalloc_gc_thread", [this]() { this->tcmalloc_gc_thread(); },
             &_threads.emplace_back());
     CHECK(st.ok()) << st;
     st = Thread::create(
             "Daemon", "memory_maintenance_thread", [this]() { this->memory_maintenance_thread(); },
             &_threads.emplace_back());
     CHECK(st.ok()) << st;
     st = Thread::create(
             "Daemon", "memory_gc_thread", [this]() { this->memory_gc_thread(); },
             &_threads.emplace_back());
     CHECK(st.ok()) << st;
     st = Thread::create(
             "Daemon", "memtable_memory_limiter_tracker_refresh_thread",
             [this]() { this->memtable_memory_limiter_tracker_refresh_thread(); },
             &_threads.emplace_back());
     CHECK(st.ok()) << st;

     if (config::enable_metric_calculator) {
         st = Thread::create(
                 "Daemon", "calculate_metrics_thread",
                 [this]() { this->calculate_metrics_thread(); }, &_threads.emplace_back());
         CHECK(st.ok()) << st;
     }
     st = Thread::create(
             "Daemon", "block_spill_gc_thread", [this]() { this->block_spill_gc_thread(); },
             &_threads.emplace_back());
     st = Thread::create(
             "Daemon", "je_purge_dirty_pages_thread",
             [this]() { this->je_purge_dirty_pages_thread(); }, &_threads.emplace_back());
     st = Thread::create(
             "Daemon", "query_runtime_statistics_thread",
             [this]() { this->report_runtime_query_statistics_thread(); }, &_threads.emplace_back());
     CHECK(st.ok()) << st;
 }

 void Daemon::stop() {
     LOG(INFO) << "Doris daemon is stopping.";
     if (_stop_background_threads_latch.count() == 0) {
         LOG(INFO) << "Doris daemon stop returned since no bg threads latch.";
         return;
     }
     _stop_background_threads_latch.count_down();
     for (auto&& t : _threads) {
         if (t) {
             t->join();
         }
     }
     LOG(INFO) << "Doris daemon stopped after background threads are joined.";
 }

 } // namespace doris
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#include "common/daemon.h"

	// IWYU pragma: no_include <bthread/errno.h>
	#include <errno.h> // IWYU pragma: keep
	#include <gflags/gflags.h>
	#include <gperftools/malloc_extension.h> // IWYU pragma: keep
	// IWYU pragma: no_include <bits/std_abs.h>
	#include <butil/iobuf.h>
	#include <math.h>
	#include <signal.h>
	#include <stdint.h>
	#include <stdlib.h>
	#include <string.h>

	#include <algorithm>
	// IWYU pragma: no_include <bits/chrono.h>
	#include <chrono> // IWYU pragma: keep
	#include <map>
	#include <ostream>
	#include <set>
	#include <string>

	#include "common/config.h"
	#include "common/logging.h"
	#include "common/status.h"
	#include "olap/memtable_memory_limiter.h"
	#include "olap/options.h"
	#include "olap/storage_engine.h"
	#include "olap/tablet_manager.h"
	#include "runtime/block_spill_manager.h"
	#include "runtime/client_cache.h"
	#include "runtime/exec_env.h"
	#include "runtime/fragment_mgr.h"
	#include "runtime/memory/mem_tracker.h"
	#include "runtime/memory/mem_tracker_limiter.h"
	#include "runtime/runtime_query_statistics_mgr.h"
	#include "runtime/task_group/task_group_manager.h"
	#include "util/cpu_info.h"
	#include "util/debug_util.h"
	#include "util/disk_info.h"
	#include "util/doris_metrics.h"
	#include "util/mem_info.h"
	#include "util/metrics.h"
	#include "util/network_util.h"
	#include "util/perf_counters.h"
	#include "util/system_metrics.h"
	#include "util/thrift_util.h"
	#include "util/time.h"

	namespace doris {

	void Daemon::tcmalloc_gc_thread() {
	// TODO All cache GC wish to be supported
	#if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) && \
	!defined(USE_JEMALLOC)

	// Limit size of tcmalloc cache via release_rate and max_cache_percent.
	// We adjust release_rate according to memory_pressure, which is usage percent of memory.
	int64_t max_cache_percent = 60;
	double release_rates[10] = {1.0, 1.0, 1.0, 5.0, 5.0, 20.0, 50.0, 100.0, 500.0, 2000.0};
	int64_t pressure_limit = 90;
	bool is_performance_mode = false;
	int64_t physical_limit_bytes =
	std::min(MemInfo::physical_mem() - MemInfo::sys_mem_available_low_water_mark(),
	MemInfo::mem_limit());

	if (config::memory_mode == std::string("performance")) {
	max_cache_percent = 100;
	pressure_limit = 90;
	is_performance_mode = true;
	physical_limit_bytes = std::min(MemInfo::mem_limit(), MemInfo::physical_mem());
	} else if (config::memory_mode == std::string("compact")) {
	max_cache_percent = 20;
	pressure_limit = 80;
	}

	int last_ms = 0;
	const int kMaxLastMs = 30000;
	const int kIntervalMs = 10;
	size_t init_aggressive_decommit = 0;
	size_t current_aggressive_decommit = 0;
	size_t expected_aggressive_decommit = 0;
	int64_t last_memory_pressure = 0;

	MallocExtension::instance()->GetNumericProperty("tcmalloc.aggressive_memory_decommit",
	&init_aggressive_decommit);
	current_aggressive_decommit = init_aggressive_decommit;

	while (!_stop_background_threads_latch.wait_for(std::chrono::milliseconds(kIntervalMs))) {
	size_t tc_used_bytes = 0;
	size_t tc_alloc_bytes = 0;
	size_t rss = PerfCounters::get_vm_rss();

	MallocExtension::instance()->GetNumericProperty("generic.total_physical_bytes",
	&tc_alloc_bytes);
	MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes",
	&tc_used_bytes);
	int64_t tc_cached_bytes = (int64_t)tc_alloc_bytes - (int64_t)tc_used_bytes;
	int64_t to_free_bytes =
	(int64_t)tc_cached_bytes - ((int64_t)tc_used_bytes * max_cache_percent / 100);
	to_free_bytes = std::max(to_free_bytes, (int64_t)0);

	int64_t memory_pressure = 0;
	int64_t rss_pressure = 0;
	int64_t alloc_bytes = std::max(rss, tc_alloc_bytes);
	memory_pressure = alloc_bytes * 100 / physical_limit_bytes;
	rss_pressure = rss * 100 / physical_limit_bytes;

	expected_aggressive_decommit = init_aggressive_decommit;
	if (memory_pressure > pressure_limit) {
	// We are reaching oom, so release cache aggressively.
	// Ideally, we should reuse cache and not allocate from system any more,
	// however, it is hard to set limit on cache of tcmalloc and doris
	// use mmap in vectorized mode.
	// Limit cache capactiy is enough.
	if (rss_pressure > pressure_limit) {
	int64_t min_free_bytes = alloc_bytes - physical_limit_bytes * 9 / 10;
	to_free_bytes = std::max(to_free_bytes, min_free_bytes);
	to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 30 / 100);
	// We assure that we have at least 500M bytes in cache.
	to_free_bytes = std::min(to_free_bytes, tc_cached_bytes - 500 * 1024 * 1024);
	expected_aggressive_decommit = 1;
	}
	last_ms = kMaxLastMs;
	} else if (memory_pressure > (pressure_limit - 10)) {
	// In most cases, adjusting release rate is enough, if memory are consumed quickly
	// we should release manually.
	if (last_memory_pressure <= (pressure_limit - 10)) {
	to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 10 / 100);
	}
	}

	int release_rate_index = memory_pressure / 10;
	double release_rate = 1.0;
	if (release_rate_index >= sizeof(release_rates) / sizeof(release_rates[0])) {
	release_rate = 2000.0;
	} else {
	release_rate = release_rates[release_rate_index];
	}
	MallocExtension::instance()->SetMemoryReleaseRate(release_rate);

	if ((current_aggressive_decommit != expected_aggressive_decommit) && !is_performance_mode) {
	MallocExtension::instance()->SetNumericProperty("tcmalloc.aggressive_memory_decommit",
	expected_aggressive_decommit);
	current_aggressive_decommit = expected_aggressive_decommit;
	}

	last_memory_pressure = memory_pressure;
	// We release at least 2% bytes once, frequent releasing hurts performance.
	if (to_free_bytes > (physical_limit_bytes * 2 / 100)) {
	last_ms += kIntervalMs;
	if (last_ms >= kMaxLastMs) {
	LOG(INFO) << "generic.current_allocated_bytes " << tc_used_bytes
	<< ", generic.total_physical_bytes " << tc_alloc_bytes << ", rss " << rss
	<< ", max_cache_percent " << max_cache_percent << ", release_rate "
	<< release_rate << ", memory_pressure " << memory_pressure
	<< ", physical_limit_bytes " << physical_limit_bytes << ", to_free_bytes "
	<< to_free_bytes << ", current_aggressive_decommit "
	<< current_aggressive_decommit;
	MallocExtension::instance()->ReleaseToSystem(to_free_bytes);
	last_ms = 0;
	}
	} else {
	last_ms = 0;
	}
	}
	#endif
	}

	void Daemon::memory_maintenance_thread() {
	int32_t interval_milliseconds = config::memory_maintenance_sleep_time_ms;
	int64_t last_print_proc_mem = PerfCounters::get_vm_rss();
	while (!_stop_background_threads_latch.wait_for(
	std::chrono::milliseconds(interval_milliseconds))) {
	// Refresh process memory metrics.
	doris::PerfCounters::refresh_proc_status();
	doris::MemInfo::refresh_proc_meminfo();
	doris::MemInfo::refresh_proc_mem_no_allocator_cache();

	// Update and print memory stat when the memory changes by 256M.
	if (abs(last_print_proc_mem - PerfCounters::get_vm_rss()) > 268435456) {
	last_print_proc_mem = PerfCounters::get_vm_rss();
	doris::MemTrackerLimiter::enable_print_log_process_usage();

	// Refresh mem tracker each type counter.
	doris::MemTrackerLimiter::refresh_global_counter();

	// Refresh allocator memory metrics.
	#if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER)
	doris::MemInfo::refresh_allocator_mem();
	if (config::enable_system_metrics) {
	DorisMetrics::instance()->system_metrics()->update_allocator_metrics();
	}
	#endif

	ExecEnv::GetInstance()->brpc_iobuf_block_memory_tracker()->set_consumption(
	butil::IOBuf::block_memory());
	LOG(INFO) << MemTrackerLimiter::
	process_mem_log_str(); // print mem log when memory state by 256M
	}
	}
	}

	void Daemon::memory_gc_thread() {
	int32_t interval_milliseconds = config::memory_maintenance_sleep_time_ms;
	int32_t memory_minor_gc_sleep_time_ms = 0;
	int32_t memory_full_gc_sleep_time_ms = 0;
	int32_t memory_gc_sleep_time_ms = config::memory_gc_sleep_time_ms;
	while (!_stop_background_threads_latch.wait_for(
	std::chrono::milliseconds(interval_milliseconds))) {
	if (config::disable_memory_gc) {
	continue;
	}
	auto sys_mem_available = doris::MemInfo::sys_mem_available();
	auto proc_mem_no_allocator_cache = doris::MemInfo::proc_mem_no_allocator_cache();

	// GC excess memory for resource groups that not enable overcommit
	auto tg_free_mem = doris::MemInfo::tg_not_enable_overcommit_group_gc();
	sys_mem_available += tg_free_mem;
	proc_mem_no_allocator_cache -= tg_free_mem;

	if (memory_full_gc_sleep_time_ms <= 0 &&
	(sys_mem_available < doris::MemInfo::sys_mem_available_low_water_mark() \|\|
	proc_mem_no_allocator_cache >= doris::MemInfo::mem_limit())) {
	// No longer full gc and minor gc during sleep.
	memory_full_gc_sleep_time_ms = memory_gc_sleep_time_ms;
	memory_minor_gc_sleep_time_ms = memory_gc_sleep_time_ms;
	LOG(INFO) << fmt::format("[MemoryGC] start full GC, {}.",
	MemTrackerLimiter::process_limit_exceeded_errmsg_str());
	doris::MemTrackerLimiter::print_log_process_usage();
	if (doris::MemInfo::process_full_gc()) {
	// If there is not enough memory to be gc, the process memory usage will not be printed in the next continuous gc.
	doris::MemTrackerLimiter::enable_print_log_process_usage();
	}
	} else if (memory_minor_gc_sleep_time_ms <= 0 &&
	(sys_mem_available < doris::MemInfo::sys_mem_available_warning_water_mark() \|\|
	proc_mem_no_allocator_cache >= doris::MemInfo::soft_mem_limit())) {
	// No minor gc during sleep, but full gc is possible.
	memory_minor_gc_sleep_time_ms = memory_gc_sleep_time_ms;
	LOG(INFO) << fmt::format("[MemoryGC] start minor GC, {}.",
	MemTrackerLimiter::process_soft_limit_exceeded_errmsg_str());
	doris::MemTrackerLimiter::print_log_process_usage();
	if (doris::MemInfo::process_minor_gc()) {
	doris::MemTrackerLimiter::enable_print_log_process_usage();
	}
	} else {
	if (memory_full_gc_sleep_time_ms > 0) {
	memory_full_gc_sleep_time_ms -= interval_milliseconds;
	}
	if (memory_minor_gc_sleep_time_ms > 0) {
	memory_minor_gc_sleep_time_ms -= interval_milliseconds;
	}
	}
	}
	}

	void Daemon::memtable_memory_limiter_tracker_refresh_thread() {
	// Refresh the memory statistics of the load channel tracker more frequently,
	// which helps to accurately control the memory of LoadChannelMgr.
	while (!_stop_background_threads_latch.wait_for(
	std::chrono::milliseconds(config::memtable_mem_tracker_refresh_interval_ms))) {
	doris::ExecEnv::GetInstance()->memtable_memory_limiter()->refresh_mem_tracker();
	}
	}

	/*
	* this thread will calculate some metrics at a fix interval(15 sec)
	* 1. push bytes per second
	* 2. scan bytes per second
	* 3. max io util of all disks
	* 4. max network send bytes rate
	* 5. max network receive bytes rate
	*/
	void Daemon::calculate_metrics_thread() {
	int64_t last_ts = -1L;
	int64_t lst_query_bytes = -1;

	std::map<std::string, int64_t> lst_disks_io_time;
	std::map<std::string, int64_t> lst_net_send_bytes;
	std::map<std::string, int64_t> lst_net_receive_bytes;

	do {
	DorisMetrics::instance()->metric_registry()->trigger_all_hooks(true);

	if (last_ts == -1L) {
	last_ts = GetMonoTimeMicros() / 1000;
	lst_query_bytes = DorisMetrics::instance()->query_scan_bytes->value();
	if (config::enable_system_metrics) {
	DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);
	DorisMetrics::instance()->system_metrics()->get_network_traffic(
	&lst_net_send_bytes, &lst_net_receive_bytes);
	}
	} else {
	int64_t current_ts = GetMonoTimeMicros() / 1000;
	long interval = (current_ts - last_ts) / 1000;
	last_ts = current_ts;

	// 1. query bytes per second
	int64_t current_query_bytes = DorisMetrics::instance()->query_scan_bytes->value();
	int64_t qps = (current_query_bytes - lst_query_bytes) / (interval + 1);
	DorisMetrics::instance()->query_scan_bytes_per_second->set_value(qps < 0 ? 0 : qps);
	lst_query_bytes = current_query_bytes;

	if (config::enable_system_metrics) {
	// 2. max disk io util
	DorisMetrics::instance()->system_metrics()->update_max_disk_io_util_percent(
	lst_disks_io_time, 15);

	// update lst map
	DorisMetrics::instance()->system_metrics()->get_disks_io_time(&lst_disks_io_time);

	// 3. max network traffic
	int64_t max_send = 0;
	int64_t max_receive = 0;
	DorisMetrics::instance()->system_metrics()->get_max_net_traffic(
	lst_net_send_bytes, lst_net_receive_bytes, 15, &max_send, &max_receive);
	DorisMetrics::instance()->system_metrics()->update_max_network_send_bytes_rate(
	max_send);
	DorisMetrics::instance()->system_metrics()->update_max_network_receive_bytes_rate(
	max_receive);
	// update lst map
	DorisMetrics::instance()->system_metrics()->get_network_traffic(
	&lst_net_send_bytes, &lst_net_receive_bytes);
	}

	DorisMetrics::instance()->all_rowsets_num->set_value(
	StorageEngine::instance()->tablet_manager()->get_rowset_nums());
	DorisMetrics::instance()->all_segments_num->set_value(
	StorageEngine::instance()->tablet_manager()->get_segment_nums());
	}
	} while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(15)));
	}

	// clean up stale spilled files
	void Daemon::block_spill_gc_thread() {
	while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(60))) {
	ExecEnv::GetInstance()->block_spill_mgr()->gc(200);
	}
	}

	void Daemon::report_runtime_query_statistics_thread() {
	while (!_stop_background_threads_latch.wait_for(
	std::chrono::milliseconds(config::report_query_statistics_interval_ms))) {
	ExecEnv::GetInstance()->runtime_query_statistics_mgr()->report_runtime_query_statistics();
	}
	}

	void Daemon::je_purge_dirty_pages_thread() const {
	do {
	std::unique_lock<std::mutex> l(doris::MemInfo::je_purge_dirty_pages_lock);
	while (_stop_background_threads_latch.count() != 0 &&
	!doris::MemInfo::je_purge_dirty_pages_notify.load(std::memory_order_relaxed)) {
	doris::MemInfo::je_purge_dirty_pages_cv.wait_for(l, std::chrono::seconds(1));
	}
	if (_stop_background_threads_latch.count() == 0) {
	break;
	}
	doris::MemInfo::je_purge_all_arena_dirty_pages();
	doris::MemInfo::je_purge_dirty_pages_notify.store(false, std::memory_order_relaxed);
	} while (true);
	}

	void Daemon::start() {
	Status st;
	st = Thread::create(
	"Daemon", "tcmalloc_gc_thread", [this]() { this->tcmalloc_gc_thread(); },
	&_threads.emplace_back());
	CHECK(st.ok()) << st;
	st = Thread::create(
	"Daemon", "memory_maintenance_thread", [this]() { this->memory_maintenance_thread(); },
	&_threads.emplace_back());
	CHECK(st.ok()) << st;
	st = Thread::create(
	"Daemon", "memory_gc_thread", [this]() { this->memory_gc_thread(); },
	&_threads.emplace_back());
	CHECK(st.ok()) << st;
	st = Thread::create(
	"Daemon", "memtable_memory_limiter_tracker_refresh_thread",
	[this]() { this->memtable_memory_limiter_tracker_refresh_thread(); },
	&_threads.emplace_back());
	CHECK(st.ok()) << st;

	if (config::enable_metric_calculator) {
	st = Thread::create(
	"Daemon", "calculate_metrics_thread",
	[this]() { this->calculate_metrics_thread(); }, &_threads.emplace_back());
	CHECK(st.ok()) << st;
	}
	st = Thread::create(
	"Daemon", "block_spill_gc_thread", [this]() { this->block_spill_gc_thread(); },
	&_threads.emplace_back());
	st = Thread::create(
	"Daemon", "je_purge_dirty_pages_thread",
	[this]() { this->je_purge_dirty_pages_thread(); }, &_threads.emplace_back());
	st = Thread::create(
	"Daemon", "query_runtime_statistics_thread",
	[this]() { this->report_runtime_query_statistics_thread(); }, &_threads.emplace_back());
	CHECK(st.ok()) << st;
	}

	void Daemon::stop() {
	LOG(INFO) << "Doris daemon is stopping.";
	if (_stop_background_threads_latch.count() == 0) {
	LOG(INFO) << "Doris daemon stop returned since no bg threads latch.";
	return;
	}
	_stop_background_threads_latch.count_down();
	for (auto&& t : _threads) {
	if (t) {
	t->join();
	}
	}
	LOG(INFO) << "Doris daemon stopped after background threads are joined.";
	}

	} // namespace doris