[fix](memory) `TabletSchema` and `Schema` no longer track memory, only track columns count. (#28149)
TabletSchema and Schema no longer track memory, only track columns count. because cannot accurately track memory size.
TabletMeta MemTracker changed to track TabletSchema columns count.
Segment::_meta_mem_usage Unknown value overflow, causes the value of SegmentMeta MemTracker is similar to -2912341218700198079. So, temporarily put it in experimental type tracker.
diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h
index 3f2c279..64d6a75 100644
--- a/be/src/olap/schema.h
+++ b/be/src/olap/schema.h
@@ -50,7 +50,6 @@
class Schema {
public:
Schema(TabletSchemaSPtr tablet_schema) {
- SCOPED_MEM_COUNT_BY_HOOK(&_mem_size);
size_t num_columns = tablet_schema->num_columns();
// ignore this column
if (tablet_schema->columns().back().name() == BeConsts::ROW_STORE_COL) {
@@ -86,7 +85,6 @@
// All the columns of one table may exist in the columns param, but col_ids is only a subset.
Schema(const std::vector<TabletColumn>& columns, const std::vector<ColumnId>& col_ids) {
- SCOPED_MEM_COUNT_BY_HOOK(&_mem_size);
size_t num_key_columns = 0;
_unique_ids.resize(columns.size());
for (size_t i = 0; i < columns.size(); ++i) {
@@ -109,7 +107,6 @@
// Only for UT
Schema(const std::vector<TabletColumn>& columns, size_t num_key_columns) {
- SCOPED_MEM_COUNT_BY_HOOK(&_mem_size);
std::vector<ColumnId> col_ids(columns.size());
_unique_ids.resize(columns.size());
for (uint32_t cid = 0; cid < columns.size(); ++cid) {
@@ -121,7 +118,6 @@
}
Schema(const std::vector<const Field*>& cols, size_t num_key_columns) {
- SCOPED_MEM_COUNT_BY_HOOK(&_mem_size);
std::vector<ColumnId> col_ids(cols.size());
_unique_ids.resize(cols.size());
for (uint32_t cid = 0; cid < cols.size(); ++cid) {
@@ -181,6 +177,9 @@
bool has_sequence_col() const { return _has_sequence_col; }
int32_t rowid_col_idx() const { return _rowid_col_idx; }
int32_t version_col_idx() const { return _version_col_idx; }
+ // Don't use.
+ // TODO: memory size of Schema cannot be accurately tracked.
+ // In some places, temporarily use num_columns() as Schema size.
int64_t mem_size() const { return _mem_size; }
private:
diff --git a/be/src/olap/storage_engine.cpp b/be/src/olap/storage_engine.cpp
index c590345..9de4614 100644
--- a/be/src/olap/storage_engine.cpp
+++ b/be/src/olap/storage_engine.cpp
@@ -118,7 +118,8 @@
_is_all_cluster_id_exist(true),
_stopped(false),
_segcompaction_mem_tracker(std::make_shared<MemTracker>("SegCompaction")),
- _segment_meta_mem_tracker(std::make_shared<MemTracker>("SegmentMeta")),
+ _segment_meta_mem_tracker(std::make_shared<MemTracker>(
+ "SegmentMeta", ExecEnv::GetInstance()->experimental_mem_tracker())),
_stop_background_threads_latch(1),
_tablet_manager(new TabletManager(config::tablet_map_shard_size)),
_txn_manager(new TxnManager(config::txn_map_shard_size, config::txn_shard_size)),
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index e10b737..679bcc6 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -392,6 +392,9 @@
std::shared_ptr<MemTracker> _segcompaction_mem_tracker;
// This mem tracker is only for tracking memory use by segment meta data such as footer or index page.
// The memory consumed by querying is tracked in segment iterator.
+ // TODO: Segment::_meta_mem_usage Unknown value overflow, causes the value of SegmentMeta mem tracker
+ // is similar to `-2912341218700198079`. So, temporarily put it in experimental type tracker.
+ // maybe have to use ColumnReader count as segment meta size.
std::shared_ptr<MemTracker> _segment_meta_mem_tracker;
CountDownLatch _stop_background_threads_latch;
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index ad9480f..ee060d0 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -32,6 +32,7 @@
#include <mutex>
#include <ostream>
+#include "bvar/bvar.h"
#include "common/compiler_util.h" // IWYU pragma: keep
#include "common/config.h"
#include "common/logging.h"
@@ -82,10 +83,10 @@
DEFINE_GAUGE_METRIC_PROTOTYPE_5ARG(tablet_meta_mem_consumption, MetricUnit::BYTES, "",
mem_consumption, Labels({{"type", "tablet_meta"}}));
+bvar::Adder<int64_t> g_tablet_meta_schema_columns_count("tablet_meta_schema_columns_count");
+
TabletManager::TabletManager(int32_t tablet_map_lock_shard_size)
- : _mem_tracker(std::make_shared<MemTracker>(
- "TabletManager", ExecEnv::GetInstance()->experimental_mem_tracker())),
- _tablet_meta_mem_tracker(std::make_shared<MemTracker>(
+ : _tablet_meta_mem_tracker(std::make_shared<MemTracker>(
"TabletMeta", ExecEnv::GetInstance()->experimental_mem_tracker())),
_tablets_shards_size(tablet_map_lock_shard_size),
_tablets_shards_mask(tablet_map_lock_shard_size - 1) {
@@ -93,7 +94,7 @@
CHECK_EQ(_tablets_shards_size & _tablets_shards_mask, 0);
_tablets_shards.resize(_tablets_shards_size);
REGISTER_HOOK_METRIC(tablet_meta_mem_consumption,
- [this]() { return _mem_tracker->consumption(); });
+ [this]() { return _tablet_meta_mem_tracker->consumption(); });
}
TabletManager::~TabletManager() {
@@ -240,6 +241,7 @@
// Because table schema will copy in tablet, there will be double mem cost
// so here multiply 2
_tablet_meta_mem_tracker->consume(tablet->tablet_meta()->mem_size() * 2);
+ g_tablet_meta_schema_columns_count << tablet->tablet_meta()->tablet_columns_num();
COUNTER_UPDATE(ADD_CHILD_TIMER(profile, "RegisterTabletInfo", "AddTablet"),
static_cast<int64_t>(watch.reset()));
@@ -261,7 +263,6 @@
Status TabletManager::create_tablet(const TCreateTabletReq& request, std::vector<DataDir*> stores,
RuntimeProfile* profile) {
- SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
DorisMetrics::instance()->create_tablet_requests_total->increment(1);
int64_t tablet_id = request.tablet_id;
@@ -515,7 +516,6 @@
if (shard.tablets_under_clone.count(tablet_id) > 0) {
return Status::Aborted("tablet {} is under clone, skip drop task", tablet_id);
}
- SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
return _drop_tablet_unlocked(tablet_id, replica_id, false, is_drop_table_or_partition);
}
@@ -580,6 +580,7 @@
to_drop_tablet->deregister_tablet_from_dir();
_tablet_meta_mem_tracker->release(to_drop_tablet->tablet_meta()->mem_size() * 2);
+ g_tablet_meta_schema_columns_count << -to_drop_tablet->tablet_meta()->tablet_columns_num();
return Status::OK();
}
@@ -797,7 +798,6 @@
TSchemaHash schema_hash, const string& meta_binary,
bool update_meta, bool force, bool restore,
bool check_path) {
- SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
TabletMetaSharedPtr tablet_meta(new TabletMeta());
Status status = tablet_meta->deserialize(meta_binary);
if (!status.ok()) {
@@ -880,7 +880,6 @@
Status TabletManager::load_tablet_from_dir(DataDir* store, TTabletId tablet_id,
SchemaHash schema_hash, const string& schema_hash_path,
bool force, bool restore) {
- SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
LOG(INFO) << "begin to load tablet from dir. "
<< " tablet_id=" << tablet_id << " schema_hash=" << schema_hash
<< " path = " << schema_hash_path << " force = " << force << " restore = " << restore;
@@ -1031,7 +1030,6 @@
return Status::OK();
}
- SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
for_each_tablet([](const TabletSharedPtr& tablet) { tablet->delete_expired_stale_rowset(); },
filter_all_tablets);
@@ -1191,7 +1189,6 @@
void TabletManager::try_delete_unused_tablet_path(DataDir* data_dir, TTabletId tablet_id,
SchemaHash schema_hash,
const string& schema_hash_path) {
- SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
// acquire the read lock, so that there is no creating tablet or load tablet from meta tasks
// create tablet and load tablet task should check whether the dir exists
tablets_shard& shard = _get_tablets_shard(tablet_id);
@@ -1252,7 +1249,6 @@
}
void TabletManager::do_tablet_meta_checkpoint(DataDir* data_dir) {
- SCOPED_CONSUME_MEM_TRACKER(_mem_tracker);
auto filter = [data_dir](Tablet* tablet) -> bool {
return tablet->tablet_state() == TABLET_RUNNING &&
tablet->data_dir()->path_hash() == data_dir->path_hash() && tablet->is_used() &&
diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h
index 39043f2..47d2293 100644
--- a/be/src/olap/tablet_manager.h
+++ b/be/src/olap/tablet_manager.h
@@ -225,8 +225,8 @@
std::set<int64_t> tablets_under_clone;
};
+ // TODO: memory size of TabletSchema cannot be accurately tracked.
// trace the memory use by meta of tablet
- std::shared_ptr<MemTracker> _mem_tracker;
std::shared_ptr<MemTracker> _tablet_meta_mem_tracker;
const int32_t _tablets_shards_size;
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index 237a125..c463781 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -679,7 +679,7 @@
time_series_compaction_time_threshold_seconds());
}
-uint32_t TabletMeta::mem_size() const {
+int64_t TabletMeta::mem_size() const {
auto size = sizeof(TabletMeta);
size += _schema->mem_size();
return size;
diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h
index 60cc485..00886bb 100644
--- a/be/src/olap/tablet_meta.h
+++ b/be/src/olap/tablet_meta.h
@@ -131,7 +131,11 @@
void to_meta_pb(TabletMetaPB* tablet_meta_pb);
void to_json(std::string* json_string, json2pb::Pb2JsonOptions& options);
- uint32_t mem_size() const;
+ // Don't use.
+ // TODO: memory size of TabletSchema cannot be accurately tracked.
+ // In some places, temporarily use num_columns() as TabletSchema size.
+ int64_t mem_size() const;
+ size_t tablet_columns_num() const { return _schema->num_columns(); }
TabletTypePB tablet_type() const { return _tablet_type; }
TabletUid tablet_uid() const;
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 7d1abd8..63682d8 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -842,7 +842,6 @@
}
void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extracted_columns) {
- SCOPED_MEM_COUNT_BY_HOOK(&_mem_size);
_keys_type = schema.keys_type();
_num_columns = 0;
_num_variant_columns = 0;
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index b0de391..3f56568 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -244,6 +244,9 @@
void add_row_column();
void copy_from(const TabletSchema& tablet_schema);
std::string to_key() const;
+ // Don't use.
+ // TODO: memory size of TabletSchema cannot be accurately tracked.
+ // In some places, temporarily use num_columns() as TabletSchema size.
int64_t mem_size() const { return _mem_size; }
size_t row_size() const;
int32_t field_index(const std::string& field_name) const;
diff --git a/be/src/olap/tablet_schema_cache.cpp b/be/src/olap/tablet_schema_cache.cpp
index 0db1953..ba354b6 100644
--- a/be/src/olap/tablet_schema_cache.cpp
+++ b/be/src/olap/tablet_schema_cache.cpp
@@ -17,8 +17,13 @@
#include "olap/tablet_schema_cache.h"
+#include "bvar/bvar.h"
+
namespace doris {
+bvar::Adder<int64_t> g_tablet_schema_cache_count("tablet_schema_cache_count");
+bvar::Adder<int64_t> g_tablet_schema_cache_columns_count("tablet_schema_cache_columns_count");
+
TabletSchemaSPtr TabletSchemaCache::insert(const std::string& key) {
std::lock_guard guard(_mtx);
auto iter = _cache.find(key);
@@ -28,9 +33,8 @@
pb.ParseFromString(key);
tablet_schema_ptr->init_from_pb(pb);
_cache[key] = tablet_schema_ptr;
- DorisMetrics::instance()->tablet_schema_cache_count->increment(1);
- DorisMetrics::instance()->tablet_schema_cache_memory_bytes->increment(
- tablet_schema_ptr->mem_size());
+ g_tablet_schema_cache_count << 1;
+ g_tablet_schema_cache_columns_count << tablet_schema_ptr->num_columns();
return tablet_schema_ptr;
}
return iter->second;
@@ -69,9 +73,8 @@
LOG(INFO) << "Tablet Schema Cache Capacity " << _cache.size();
for (auto iter = _cache.begin(), last = _cache.end(); iter != last;) {
if (iter->second.unique()) {
- DorisMetrics::instance()->tablet_schema_cache_memory_bytes->increment(
- -iter->second->mem_size());
- DorisMetrics::instance()->tablet_schema_cache_count->increment(-1);
+ g_tablet_schema_cache_count << -1;
+ g_tablet_schema_cache_columns_count << -iter->second->num_columns();
iter = _cache.erase(iter);
} else {
++iter;
diff --git a/be/src/util/doris_metrics.cpp b/be/src/util/doris_metrics.cpp
index ecc74db..53ae69e 100644
--- a/be/src/util/doris_metrics.cpp
+++ b/be/src/util/doris_metrics.cpp
@@ -162,8 +162,6 @@
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(query_cache_sql_total_count, MetricUnit::NOUNIT);
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(query_cache_partition_total_count, MetricUnit::NOUNIT);
-DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(tablet_schema_cache_count, MetricUnit::NOUNIT);
-DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(tablet_schema_cache_memory_bytes, MetricUnit::BYTES);
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(lru_cache_memory_bytes, MetricUnit::BYTES);
DEFINE_GAUGE_CORE_METRIC_PROTOTYPE_2ARG(upload_total_byte, MetricUnit::BYTES);
@@ -288,8 +286,6 @@
INT_UGAUGE_METRIC_REGISTER(_server_metric_entity, query_cache_sql_total_count);
INT_UGAUGE_METRIC_REGISTER(_server_metric_entity, query_cache_partition_total_count);
- INT_COUNTER_METRIC_REGISTER(_server_metric_entity, tablet_schema_cache_count);
- INT_UGAUGE_METRIC_REGISTER(_server_metric_entity, tablet_schema_cache_memory_bytes);
INT_GAUGE_METRIC_REGISTER(_server_metric_entity, lru_cache_memory_bytes);
INT_COUNTER_METRIC_REGISTER(_server_metric_entity, local_file_reader_total);
diff --git a/be/src/util/doris_metrics.h b/be/src/util/doris_metrics.h
index 3aafc5d..da68f53 100644
--- a/be/src/util/doris_metrics.h
+++ b/be/src/util/doris_metrics.h
@@ -193,8 +193,6 @@
UIntGauge* query_cache_sql_total_count = nullptr;
UIntGauge* query_cache_partition_total_count = nullptr;
- IntCounter* tablet_schema_cache_count = nullptr;
- UIntGauge* tablet_schema_cache_memory_bytes = nullptr;
IntGauge* lru_cache_memory_bytes = nullptr;
UIntGauge* scanner_thread_pool_queue_size = nullptr;