blob: 274cde00890ecfebafdf2b36a87d755d407c5906 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.uniffle.server;
import java.util.Map;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.Maps;
import io.prometheus.client.CollectorRegistry;
import io.prometheus.client.Counter;
import io.prometheus.client.Gauge;
import io.prometheus.client.Summary;
import org.apache.commons.lang3.StringUtils;
import org.apache.uniffle.common.metrics.MetricsManager;
import org.apache.uniffle.common.util.Constants;
import org.apache.uniffle.storage.common.LocalStorage;
public class ShuffleServerMetrics {
private static final String TOTAL_RECEIVED_DATA = "total_received_data";
private static final String TOTAL_WRITE_DATA = "total_write_data";
private static final String TOTAL_WRITE_BLOCK = "total_write_block";
private static final String TOTAL_WRITE_TIME = "total_write_time";
private static final String TOTAL_WRITE_HANDLER = "total_write_handler";
private static final String TOTAL_WRITE_EXCEPTION = "total_write_exception";
private static final String TOTAL_WRITE_SLOW = "total_write_slow";
private static final String TOTAL_WRITE_NUM = "total_write_num";
private static final String APP_NUM_WITH_NODE = "app_num_with_node";
private static final String PARTITION_NUM_WITH_NODE = "partition_num_with_node";
private static final String EVENT_SIZE_THRESHOLD_LEVEL1 = "event_size_threshold_level1";
private static final String EVENT_SIZE_THRESHOLD_LEVEL2 = "event_size_threshold_level2";
private static final String EVENT_SIZE_THRESHOLD_LEVEL3 = "event_size_threshold_level3";
private static final String EVENT_SIZE_THRESHOLD_LEVEL4 = "event_size_threshold_level4";
private static final String EVENT_QUEUE_SIZE = "event_queue_size";
private static final String HADOOP_FLUSH_THREAD_POOL_QUEUE_SIZE =
"hadoop_flush_thread_pool_queue_size";
private static final String LOCALFILE_FLUSH_THREAD_POOL_QUEUE_SIZE =
"localfile_flush_thread_pool_queue_size";
private static final String FALLBACK_FLUSH_THREAD_POOL_QUEUE_SIZE =
"fallback_flush_thread_pool_queue_size";
private static final String TOTAL_READ_DATA = "total_read_data";
private static final String TOTAL_READ_LOCAL_DATA_FILE = "total_read_local_data_file";
private static final String TOTAL_READ_LOCAL_INDEX_FILE = "total_read_local_index_file";
private static final String TOTAL_READ_MEMORY_DATA = "total_read_memory_data";
private static final String TOTAL_READ_TIME = "total_read_time";
private static final String TOTAL_REQUIRE_READ_MEMORY = "total_require_read_memory_num";
private static final String TOTAL_REQUIRE_READ_MEMORY_RETRY =
"total_require_read_memory_retry_num";
private static final String TOTAL_REQUIRE_READ_MEMORY_FAILED =
"total_require_read_memory_failed_num";
private static final String LOCAL_STORAGE_TOTAL_DIRS_NUM = "local_storage_total_dirs_num";
private static final String LOCAL_STORAGE_CORRUPTED_DIRS_NUM = "local_storage_corrupted_dirs_num";
private static final String LOCAL_STORAGE_TOTAL_SPACE = "local_storage_total_space";
private static final String LOCAL_STORAGE_WHOLE_DISK_USED_SPACE =
"local_storage_whole_disk_used_space";
private static final String LOCAL_STORAGE_SERVICE_USED_SPACE = "local_storage_service_used_space";
private static final String LOCAL_STORAGE_USED_SPACE_RATIO = "local_storage_used_space_ratio";
private static final String IS_HEALTHY = "is_healthy";
private static final String ALLOCATED_BUFFER_SIZE = "allocated_buffer_size";
private static final String IN_FLUSH_BUFFER_SIZE = "in_flush_buffer_size";
private static final String USED_BUFFER_SIZE = "used_buffer_size";
private static final String READ_USED_BUFFER_SIZE = "read_used_buffer_size";
private static final String USED_DIRECT_MEMORY_SIZE = "used_direct_memory_size";
private static final String ALLOCATED_DIRECT_MEMORY_SIZE = "allocated_direct_memory_size";
private static final String PINNED_DIRECT_MEMORY_SIZE = "pinned_direct_memory_size";
private static final String TOTAL_FAILED_WRITTEN_EVENT_NUM = "total_failed_written_event_num";
private static final String TOTAL_DROPPED_EVENT_NUM = "total_dropped_event_num";
private static final String TOTAL_HADOOP_WRITE_DATA = "total_hadoop_write_data";
private static final String TOTAL_HADOOP_WRITE_DATA_FOR_HUGE_PARTITION =
"total_hadoop_write_data_for_huge_partition";
private static final String TOTAL_LOCALFILE_WRITE_DATA = "total_localfile_write_data";
private static final String LOCAL_DISK_PATH_LABEL = "local_disk_path";
public static final String LOCAL_DISK_PATH_LABEL_ALL = "ALL";
private static final String TOTAL_REQUIRE_BUFFER_FAILED = "total_require_buffer_failed";
public static final String TOTAL_REQUIRE_BUFFER_FAILED_FOR_HUGE_PARTITION =
"total_require_buffer_failed_for_huge_partition";
private static final String TOTAL_REQUIRE_BUFFER_FAILED_FOR_REGULAR_PARTITION =
"total_require_buffer_failed_for_regular_partition";
private static final String STORAGE_TOTAL_WRITE_LOCAL = "storage_total_write_local";
private static final String STORAGE_RETRY_WRITE_LOCAL = "storage_retry_write_local";
private static final String STORAGE_FAILED_WRITE_LOCAL = "storage_failed_write_local";
private static final String STORAGE_SUCCESS_WRITE_LOCAL = "storage_success_write_local";
private static final String STORAGE_HOST_LABEL = "storage_host";
public static final String STORAGE_HOST_LABEL_ALL = "ALL";
public static final String STORAGE_TOTAL_WRITE_REMOTE = "storage_total_write_remote";
public static final String STORAGE_RETRY_WRITE_REMOTE = "storage_retry_write_remote";
public static final String STORAGE_FAILED_WRITE_REMOTE = "storage_failed_write_remote";
public static final String STORAGE_SUCCESS_WRITE_REMOTE = "storage_success_write_remote";
private static final String TOTAL_APP_NUM = "total_app_num";
private static final String TOTAL_APP_WITH_HUGE_PARTITION_NUM =
"total_app_with_huge_partition_num";
private static final String TOTAL_PARTITION_NUM = "total_partition_num";
private static final String TOTAL_HUGE_PARTITION_NUM = "total_huge_partition_num";
private static final String HUGE_PARTITION_NUM = "huge_partition_num";
private static final String APP_WITH_HUGE_PARTITION_NUM = "app_with_huge_partition_num";
private static final String LOCAL_FILE_EVENT_FLUSH_NUM = "local_file_event_flush_num";
private static final String HADOOP_EVENT_FLUSH_NUM = "hadoop_event_flush_num";
private static final String TOTAL_EXPIRED_PRE_ALLOCATED_BUFFER_NUM =
"total_expired_preAllocated_buffer_num";
private static final String TOTAL_REMOVE_RESOURCE_TIME = "total_remove_resource_time";
private static final String TOTAL_REMOVE_RESOURCE_BY_SHUFFLE_IDS_TIME =
"total_remove_resource_by_shuffle_ids_time";
public static final String TOPN_OF_TOTAL_DATA_SIZE_FOR_APP = "topN_of_total_data_size_for_app";
public static final String TOPN_OF_IN_MEMORY_DATA_SIZE_FOR_APP =
"topN_of_in_memory_data_size_for_app";
public static final String TOPN_OF_ON_LOCALFILE_DATA_SIZE_FOR_APP =
"topN_of_on_localfile_data_size_for_app";
public static final String TOPN_OF_ON_HADOOP_DATA_SIZE_FOR_APP =
"topN_of_on_hadoop_data_size_for_app";
public static Counter.Child counterTotalAppNum;
public static Counter.Child counterTotalAppWithHugePartitionNum;
public static Counter.Child counterTotalPartitionNum;
public static Counter.Child counterTotalHugePartitionNum;
public static Counter.Child counterTotalReceivedDataSize;
public static Counter.Child counterTotalWriteDataSize;
public static Counter.Child counterTotalWriteBlockSize;
public static Counter.Child counterTotalWriteTime;
public static Counter.Child counterWriteException;
public static Counter.Child counterWriteSlow;
public static Counter.Child counterWriteTotal;
public static Counter.Child counterEventSizeThresholdLevel1;
public static Counter.Child counterEventSizeThresholdLevel2;
public static Counter.Child counterEventSizeThresholdLevel3;
public static Counter.Child counterEventSizeThresholdLevel4;
public static Counter.Child counterTotalReadDataSize;
public static Counter.Child counterTotalReadLocalDataFileSize;
public static Counter.Child counterTotalReadLocalIndexFileSize;
public static Counter.Child counterTotalReadMemoryDataSize;
public static Counter.Child counterTotalReadTime;
public static Counter.Child counterTotalFailedWrittenEventNum;
public static Counter.Child counterTotalDroppedEventNum;
public static Counter.Child counterTotalRequireBufferFailed;
public static Counter.Child counterTotalRequireBufferFailedForHugePartition;
public static Counter.Child counterTotalRequireBufferFailedForRegularPartition;
public static Counter.Child counterLocalStorageTotalWrite;
public static Counter.Child counterLocalStorageRetryWrite;
public static Counter.Child counterLocalStorageFailedWrite;
public static Counter.Child counterLocalStorageSuccessWrite;
public static Counter.Child counterTotalRequireReadMemoryNum;
public static Counter.Child counterTotalRequireReadMemoryRetryNum;
public static Counter.Child counterTotalRequireReadMemoryFailedNum;
public static Summary summaryTotalRemoveResourceTime;
public static Summary summaryTotalRemoveResourceByShuffleIdsTime;
public static Gauge.Child gaugeHugePartitionNum;
public static Gauge.Child gaugeAppWithHugePartitionNum;
public static Gauge.Child gaugeLocalStorageTotalDirsNum;
public static Gauge.Child gaugeLocalStorageCorruptedDirsNum;
public static Gauge.Child gaugeLocalStorageTotalSpace;
public static Gauge.Child gaugeLocalStorageWholeDiskUsedSpace;
public static Gauge.Child gaugeLocalStorageServiceUsedSpace;
public static Gauge.Child gaugeLocalStorageUsedSpaceRatio;
public static Gauge.Child gaugeIsHealthy;
public static Gauge.Child gaugeAllocatedBufferSize;
public static Gauge.Child gaugeInFlushBufferSize;
public static Gauge.Child gaugeUsedBufferSize;
public static Gauge.Child gaugeReadBufferUsedSize;
public static Gauge.Child gaugeUsedDirectMemorySize;
public static Gauge.Child gaugeWriteHandler;
public static Gauge.Child gaugeEventQueueSize;
public static Gauge.Child gaugeHadoopFlushThreadPoolQueueSize;
public static Gauge.Child gaugeLocalfileFlushThreadPoolQueueSize;
public static Gauge.Child gaugeFallbackFlushThreadPoolQueueSize;
public static Gauge.Child gaugeAppNum;
public static Gauge.Child gaugeTotalPartitionNum;
public static Gauge gaugeTotalDataSizeUsage;
public static Gauge gaugeInMemoryDataSizeUsage;
public static Gauge gaugeOnDiskDataSizeUsage;
public static Gauge gaugeOnHadoopDataSizeUsage;
public static Counter counterRemoteStorageTotalWrite;
public static Counter counterRemoteStorageRetryWrite;
public static Counter counterRemoteStorageFailedWrite;
public static Counter counterRemoteStorageSuccessWrite;
public static Counter counterTotalHadoopWriteDataSize;
public static Counter counterTotalHadoopWriteDataSizeForHugePartition;
public static Counter counterTotalLocalFileWriteDataSize;
private static String tags;
public static Counter counterLocalFileEventFlush;
public static Counter counterHadoopEventFlush;
public static Counter counterPreAllocatedBufferExpired;
private static MetricsManager metricsManager;
private static boolean isRegister = false;
public static synchronized void register(CollectorRegistry collectorRegistry, String tags) {
if (!isRegister) {
ShuffleServerMetrics.tags = tags;
Map<String, String> labels = Maps.newHashMap();
labels.put(Constants.METRICS_TAG_LABEL_NAME, ShuffleServerMetrics.tags);
metricsManager = new MetricsManager(collectorRegistry, labels);
isRegister = true;
setUpMetrics();
}
}
@VisibleForTesting
public static void register() {
register(CollectorRegistry.defaultRegistry, Constants.SHUFFLE_SERVER_VERSION);
}
@VisibleForTesting
public static void clear() {
isRegister = false;
CollectorRegistry.defaultRegistry.clear();
}
public static CollectorRegistry getCollectorRegistry() {
return metricsManager.getCollectorRegistry();
}
public static void incStorageRetryCounter(String storageHost) {
if (LocalStorage.STORAGE_HOST.equals(storageHost)) {
counterLocalStorageTotalWrite.inc();
counterLocalStorageRetryWrite.inc();
} else {
if (!StringUtils.isEmpty(storageHost)) {
counterRemoteStorageTotalWrite.labels(tags, storageHost).inc();
counterRemoteStorageRetryWrite.labels(tags, storageHost).inc();
}
}
}
public static void incStorageSuccessCounter(String storageHost) {
if (LocalStorage.STORAGE_HOST.equals(storageHost)) {
counterLocalStorageTotalWrite.inc();
counterLocalStorageSuccessWrite.inc();
} else {
if (!StringUtils.isEmpty(storageHost)) {
counterRemoteStorageTotalWrite.labels(tags, storageHost).inc();
counterRemoteStorageSuccessWrite.labels(tags, storageHost).inc();
}
}
}
public static void incStorageFailedCounter(String storageHost) {
if (LocalStorage.STORAGE_HOST.equals(storageHost)) {
counterLocalStorageTotalWrite.inc();
counterLocalStorageFailedWrite.inc();
} else {
if (!StringUtils.isEmpty(storageHost)) {
counterRemoteStorageTotalWrite.labels(tags, storageHost).inc();
counterRemoteStorageFailedWrite.labels(tags, storageHost).inc();
}
}
}
public static void incHadoopStorageWriteDataSize(
String storageHost, long size, boolean isOwnedByHugePartition) {
if (StringUtils.isEmpty(storageHost)) {
return;
}
counterTotalHadoopWriteDataSize.labels(tags, storageHost).inc(size);
counterTotalHadoopWriteDataSize.labels(tags, STORAGE_HOST_LABEL_ALL).inc(size);
if (isOwnedByHugePartition) {
counterTotalHadoopWriteDataSizeForHugePartition.labels(tags, storageHost).inc(size);
counterTotalHadoopWriteDataSizeForHugePartition
.labels(tags, STORAGE_HOST_LABEL_ALL)
.inc(size);
}
}
// only for test cases
@VisibleForTesting
public static void incHadoopStorageWriteDataSize(String storageHost, long size) {
incHadoopStorageWriteDataSize(storageHost, size, false);
}
private static void setUpMetrics() {
counterTotalReceivedDataSize = metricsManager.addLabeledCounter(TOTAL_RECEIVED_DATA);
counterTotalWriteDataSize = metricsManager.addLabeledCounter(TOTAL_WRITE_DATA);
counterTotalWriteBlockSize = metricsManager.addLabeledCounter(TOTAL_WRITE_BLOCK);
counterTotalWriteTime = metricsManager.addLabeledCounter(TOTAL_WRITE_TIME);
counterWriteException = metricsManager.addLabeledCounter(TOTAL_WRITE_EXCEPTION);
counterWriteSlow = metricsManager.addLabeledCounter(TOTAL_WRITE_SLOW);
counterWriteTotal = metricsManager.addLabeledCounter(TOTAL_WRITE_NUM);
counterEventSizeThresholdLevel1 = metricsManager.addLabeledCounter(EVENT_SIZE_THRESHOLD_LEVEL1);
counterEventSizeThresholdLevel2 = metricsManager.addLabeledCounter(EVENT_SIZE_THRESHOLD_LEVEL2);
counterEventSizeThresholdLevel3 = metricsManager.addLabeledCounter(EVENT_SIZE_THRESHOLD_LEVEL3);
counterEventSizeThresholdLevel4 = metricsManager.addLabeledCounter(EVENT_SIZE_THRESHOLD_LEVEL4);
counterTotalReadDataSize = metricsManager.addLabeledCounter(TOTAL_READ_DATA);
counterTotalReadLocalDataFileSize =
metricsManager.addLabeledCounter(TOTAL_READ_LOCAL_DATA_FILE);
counterTotalReadLocalIndexFileSize =
metricsManager.addLabeledCounter(TOTAL_READ_LOCAL_INDEX_FILE);
counterTotalReadMemoryDataSize = metricsManager.addLabeledCounter(TOTAL_READ_MEMORY_DATA);
counterTotalReadTime = metricsManager.addLabeledCounter(TOTAL_READ_TIME);
counterTotalDroppedEventNum = metricsManager.addLabeledCounter(TOTAL_DROPPED_EVENT_NUM);
counterTotalFailedWrittenEventNum =
metricsManager.addLabeledCounter(TOTAL_FAILED_WRITTEN_EVENT_NUM);
counterTotalHadoopWriteDataSize =
metricsManager.addCounter(
TOTAL_HADOOP_WRITE_DATA, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
counterTotalHadoopWriteDataSizeForHugePartition =
metricsManager.addCounter(
TOTAL_HADOOP_WRITE_DATA_FOR_HUGE_PARTITION,
Constants.METRICS_TAG_LABEL_NAME,
STORAGE_HOST_LABEL);
counterTotalLocalFileWriteDataSize =
metricsManager.addCounter(TOTAL_LOCALFILE_WRITE_DATA, LOCAL_DISK_PATH_LABEL);
counterTotalRequireBufferFailed = metricsManager.addLabeledCounter(TOTAL_REQUIRE_BUFFER_FAILED);
counterTotalRequireBufferFailedForRegularPartition =
metricsManager.addLabeledCounter(TOTAL_REQUIRE_BUFFER_FAILED_FOR_REGULAR_PARTITION);
counterTotalRequireBufferFailedForHugePartition =
metricsManager.addLabeledCounter(TOTAL_REQUIRE_BUFFER_FAILED_FOR_HUGE_PARTITION);
counterLocalStorageTotalWrite = metricsManager.addLabeledCounter(STORAGE_TOTAL_WRITE_LOCAL);
counterLocalStorageRetryWrite = metricsManager.addLabeledCounter(STORAGE_RETRY_WRITE_LOCAL);
counterLocalStorageFailedWrite = metricsManager.addLabeledCounter(STORAGE_FAILED_WRITE_LOCAL);
counterLocalStorageSuccessWrite = metricsManager.addLabeledCounter(STORAGE_SUCCESS_WRITE_LOCAL);
counterRemoteStorageTotalWrite =
metricsManager.addCounter(
STORAGE_TOTAL_WRITE_REMOTE, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
counterRemoteStorageRetryWrite =
metricsManager.addCounter(
STORAGE_RETRY_WRITE_REMOTE, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
counterRemoteStorageFailedWrite =
metricsManager.addCounter(
STORAGE_FAILED_WRITE_REMOTE, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
counterRemoteStorageSuccessWrite =
metricsManager.addCounter(
STORAGE_SUCCESS_WRITE_REMOTE, Constants.METRICS_TAG_LABEL_NAME, STORAGE_HOST_LABEL);
counterTotalRequireReadMemoryNum = metricsManager.addLabeledCounter(TOTAL_REQUIRE_READ_MEMORY);
counterTotalRequireReadMemoryRetryNum =
metricsManager.addLabeledCounter(TOTAL_REQUIRE_READ_MEMORY_RETRY);
counterTotalRequireReadMemoryFailedNum =
metricsManager.addLabeledCounter(TOTAL_REQUIRE_READ_MEMORY_FAILED);
counterTotalAppNum = metricsManager.addLabeledCounter(TOTAL_APP_NUM);
counterTotalAppWithHugePartitionNum =
metricsManager.addLabeledCounter(TOTAL_APP_WITH_HUGE_PARTITION_NUM);
counterTotalPartitionNum = metricsManager.addLabeledCounter(TOTAL_PARTITION_NUM);
counterTotalHugePartitionNum = metricsManager.addLabeledCounter(TOTAL_HUGE_PARTITION_NUM);
gaugeLocalStorageTotalDirsNum = metricsManager.addLabeledGauge(LOCAL_STORAGE_TOTAL_DIRS_NUM);
gaugeLocalStorageCorruptedDirsNum =
metricsManager.addLabeledGauge(LOCAL_STORAGE_CORRUPTED_DIRS_NUM);
gaugeLocalStorageTotalSpace = metricsManager.addLabeledGauge(LOCAL_STORAGE_TOTAL_SPACE);
gaugeLocalStorageWholeDiskUsedSpace =
metricsManager.addLabeledGauge(LOCAL_STORAGE_WHOLE_DISK_USED_SPACE);
gaugeLocalStorageServiceUsedSpace =
metricsManager.addLabeledGauge(LOCAL_STORAGE_SERVICE_USED_SPACE);
gaugeLocalStorageUsedSpaceRatio =
metricsManager.addLabeledGauge(LOCAL_STORAGE_USED_SPACE_RATIO);
gaugeIsHealthy = metricsManager.addLabeledGauge(IS_HEALTHY);
gaugeAllocatedBufferSize = metricsManager.addLabeledGauge(ALLOCATED_BUFFER_SIZE);
gaugeInFlushBufferSize = metricsManager.addLabeledGauge(IN_FLUSH_BUFFER_SIZE);
gaugeUsedBufferSize = metricsManager.addLabeledGauge(USED_BUFFER_SIZE);
gaugeReadBufferUsedSize = metricsManager.addLabeledGauge(READ_USED_BUFFER_SIZE);
gaugeUsedDirectMemorySize = metricsManager.addLabeledGauge(USED_DIRECT_MEMORY_SIZE);
gaugeWriteHandler = metricsManager.addLabeledGauge(TOTAL_WRITE_HANDLER);
gaugeEventQueueSize = metricsManager.addLabeledGauge(EVENT_QUEUE_SIZE);
gaugeHadoopFlushThreadPoolQueueSize =
metricsManager.addLabeledGauge(HADOOP_FLUSH_THREAD_POOL_QUEUE_SIZE);
gaugeLocalfileFlushThreadPoolQueueSize =
metricsManager.addLabeledGauge(LOCALFILE_FLUSH_THREAD_POOL_QUEUE_SIZE);
gaugeFallbackFlushThreadPoolQueueSize =
metricsManager.addLabeledGauge(FALLBACK_FLUSH_THREAD_POOL_QUEUE_SIZE);
gaugeAppNum = metricsManager.addLabeledGauge(APP_NUM_WITH_NODE);
gaugeTotalPartitionNum = metricsManager.addLabeledGauge(PARTITION_NUM_WITH_NODE);
gaugeHugePartitionNum = metricsManager.addLabeledGauge(HUGE_PARTITION_NUM);
gaugeAppWithHugePartitionNum = metricsManager.addLabeledGauge(APP_WITH_HUGE_PARTITION_NUM);
counterLocalFileEventFlush = metricsManager.addCounter(LOCAL_FILE_EVENT_FLUSH_NUM);
counterHadoopEventFlush = metricsManager.addCounter(HADOOP_EVENT_FLUSH_NUM);
counterPreAllocatedBufferExpired =
metricsManager.addCounter(TOTAL_EXPIRED_PRE_ALLOCATED_BUFFER_NUM);
summaryTotalRemoveResourceTime = metricsManager.addSummary(TOTAL_REMOVE_RESOURCE_TIME);
summaryTotalRemoveResourceByShuffleIdsTime =
metricsManager.addSummary(TOTAL_REMOVE_RESOURCE_BY_SHUFFLE_IDS_TIME);
gaugeTotalDataSizeUsage =
Gauge.build()
.name(TOPN_OF_TOTAL_DATA_SIZE_FOR_APP)
.help("top N of total shuffle data size for app level")
.labelNames("app_id")
.register(metricsManager.getCollectorRegistry());
gaugeInMemoryDataSizeUsage =
Gauge.build()
.name(TOPN_OF_IN_MEMORY_DATA_SIZE_FOR_APP)
.help("top N of in memory shuffle data size for app level")
.labelNames("app_id")
.register(metricsManager.getCollectorRegistry());
gaugeOnDiskDataSizeUsage =
Gauge.build()
.name(TOPN_OF_ON_LOCALFILE_DATA_SIZE_FOR_APP)
.help("top N of on disk shuffle data size for app level")
.labelNames("app_id")
.register(metricsManager.getCollectorRegistry());
gaugeOnHadoopDataSizeUsage =
Gauge.build()
.name(TOPN_OF_ON_HADOOP_DATA_SIZE_FOR_APP)
.help("top N of on hadoop shuffle data size for app level")
.labelNames("app_id")
.register(metricsManager.getCollectorRegistry());
}
}