blob: eeb9ff8aa40552fffc7ab485d3fc579447d0eb29 [file] [log] [blame]
{
"layouts": [
{
"layout_name": "default_hdfs_dashboard",
"display_name": "Standard HDFS Dashboard",
"section_name": "HDFS_SUMMARY",
"widgetLayoutInfo": [
{
"widget_name": "NameNode GC count",
"description": "Count of total garbage collections and count of major type garbage collections of the JVM.",
"widget_type": "GRAPH",
"is_visible": true,
"metrics": [
{
"name": "jvm.JvmMetrics.GcCount._rate",
"metric_path": "metrics/jvm/gcCount._rate",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
},
{
"name": "jvm.JvmMetrics.GcCountConcurrentMarkSweep._rate",
"metric_path": "metrics/jvm/GcCountConcurrentMarkSweep._rate",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
}
],
"values": [
{
"name": "GC total count",
"value": "${jvm.JvmMetrics.GcCount._rate}"
},
{
"name": "GC count of type major collection",
"value": "${jvm.JvmMetrics.GcCountConcurrentMarkSweep._rate}"
}
],
"properties": {
"graph_type": "LINE",
"time_range": "1"
}
},
{
"widget_name": "NameNode GC time",
"description": "Total time taken by major type garbage collections in milliseconds.",
"widget_type": "GRAPH",
"is_visible": true,
"metrics": [
{
"name": "jvm.JvmMetrics.GcTimeMillisConcurrentMarkSweep._rate",
"metric_path": "metrics/jvm/GcTimeMillisConcurrentMarkSweep._rate",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
}
],
"values": [
{
"name": "GC time in major collection",
"value": "${jvm.JvmMetrics.GcTimeMillisConcurrentMarkSweep._rate}"
}
],
"properties": {
"display_unit": "ms",
"graph_type": "LINE",
"time_range": "1"
}
},
{
"widget_name": "NN Connection Load",
"description": "Number of open RPC connections being managed by NameNode.",
"widget_type": "GRAPH",
"is_visible": true,
"metrics": [
{
"name": "rpc.rpc.client.NumOpenConnections",
"metric_path": "metrics/rpc/client/NumOpenConnections",
"category": "",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
},
{
"name": "rpc.rpc.datanode.NumOpenConnections",
"metric_path": "metrics/rpc/datanode/NumOpenConnections",
"category": "",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
}
],
"values": [
{
"name": "Open Client Connections",
"value": "${rpc.rpc.client.NumOpenConnections}"
},
{
"name": "Open Datanode Connections",
"value": "${rpc.rpc.datanode.NumOpenConnections}"
}
],
"properties": {
"graph_type": "LINE",
"time_range": "1"
}
},
{
"widget_name": "NameNode Heap",
"description": "Heap memory committed and Heap memory used with respect to time.",
"widget_type": "GRAPH",
"is_visible": true,
"metrics": [
{
"name": "jvm.JvmMetrics.MemHeapCommittedM",
"metric_path": "metrics/jvm/memHeapCommittedM",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
},
{
"name": "jvm.JvmMetrics.MemHeapUsedM",
"metric_path": "metrics/jvm/memHeapUsedM",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
}
],
"values": [
{
"name": "JVM heap committed",
"value": "${jvm.JvmMetrics.MemHeapCommittedM}"
},
{
"name": "JVM heap used",
"value": "${jvm.JvmMetrics.MemHeapUsedM}"
}
],
"properties": {
"display_unit": "MB",
"graph_type": "LINE",
"time_range": "1"
}
},
{
"widget_name": "NameNode Host Load",
"description": "Percentage of CPU and Memory resources being consumed on NameNode host.",
"widget_type": "GRAPH",
"is_visible": true,
"metrics": [
{
"name": "cpu_system",
"metric_path": "metrics/cpu/cpu_system",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
},
{
"name": "cpu_user",
"metric_path": "metrics/cpu/cpu_user",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
},
{
"name": "cpu_nice",
"metric_path": "metrics/cpu/cpu_nice",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
},
{
"name": "cpu_idle",
"metric_path": "metrics/cpu/cpu_idle",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
},
{
"name": "cpu_wio",
"metric_path": "metrics/cpu/cpu_wio",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
},
{
"name": "mem_total",
"metric_path": "metrics/memory/mem_total",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
},
{
"name": "mem_free",
"metric_path": "metrics/memory/mem_free",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
}
],
"values": [
{
"name": "CPU utilization",
"value": "${((cpu_system + cpu_user + cpu_nice)/(cpu_system + cpu_user + cpu_nice + cpu_idle + cpu_wio)) * 100}"
},
{
"name": "Memory utilization",
"value": "${((mem_total - mem_free)/mem_total) * 100}"
}
],
"properties": {
"graph_type": "LINE",
"time_range": "1",
"display_unit": "%"
}
},
{
"widget_name": "NameNode RPC",
"description": "Compares the average time spent for RPC request in a queue and RPC request being processed.",
"widget_type": "GRAPH",
"is_visible": true,
"metrics": [
{
"name": "rpc.rpc.client.RpcQueueTimeAvgTime",
"metric_path": "metrics/rpc/client/RpcQueueTime_avg_time",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
},
{
"name": "rpc.rpc.client.RpcProcessingTimeAvgTime",
"metric_path": "metrics/rpc/client/RpcProcessingTime_avg_time",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
},
{
"name": "rpc.rpc.datanode.RpcQueueTimeAvgTime",
"metric_path": "metrics/rpc/datanode/RpcQueueTime_avg_time",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
},
{
"name": "rpc.rpc.datanode.RpcProcessingTimeAvgTime",
"metric_path": "metrics/rpc/datanode/RpcProcessingTime_avg_time",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
}
],
"values": [
{
"name": "Client RPC Queue Wait time",
"value": "${rpc.rpc.client.RpcQueueTimeAvgTime}"
},
{
"name": "Client RPC Processing time",
"value": "${rpc.rpc.client.RpcProcessingTimeAvgTime}"
},
{
"name": "Datanode RPC Queue Wait time",
"value": "${rpc.rpc.datanode.RpcQueueTimeAvgTime}"
},
{
"name": "Datanode RPC Processing time",
"value": "${rpc.rpc.datanode.RpcProcessingTimeAvgTime}"
}
],
"properties": {
"graph_type": "LINE",
"time_range": "1",
"display_unit": "ms"
}
},
{
"widget_name": "NameNode Operations",
"description": "Rate per second of number of file operation over time.",
"widget_type": "GRAPH",
"is_visible": false,
"metrics": [
{
"name": "dfs.namenode.TotalFileOps._rate",
"metric_path": "metrics/dfs/namenode/TotalFileOps._rate",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
}
],
"values": [
{
"name": "NameNode File Operations",
"value": "${dfs.namenode.TotalFileOps._rate}"
}
],
"properties": {
"graph_type": "LINE",
"time_range": "1"
}
},
{
"widget_name": "Failed disk volumes",
"description": "Number of Failed disk volumes across all DataNodes. Its indicative of HDFS bad health.",
"widget_type": "NUMBER",
"is_visible": true,
"metrics": [
{
"name": "FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.NumFailedVolumes._sum",
"metric_path": "metrics/dfs/datanode/NumFailedVolumes",
"service_name": "HDFS",
"component_name": "DATANODE"
}
],
"values": [
{
"name": "Failed disk volumes",
"value": "${FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.NumFailedVolumes._sum}"
}
],
"properties": {
"display_unit": ""
}
},
{
"widget_name": "Blocks With Corrupted Replicas",
"description": "Number represents data blocks with at least one corrupted replica (but not all of them). Its indicative of HDFS bad health.",
"widget_type": "NUMBER",
"is_visible": true,
"metrics": [
{
"name": "Hadoop:service=NameNode,name=FSNamesystem.CorruptBlocks",
"metric_path": "metrics/dfs/FSNamesystem/CorruptBlocks",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
}
],
"values": [
{
"name": "Blocks With Corrupted Replicas",
"value": "${Hadoop:service=NameNode,name=FSNamesystem.CorruptBlocks}"
}
],
"properties": {
"warning_threshold": "0",
"error_threshold": "50"
}
},
{
"widget_name": "Under Replicated Blocks",
"description": "Number represents file blocks that does not meet the replication factor criteria. Its indicative of HDFS bad health.",
"widget_type": "NUMBER",
"is_visible": true,
"metrics": [
{
"name": "Hadoop:service=NameNode,name=FSNamesystem.UnderReplicatedBlocks",
"metric_path": "metrics/dfs/FSNamesystem/UnderReplicatedBlocks",
"service_name": "HDFS",
"component_name": "NAMENODE",
"host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active"
}
],
"values": [
{
"name": "Under Replicated Blocks",
"value": "${Hadoop:service=NameNode,name=FSNamesystem.UnderReplicatedBlocks}"
}
],
"properties": {
"warning_threshold": "0",
"error_threshold": "50"
}
},
{
"widget_name": "HDFS Space Utilization",
"description": "Percentage of available space used in the DFS.",
"widget_type": "GAUGE",
"is_visible": true,
"metrics": [
{
"name": "FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.DfsUsed",
"metric_path": "metrics/dfs/datanode/DfsUsed",
"service_name": "HDFS",
"component_name": "DATANODE"
},
{
"name": "FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.Capacity",
"metric_path": "metrics/dfs/datanode/Capacity",
"service_name": "HDFS",
"component_name": "DATANODE"
}
],
"values": [
{
"name": "HDFS Space Utilization",
"value": "${FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.DfsUsed/FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.Capacity}"
}
],
"properties": {
"warning_threshold": "0.75",
"error_threshold": "0.9"
}
}
]
},
{
"layout_name": "default_hdfs_heatmap",
"section_name": "HDFS_HEATMAPS",
"display_name": "HDFS Heatmaps",
"widgetLayoutInfo": [
{
"widget_name": "HDFS Bytes Read",
"default_section_name": "HDFS_HEATMAPS",
"description": "",
"widget_type": "HEATMAP",
"is_visible": true,
"metrics": [
{
"name": "dfs.datanode.BytesRead._rate",
"metric_path": "metrics/dfs/datanode/bytes_read._rate",
"service_name": "HDFS",
"component_name": "DATANODE"
}
],
"values": [
{
"name": "HDFS Bytes Read",
"value": "${dfs.datanode.BytesRead._rate}"
}
],
"properties": {
"display_unit": "MB",
"max_limit": "1024"
}
},
{
"widget_name": "HDFS Bytes Written",
"description": "",
"widget_type": "HEATMAP",
"is_visible": false,
"metrics": [
{
"name": "dfs.datanode.BytesWritten._rate",
"metric_path": "metrics/dfs/datanode/bytes_written._rate",
"service_name": "HDFS",
"component_name": "DATANODE"
}
],
"values": [
{
"name": "HDFS Bytes Written",
"value": "${dfs.datanode.BytesWritten._rate}"
}
],
"properties": {
"display_unit": "MB",
"max_limit": "1024"
}
},
{
"widget_name": "DataNode Garbage Collection Time",
"description": "",
"widget_type": "HEATMAP",
"is_visible": false,
"metrics": [
{
"name": "Hadoop:service=DataNode,name=JvmMetrics.GcTimeMillis",
"metric_path": "metrics/jvm/gcTimeMillis",
"service_name": "HDFS",
"component_name": "DATANODE"
}
],
"values": [
{
"name": "DataNode Garbage Collection Time",
"value": "${Hadoop:service=DataNode,name=JvmMetrics.GcTimeMillis}"
}
],
"properties": {
"display_unit": "ms",
"max_limit": "10000"
}
},
{
"widget_name": "DataNode JVM Heap Memory Used",
"description": "",
"widget_type": "HEATMAP",
"is_visible": false,
"metrics": [
{
"name": "Hadoop:service=DataNode,name=JvmMetrics.MemHeapUsedM",
"metric_path": "metrics/jvm/memHeapUsedM",
"service_name": "HDFS",
"component_name": "DATANODE"
}
],
"values": [
{
"name": "DataNode JVM Heap Memory Used",
"value": "${Hadoop:service=DataNode,name=JvmMetrics.MemHeapUsedM}"
}
],
"properties": {
"display_unit": "MB",
"max_limit": "512"
}
},
{
"widget_name": "DataNode JVM Heap Memory Committed",
"description": "",
"widget_type": "HEATMAP",
"is_visible": false,
"metrics": [
{
"name": "Hadoop:service=DataNode,name=JvmMetrics.MemHeapCommittedM",
"metric_path": "metrics/jvm/memHeapCommittedM",
"service_name": "HDFS",
"component_name": "DATANODE"
}
],
"values": [
{
"name": "DataNode JVM Heap Memory Committed",
"value": "${Hadoop:service=DataNode,name=JvmMetrics.MemHeapCommittedM}"
}
],
"properties": {
"display_unit": "MB",
"max_limit": "512"
}
},
{
"widget_name": "DataNode Process Disk I/O Utilization",
"default_section_name": "HDFS_HEATMAPS",
"description": "",
"widget_type": "HEATMAP",
"is_visible": false,
"metrics": [
{
"name": "dfs.datanode.BytesRead._rate",
"metric_path": "metrics/dfs/datanode/bytes_read._rate",
"service_name": "HDFS",
"component_name": "DATANODE"
},
{
"name": "dfs.datanode.BytesWritten._rate",
"metric_path": "metrics/dfs/datanode/bytes_written._rate",
"service_name": "HDFS",
"component_name": "DATANODE"
},
{
"name": "dfs.datanode.TotalReadTime._rate",
"metric_path": "metrics/dfs/datanode/TotalReadTime._rate",
"service_name": "HDFS",
"component_name": "DATANODE"
},
{
"name": "dfs.datanode.TotalWriteTime._rate",
"metric_path": "metrics/dfs/datanode/TotalWriteTime._rate",
"service_name": "HDFS",
"component_name": "DATANODE"
}
],
"values": [
{
"name": "DataNode Process Disk I/O Utilization",
"value": "${((dfs.datanode.BytesRead._rate/dfs.datanode.TotalReadTime._rate)+(dfs.datanode.BytesWritten._rate/dfs.datanode.TotalWriteTime._rate))*50}"
}
],
"properties": {
"display_unit": "%",
"max_limit": "100"
}
},
{
"widget_name": "DataNode Process Network I/O Utilization",
"description": "",
"widget_type": "HEATMAP",
"is_visible": false,
"metrics": [
{
"name": "dfs.datanode.RemoteBytesRead._rate",
"metric_path": "metrics/dfs/datanode/RemoteBytesRead._rate",
"service_name": "HDFS",
"component_name": "DATANODE"
},
{
"name": "dfs.datanode.ReadsFromRemoteClient._rate",
"metric_path": "metrics/dfs/datanode/reads_from_remote_client._rate",
"service_name": "HDFS",
"component_name": "DATANODE"
},
{
"name": "dfs.datanode.RemoteBytesWritten._rate",
"metric_path": "metrics/dfs/datanode/RemoteBytesWritten._rate",
"service_name": "HDFS",
"component_name": "DATANODE"
},
{
"name": "dfs.datanode.WritesFromRemoteClient._rate",
"metric_path": "metrics/dfs/datanode/writes_from_remote_client._rate",
"service_name": "HDFS",
"component_name": "DATANODE"
}
],
"values": [
{
"name": "DataNode Process Network I/O Utilization",
"value": "${((dfs.datanode.RemoteBytesRead._rate/dfs.datanode.ReadsFromRemoteClient._rate)+(dfs.datanode.RemoteBytesWritten._rate/dfs.datanode.WritesFromRemoteClient._rate))*50}"
}
],
"properties": {
"display_unit": "%",
"max_limit": "100"
}
},
{
"widget_name": "HDFS Space Utilization",
"widget_type": "HEATMAP",
"is_visible": false,
"metrics": [
{
"name": "FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.DfsUsed",
"metric_path": "metrics/dfs/datanode/DfsUsed",
"service_name": "HDFS",
"component_name": "DATANODE"
},
{
"name": "FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.Capacity",
"metric_path": "metrics/dfs/datanode/Capacity",
"service_name": "HDFS",
"component_name": "DATANODE"
}
],
"values": [
{
"name": "HDFS Space Utilization",
"value": "${(FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.DfsUsed/FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.Capacity) * 100}"
}
],
"properties": {
"display_unit": "%",
"max_limit": "100"
}
}
]
}
]
}