| { |
| "layouts": [ |
| { |
| "layout_name": "default_hdfs_dashboard", |
| "display_name": "Standard HDFS Dashboard", |
| "section_name": "HDFS_SUMMARY", |
| "widgetLayoutInfo": [ |
| { |
| "widget_name": "NameNode GC count", |
| "description": "Count of total garbage collections and count of major type garbage collections of the JVM.", |
| "widget_type": "GRAPH", |
| "is_visible": true, |
| "metrics": [ |
| { |
| "name": "jvm.JvmMetrics.GcCount._rate", |
| "metric_path": "metrics/jvm/gcCount._rate", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| }, |
| { |
| "name": "jvm.JvmMetrics.GcCountConcurrentMarkSweep._rate", |
| "metric_path": "metrics/jvm/GcCountConcurrentMarkSweep._rate", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| } |
| ], |
| "values": [ |
| { |
| "name": "GC total count", |
| "value": "${jvm.JvmMetrics.GcCount._rate}" |
| }, |
| { |
| "name": "GC count of type major collection", |
| "value": "${jvm.JvmMetrics.GcCountConcurrentMarkSweep._rate}" |
| } |
| ], |
| "properties": { |
| "graph_type": "LINE", |
| "time_range": "1" |
| } |
| }, |
| { |
| "widget_name": "NameNode GC time", |
| "description": "Total time taken by major type garbage collections in milliseconds.", |
| "widget_type": "GRAPH", |
| "is_visible": true, |
| "metrics": [ |
| { |
| "name": "jvm.JvmMetrics.GcTimeMillisConcurrentMarkSweep._rate", |
| "metric_path": "metrics/jvm/GcTimeMillisConcurrentMarkSweep._rate", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| } |
| ], |
| "values": [ |
| { |
| "name": "GC time in major collection", |
| "value": "${jvm.JvmMetrics.GcTimeMillisConcurrentMarkSweep._rate}" |
| } |
| ], |
| "properties": { |
| "display_unit": "ms", |
| "graph_type": "LINE", |
| "time_range": "1" |
| } |
| }, |
| { |
| "widget_name": "NN Connection Load", |
| "description": "Number of open RPC connections being managed by NameNode.", |
| "widget_type": "GRAPH", |
| "is_visible": true, |
| "metrics": [ |
| { |
| "name": "rpc.rpc.client.NumOpenConnections", |
| "metric_path": "metrics/rpc/client/NumOpenConnections", |
| "category": "", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| }, |
| { |
| "name": "rpc.rpc.datanode.NumOpenConnections", |
| "metric_path": "metrics/rpc/datanode/NumOpenConnections", |
| "category": "", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| } |
| ], |
| "values": [ |
| { |
| "name": "Open Client Connections", |
| "value": "${rpc.rpc.client.NumOpenConnections}" |
| }, |
| { |
| "name": "Open Datanode Connections", |
| "value": "${rpc.rpc.datanode.NumOpenConnections}" |
| } |
| ], |
| "properties": { |
| "graph_type": "LINE", |
| "time_range": "1" |
| } |
| }, |
| { |
| "widget_name": "NameNode Heap", |
| "description": "Heap memory committed and Heap memory used with respect to time.", |
| "widget_type": "GRAPH", |
| "is_visible": true, |
| "metrics": [ |
| { |
| "name": "jvm.JvmMetrics.MemHeapCommittedM", |
| "metric_path": "metrics/jvm/memHeapCommittedM", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| }, |
| { |
| "name": "jvm.JvmMetrics.MemHeapUsedM", |
| "metric_path": "metrics/jvm/memHeapUsedM", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| } |
| ], |
| "values": [ |
| { |
| "name": "JVM heap committed", |
| "value": "${jvm.JvmMetrics.MemHeapCommittedM}" |
| }, |
| { |
| "name": "JVM heap used", |
| "value": "${jvm.JvmMetrics.MemHeapUsedM}" |
| } |
| ], |
| "properties": { |
| "display_unit": "MB", |
| "graph_type": "LINE", |
| "time_range": "1" |
| } |
| }, |
| { |
| "widget_name": "NameNode Host Load", |
| "description": "Percentage of CPU and Memory resources being consumed on NameNode host.", |
| "widget_type": "GRAPH", |
| "is_visible": true, |
| "metrics": [ |
| { |
| "name": "cpu_system", |
| "metric_path": "metrics/cpu/cpu_system", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| }, |
| { |
| "name": "cpu_user", |
| "metric_path": "metrics/cpu/cpu_user", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| }, |
| { |
| "name": "cpu_nice", |
| "metric_path": "metrics/cpu/cpu_nice", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| }, |
| { |
| "name": "cpu_idle", |
| "metric_path": "metrics/cpu/cpu_idle", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| }, |
| { |
| "name": "cpu_wio", |
| "metric_path": "metrics/cpu/cpu_wio", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| }, |
| { |
| "name": "mem_total", |
| "metric_path": "metrics/memory/mem_total", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| }, |
| { |
| "name": "mem_free", |
| "metric_path": "metrics/memory/mem_free", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| } |
| ], |
| "values": [ |
| { |
| "name": "CPU utilization", |
| "value": "${((cpu_system + cpu_user + cpu_nice)/(cpu_system + cpu_user + cpu_nice + cpu_idle + cpu_wio)) * 100}" |
| }, |
| { |
| "name": "Memory utilization", |
| "value": "${((mem_total - mem_free)/mem_total) * 100}" |
| } |
| ], |
| "properties": { |
| "graph_type": "LINE", |
| "time_range": "1", |
| "display_unit": "%" |
| } |
| }, |
| { |
| "widget_name": "NameNode RPC", |
| "description": "Compares the average time spent for RPC request in a queue and RPC request being processed.", |
| "widget_type": "GRAPH", |
| "is_visible": true, |
| "metrics": [ |
| { |
| "name": "rpc.rpc.client.RpcQueueTimeAvgTime", |
| "metric_path": "metrics/rpc/client/RpcQueueTime_avg_time", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| }, |
| { |
| "name": "rpc.rpc.client.RpcProcessingTimeAvgTime", |
| "metric_path": "metrics/rpc/client/RpcProcessingTime_avg_time", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| }, |
| { |
| "name": "rpc.rpc.datanode.RpcQueueTimeAvgTime", |
| "metric_path": "metrics/rpc/datanode/RpcQueueTime_avg_time", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| }, |
| { |
| "name": "rpc.rpc.datanode.RpcProcessingTimeAvgTime", |
| "metric_path": "metrics/rpc/datanode/RpcProcessingTime_avg_time", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| } |
| ], |
| "values": [ |
| { |
| "name": "Client RPC Queue Wait time", |
| "value": "${rpc.rpc.client.RpcQueueTimeAvgTime}" |
| }, |
| { |
| "name": "Client RPC Processing time", |
| "value": "${rpc.rpc.client.RpcProcessingTimeAvgTime}" |
| }, |
| { |
| "name": "Datanode RPC Queue Wait time", |
| "value": "${rpc.rpc.datanode.RpcQueueTimeAvgTime}" |
| }, |
| { |
| "name": "Datanode RPC Processing time", |
| "value": "${rpc.rpc.datanode.RpcProcessingTimeAvgTime}" |
| } |
| ], |
| "properties": { |
| "graph_type": "LINE", |
| "time_range": "1", |
| "display_unit": "ms" |
| } |
| }, |
| { |
| "widget_name": "NameNode Operations", |
| "description": "Rate per second of number of file operation over time.", |
| "widget_type": "GRAPH", |
| "is_visible": false, |
| "metrics": [ |
| { |
| "name": "dfs.namenode.TotalFileOps._rate", |
| "metric_path": "metrics/dfs/namenode/TotalFileOps._rate", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| } |
| ], |
| "values": [ |
| { |
| "name": "NameNode File Operations", |
| "value": "${dfs.namenode.TotalFileOps._rate}" |
| } |
| ], |
| "properties": { |
| "graph_type": "LINE", |
| "time_range": "1" |
| } |
| }, |
| { |
| "widget_name": "Failed disk volumes", |
| "description": "Number of Failed disk volumes across all DataNodes. Its indicative of HDFS bad health.", |
| "widget_type": "NUMBER", |
| "is_visible": true, |
| "metrics": [ |
| { |
| "name": "FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.NumFailedVolumes._sum", |
| "metric_path": "metrics/dfs/datanode/NumFailedVolumes", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| } |
| ], |
| "values": [ |
| { |
| "name": "Failed disk volumes", |
| "value": "${FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.NumFailedVolumes._sum}" |
| } |
| ], |
| "properties": { |
| "display_unit": "" |
| } |
| }, |
| { |
| "widget_name": "Blocks With Corrupted Replicas", |
| "description": "Number represents data blocks with at least one corrupted replica (but not all of them). Its indicative of HDFS bad health.", |
| "widget_type": "NUMBER", |
| "is_visible": true, |
| "metrics": [ |
| { |
| "name": "Hadoop:service=NameNode,name=FSNamesystem.CorruptBlocks", |
| "metric_path": "metrics/dfs/FSNamesystem/CorruptBlocks", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| } |
| ], |
| "values": [ |
| { |
| "name": "Blocks With Corrupted Replicas", |
| "value": "${Hadoop:service=NameNode,name=FSNamesystem.CorruptBlocks}" |
| } |
| ], |
| "properties": { |
| "warning_threshold": "0", |
| "error_threshold": "50" |
| } |
| }, |
| { |
| "widget_name": "Under Replicated Blocks", |
| "description": "Number represents file blocks that does not meet the replication factor criteria. Its indicative of HDFS bad health.", |
| "widget_type": "NUMBER", |
| "is_visible": true, |
| "metrics": [ |
| { |
| "name": "Hadoop:service=NameNode,name=FSNamesystem.UnderReplicatedBlocks", |
| "metric_path": "metrics/dfs/FSNamesystem/UnderReplicatedBlocks", |
| "service_name": "HDFS", |
| "component_name": "NAMENODE", |
| "host_component_criteria": "host_components/metrics/dfs/FSNamesystem/HAState=active" |
| } |
| ], |
| "values": [ |
| { |
| "name": "Under Replicated Blocks", |
| "value": "${Hadoop:service=NameNode,name=FSNamesystem.UnderReplicatedBlocks}" |
| } |
| ], |
| "properties": { |
| "warning_threshold": "0", |
| "error_threshold": "50" |
| } |
| }, |
| { |
| "widget_name": "HDFS Space Utilization", |
| "description": "Percentage of available space used in the DFS.", |
| "widget_type": "GAUGE", |
| "is_visible": true, |
| "metrics": [ |
| { |
| "name": "FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.DfsUsed", |
| "metric_path": "metrics/dfs/datanode/DfsUsed", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| }, |
| { |
| "name": "FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.Capacity", |
| "metric_path": "metrics/dfs/datanode/Capacity", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| } |
| ], |
| "values": [ |
| { |
| "name": "HDFS Space Utilization", |
| "value": "${FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.DfsUsed/FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.Capacity}" |
| } |
| ], |
| "properties": { |
| "warning_threshold": "0.75", |
| "error_threshold": "0.9" |
| } |
| } |
| ] |
| }, |
| { |
| "layout_name": "default_hdfs_heatmap", |
| "section_name": "HDFS_HEATMAPS", |
| "display_name": "HDFS Heatmaps", |
| "widgetLayoutInfo": [ |
| { |
| "widget_name": "HDFS Bytes Read", |
| "default_section_name": "HDFS_HEATMAPS", |
| "description": "", |
| "widget_type": "HEATMAP", |
| "is_visible": true, |
| "metrics": [ |
| { |
| "name": "dfs.datanode.BytesRead._rate", |
| "metric_path": "metrics/dfs/datanode/bytes_read._rate", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| } |
| ], |
| "values": [ |
| { |
| "name": "HDFS Bytes Read", |
| "value": "${dfs.datanode.BytesRead._rate}" |
| } |
| ], |
| "properties": { |
| "display_unit": "MB", |
| "max_limit": "1024" |
| } |
| }, |
| { |
| "widget_name": "HDFS Bytes Written", |
| "description": "", |
| "widget_type": "HEATMAP", |
| "is_visible": false, |
| "metrics": [ |
| { |
| "name": "dfs.datanode.BytesWritten._rate", |
| "metric_path": "metrics/dfs/datanode/bytes_written._rate", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| } |
| ], |
| "values": [ |
| { |
| "name": "HDFS Bytes Written", |
| "value": "${dfs.datanode.BytesWritten._rate}" |
| } |
| ], |
| "properties": { |
| "display_unit": "MB", |
| "max_limit": "1024" |
| } |
| }, |
| { |
| "widget_name": "DataNode Garbage Collection Time", |
| "description": "", |
| "widget_type": "HEATMAP", |
| "is_visible": false, |
| "metrics": [ |
| { |
| "name": "Hadoop:service=DataNode,name=JvmMetrics.GcTimeMillis", |
| "metric_path": "metrics/jvm/gcTimeMillis", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| } |
| ], |
| "values": [ |
| { |
| "name": "DataNode Garbage Collection Time", |
| "value": "${Hadoop:service=DataNode,name=JvmMetrics.GcTimeMillis}" |
| } |
| ], |
| "properties": { |
| "display_unit": "ms", |
| "max_limit": "10000" |
| } |
| }, |
| { |
| "widget_name": "DataNode JVM Heap Memory Used", |
| "description": "", |
| "widget_type": "HEATMAP", |
| "is_visible": false, |
| "metrics": [ |
| { |
| "name": "Hadoop:service=DataNode,name=JvmMetrics.MemHeapUsedM", |
| "metric_path": "metrics/jvm/memHeapUsedM", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| } |
| ], |
| "values": [ |
| { |
| "name": "DataNode JVM Heap Memory Used", |
| "value": "${Hadoop:service=DataNode,name=JvmMetrics.MemHeapUsedM}" |
| } |
| ], |
| "properties": { |
| "display_unit": "MB", |
| "max_limit": "512" |
| } |
| }, |
| { |
| "widget_name": "DataNode JVM Heap Memory Committed", |
| "description": "", |
| "widget_type": "HEATMAP", |
| "is_visible": false, |
| "metrics": [ |
| { |
| "name": "Hadoop:service=DataNode,name=JvmMetrics.MemHeapCommittedM", |
| "metric_path": "metrics/jvm/memHeapCommittedM", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| } |
| ], |
| "values": [ |
| { |
| "name": "DataNode JVM Heap Memory Committed", |
| "value": "${Hadoop:service=DataNode,name=JvmMetrics.MemHeapCommittedM}" |
| } |
| ], |
| "properties": { |
| "display_unit": "MB", |
| "max_limit": "512" |
| } |
| }, |
| { |
| "widget_name": "DataNode Process Disk I/O Utilization", |
| "default_section_name": "HDFS_HEATMAPS", |
| "description": "", |
| "widget_type": "HEATMAP", |
| "is_visible": false, |
| "metrics": [ |
| { |
| "name": "dfs.datanode.BytesRead._rate", |
| "metric_path": "metrics/dfs/datanode/bytes_read._rate", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| }, |
| { |
| "name": "dfs.datanode.BytesWritten._rate", |
| "metric_path": "metrics/dfs/datanode/bytes_written._rate", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| }, |
| { |
| "name": "dfs.datanode.TotalReadTime._rate", |
| "metric_path": "metrics/dfs/datanode/TotalReadTime._rate", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| }, |
| { |
| "name": "dfs.datanode.TotalWriteTime._rate", |
| "metric_path": "metrics/dfs/datanode/TotalWriteTime._rate", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| } |
| ], |
| "values": [ |
| { |
| "name": "DataNode Process Disk I/O Utilization", |
| "value": "${((dfs.datanode.BytesRead._rate/dfs.datanode.TotalReadTime._rate)+(dfs.datanode.BytesWritten._rate/dfs.datanode.TotalWriteTime._rate))*50}" |
| } |
| ], |
| "properties": { |
| "display_unit": "%", |
| "max_limit": "100" |
| } |
| }, |
| { |
| "widget_name": "DataNode Process Network I/O Utilization", |
| "description": "", |
| "widget_type": "HEATMAP", |
| "is_visible": false, |
| "metrics": [ |
| { |
| "name": "dfs.datanode.RemoteBytesRead._rate", |
| "metric_path": "metrics/dfs/datanode/RemoteBytesRead._rate", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| }, |
| { |
| "name": "dfs.datanode.ReadsFromRemoteClient._rate", |
| "metric_path": "metrics/dfs/datanode/reads_from_remote_client._rate", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| }, |
| { |
| "name": "dfs.datanode.RemoteBytesWritten._rate", |
| "metric_path": "metrics/dfs/datanode/RemoteBytesWritten._rate", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| }, |
| { |
| "name": "dfs.datanode.WritesFromRemoteClient._rate", |
| "metric_path": "metrics/dfs/datanode/writes_from_remote_client._rate", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| } |
| ], |
| "values": [ |
| { |
| "name": "DataNode Process Network I/O Utilization", |
| "value": "${((dfs.datanode.RemoteBytesRead._rate/dfs.datanode.ReadsFromRemoteClient._rate)+(dfs.datanode.RemoteBytesWritten._rate/dfs.datanode.WritesFromRemoteClient._rate))*50}" |
| } |
| ], |
| "properties": { |
| "display_unit": "%", |
| "max_limit": "100" |
| } |
| }, |
| { |
| "widget_name": "HDFS Space Utilization", |
| "widget_type": "HEATMAP", |
| "is_visible": false, |
| "metrics": [ |
| { |
| "name": "FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.DfsUsed", |
| "metric_path": "metrics/dfs/datanode/DfsUsed", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| }, |
| { |
| "name": "FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.Capacity", |
| "metric_path": "metrics/dfs/datanode/Capacity", |
| "service_name": "HDFS", |
| "component_name": "DATANODE" |
| } |
| ], |
| "values": [ |
| { |
| "name": "HDFS Space Utilization", |
| "value": "${(FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.DfsUsed/FSDatasetState.org.apache.hadoop.hdfs.server.datanode.fsdataset.impl.FsDatasetImpl.Capacity) * 100}" |
| } |
| ], |
| "properties": { |
| "display_unit": "%", |
| "max_limit": "100" |
| } |
| } |
| ] |
| } |
| ] |
| } |