blob: 869dad627e0330754cf05ff82608bcec5b4253b1 [file] [log] [blame]
import{_ as r,r as n,o as u,c as s,b as t,d as e,a as o,e as a}from"./app-Bp5kEZWW.js";const c={},i=a('<h1 id="监控告警" tabindex="-1"><a class="header-anchor" href="#监控告警"><span>监控告警</span></a></h1><p>在 IoTDB 的运行过程中,我们希望对 IoTDB 的状态进行观测,以便于排查系统问题或者及时发现系统潜在的风险,能够**反映系统运行状态的一系列指标<br> **就是系统监控指标。</p><h2 id="_1-什么场景下会使用到监控" tabindex="-1"><a class="header-anchor" href="#_1-什么场景下会使用到监控"><span>1. 什么场景下会使用到监控?</span></a></h2><p>那么什么时候会用到监控框架呢?下面列举一些常见的场景。</p><ol><li><p>系统变慢了</p><p>系统变慢几乎是最常见也最头疼的问题,这时候我们需要尽可能多的信息来帮助我们找到系统变慢的原因,比如:</p><ul><li>JVM信息:是不是有FGC?GC耗时多少?GC后内存有没有恢复?是不是有大量的线程?</li><li>系统信息:CPU使用率是不是太高了?磁盘IO是不是很频繁?</li><li>连接数:当前连接是不是太多?</li><li>接口:当前TPS是多少?各个接口耗时有没有变化?</li><li>线程池:系统中各种任务是否有积压?</li><li>缓存命中率</li></ul></li><li><p>磁盘快满了</p><p>这时候我们迫切想知道最近一段时间数据文件的增长情况,看看是不是某种文件有突增。</p></li><li><p>系统运行是否正常</p><p>此时我们可能需要通过错误日志的数量、集群节点的状态等指标来判断系统是否在正常运行。</p></li></ol><h2 id="_2-什么人需要使用监控" tabindex="-1"><a class="header-anchor" href="#_2-什么人需要使用监控"><span>2. 什么人需要使用监控?</span></a></h2><p>所有关注系统状态的人员都可以使用,包括但不限于研发、测试、运维、DBA等等</p><h2 id="_3-什么是监控指标" tabindex="-1"><a class="header-anchor" href="#_3-什么是监控指标"><span>3. 什么是监控指标?</span></a></h2><h3 id="_3-1-监控指标名词解释" tabindex="-1"><a class="header-anchor" href="#_3-1-监控指标名词解释"><span>3.1. 监控指标名词解释</span></a></h3><p>在 IoTDB 的监控模块,每个监控指标被 <code>Metric Name</code> 和 <code>Tags</code> 唯一标识。</p><ul><li><code>Metric Name</code>:指标类型名称,比如<code>logback_events</code>表示日志事件。</li><li><code>Tags</code>:指标分类,形式为Key-Value对,每个指标下面可以有0到多个分类,常见的Key-Value对: <ul><li><code>name = xxx</code>:被监控对象的名称,是对<strong>业务逻辑</strong>的说明。比如对于<code>Metric Name = entry_seconds_count</code><br> 类型的监控项,name的含义是指被监控的业务接口。</li><li><code>type = xxx</code>:监控指标类型细分,是对<strong>监控指标</strong>本身的说明。比如对于<code>Metric Name = point</code><br> 类型的监控项,type的含义是指监控具体是什么类型的点数。</li><li><code>status = xxx</code>:被监控对象的状态,是对<strong>业务逻辑</strong>的说明。比如对于<code>Metric Name = Task</code>类型的监控项可以通过该参数,从而区分被监控对象的状态。</li><li><code>user = xxx</code>:被监控对象的相关用户,是对<strong>业务逻辑</strong>的说明。比如统计<code>root</code>用户的写入总点数。</li><li>根据具体情况自定义:比如logback_events_total下有一个level的分类,用来表示特定级别下的日志数量。</li></ul></li><li><code>Metric Level</code>:<strong>指标管理级别</strong>,默认启动级别为<code>Core</code>级别,建议启动级别为<code>Important级别</code><br> ,审核严格程度<code>Core &gt; Important &gt; Normal &gt; All</code><ul><li><code>Core</code>:系统的核心指标,供<strong>系统内核和运维人员</strong>使用,关乎系统的<strong>性能、稳定性、安全性</strong>,比如实例的状况,系统的负载等。</li><li><code>Important</code>:模块的重要指标,供<strong>运维和测试人员</strong>使用,直接关乎<strong>每个模块的运行状态</strong>,比如合并文件个数、执行情况等。</li><li><code>Normal</code>:模块的一般指标,供<strong>开发人员</strong>使用,方便在出现问题时<strong>定位模块</strong>,比如合并中的特定关键操作情况。</li><li><code>All</code>:模块的全部指标,供<strong>模块开发人员</strong>使用,往往在复现问题的时候使用,从而快速解决问题。</li></ul></li></ul><h3 id="_3-2-监控指标对外获取数据格式" tabindex="-1"><a class="header-anchor" href="#_3-2-监控指标对外获取数据格式"><span>3.2. 监控指标对外获取数据格式</span></a></h3><ul><li>IoTDB 对外提供 JMX、 Prometheus 和 IoTDB 格式的监控指标: <ul><li>对于 JMX ,可以通过<code>org.apache.iotdb.metrics</code>获取系统监控指标指标。</li><li>对于 Prometheus ,可以通过对外暴露的端口获取监控指标的值</li><li>对于 IoTDB 方式对外暴露:可以通过执行 IoTDB 的查询来获取监控指标</li></ul></li></ul><h2 id="_4-监控指标有哪些" tabindex="-1"><a class="header-anchor" href="#_4-监控指标有哪些"><span>4. 监控指标有哪些?</span></a></h2>',14),l=t("br",null,null,-1),h=t("br",null,null,-1),m={href:"https://github.com/apache/iotdb/tree/master/metrics",target:"_blank",rel:"noopener noreferrer"},q=a('<h3 id="_4-1-core-级别监控指标" tabindex="-1"><a class="header-anchor" href="#_4-1-core-级别监控指标"><span>4.1. Core 级别监控指标</span></a></h3><p>Core 级别的监控指标在系统运行中默认开启,每一个 Core 级别的监控指标的添加都需要经过谨慎的评估,目前 Core 级别的监控指标如下所述:</p><h4 id="_4-1-1-集群运行状态" tabindex="-1"><a class="header-anchor" href="#_4-1-1-集群运行状态"><span>4.1.1. 集群运行状态</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>up_time</td><td>-</td><td>AutoGauge</td><td>IoTDB 启动的运行时间</td></tr><tr><td>config_node</td><td>name=&quot;total&quot;,status=&quot;Registered/Online/Unknown&quot;</td><td>AutoGauge</td><td>已注册/在线/离线 confignode 的节点数量</td></tr><tr><td>data_node</td><td>name=&quot;total&quot;,status=&quot;Registered/Online/Unknown&quot;</td><td>AutoGauge</td><td>已注册/在线/离线 datanode 的节点数量</td></tr><tr><td>cluster_node_leader_count</td><td>name=&quot;{ip}:{port}&quot;</td><td>Gauge</td><td>节点上共识组Leader的数量</td></tr><tr><td>cluster_node_status</td><td>name=&quot;{ip}:{port}&quot;,type=&quot;ConfigNode/DataNode&quot;</td><td>Gauge</td><td>节点的状态,0=Unkonwn 1=online</td></tr><tr><td>entry</td><td>name=&quot;{interface}&quot;</td><td>Timer</td><td>Client 建立的 Thrift 的耗时情况</td></tr><tr><td>mem</td><td>name=&quot;IoTConsensus&quot;</td><td>AutoGauge</td><td>IoT共识协议的内存占用,单位为byte</td></tr></tbody></table><h4 id="_4-1-2-接口层统计" tabindex="-1"><a class="header-anchor" href="#_4-1-2-接口层统计"><span>4.1.2. 接口层统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>thrift_connections</td><td>name=&quot;ConfigNodeRPC&quot;</td><td>AutoGauge</td><td>ConfigNode 的内部 Thrift 连接数</td></tr><tr><td>thrift_connections</td><td>name=&quot;InternalRPC&quot;</td><td>AutoGauge</td><td>DataNode 的内部 Thrift 连接数</td></tr><tr><td>thrift_connections</td><td>name=&quot;MPPDataExchangeRPC&quot;</td><td>AutoGauge</td><td>MPP 框架的内部 Thrift 连接数</td></tr><tr><td>thrift_connections</td><td>name=&quot;ClientRPC&quot;</td><td>AutoGauge</td><td>Client 建立的 Thrift 连接数</td></tr><tr><td>thrift_active_threads</td><td>name=&quot;ConfigNodeRPC-Service&quot;</td><td>AutoGauge</td><td>ConfigNode 的内部活跃 Thrift 连接数</td></tr><tr><td>thrift_active_threads</td><td>name=&quot;DataNodeInternalRPC-Service&quot;</td><td>AutoGauge</td><td>DataNode 的内部活跃 Thrift 连接数</td></tr><tr><td>thrift_active_threads</td><td>name=&quot;MPPDataExchangeRPC-Service&quot;</td><td>AutoGauge</td><td>MPP 框架的内部活跃 Thrift 连接数</td></tr><tr><td>thrift_active_threads</td><td>name=&quot;ClientRPC-Service&quot;</td><td>AutoGauge</td><td>Client 建立的活跃 Thrift 连接数</td></tr><tr><td>session_idle_time</td><td>name = &quot;sessionId&quot;</td><td>Histogram</td><td>不同 Session 的空闲时间分布情况</td></tr></tbody></table><h4 id="_4-1-3-节点统计" tabindex="-1"><a class="header-anchor" href="#_4-1-3-节点统计"><span>4.1.3. 节点统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>quantity</td><td>name=&quot;database&quot;</td><td>AutoGauge</td><td>系统数据库数量</td></tr><tr><td>quantity</td><td>name=&quot;timeSeries&quot;</td><td>AutoGauge</td><td>系统时间序列数量</td></tr><tr><td>quantity</td><td>name=&quot;pointsIn&quot;</td><td>Counter</td><td>系统累计写入点数</td></tr><tr><td>points</td><td>database=&quot;{database}&quot;, type=&quot;flush&quot;</td><td>Gauge</td><td>最新一个刷盘的memtale的点数</td></tr></tbody></table><h4 id="_4-1-4-集群全链路" tabindex="-1"><a class="header-anchor" href="#_4-1-4-集群全链路"><span>4.1.4. 集群全链路</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>performance_overview</td><td>interface=&quot;{interface}&quot;, type=&quot;{statement_type}&quot;</td><td>Timer</td><td>客户端执行的操作的耗时情况</td></tr><tr><td>performance_overview_detail</td><td>stage=&quot;authority&quot;</td><td>Timer</td><td>权限认证总耗时</td></tr><tr><td>performance_overview_detail</td><td>stage=&quot;parser&quot;</td><td>Timer</td><td>解析构造总耗时</td></tr><tr><td>performance_overview_detail</td><td>stage=&quot;analyzer&quot;</td><td>Timer</td><td>语句分析总耗时</td></tr><tr><td>performance_overview_detail</td><td>stage=&quot;planner&quot;</td><td>Timer</td><td>请求规划总耗时</td></tr><tr><td>performance_overview_detail</td><td>stage=&quot;scheduler&quot;</td><td>Timer</td><td>请求执行总耗时</td></tr><tr><td>performance_overview_schedule_detail</td><td>stage=&quot;local_scheduler&quot;</td><td>Timer</td><td>本地请求执行总耗时</td></tr><tr><td>performance_overview_schedule_detail</td><td>stage=&quot;remote_scheduler&quot;</td><td>Timer</td><td>远程请求执行总耗时</td></tr><tr><td>performance_overview_local_detail</td><td>stage=&quot;schema_validate&quot;</td><td>Timer</td><td>元数据验证总耗时</td></tr><tr><td>performance_overview_local_detail</td><td>stage=&quot;trigger&quot;</td><td>Timer</td><td>Trigger 触发总耗时</td></tr><tr><td>performance_overview_local_detail</td><td>stage=&quot;storage&quot;</td><td>Timer</td><td>共识层总耗时</td></tr><tr><td>performance_overview_storage_detail</td><td>stage=&quot;engine&quot;</td><td>Timer</td><td>DataRegion 抢锁总耗时</td></tr><tr><td>performance_overview_engine_detail</td><td>stage=&quot;lock&quot;</td><td>Timer</td><td>DataRegion 抢锁总耗时</td></tr><tr><td>performance_overview_engine_detail</td><td>stage=&quot;create_memtable_block&quot;</td><td>Timer</td><td>创建新的 Memtable 耗时</td></tr><tr><td>performance_overview_engine_detail</td><td>stage=&quot;memory_block&quot;</td><td>Timer</td><td>内存控制阻塞总耗时</td></tr><tr><td>performance_overview_engine_detail</td><td>stage=&quot;wal&quot;</td><td>Timer</td><td>写入 Wal 总耗时</td></tr><tr><td>performance_overview_engine_detail</td><td>stage=&quot;memtable&quot;</td><td>Timer</td><td>写入 Memtable 总耗时</td></tr><tr><td>performance_overview_engine_detail</td><td>stage=&quot;last_cache&quot;</td><td>Timer</td><td>更新 LastCache 总耗时</td></tr></tbody></table><h4 id="_4-1-5-任务统计" tabindex="-1"><a class="header-anchor" href="#_4-1-5-任务统计"><span>4.1.5. 任务统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>queue</td><td>name=&quot;compaction_inner&quot;, status=&quot;running/waiting&quot;</td><td>Gauge</td><td>空间内合并任务数</td></tr><tr><td>queue</td><td>name=&quot;compaction_cross&quot;, status=&quot;running/waiting&quot;</td><td>Gauge</td><td>跨空间合并任务数</td></tr><tr><td>queue</td><td>name=&quot;flush&quot;,status=&quot;running/waiting&quot;</td><td>AutoGauge</td><td>刷盘任务数</td></tr><tr><td>cost_task</td><td>name=&quot;inner_compaction/cross_compaction/flush&quot;</td><td>Gauge</td><td>任务耗时情况</td></tr></tbody></table><h4 id="_4-1-6-iotdb-进程运行状态" tabindex="-1"><a class="header-anchor" href="#_4-1-6-iotdb-进程运行状态"><span>4.1.6. IoTDB 进程运行状态</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>process_cpu_load</td><td>name=&quot;process&quot;</td><td>AutoGauge</td><td>IoTDB 进程的 CPU 占用率,单位为%</td></tr><tr><td>process_cpu_time</td><td>name=&quot;process&quot;</td><td>AutoGauge</td><td>IoTDB 进程占用的 CPU 时间,单位为ns</td></tr><tr><td>process_max_mem</td><td>name=&quot;memory&quot;</td><td>AutoGauge</td><td>IoTDB 进程最大可用内存</td></tr><tr><td>process_total_mem</td><td>name=&quot;memory&quot;</td><td>AutoGauge</td><td>IoTDB 进程当前已申请内存</td></tr><tr><td>process_free_mem</td><td>name=&quot;memory&quot;</td><td>AutoGauge</td><td>IoTDB 进程当前剩余可用内存</td></tr></tbody></table><h4 id="_4-1-7-系统运行状态" tabindex="-1"><a class="header-anchor" href="#_4-1-7-系统运行状态"><span>4.1.7. 系统运行状态</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>sys_cpu_load</td><td>name=&quot;system&quot;</td><td>AutoGauge</td><td>系统的 CPU 占用率,单位为%</td></tr><tr><td>sys_cpu_cores</td><td>name=&quot;system&quot;</td><td>Gauge</td><td>系统的可用处理器数</td></tr><tr><td>sys_total_physical_memory_size</td><td>name=&quot;memory&quot;</td><td>Gauge</td><td>系统的最大物理内存</td></tr><tr><td>sys_free_physical_memory_size</td><td>name=&quot;memory&quot;</td><td>AutoGauge</td><td>系统的剩余可用内存</td></tr><tr><td>sys_total_swap_space_size</td><td>name=&quot;memory&quot;</td><td>AutoGauge</td><td>系统的交换区最大空间</td></tr><tr><td>sys_free_swap_space_size</td><td>name=&quot;memory&quot;</td><td>AutoGauge</td><td>系统的交换区剩余可用空间</td></tr><tr><td>sys_committed_vm_size</td><td>name=&quot;memory&quot;</td><td>AutoGauge</td><td>系统保证可用于正在运行的进程的虚拟内存量</td></tr><tr><td>sys_disk_total_space</td><td>name=&quot;disk&quot;</td><td>AutoGauge</td><td>系统磁盘总大小</td></tr><tr><td>sys_disk_free_space</td><td>name=&quot;disk&quot;</td><td>AutoGauge</td><td>系统磁盘可用大小</td></tr></tbody></table><h4 id="_4-1-8-iotdb-日志统计" tabindex="-1"><a class="header-anchor" href="#_4-1-8-iotdb-日志统计"><span>4.1.8. IoTDB 日志统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>logback_events</td><td>level=&quot;trace/debug/info/warn/error&quot;</td><td>Counter</td><td>不同类型的日志个数</td></tr></tbody></table><h4 id="_4-1-9-文件统计信息" tabindex="-1"><a class="header-anchor" href="#_4-1-9-文件统计信息"><span>4.1.9. 文件统计信息</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>file_size</td><td>name=&quot;wal&quot;</td><td>AutoGauge</td><td>写前日志总大小,单位为byte</td></tr><tr><td>file_size</td><td>name=&quot;seq&quot;</td><td>AutoGauge</td><td>顺序TsFile总大小,单位为byte</td></tr><tr><td>file_size</td><td>name=&quot;unseq&quot;</td><td>AutoGauge</td><td>乱序TsFile总大小,单位为byte</td></tr><tr><td>file_size</td><td>name=&quot;inner-seq-temp&quot;</td><td>AutoGauge</td><td>顺序空间内合并临时文件大小,单位为byte</td></tr><tr><td>file_size</td><td>name=&quot;inner-unseq-temp&quot;</td><td>AutoGauge</td><td>乱序空间内合并临时文件大小,单位为byte</td></tr><tr><td>file_size</td><td>name=&quot;cross-temp&quot;</td><td>AutoGauge</td><td>跨空间合并临时文件大小,单位为byte</td></tr><tr><td>file_size</td><td>name=&quot;mods&quot;</td><td>AutoGauge</td><td>Modification 文件的大小</td></tr><tr><td>file_count</td><td>name=&quot;wal&quot;</td><td>AutoGauge</td><td>写前日志文件个数</td></tr><tr><td>file_count</td><td>name=&quot;seq&quot;</td><td>AutoGauge</td><td>顺序TsFile文件个数</td></tr><tr><td>file_count</td><td>name=&quot;unseq&quot;</td><td>AutoGauge</td><td>乱序TsFile文件个数</td></tr><tr><td>file_count</td><td>name=&quot;inner-seq-temp&quot;</td><td>AutoGauge</td><td>顺序空间内合并临时文件个数</td></tr><tr><td>file_count</td><td>name=&quot;inner-unseq-temp&quot;</td><td>AutoGauge</td><td>乱序空间内合并临时文件个数</td></tr><tr><td>file_count</td><td>name=&quot;cross-temp&quot;</td><td>AutoGauge</td><td>跨空间合并临时文件个数</td></tr><tr><td>file_count</td><td>name=&quot;open_file_handlers&quot;</td><td>AutoGauge</td><td>IoTDB 进程打开文件数,仅支持Linux和MacOS</td></tr><tr><td>file_count</td><td>name=&quot;mods</td><td>AutoGauge</td><td>Modification 文件的数目</td></tr></tbody></table><h4 id="_4-1-10-jvm-内存统计" tabindex="-1"><a class="header-anchor" href="#_4-1-10-jvm-内存统计"><span>4.1.10. JVM 内存统计</span></a></h4>',21),_=t("table",null,[t("thead",null,[t("tr",null,[t("th",null,"Metric"),t("th",null,"Tags"),t("th",null,"Type"),t("th",null,"Description")])]),t("tbody",null,[t("tr",null,[t("td",null,"jvm_buffer_memory_used_bytes"),t("td",null,'id="direct/mapped"'),t("td",null,"AutoGauge"),t("td",null,"已经使用的缓冲区大小")]),t("tr",null,[t("td",null,"jvm_buffer_total_capacity_bytes"),t("td",null,'id="direct/mapped"'),t("td",null,"AutoGauge"),t("td",null,"最大缓冲区大小")]),t("tr",null,[t("td",null,"jvm_buffer_count_buffers"),t("td",null,'id="direct/mapped"'),t("td",null,"AutoGauge"),t("td",null,"当前缓冲区数量")]),t("tr",null,[t("td",null,"jvm_memory_committed_bytes"),t("td",{area:'heap/nonheap,id="xxx",'}),t("td",null,"AutoGauge"),t("td",null,"当前申请的内存大小")]),t("tr",null,[t("td",null,"jvm_memory_max_bytes"),t("td",{area:'heap/nonheap,id="xxx",'}),t("td",null,"AutoGauge"),t("td",null,"最大内存")]),t("tr",null,[t("td",null,"jvm_memory_used_bytes"),t("td",{area:'heap/nonheap,id="xxx",'}),t("td",null,"AutoGauge"),t("td",null,"已使用内存大小")])])],-1),p=a('<h4 id="_4-1-11-jvm-线程统计" tabindex="-1"><a class="header-anchor" href="#_4-1-11-jvm-线程统计"><span>4.1.11. JVM 线程统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>jvm_threads_live_threads</td><td></td><td>AutoGauge</td><td>当前线程数</td></tr><tr><td>jvm_threads_daemon_threads</td><td></td><td>AutoGauge</td><td>当前 Daemon 线程数</td></tr><tr><td>jvm_threads_peak_threads</td><td></td><td>AutoGauge</td><td>峰值线程数</td></tr><tr><td>jvm_threads_states_threads</td><td>state=&quot;runnable/blocked/waiting/timed-waiting/new/terminated&quot;</td><td>AutoGauge</td><td>当前处于各种状态的线程数</td></tr></tbody></table><h4 id="_4-1-12-jvm-gc-统计" tabindex="-1"><a class="header-anchor" href="#_4-1-12-jvm-gc-统计"><span>4.1.12. JVM GC 统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>jvm_gc_pause</td><td>action=&quot;end of major GC/end of minor GC&quot;,cause=&quot;xxxx&quot;</td><td>Timer</td><td>不同原因的Young GC/Full GC的次数与耗时</td></tr><tr><td></td><td></td><td></td><td></td></tr><tr><td>jvm_gc_concurrent_phase_time</td><td>action=&quot;{action}&quot;,cause=&quot;{cause}&quot;</td><td>Timer</td><td>不同原因的Young GC/Full GC的次数与耗时</td></tr><tr><td></td><td></td><td></td><td></td></tr><tr><td>jvm_gc_max_data_size_bytes</td><td></td><td>AutoGauge</td><td>老年代内存的历史最大值</td></tr><tr><td>jvm_gc_live_data_size_bytes</td><td></td><td>AutoGauge</td><td>老年代内存的使用值</td></tr><tr><td>jvm_gc_memory_promoted_bytes</td><td></td><td>Counter</td><td>老年代内存正向增长累计值</td></tr><tr><td>jvm_gc_memory_allocated_bytes</td><td></td><td>Counter</td><td>GC分配内存正向增长累计值</td></tr></tbody></table><h3 id="_4-2-important-级别监控指标" tabindex="-1"><a class="header-anchor" href="#_4-2-important-级别监控指标"><span>4.2. Important 级别监控指标</span></a></h3><p>目前 Important 级别的监控指标如下所述:</p><h4 id="_4-2-1-节点统计" tabindex="-1"><a class="header-anchor" href="#_4-2-1-节点统计"><span>4.2.1. 节点统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>region</td><td>name=&quot;total&quot;,type=&quot;SchemaRegion&quot;</td><td>AutoGauge</td><td>分区表中 SchemaRegion 总数量</td></tr><tr><td>region</td><td>name=&quot;total&quot;,type=&quot;DataRegion&quot;</td><td>AutoGauge</td><td>分区表中 DataRegion 总数量</td></tr><tr><td>region</td><td>name=&quot;{ip}:{port}&quot;,type=&quot;SchemaRegion&quot;</td><td>Gauge</td><td>分区表中对应节点上 DataRegion 总数量</td></tr><tr><td>region</td><td>name=&quot;{ip}:{port}&quot;,type=&quot;DataRegion&quot;</td><td>Gauge</td><td>分区表中对应节点上 DataRegion 总数量</td></tr></tbody></table><h4 id="_4-2-2-ratis共识协议统计" tabindex="-1"><a class="header-anchor" href="#_4-2-2-ratis共识协议统计"><span>4.2.2. Ratis共识协议统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>ratis_consensus_write</td><td>stage=&quot;writeLocally&quot;</td><td>Timer</td><td>本地写入阶段的时间</td></tr><tr><td>ratis_consensus_write</td><td>stage=&quot;writeRemotely&quot;</td><td>Timer</td><td>远程写入阶段的时间</td></tr><tr><td>ratis_consensus_write</td><td>stage=&quot;writeStateMachine&quot;</td><td>Timer</td><td>写入状态机阶段的时间</td></tr><tr><td>ratis_server</td><td>clientWriteRequest</td><td>Timer</td><td>处理来自客户端写请求的时间</td></tr><tr><td>ratis_server</td><td>followerAppendEntryLatency</td><td>Timer</td><td>跟随者追加日志条目的总时间</td></tr><tr><td>ratis_log_worker</td><td>appendEntryLatency</td><td>Timer</td><td>领导者追加日志条目的总时间</td></tr><tr><td>ratis_log_worker</td><td>queueingDelay</td><td>Timer</td><td>一个 Raft 日志操作被请求后进入队列的时间,等待队列未满</td></tr><tr><td>ratis_log_worker</td><td>enqueuedTime</td><td>Timer</td><td>一个 Raft 日志操作在队列中的时间</td></tr><tr><td>ratis_log_worker</td><td>writelogExecutionTime</td><td>Timer</td><td>一个 Raft 日志写入操作完成执行的时间</td></tr><tr><td>ratis_log_worker</td><td>flushTime</td><td>Timer</td><td>刷新日志的时间</td></tr><tr><td>ratis_log_worker</td><td>closedSegmentsSizeInBytes</td><td>Gauge</td><td>关闭的 Raft 日志段的总大小</td></tr><tr><td>ratis_log_worker</td><td>openSegmentSizeInBytes</td><td>Gauge</td><td>打开的 Raft 日志段的总大小</td></tr></tbody></table><h4 id="_4-2-3-iot共识协议统计" tabindex="-1"><a class="header-anchor" href="#_4-2-3-iot共识协议统计"><span>4.2.3. IoT共识协议统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>iot_consensus</td><td>name=&quot;logDispatcher-{IP}:{Port}&quot;, region=&quot;{region}&quot;, type=&quot;currentSyncIndex&quot;</td><td>AutoGauge</td><td>副本组同步线程的当前同步进度</td></tr><tr><td>iot_consensus</td><td>name=&quot;logDispatcher-{IP}:{Port}&quot;, region=&quot;{region}&quot;, type=&quot;cachedRequestInMemoryQueue&quot;</td><td>AutoGauge</td><td>副本组同步线程缓存队列请求总大小</td></tr><tr><td>iot_consensus</td><td>name=&quot;IoTConsensusServerImpl&quot;, region=&quot;{region}&quot;, type=&quot;searchIndex&quot;</td><td>AutoGauge</td><td>副本组主流程写入进度</td></tr><tr><td>iot_consensus</td><td>name=&quot;IoTConsensusServerImpl&quot;, region=&quot;{region}&quot;, type=&quot;safeIndex&quot;</td><td>AutoGauge</td><td>副本组同步进度</td></tr><tr><td>iot_consensus</td><td>name=&quot;IoTConsensusServerImpl&quot;, region=&quot;{region}&quot;, type=&quot;syncLag&quot;</td><td>AutoGauge</td><td>副本组写入进度与同步进度差</td></tr><tr><td>iot_consensus</td><td>name=&quot;IoTConsensusServerImpl&quot;, region=&quot;{region}&quot;, type=&quot;LogEntriesFromWAL&quot;</td><td>AutoGauge</td><td>副本组Batch中来自WAL的日志项数量</td></tr><tr><td>iot_consensus</td><td>name=&quot;IoTConsensusServerImpl&quot;, region=&quot;{region}&quot;, type=&quot;LogEntriesFromQueue&quot;</td><td>AutoGauge</td><td>副本组Batch中来自队列的日志项数量</td></tr><tr><td>stage</td><td>name=&quot;iot_consensus&quot;, region=&quot;{region}&quot;, type=&quot;getStateMachineLock&quot;</td><td>Histogram</td><td>主流程获取状态机锁耗时</td></tr><tr><td>stage</td><td>name=&quot;iot_consensus&quot;, region=&quot;{region}&quot;, type=&quot;checkingBeforeWrite&quot;</td><td>Histogram</td><td>主流程写入状态机检查耗时</td></tr><tr><td>stage</td><td>name=&quot;iot_consensus&quot;, region=&quot;{region}&quot;, type=&quot;writeStateMachine&quot;</td><td>Histogram</td><td>主流程写入状态机耗时</td></tr><tr><td>stage</td><td>name=&quot;iot_consensus&quot;, region=&quot;{region}&quot;, type=&quot;offerRequestToQueue&quot;</td><td>Histogram</td><td>主流程尝试添加队列耗时</td></tr><tr><td>stage</td><td>name=&quot;iot_consensus&quot;, region=&quot;{region}&quot;, type=&quot;consensusWrite&quot;</td><td>Histogram</td><td>主流程全写入耗时</td></tr><tr><td>stage</td><td>name=&quot;iot_consensus&quot;, region=&quot;{region}&quot;, type=&quot;constructBatch&quot;</td><td>Histogram</td><td>同步线程构造 Batch 耗时</td></tr><tr><td>stage</td><td>name=&quot;iot_consensus&quot;, region=&quot;{region}&quot;, type=&quot;syncLogTimePerRequest&quot;</td><td>Histogram</td><td>异步回调流程同步日志耗时</td></tr></tbody></table><h4 id="_4-2-4-缓存统计" tabindex="-1"><a class="header-anchor" href="#_4-2-4-缓存统计"><span>4.2.4. 缓存统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>cache_hit</td><td>name=&quot;chunk&quot;</td><td>AutoGauge</td><td>ChunkCache的命中率,单位为%</td></tr><tr><td>cache_hit</td><td>name=&quot;timeSeriesMeta&quot;</td><td>AutoGauge</td><td>TimeseriesMetadataCache的命中率,单位为%</td></tr><tr><td>cache_hit</td><td>name=&quot;bloomFilter&quot;</td><td>AutoGauge</td><td>TimeseriesMetadataCache中的bloomFilter的拦截率,单位为%</td></tr><tr><td>cache</td><td>name=&quot;Database&quot;, type=&quot;hit&quot;</td><td>Counter</td><td>Database Cache 的命中次数</td></tr><tr><td>cache</td><td>name=&quot;Database&quot;, type=&quot;all&quot;</td><td>Counter</td><td>Database Cache 的访问次数</td></tr><tr><td>cache</td><td>name=&quot;SchemaPartition&quot;, type=&quot;hit&quot;</td><td>Counter</td><td>SchemaPartition Cache 的命中次数</td></tr><tr><td>cache</td><td>name=&quot;SchemaPartition&quot;, type=&quot;all&quot;</td><td>Counter</td><td>SchemaPartition Cache 的访问次数</td></tr><tr><td>cache</td><td>name=&quot;DataPartition&quot;, type=&quot;hit&quot;</td><td>Counter</td><td>DataPartition Cache 的命中次数</td></tr><tr><td>cache</td><td>name=&quot;DataPartition&quot;, type=&quot;all&quot;</td><td>Counter</td><td>DataPartition Cache 的访问次数</td></tr><tr><td>cache</td><td>name=&quot;schemaCache&quot;, type=&quot;hit&quot;</td><td>Counter</td><td>Schema Cache 的命中次数</td></tr><tr><td>cache</td><td>name=&quot;schemaCache&quot;, type=&quot;all&quot;</td><td>Counter</td><td>Schema Cache 的访问次数</td></tr></tbody></table><h4 id="_4-2-5-内存统计" tabindex="-1"><a class="header-anchor" href="#_4-2-5-内存统计"><span>4.2.5. 内存统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>mem</td><td>name=&quot;database_{name}&quot;</td><td>AutoGauge</td><td>DataNode内对应DataRegion的内存占用,单位为byte</td></tr><tr><td>mem</td><td>name=&quot;chunkMetaData_{name}&quot;</td><td>AutoGauge</td><td>写入TsFile时的ChunkMetaData的内存占用,单位为byte</td></tr><tr><td>mem</td><td>name=&quot;IoTConsensus&quot;</td><td>AutoGauge</td><td>IoT共识协议的内存占用,单位为byte</td></tr><tr><td>mem</td><td>name=&quot;IoTConsensusQueue&quot;</td><td>AutoGauge</td><td>IoT共识协议用于队列的内存占用,单位为byte</td></tr><tr><td>mem</td><td>name=&quot;IoTConsensusSync&quot;</td><td>AutoGauge</td><td>IoT共识协议用于同步的内存占用,单位为byte</td></tr><tr><td>mem</td><td>name=&quot;schema_region_total_usage&quot;</td><td>AutoGauge</td><td>所有SchemaRegion的总内存占用,单位为byte</td></tr></tbody></table><h4 id="_4-2-6-合并统计" tabindex="-1"><a class="header-anchor" href="#_4-2-6-合并统计"><span>4.2.6. 合并统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>data_written</td><td>name=&quot;compaction&quot;, type=&quot;aligned/not-aligned/total&quot;</td><td>Counter</td><td>合并时写入量</td></tr><tr><td>data_read</td><td>name=&quot;compaction&quot;</td><td>Counter</td><td>合并时的读取量</td></tr><tr><td>compaction_task_count</td><td>name = &quot;inner_compaction&quot;, type=&quot;sequence&quot;</td><td>Counter</td><td>顺序空间内合并次数</td></tr><tr><td>compaction_task_count</td><td>name = &quot;inner_compaction&quot;, type=&quot;unsequence&quot;</td><td>Counter</td><td>乱序空间内合并次数</td></tr><tr><td>compaction_task_count</td><td>name = &quot;cross_compaction&quot;, type=&quot;cross&quot;</td><td>Counter</td><td>跨空间合并次数</td></tr></tbody></table><h4 id="_4-2-7-iotdb-进程统计" tabindex="-1"><a class="header-anchor" href="#_4-2-7-iotdb-进程统计"><span>4.2.7. IoTDB 进程统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>process_used_mem</td><td>name=&quot;memory&quot;</td><td>AutoGauge</td><td>IoTDB 进程当前使用内存</td></tr><tr><td>process_mem_ratio</td><td>name=&quot;memory&quot;</td><td>AutoGauge</td><td>IoTDB 进程的内存占用比例</td></tr><tr><td>process_threads_count</td><td>name=&quot;process&quot;</td><td>AutoGauge</td><td>IoTDB 进程当前线程数</td></tr><tr><td>process_status</td><td>name=&quot;process&quot;</td><td>AutoGauge</td><td>IoTDB 进程存活状态,1为存活,0为终止</td></tr></tbody></table><h4 id="_4-2-8-jvm-类加载统计" tabindex="-1"><a class="header-anchor" href="#_4-2-8-jvm-类加载统计"><span>4.2.8. JVM 类加载统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>jvm_classes_unloaded_classes</td><td></td><td>AutoGauge</td><td>累计卸载的class数量</td></tr><tr><td>jvm_classes_loaded_classes</td><td></td><td>AutoGauge</td><td>累计加载的class数量</td></tr></tbody></table><h4 id="_4-2-9-jvm-编译时间统计" tabindex="-1"><a class="header-anchor" href="#_4-2-9-jvm-编译时间统计"><span>4.2.9. JVM 编译时间统计</span></a></h4>',23),g=t("table",null,[t("thead",null,[t("tr",null,[t("th",null,"Metric"),t("th",null,"Tags"),t("th",null,"Type"),t("th",null,"Description")])]),t("tbody",null,[t("tr",null,[t("td",null,"jvm_compilation_time_ms"),t("td",{compiler:"HotSpot 64-Bit Tiered Compilers,"}),t("td",null,"AutoGauge"),t("td",null,"耗费在编译上的时间")])])],-1),b=a(`<h4 id="_4-2-10-查询规划耗时统计" tabindex="-1"><a class="header-anchor" href="#_4-2-10-查询规划耗时统计"><span>4.2.10. 查询规划耗时统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>query_plan_cost</td><td>stage=&quot;analyzer&quot;</td><td>Timer</td><td>查询语句分析耗时</td></tr><tr><td>query_plan_cost</td><td>stage=&quot;logical_planner&quot;</td><td>Timer</td><td>查询逻辑计划规划耗时</td></tr><tr><td>query_plan_cost</td><td>stage=&quot;distribution_planner&quot;</td><td>Timer</td><td>查询分布式执行计划规划耗时</td></tr><tr><td>query_plan_cost</td><td>stage=&quot;partition_fetcher&quot;</td><td>Timer</td><td>分区信息拉取耗时</td></tr><tr><td>query_plan_cost</td><td>stage=&quot;schema_fetcher&quot;</td><td>Timer</td><td>元数据信息拉取耗时</td></tr></tbody></table><h4 id="_4-2-11-执行计划分发耗时统计" tabindex="-1"><a class="header-anchor" href="#_4-2-11-执行计划分发耗时统计"><span>4.2.11. 执行计划分发耗时统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>dispatcher</td><td>stage=&quot;wait_for_dispatch&quot;</td><td>Timer</td><td>分发执行计划耗时</td></tr><tr><td>dispatcher</td><td>stage=&quot;dispatch_read&quot;</td><td>Timer</td><td>查询执行计划发送耗时</td></tr></tbody></table><h4 id="_4-2-12-查询资源访问统计" tabindex="-1"><a class="header-anchor" href="#_4-2-12-查询资源访问统计"><span>4.2.12. 查询资源访问统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>query_resource</td><td>type=&quot;sequence_tsfile&quot;</td><td>Rate</td><td>顺序文件访问频率</td></tr><tr><td>query_resource</td><td>type=&quot;unsequence_tsfile&quot;</td><td>Rate</td><td>乱序文件访问频率</td></tr><tr><td>query_resource</td><td>type=&quot;flushing_memtable&quot;</td><td>Rate</td><td>flushing memtable 访问频率</td></tr><tr><td>query_resource</td><td>type=&quot;working_memtable&quot;</td><td>Rate</td><td>working memtable 访问频率</td></tr></tbody></table><h4 id="_4-2-13-数据传输模块统计" tabindex="-1"><a class="header-anchor" href="#_4-2-13-数据传输模块统计"><span>4.2.13. 数据传输模块统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>data_exchange_cost</td><td>operation=&quot;source_handle_get_tsblock&quot;, type=&quot;local/remote&quot;</td><td>Timer</td><td>source handle 接收 TsBlock 耗时</td></tr><tr><td>data_exchange_cost</td><td>operation=&quot;source_handle_deserialize_tsblock&quot;, type=&quot;local/remote&quot;</td><td>Timer</td><td>source handle 反序列化 TsBlock 耗时</td></tr><tr><td>data_exchange_cost</td><td>operation=&quot;sink_handle_send_tsblock&quot;, type=&quot;local/remote&quot;</td><td>Timer</td><td>sink handle 发送 TsBlock 耗时</td></tr><tr><td>data_exchange_cost</td><td>operation=&quot;send_new_data_block_event_task&quot;, type=&quot;server/caller&quot;</td><td>Timer</td><td>sink handle 发送 TsBlock RPC 耗时</td></tr><tr><td>data_exchange_cost</td><td>operation=&quot;get_data_block_task&quot;, type=&quot;server/caller&quot;</td><td>Timer</td><td>source handle 接收 TsBlock RPC 耗时</td></tr><tr><td>data_exchange_cost</td><td>operation=&quot;on_acknowledge_data_block_event_task&quot;, type=&quot;server/caller&quot;</td><td>Timer</td><td>source handle 确认接收 TsBlock RPC 耗时</td></tr><tr><td>data_exchange_count</td><td>name=&quot;send_new_data_block_num&quot;, type=&quot;server/caller&quot;</td><td>Histogram</td><td>sink handle 发送 TsBlock数量</td></tr><tr><td>data_exchange_count</td><td>name=&quot;get_data_block_num&quot;, type=&quot;server/caller&quot;</td><td>Histogram</td><td>source handle 接收 TsBlock 数量</td></tr><tr><td>data_exchange_count</td><td>name=&quot;on_acknowledge_data_block_num&quot;, type=&quot;server/caller&quot;</td><td>Histogram</td><td>source handle 确认接收 TsBlock 数量</td></tr></tbody></table><h4 id="_4-2-14-查询任务调度统计" tabindex="-1"><a class="header-anchor" href="#_4-2-14-查询任务调度统计"><span>4.2.14. 查询任务调度统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>driver_scheduler</td><td>name=&quot;ready_queued_time&quot;</td><td>Timer</td><td>就绪队列排队时间</td></tr><tr><td>driver_scheduler</td><td>name=&quot;block_queued_time&quot;</td><td>Timer</td><td>阻塞队列排队时间</td></tr><tr><td>driver_scheduler</td><td>name=&quot;ready_queue_task_count&quot;</td><td>AutoGauge</td><td>就绪队列排队任务数</td></tr><tr><td>driver_scheduler</td><td>name=&quot;block_queued_task_count&quot;</td><td>AutoGauge</td><td>阻塞队列排队任务数</td></tr></tbody></table><h4 id="_4-2-15-查询执行耗时统计" tabindex="-1"><a class="header-anchor" href="#_4-2-15-查询执行耗时统计"><span>4.2.15. 查询执行耗时统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>query_execution</td><td>stage=&quot;local_execution_planner&quot;</td><td>Timer</td><td>算子树构造耗时</td></tr><tr><td>query_execution</td><td>stage=&quot;query_resource_init&quot;</td><td>Timer</td><td>查询资源初始化耗时</td></tr><tr><td>query_execution</td><td>stage=&quot;get_query_resource_from_mem&quot;</td><td>Timer</td><td>查询资源内存查询与构造耗时</td></tr><tr><td>query_execution</td><td>stage=&quot;driver_internal_process&quot;</td><td>Timer</td><td>Driver 执行耗时</td></tr><tr><td>query_execution</td><td>stage=&quot;wait_for_result&quot;</td><td>Timer</td><td>从resultHandle 获取一次查询结果的耗时</td></tr><tr><td>operator_execution_cost</td><td>name=&quot;{operator_name}&quot;</td><td>Timer</td><td>算子执行耗时</td></tr><tr><td>operator_execution_count</td><td>name=&quot;{operator_name}&quot;</td><td>Counter</td><td>算子调用次数(以 next 方法调用次数计算)</td></tr><tr><td>aggregation</td><td>from=&quot;raw_data&quot;</td><td>Timer</td><td>从一批原始数据进行一次聚合计算的耗时</td></tr><tr><td>aggregation</td><td>from=&quot;statistics&quot;</td><td>Timer</td><td>使用统计信息更新一次聚合值的耗时</td></tr><tr><td>series_scan_cost</td><td>stage=&quot;load_timeseries_metadata&quot;, type=&quot;aligned/non_aligned&quot;, from=&quot;mem/disk&quot;</td><td>Timer</td><td>加载 TimeseriesMetadata 耗时</td></tr><tr><td>series_scan_cost</td><td>stage=&quot;read_timeseries_metadata&quot;, type=&quot;&quot;, from=&quot;cache/file&quot;</td><td>Timer</td><td>读取一个文件的 Metadata 耗时</td></tr><tr><td>series_scan_cost</td><td>stage=&quot;timeseries_metadata_modification&quot;, type=&quot;aligned/non_aligned&quot;, from=&quot;null&quot;</td><td>Timer</td><td>过滤删除的 TimeseriesMetadata 耗时</td></tr><tr><td>series_scan_cost</td><td>stage=&quot;load_chunk_metadata_list&quot;, type=&quot;aligned/non_aligned&quot;, from=&quot;mem/disk&quot;</td><td>Timer</td><td>加载 ChunkMetadata 列表耗时</td></tr><tr><td>series_scan_cost</td><td>stage=&quot;chunk_metadata_modification&quot;, type=&quot;aligned/non_aligned&quot;, from=&quot;mem/disk&quot;</td><td>Timer</td><td>过滤删除的 ChunkMetadata 耗时</td></tr><tr><td>series_scan_cost</td><td>stage=&quot;chunk_metadata_filter&quot;, type=&quot;aligned/non_aligned&quot;, from=&quot;mem/disk&quot;</td><td>Timer</td><td>根据查询过滤条件过滤 ChunkMetadata 耗时</td></tr><tr><td>series_scan_cost</td><td>stage=&quot;construct_chunk_reader&quot;, type=&quot;aligned/non_aligned&quot;, from=&quot;mem/disk&quot;</td><td>Timer</td><td>构造 ChunkReader 耗时</td></tr><tr><td>series_scan_cost</td><td>stage=&quot;read_chunk&quot;, type=&quot;&quot;, from=&quot;cache/file&quot;</td><td>Timer</td><td>读取 Chunk 的耗时</td></tr><tr><td>series_scan_cost</td><td>stage=&quot;init_chunk_reader&quot;, type=&quot;aligned/non_aligned&quot;, from=&quot;mem/disk&quot;</td><td>Timer</td><td>初始化 ChunkReader(构造 PageReader) 耗时</td></tr><tr><td>series_scan_cost</td><td>stage=&quot;build_tsblock_from_page_reader&quot;, type=&quot;aligned/non_aligned&quot;, from=&quot;mem/disk&quot;</td><td>Timer</td><td>从 PageReader 构造 Tsblock 耗时</td></tr><tr><td>series_scan_cost</td><td>stage=&quot;build_tsblock_from_merge_reader&quot;, type=&quot;aligned/non_aligned&quot;, from=&quot;null&quot;</td><td>Timer</td><td>从 MergeReader 构造 Tsblock (解乱序数据)耗时</td></tr></tbody></table><h4 id="_4-2-16-元数据引擎统计" tabindex="-1"><a class="header-anchor" href="#_4-2-16-元数据引擎统计"><span>4.2.16 元数据引擎统计</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>schema_engine</td><td>name=&quot;schema_region_total_mem_usage&quot;</td><td>AutoGauge</td><td>SchemaRegion 全局内存使用量</td></tr><tr><td>schema_engine</td><td>name=&quot;schema_region_mem_capacity&quot;</td><td>AutoGauge</td><td>SchemaRegion 全局可用内存</td></tr><tr><td>schema_engine</td><td>name=&quot;schema_engine_mode&quot;</td><td>Gauge</td><td>SchemaEngine 模式</td></tr><tr><td>schema_engine</td><td>name=&quot;schema_region_consensus&quot;</td><td>Gauge</td><td>元数据管理引擎共识协议</td></tr><tr><td>schema_engine</td><td>name=&quot;schema_region_number&quot;</td><td>AutoGauge</td><td>SchemaRegion 个数</td></tr><tr><td>quantity</td><td>name=&quot;template_series_cnt&quot;</td><td>AutoGauge</td><td>模板序列数</td></tr><tr><td>schema_region</td><td>name=&quot;schema_region_mem_usage&quot;, region=&quot;SchemaRegion[{regionId}]&quot;</td><td>AutoGauge</td><td>每个 SchemaRegion 分别的内存使用量</td></tr><tr><td>schema_region</td><td>name=&quot;schema_region_series_cnt&quot;, region=&quot;SchemaRegion[{regionId}]&quot;</td><td>AutoGauge</td><td>每个 SchemaRegion 分别的时间序列数</td></tr><tr><td>schema_region</td><td>name=&quot;activated_template_cnt&quot;, region=&quot;SchemaRegion[{regionId}]&quot;</td><td>AutoGauge</td><td>每个 SchemaRegion 激活的模板数</td></tr><tr><td>schema_region</td><td>name=&quot;template_series_cnt&quot;, region=&quot;SchemaRegion[{regionId}]&quot;</td><td>AutoGauge</td><td>每个 SchemaRegion 的模板序列数</td></tr></tbody></table><h3 id="_4-3-normal-级别监控指标" tabindex="-1"><a class="header-anchor" href="#_4-3-normal-级别监控指标"><span>4.3. Normal 级别监控指标</span></a></h3><h4 id="_4-3-1-集群" tabindex="-1"><a class="header-anchor" href="#_4-3-1-集群"><span>4.3.1. 集群</span></a></h4><table><thead><tr><th>Metric</th><th>Tags</th><th>Type</th><th>Description</th></tr></thead><tbody><tr><td>region</td><td>name=&quot;{DatabaseName}&quot;,type=&quot;SchemaRegion/DataRegion&quot;</td><td>AutoGauge</td><td>特定节点上不同 Database 的 DataRegion/SchemaRegion 个数</td></tr><tr><td>slot</td><td>name=&quot;{DatabaseName}&quot;,type=&quot;schemaSlotNumber/dataSlotNumber&quot;</td><td>AutoGauge</td><td>特定节点上不同 Database 的 DataSlot/SchemaSlot 个数</td></tr></tbody></table><h3 id="_4-4-all-级别监控指标" tabindex="-1"><a class="header-anchor" href="#_4-4-all-级别监控指标"><span>4.4. All 级别监控指标</span></a></h3><p>目前还没有All级别的监控指标,后续会持续添加。</p><h2 id="_5-怎样获取这些系统监控" tabindex="-1"><a class="header-anchor" href="#_5-怎样获取这些系统监控"><span>5. 怎样获取这些系统监控?</span></a></h2><ul><li>监控模块的相关配置均在<code>conf/iotdb-{datanode/confignode}.properties</code>中,所有配置项支持通过<code>load configuration</code>命令热加载。</li></ul><h3 id="_5-1-使用-jmx-方式" tabindex="-1"><a class="header-anchor" href="#_5-1-使用-jmx-方式"><span>5.1. 使用 JMX 方式</span></a></h3><p>对于使用 JMX 对外暴露的指标,可以通过 Jconsole 来进行查看。在进入 Jconsole 监控页面后,首先会看到 IoTDB<br> 的各类运行情况的概览。在这里,您可以看到堆内存信息、线程信息、类信息以及服务器的 CPU 使用情况。</p><h4 id="_5-1-1-获取监控指标数据" tabindex="-1"><a class="header-anchor" href="#_5-1-1-获取监控指标数据"><span>5.1.1. 获取监控指标数据</span></a></h4><p>连接到 JMX 后,您可以通过 &quot;MBeans&quot; 标签找到名为 &quot;org.apache.iotdb.metrics&quot; 的 &quot;MBean&quot;,可以在侧边栏中查看所有监控指标的具体值。</p><img style="width:100%;max-width:800px;max-height:600px;margin-left:auto;margin-right:auto;display:block;" alt="metric-jmx" src="https://alioss.timecho.com/docs/img/github/204018765-6fda9391-ebcf-4c80-98c5-26f34bd74df0.png"><h4 id="_5-1-2-获取其他相关数据" tabindex="-1"><a class="header-anchor" href="#_5-1-2-获取其他相关数据"><span>5.1.2. 获取其他相关数据</span></a></h4><p>连接到 JMX 后,您可以通过 &quot;MBeans&quot; 标签找到名为 &quot;org.apache.iotdb.service&quot; 的 &quot;MBean&quot;,如下图所示,了解服务的基本状态</p><p><img style="width:100%;max-width:800px;max-height:600px;margin-left:auto;margin-right:auto;display:block;" src="https://alioss.timecho.com/docs/img/github/149951720-707f1ee8-32ee-4fde-9252-048caebd232e.png"> <br></p><p>为了提高查询性能,IOTDB 对 ChunkMetaData 和 TsFileMetaData 进行了缓存。用户可以使用 MXBean<br> ,展开侧边栏<code>org.apache.iotdb.db.service</code>查看缓存命中率:</p><img style="width:100%;max-width:800px;max-height:600px;margin-left:auto;margin-right:auto;display:block;" src="https://alioss.timecho.com/docs/img/github/112426760-73e3da80-8d73-11eb-9a8f-9232d1f2033b.png"><h3 id="_5-2-使用-prometheus-方式" tabindex="-1"><a class="header-anchor" href="#_5-2-使用-prometheus-方式"><span>5.2. 使用 Prometheus 方式</span></a></h3><h4 id="_5-2-1-监控指标的-prometheus-映射关系" tabindex="-1"><a class="header-anchor" href="#_5-2-1-监控指标的-prometheus-映射关系"><span>5.2.1. 监控指标的 Prometheus 映射关系</span></a></h4><blockquote><p>对于 Metric Name 为 name, Tags 为 K1=V1, ..., Kn=Vn 的监控指标有如下映射,其中 value 为具体值</p></blockquote><table><thead><tr><th>监控指标类型</th><th>映射关系</th></tr></thead><tbody><tr><td>Counter</td><td>name_total{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;} value</td></tr><tr><td>AutoGauge、Gauge</td><td>name{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;} value</td></tr><tr><td>Histogram</td><td>name_max{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;} value <br> name_sum{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;} value <br> name_count{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;} value <br> name{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;, quantile=&quot;0.5&quot;} value <br> name{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;, quantile=&quot;0.99&quot;} value</td></tr><tr><td>Rate</td><td>name_total{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;} value <br> name_total{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;, rate=&quot;m1&quot;} value <br> name_total{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;, rate=&quot;m5&quot;} value <br> name_total{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;, rate=&quot;m15&quot;} value <br> name_total{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;, rate=&quot;mean&quot;} value</td></tr><tr><td>Timer</td><td>name_seconds_max{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;} value <br> name_seconds_sum{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;} value <br> name_seconds_count{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;} value <br> name_seconds{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;, quantile=&quot;0.5&quot;} value value <br> name_seconds{cluster=&quot;clusterName&quot;, nodeType=&quot;nodeType&quot;, nodeId=&quot;nodeId&quot;,k1=&quot;V1&quot; , ..., Kn=&quot;Vn&quot;, quantile=&quot;0.99&quot;} value</td></tr></tbody></table><h4 id="_5-2-2-修改配置文件" tabindex="-1"><a class="header-anchor" href="#_5-2-2-修改配置文件"><span>5.2.2. 修改配置文件</span></a></h4><ol><li>以 DataNode 为例,修改 iotdb-datanode.properties 配置文件如下:</li></ol><div class="language-properties line-numbers-mode" data-ext="properties" data-title="properties"><pre class="language-properties"><code><span class="token key attr-name">dn_metric_reporter_list</span><span class="token punctuation">=</span><span class="token value attr-value">PROMETHEUS</span>
<span class="token key attr-name">dn_metric_level</span><span class="token punctuation">=</span><span class="token value attr-value">CORE</span>
<span class="token key attr-name">dn_metric_prometheus_reporter_port</span><span class="token punctuation">=</span><span class="token value attr-value">9091</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><ol start="2"><li><p>启动 IoTDB DataNode</p></li><li><p>打开浏览器或者用<code>curl</code> 访问 <code>http://servier_ip:9091/metrics</code>, 就能得到如下 metric 数据:</p></li></ol><div class="language-text line-numbers-mode" data-ext="text" data-title="text"><pre class="language-text"><code>...
# HELP file_count
# TYPE file_count gauge
file_count{name=&quot;wal&quot;,} 0.0
file_count{name=&quot;unseq&quot;,} 0.0
file_count{name=&quot;seq&quot;,} 2.0
...
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><h4 id="_5-2-3-prometheus-grafana" tabindex="-1"><a class="header-anchor" href="#_5-2-3-prometheus-grafana"><span>5.2.3. Prometheus + Grafana</span></a></h4><p>如上所示,IoTDB 对外暴露出标准的 Prometheus 格式的监控指标数据,可以使用 Prometheus 采集并存储监控指标,使用 Grafana<br> 可视化监控指标。</p><p>IoTDB、Prometheus、Grafana三者的关系如下图所示:</p><figure><img src="https://alioss.timecho.com/docs/img/UserGuide/System-Tools/Metrics/iotdb_prometheus_grafana.png" alt="iotdb_prometheus_grafana" tabindex="0" loading="lazy"><figcaption>iotdb_prometheus_grafana</figcaption></figure><ol><li>IoTDB在运行过程中持续收集监控指标数据。</li><li>Prometheus以固定的间隔(可配置)从IoTDB的HTTP接口拉取监控指标数据。</li><li>Prometheus将拉取到的监控指标数据存储到自己的TSDB中。</li><li>Grafana以固定的间隔(可配置)从Prometheus查询监控指标数据并绘图展示。</li></ol><p>从交互流程可以看出,我们需要做一些额外的工作来部署和配置Prometheus和Grafana。</p><p>比如,你可以对Prometheus进行如下的配置(部分参数可以自行调整)来从IoTDB获取监控数据</p><div class="language-yaml line-numbers-mode" data-ext="yml" data-title="yml"><pre class="language-yaml"><code><span class="token key atrule">job_name</span><span class="token punctuation">:</span> pull<span class="token punctuation">-</span>metrics
<span class="token key atrule">honor_labels</span><span class="token punctuation">:</span> <span class="token boolean important">true</span>
<span class="token key atrule">honor_timestamps</span><span class="token punctuation">:</span> <span class="token boolean important">true</span>
<span class="token key atrule">scrape_interval</span><span class="token punctuation">:</span> 15s
<span class="token key atrule">scrape_timeout</span><span class="token punctuation">:</span> 10s
<span class="token key atrule">metrics_path</span><span class="token punctuation">:</span> /metrics
<span class="token key atrule">scheme</span><span class="token punctuation">:</span> http
<span class="token key atrule">follow_redirects</span><span class="token punctuation">:</span> <span class="token boolean important">true</span>
<span class="token key atrule">static_configs</span><span class="token punctuation">:</span>
<span class="token punctuation">-</span> <span class="token key atrule">targets</span><span class="token punctuation">:</span>
<span class="token punctuation">-</span> localhost<span class="token punctuation">:</span><span class="token number">9091</span>
</code></pre><div class="line-numbers" aria-hidden="true"><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div><div class="line-number"></div></div></div><p>更多细节可以参考下面的文档:</p>`,49),y={href:"https://prometheus.io/docs/prometheus/latest/getting_started/",target:"_blank",rel:"noopener noreferrer"},T={href:"https://prometheus.io/docs/prometheus/latest/configuration/configuration/#scrape_config",target:"_blank",rel:"noopener noreferrer"},f={href:"https://grafana.com/docs/grafana/latest/getting-started/getting-started/",target:"_blank",rel:"noopener noreferrer"},v={href:"https://prometheus.io/docs/visualization/grafana/#grafana-support-for-prometheus",target:"_blank",rel:"noopener noreferrer"},k=a('<h4 id="_5-2-4-apache-iotdb-dashboard" tabindex="-1"><a class="header-anchor" href="#_5-2-4-apache-iotdb-dashboard"><span>5.2.4. Apache IoTDB Dashboard</span></a></h4><p>我们提供了Apache IoTDB Dashboard,在Grafana中显示的效果图如下所示:</p><figure><img src="https://alioss.timecho.com/docs/img/UserGuide/System-Tools/Metrics/dashboard.png" alt="Apache IoTDB Dashboard" tabindex="0" loading="lazy"><figcaption>Apache IoTDB Dashboard</figcaption></figure><p>你可以在企业版中获取到 Dashboard 的 Json文件。</p><h3 id="_5-3-使用-iotdb-方式" tabindex="-1"><a class="header-anchor" href="#_5-3-使用-iotdb-方式"><span>5.3. 使用 IoTDB 方式</span></a></h3><h4 id="_5-3-1-监控指标的-iotdb-映射关系" tabindex="-1"><a class="header-anchor" href="#_5-3-1-监控指标的-iotdb-映射关系"><span>5.3.1. 监控指标的 IoTDB 映射关系</span></a></h4><blockquote><p>对于 Metric Name 为 name, Tags 为 K1=V1, ..., Kn=Vn 的监控指标有如下映射,以默认写到 root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code> 为例</p></blockquote><table><thead><tr><th>监控指标类型</th><th>映射关系</th></tr></thead><tbody><tr><td>Counter</td><td>root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.value</td></tr><tr><td>AutoGauge、Gauge</td><td>root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.value</td></tr><tr><td>Histogram</td><td>root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.count <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.max <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.sum <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.p0 <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.p50 <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.p75 <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.p99 <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.p999</td></tr><tr><td>Rate</td><td>root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.count <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.mean <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.m1 <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.m5 <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.m15</td></tr><tr><td>Timer</td><td>root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.count <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.max <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.mean <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.sum <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.p0 <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.p50 <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.p75 <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.p99 <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.p999 <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.m1 <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.m5 <br> root.__system.metric.<code>clusterName</code>.<code>nodeType</code>.<code>nodeId</code>.name.<code>K1=V1</code>...<code>Kn=Vn</code>.m15</td></tr></tbody></table><h4 id="_5-3-2-获取监控指标" tabindex="-1"><a class="header-anchor" href="#_5-3-2-获取监控指标"><span>5.3.2. 获取监控指标</span></a></h4><p>根据如上的映射关系,可以构成相关的 IoTDB 查询语句获取监控指标</p>',10);function I(x,G){const d=n("ExternalLinkIcon");return u(),s("div",null,[i,t("p",null,[e("目前,IoTDB 对外提供一些主要模块的监控指标,并且随着新功能的开发以及系统优化或者重构,监控指标也会同步添加和更新。如果想自己在"),l,e(" IoTDB"),h,e(" 中添加更多系统监控指标埋点,可以参考"),t("a",m,[e("IoTDB Metrics Framework"),o(d)]),e("使用说明。")]),q,_,p,g,b,t("p",null,[t("a",y,[e("Prometheus安装使用文档"),o(d)])]),t("p",null,[t("a",T,[e("Prometheus从HTTP接口拉取metrics数据的配置说明"),o(d)])]),t("p",null,[t("a",f,[e("Grafana安装使用文档"),o(d)])]),t("p",null,[t("a",v,[e("Grafana从Prometheus查询数据并绘图的文档"),o(d)])]),k])}const A=r(c,[["render",I],["__file","Metric-Tool.html.vue"]]),V=JSON.parse('{"path":"/zh/UserGuide/V1.1.x/Monitor-Alert/Metric-Tool.html","title":"监控告警","lang":"zh-CN","frontmatter":{"description":"监控告警 在 IoTDB 的运行过程中,我们希望对 IoTDB 的状态进行观测,以便于排查系统问题或者及时发现系统潜在的风险,能够**反映系统运行状态的一系列指标 **就是系统监控指标。 1. 什么场景下会使用到监控? 那么什么时候会用到监控框架呢?下面列举一些常见的场景。 系统变慢了 系统变慢几乎是最常见也最头疼的问题,这时候我们需要尽可能多的信息来...","head":[["link",{"rel":"alternate","hreflang":"en-us","href":"https://iotdb.apache.org/UserGuide/V1.1.x/Monitor-Alert/Metric-Tool.html"}],["meta",{"property":"og:url","content":"https://iotdb.apache.org/zh/UserGuide/V1.1.x/Monitor-Alert/Metric-Tool.html"}],["meta",{"property":"og:site_name","content":"IoTDB Website"}],["meta",{"property":"og:title","content":"监控告警"}],["meta",{"property":"og:description","content":"监控告警 在 IoTDB 的运行过程中,我们希望对 IoTDB 的状态进行观测,以便于排查系统问题或者及时发现系统潜在的风险,能够**反映系统运行状态的一系列指标 **就是系统监控指标。 1. 什么场景下会使用到监控? 那么什么时候会用到监控框架呢?下面列举一些常见的场景。 系统变慢了 系统变慢几乎是最常见也最头疼的问题,这时候我们需要尽可能多的信息来..."}],["meta",{"property":"og:type","content":"article"}],["meta",{"property":"og:image","content":"https://alioss.timecho.com/docs/img/UserGuide/System-Tools/Metrics/iotdb_prometheus_grafana.png"}],["meta",{"property":"og:locale","content":"zh-CN"}],["meta",{"property":"og:locale:alternate","content":"en-US"}],["meta",{"property":"og:updated_time","content":"2023-07-10T03:11:17.000Z"}],["meta",{"property":"article:modified_time","content":"2023-07-10T03:11:17.000Z"}],["script",{"type":"application/ld+json"},"{\\"@context\\":\\"https://schema.org\\",\\"@type\\":\\"Article\\",\\"headline\\":\\"监控告警\\",\\"image\\":[\\"https://alioss.timecho.com/docs/img/UserGuide/System-Tools/Metrics/iotdb_prometheus_grafana.png\\",\\"https://alioss.timecho.com/docs/img/UserGuide/System-Tools/Metrics/dashboard.png\\"],\\"dateModified\\":\\"2023-07-10T03:11:17.000Z\\",\\"author\\":[]}"]]},"headers":[{"level":2,"title":"1. 什么场景下会使用到监控?","slug":"_1-什么场景下会使用到监控","link":"#_1-什么场景下会使用到监控","children":[]},{"level":2,"title":"2. 什么人需要使用监控?","slug":"_2-什么人需要使用监控","link":"#_2-什么人需要使用监控","children":[]},{"level":2,"title":"3. 什么是监控指标?","slug":"_3-什么是监控指标","link":"#_3-什么是监控指标","children":[{"level":3,"title":"3.1. 监控指标名词解释","slug":"_3-1-监控指标名词解释","link":"#_3-1-监控指标名词解释","children":[]},{"level":3,"title":"3.2. 监控指标对外获取数据格式","slug":"_3-2-监控指标对外获取数据格式","link":"#_3-2-监控指标对外获取数据格式","children":[]}]},{"level":2,"title":"4. 监控指标有哪些?","slug":"_4-监控指标有哪些","link":"#_4-监控指标有哪些","children":[{"level":3,"title":"4.1. Core 级别监控指标","slug":"_4-1-core-级别监控指标","link":"#_4-1-core-级别监控指标","children":[]},{"level":3,"title":"4.2. Important 级别监控指标","slug":"_4-2-important-级别监控指标","link":"#_4-2-important-级别监控指标","children":[]},{"level":3,"title":"4.3. Normal 级别监控指标","slug":"_4-3-normal-级别监控指标","link":"#_4-3-normal-级别监控指标","children":[]},{"level":3,"title":"4.4. All 级别监控指标","slug":"_4-4-all-级别监控指标","link":"#_4-4-all-级别监控指标","children":[]}]},{"level":2,"title":"5. 怎样获取这些系统监控?","slug":"_5-怎样获取这些系统监控","link":"#_5-怎样获取这些系统监控","children":[{"level":3,"title":"5.1. 使用 JMX 方式","slug":"_5-1-使用-jmx-方式","link":"#_5-1-使用-jmx-方式","children":[]},{"level":3,"title":"5.2. 使用 Prometheus 方式","slug":"_5-2-使用-prometheus-方式","link":"#_5-2-使用-prometheus-方式","children":[]},{"level":3,"title":"5.3. 使用 IoTDB 方式","slug":"_5-3-使用-iotdb-方式","link":"#_5-3-使用-iotdb-方式","children":[]}]}],"git":{"createdTime":1688958677000,"updatedTime":1688958677000,"contributors":[{"name":"CritasWang","email":"critas@outlook.com","commits":1}]},"readingTime":{"minutes":20.58,"words":6173},"filePathRelative":"zh/UserGuide/V1.1.x/Monitor-Alert/Metric-Tool.md","localizedDate":"2023年7月10日","autoDesc":true}');export{A as comp,V as data};