blob: bde5b02f6f1de316451634c9bf3c042d9abd7b0c [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This will parse a textual representation of a duration. The formats
# accepted are based on the ISO-8601 duration format {@code PnDTnHnMn.nS}
# with days considered to be exactly 24 hours.
# <p>
# Examples:
# <pre>
# "PT20.345S" -- parses as "20.345 seconds"
# "PT15M" -- parses as "15 minutes" (where a minute is 60 seconds)
# "PT10H" -- parses as "10 hours" (where an hour is 3600 seconds)
# "P2D" -- parses as "2 days" (where a day is 24 hours or 86400 seconds)
# "P2DT3H4M" -- parses as "2 days, 3 hours and 4 minutes"
# "P-6H3M" -- parses as "-6 hours and +3 minutes"
# "-P6H3M" -- parses as "-6 hours and -3 minutes"
# "-P-6H+3M" -- parses as "+6 hours and -3 minutes"
# </pre>
expSuffix: instance(['app'], ['instance'], Layer.MESH_DP)
metricPrefix: envoy
metricsRules:
- name: heap_memory_used
exp: server_memory_heap_size
- name: heap_memory_max_used
exp: server_memory_heap_size.max(['app', 'instance'])
- name: memory_allocated
exp: server_memory_allocated
- name: memory_allocated_max
exp: server_memory_allocated.max(['app', 'instance'])
- name: memory_physical_size
exp: server_memory_physical_size
- name: memory_physical_size_max
exp: server_memory_physical_size.max(['app', 'instance'])
- name: total_connections_used
exp: server_total_connections.max(['app', 'instance'])
- name: parent_connections_used
exp: server_parent_connections.max(['app', 'instance'])
- name: worker_threads
exp: server_concurrency
- name: worker_threads_max
exp: server_concurrency.max(['app', 'instance'])
- name: bug_failures
exp: server_envoy_bug_failures
# envoy_cluster_metrics
- name: cluster_membership_healthy
exp: envoy_cluster_metrics.tagMatch('metrics_name' , '.+membership_healthy').tagMatch('metrics_name' , 'cluster.outbound.+|cluster.inbound.+').tagNotMatch('cluster_name' , '.+kube-system').sum(['app', 'instance' , 'cluster_name'])
- name: cluster_up_cx_active
exp: envoy_cluster_metrics.tagMatch('metrics_name' , '.+upstream_cx_active').tagMatch('metrics_name' , 'cluster.outbound.+|cluster.inbound.+').sum(['app', 'instance' , 'cluster_name'])
- name: cluster_up_cx_incr
exp: envoy_cluster_metrics.tagMatch('metrics_name' , '.+upstream_cx_total').tagMatch('metrics_name' , 'cluster.outbound.+|cluster.inbound.+').sum(['app', 'instance' , 'cluster_name']).increase('PT1M')
- name: cluster_up_rq_active
exp: envoy_cluster_metrics.tagMatch('metrics_name' , '.+upstream_rq_active').tagMatch('metrics_name' , 'cluster.outbound.+|cluster.inbound.+').sum(['app', 'instance' , 'cluster_name'])
- name: cluster_up_rq_incr
exp: envoy_cluster_metrics.tagMatch('metrics_name' , '.+upstream_rq_total').tagMatch('metrics_name' , 'cluster.outbound.+|cluster.inbound.+').sum(['app', 'instance' , 'cluster_name']).increase('PT1M')
- name: cluster_up_rq_pending_active
exp: envoy_cluster_metrics.tagMatch('metrics_name' , '.+upstream_rq_pending_active').tagMatch('metrics_name' , 'cluster.outbound.+|cluster.inbound.+').sum(['app', 'instance' , 'cluster_name'])
- name: cluster_lb_healthy_panic_incr
exp: envoy_cluster_metrics.tagMatch('metrics_name' , '.+lb_healthy_panic').tagMatch('metrics_name' , 'cluster.outbound.+|cluster.inbound.+').sum(['app', 'instance' , 'cluster_name']).increase('PT1M')
- name: cluster_up_cx_none_healthy_incr
exp: envoy_cluster_metrics.tagMatch('metrics_name' , '.+upstream_cx_none_healthy').tagMatch('metrics_name' , 'cluster.outbound.+|cluster.inbound.+').sum(['app', 'instance' , 'cluster_name']).increase('PT1M')