OAP Self Observability: make Trace analysis metrics separate by label `protocol`, add Zipkin span dropped metrics. (#13531)
diff --git a/docs/en/changes/changes.md b/docs/en/changes/changes.md
index 5b0d9e8..b28a565 100644
--- a/docs/en/changes/changes.md
+++ b/docs/en/changes/changes.md
@@ -104,6 +104,7 @@
* Aggregate TopN Slow SQL by service dimension.
* BanyanDB: support add group prefix (namespace) for BanyanDB groups.
* BanyanDB: fix when setting `@BanyanDB.TimestampColumn`, the column should not be indexed.
+* OAP Self Observability: make Trace analysis metrics separate by label `protocol`, add Zipkin span dropped metrics.
#### UI
diff --git a/oap-server/server-starter/src/main/resources/otel-rules/oap.yaml b/oap-server/server-starter/src/main/resources/otel-rules/oap.yaml
index b411f50..6c71522 100644
--- a/oap-server/server-starter/src/main/resources/otel-rules/oap.yaml
+++ b/oap-server/server-starter/src/main/resources/otel-rules/oap.yaml
@@ -51,11 +51,13 @@
.tag({tags -> if (tags['gc'] == 'PS Scavenge' || tags['gc'] == 'Copy' || tags['gc'] == 'ParNew' || tags['gc'] == 'G1 Young Generation') {tags.gc = 'young_gc_time'} })
.tag({tags -> if (tags['gc'] == 'PS MarkSweep' || tags['gc'] == 'MarkSweepCompact' || tags['gc'] == 'ConcurrentMarkSweep' || tags['gc'] == 'G1 Old Generation') {tags.gc = 'old_gc_time'} })
- name: instance_trace_count
- exp: trace_in_latency_count.sum(['service', 'host_name']).increase('PT1M')
+ exp: trace_in_latency_count.sum(['service', 'host_name', 'protocol']).increase('PT1M')
- name: instance_trace_latency_percentile
- exp: trace_in_latency.sum(['le', 'service', 'host_name']).increase('PT1M').histogram().histogram_percentile([50,70,90,99])
+ exp: trace_in_latency.sum(['le', 'service', 'host_name', 'protocol']).increase('PT1M').histogram().histogram_percentile([50,70,90,99])
- name: instance_trace_analysis_error_count
- exp: trace_analysis_error_count.sum(['service', 'host_name']).increase('PT1M')
+ exp: trace_analysis_error_count.sum(['service', 'host_name', 'protocol']).increase('PT1M')
+ - name: instance_spans_dropped_count
+ exp: spans_dropped_count.sum(['service', 'host_name', 'protocol']).increase('PT1M')
- name: instance_mesh_count
exp: mesh_analysis_latency_count.sum(['service', 'host_name']).increase('PT1M')
- name: instance_mesh_latency_percentile
diff --git a/oap-server/server-starter/src/main/resources/ui-initialized-templates/general/general-root.json b/oap-server/server-starter/src/main/resources/ui-initialized-templates/general/general-root.json
index a18f494..95c09f0 100644
--- a/oap-server/server-starter/src/main/resources/ui-initialized-templates/general/general-root.json
+++ b/oap-server/server-starter/src/main/resources/ui-initialized-templates/general/general-root.json
@@ -158,7 +158,7 @@
"x": 0,
"y": 0,
"w": 24,
- "h": 49,
+ "h": 48,
"i": "0",
"type": "Trace"
}
diff --git a/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_oap/so11y-instance.json b/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_oap/so11y-instance.json
index 852ac36..a1837e0 100644
--- a/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_oap/so11y-instance.json
+++ b/oap-server/server-starter/src/main/resources/ui-initialized-templates/so11y_oap/so11y-instance.json
@@ -649,7 +649,7 @@
]
},
{
- "x": 12,
+ "x": 18,
"y": 0,
"w": 6,
"h": 13,
@@ -676,8 +676,8 @@
]
},
{
- "x": 18,
- "y": 0,
+ "x": 0,
+ "y": 13,
"w": 6,
"h": 13,
"i": "12",
@@ -698,7 +698,7 @@
]
},
{
- "x": 0,
+ "x": 6,
"y": 13,
"w": 6,
"h": 13,
@@ -725,7 +725,7 @@
]
},
{
- "x": 12,
+ "x": 18,
"y": 13,
"w": 6,
"h": 13,
@@ -747,7 +747,7 @@
]
},
{
- "x": 6,
+ "x": 0,
"y": 26,
"w": 6,
"h": 13,
@@ -774,8 +774,8 @@
]
},
{
- "x": 0,
- "y": 39,
+ "x": 12,
+ "y": 26,
"w": 6,
"h": 13,
"i": "20",
@@ -796,8 +796,8 @@
}
},
{
- "x": 12,
- "y": 26,
+ "x": 0,
+ "y": 39,
"w": 6,
"h": 13,
"i": "21",
@@ -823,7 +823,7 @@
]
},
{
- "x": 0,
+ "x": 6,
"y": 26,
"w": 6,
"h": 13,
@@ -867,8 +867,8 @@
}
},
{
- "x": 18,
- "y": 13,
+ "x": 6,
+ "y": 39,
"w": 6,
"h": 13,
"i": "24",
@@ -889,7 +889,7 @@
}
},
{
- "x": 6,
+ "x": 12,
"y": 13,
"w": 6,
"h": 13,
@@ -906,6 +906,28 @@
"meter_oap_instance_k8s_als_streams",
"meter_oap_instance_k8s_als_error_streams"
]
+ },
+ {
+ "x": 12,
+ "y": 0,
+ "w": 6,
+ "h": 13,
+ "i": "26",
+ "type": "Widget",
+ "widget": {
+ "title": "Zipkin Span Dropped Count (Per Minute)"
+ },
+ "graph": {
+ "type": "Line",
+ "step": false,
+ "smooth": false,
+ "showSymbol": true,
+ "showXAxis": true,
+ "showYAxis": true
+ },
+ "expressions": [
+ "meter_oap_instance_spans_dropped_count"
+ ]
}
]
}
diff --git a/test/e2e-v2/cases/so11y/expected/metrics-has-value-label-trace.yml b/test/e2e-v2/cases/so11y/expected/metrics-has-value-label-trace.yml
new file mode 100644
index 0000000..3d05bc8
--- /dev/null
+++ b/test/e2e-v2/cases/so11y/expected/metrics-has-value-label-trace.yml
@@ -0,0 +1,36 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+debuggingtrace: null
+type: TIME_SERIES_VALUES
+results:
+ {{- contains .results }}
+ - metric:
+ labels:
+ - key: protocol
+ value: grpc
+ values:
+ {{- contains .values }}
+ - id: {{ notEmpty .id }}
+ value: {{ .value }}
+ owner: null
+ traceid: null
+ - id: {{ notEmpty .id }}
+ value: null
+ owner: null
+ traceid: null
+ {{- end}}
+ {{- end}}
+error: null
diff --git a/test/e2e-v2/cases/so11y/expected/metrics-has-value-percentile.yml b/test/e2e-v2/cases/so11y/expected/metrics-has-value-percentile.yml
index e20e27e..516a18f 100644
--- a/test/e2e-v2/cases/so11y/expected/metrics-has-value-percentile.yml
+++ b/test/e2e-v2/cases/so11y/expected/metrics-has-value-percentile.yml
@@ -21,6 +21,8 @@
labels:
- key: p
value: "90"
+ - key: protocol
+ value: grpc
values:
{{- contains .values }}
- id: {{ notEmpty .id }}
@@ -36,6 +38,8 @@
labels:
- key: p
value: "99"
+ - key: protocol
+ value: grpc
values:
{{- contains .values }}
- id: {{ notEmpty .id }}
diff --git a/test/e2e-v2/cases/so11y/so11y-cases.yaml b/test/e2e-v2/cases/so11y/so11y-cases.yaml
index 1771da9..63379b3 100644
--- a/test/e2e-v2/cases/so11y/so11y-cases.yaml
+++ b/test/e2e-v2/cases/so11y/so11y-cases.yaml
@@ -29,7 +29,7 @@
- query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=meter_oap_instance_jvm_memory_bytes_used --instance-name=http://localhost:1234 --service-name=oap-server
expected: expected/metrics-has-memory-value-label.yml
- query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=meter_oap_instance_trace_count --instance-name=http://localhost:1234 --service-name=oap-server
- expected: expected/metrics-has-value.yml
+ expected: expected/metrics-has-value-label-trace.yml
- query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression="meter_oap_instance_metrics_aggregation{level='L1 aggregation'}" --instance-name=http://localhost:1234 --service-name=oap-server
expected: expected/metrics-has-value-label.yml
- query: swctl --display yaml --base-url=http://${oap_host}:${oap_12800}/graphql metrics exec --expression=meter_oap_instance_persistence_prepare_count --instance-name=http://localhost:1234 --service-name=oap-server
diff --git a/test/e2e-v2/script/env b/test/e2e-v2/script/env
index fd89d8e..a24200e 100644
--- a/test/e2e-v2/script/env
+++ b/test/e2e-v2/script/env
@@ -23,7 +23,7 @@
SW_AGENT_CLIENT_JS_TEST_COMMIT=4f1eb1dcdbde3ec4a38534bf01dded4ab5d2f016
SW_KUBERNETES_COMMIT_SHA=6fe5e6f0d3b7686c6be0457733e825ee68cb9b35
SW_ROVER_COMMIT=79292fe07f17f98f486e0c4471213e1961fb2d1d
-SW_BANYANDB_COMMIT=ac67a23cdbf9c36cd679699c1d7957724c83cce0
+SW_BANYANDB_COMMIT=a3fc5bc16e8c9c3385beb41dcef8b988314ff58d
SW_AGENT_PHP_COMMIT=d1114e7be5d89881eec76e5b56e69ff844691e35
SW_PREDICTOR_COMMIT=54a0197654a3781a6f73ce35146c712af297c994