KUDU-3566 fix summary metrics in Prometheus format
This patch corrects the output of various Kudu metrics backed by HDR
histograms. From the Prometheus perspective, those metrics are output
as summaries [1], not histograms [2]. It's necessary to mark them
accordingly to avoid misinterpretation of the collected statistics.
I updated corresponding unit tests and verified that the updated output
was properly parsed and interpreted by a Prometheus 2.50.0 instance
running on my macOS laptop.
[1] https://prometheus.io/docs/concepts/metric_types/#summary
[2] https://prometheus.io/docs/concepts/metric_types/#histogram
Change-Id: I1375ddf1b0ecd730327cd44b4955813b80107f7b
Reviewed-on: http://gerrit.cloudera.org:8080/21338
Tested-by: Alexey Serbin <alexey@apache.org>
Reviewed-by: Abhishek Chennaka <achennaka@cloudera.com>
diff --git a/src/kudu/util/metrics-test.cc b/src/kudu/util/metrics-test.cc
index 95a6dcf..550bfcd 100644
--- a/src/kudu/util/metrics-test.cc
+++ b/src/kudu/util/metrics-test.cc
@@ -651,24 +651,30 @@
}
TEST_F(MetricsTest, HistogramPrometheusTest) {
+ constexpr const char* const kExpectedOutput =
+ "# HELP test_hist foo\n"
+ "# TYPE test_hist summary\n"
+ "test_hist{unit_type=\"milliseconds\", quantile=\"0\"} 1\n"
+ "test_hist{unit_type=\"milliseconds\", quantile=\"0.75\"} 2\n"
+ "test_hist{unit_type=\"milliseconds\", quantile=\"0.95\"} 3\n"
+ "test_hist{unit_type=\"milliseconds\", quantile=\"0.99\"} 4\n"
+ "test_hist{unit_type=\"milliseconds\", quantile=\"0.999\"} 5\n"
+ "test_hist{unit_type=\"milliseconds\", quantile=\"0.9999\"} 5\n"
+ "test_hist{unit_type=\"milliseconds\", quantile=\"1\"} 5\n"
+ "test_hist_sum 1460\n"
+ "test_hist_count 1000\n";
+
scoped_refptr<Histogram> hist = METRIC_test_hist.Instantiate(entity_);
+ hist->IncrementBy(1, 700);
+ hist->IncrementBy(2, 200);
+ hist->IncrementBy(3, 50);
+ hist->IncrementBy(4, 40);
+ hist->IncrementBy(5, 10);
ostringstream output;
PrometheusWriter writer(&output);
ASSERT_OK(hist->WriteAsPrometheus(&writer, {}));
-
- const string expected_output = "# HELP test_hist foo\n"
- "# TYPE test_hist histogram\n"
- "test_hist_bucket{unit_type=\"milliseconds\", le=\"0.75\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", le=\"0.95\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", le=\"0.99\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", le=\"0.999\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", le=\"0.9999\"} 0\n"
- "test_hist_bucket{unit_type=\"milliseconds\", le=\"+Inf\"} 0\n"
- "test_hist_sum{unit_type=\"milliseconds\"} 0\n"
- "test_hist_count{unit_type=\"milliseconds\"} 0\n";
-
- ASSERT_EQ(expected_output, output.str());
+ ASSERT_EQ(kExpectedOutput, output.str());
}
TEST_F(MetricsTest, JsonPrintTest) {
diff --git a/src/kudu/util/metrics.cc b/src/kudu/util/metrics.cc
index b24aeb1..a67902a 100644
--- a/src/kudu/util/metrics.cc
+++ b/src/kudu/util/metrics.cc
@@ -749,9 +749,18 @@
void MetricPrototype::WriteHelpAndType(PrometheusWriter* writer,
const string& prefix) const {
+ static constexpr const char* const kSummary = "summary";
+
+ // The way how HdrHistogram-backed stats are presented in Kudu metrics
+ // corresponds to a 'summary' metric in Prometheus, not a 'histogram' one [1].
+ //
+ // [1] https://prometheus.io/docs/concepts/metric_types/#summary
+ const auto m_type = type();
+ const char* const metric_type_str =
+ m_type == MetricType::kHistogram ? kSummary : MetricType::Name(m_type);
writer->WriteEntry(Substitute("# HELP $0$1 $2\n# TYPE $3$4 $5\n",
prefix, name(), description(),
- prefix, name(), MetricType::Name(type())));
+ prefix, name(), metric_type_str));
}
//
@@ -990,18 +999,19 @@
}
void MeanGauge::WriteValue(PrometheusWriter* writer, const string& prefix) const {
- auto output = Substitute("$0$1{unit_type=\"$2\"} $3\n", prefix, prototype_->name(),
- MetricUnit::Name(prototype_->unit()), value());
+ static constexpr const char* const kFmt = "$0$1$2{unit_type=\"$3\"} $4\n";
- SubstituteAndAppend(&output, "$0$1$2{unit_type=\"$3\"} $4\n",
- prefix, prototype_->name(), "_count",
- MetricUnit::Name(prototype_->unit()), total_count());
+ const char* const name = prototype_->name();
+ DCHECK(name);
+ const char* const unit = MetricUnit::Name(prototype_->unit());
+ DCHECK(unit);
- SubstituteAndAppend(&output, "$0$1$2{unit_type=\"$3\"} $4\n",
- prefix, prototype_->name(), "_sum",
- MetricUnit::Name(prototype_->unit()), total_sum());
+ string out;
+ SubstituteAndAppend(&out, kFmt, prefix, name, "", unit, value());
+ SubstituteAndAppend(&out, kFmt, prefix, name, "_count", unit, total_count());
+ SubstituteAndAppend(&out, kFmt, prefix, name, "_sum", unit, total_sum());
- writer->WriteEntry(output);
+ writer->WriteEntry(out);
}
//
@@ -1105,55 +1115,41 @@
return Status::OK();
}
-Status Histogram::WriteAsPrometheus(PrometheusWriter* writer, const string& prefix) const {
- // Snapshot is taken to preserve the consistency across metrics and
- // requirements given by Prometheus. The value for the _bucket in +Inf
- // quantile needs to be equal to the total _count
- HistogramSnapshotPB snapshot;
- RETURN_NOT_OK(GetHistogramSnapshotPB(&snapshot, {}));
+Status Histogram::WriteAsPrometheus(PrometheusWriter* writer,
+ const string& prefix) const {
+ static constexpr struct QuantileInfo {
+ const char* const tag;
+ const double quantile;
+ } kQuantiles[] = {
+ { "0.75", 75.0 },
+ { "0.95", 95.0 },
+ { "0.99", 99.0 },
+ { "0.999", 99.9 },
+ { "0.9999", 99.99 },
+ };
+ static constexpr const char* const kFmt =
+ "$0$1{unit_type=\"$2\", quantile=\"$3\"} $4\n";
- auto output = Substitute("$0$1$2{unit_type=\"$3\", le=\"0.75\"} $4\n",
- prefix, prototype_->name(), "_bucket",
- MetricUnit::Name(prototype_->unit()),
- snapshot.percentile_75());
+ const char* const name = prototype_->name();
+ DCHECK(name);
+ const char* const unit = MetricUnit::Name(prototype_->unit());
+ DCHECK(unit);
- SubstituteAndAppend(&output, "$0$1$2{unit_type=\"$3\", le=\"0.95\"} $4\n",
- prefix, prototype_->name(), "_bucket",
- MetricUnit::Name(prototype_->unit()),
- snapshot.percentile_95());
+ // A snapshot is taken to have more consistent statistics while generating
+ // the output.
+ const HdrHistogram h(*histogram_);
+ string out;
+ SubstituteAndAppend(&out, kFmt, prefix, name, unit, "0", h.MinValue());
+ for (const auto& [tag, q] : kQuantiles) {
+ SubstituteAndAppend(&out, kFmt, prefix, name, unit, tag, h.ValueAtPercentile(q));
+ }
+ SubstituteAndAppend(&out, kFmt, prefix, name, unit, "1", h.MaxValue());
- SubstituteAndAppend(&output, "$0$1$2{unit_type=\"$3\", le=\"0.99\"} $4\n",
- prefix, prototype_->name(), "_bucket",
- MetricUnit::Name(prototype_->unit()),
- snapshot.percentile_99());
-
- SubstituteAndAppend(&output, "$0$1$2{unit_type=\"$3\", le=\"0.999\"} $4\n",
- prefix, prototype_->name(), "_bucket",
- MetricUnit::Name(prototype_->unit()),
- snapshot.percentile_99_9());
-
- SubstituteAndAppend(&output, "$0$1$2{unit_type=\"$3\", le=\"0.9999\"} $4\n",
- prefix, prototype_->name(), "_bucket",
- MetricUnit::Name(prototype_->unit()),
- snapshot.percentile_99_99());
-
- SubstituteAndAppend(&output, "$0$1$2{unit_type=\"$3\", le=\"+Inf\"} $4\n",
- prefix, prototype_->name(), "_bucket",
- MetricUnit::Name(prototype_->unit()),
- snapshot.total_count());
-
- SubstituteAndAppend(&output, "$0$1$2{unit_type=\"$3\"} $4\n",
- prefix, prototype_->name(), "_sum",
- MetricUnit::Name(prototype_->unit()),
- snapshot.total_sum());
-
- SubstituteAndAppend(&output, "$0$1$2{unit_type=\"$3\"} $4\n",
- prefix, prototype_->name(), "_count",
- MetricUnit::Name(prototype_->unit()),
- snapshot.total_count());
+ SubstituteAndAppend(&out, "$0$1_sum $2\n", prefix, name, h.TotalSum());
+ SubstituteAndAppend(&out, "$0$1_count $2\n", prefix, name, h.TotalCount());
prototype_->WriteHelpAndType(writer, prefix);
- writer->WriteEntry(output);
+ writer->WriteEntry(out);
return Status::OK();
}