diff --git a/go_metrics.go b/go_metrics.go index 0bdb20a..5de7cb8 100644 --- a/go_metrics.go +++ b/go_metrics.go @@ -6,6 +6,7 @@ import ( "math" "runtime" runtimemetrics "runtime/metrics" + "strings" "github.com/valyala/histogram" ) @@ -104,23 +105,40 @@ func writeRuntimeMetric(w io.Writer, name string, sample *runtimemetrics.Sample) } func writeRuntimeHistogramMetric(w io.Writer, name string, h *runtimemetrics.Float64Histogram) { - // Expose histogram metric as summary, since Go runtime returns too many histogram buckets, - // which may lead to high cardinality issues at the scraper side. buckets := h.Buckets counts := h.Counts - totalCount := uint64(0) - for _, count := range counts { - totalCount += count + if len(buckets) != len(counts)+1 { + panic(fmt.Errorf("the number of buckets must be bigger than the number of counts by 1 in histogram %s; got buckets=%d, counts=%d", name, len(buckets), len(counts))) } - for _, q := range defaultSummaryQuantiles { - upperBound := uint64(math.Ceil(q * float64(totalCount))) - runningCount := uint64(0) - for i, count := range counts { - runningCount += count - if runningCount >= upperBound { - fmt.Fprintf(w, `%s{quantile="%g"} %g`+"\n", name, q, buckets[i+1]) - break + tailCount := uint64(0) + if strings.HasSuffix(name, "_seconds") { + // Limit the maximum bucket to 1 second, since Go runtime exposes buckets with 10K seconds, + // which have little sense. At the same time such buckets may lead to high cardinality issues + // at the scraper side. + for len(buckets) > 0 && buckets[len(buckets)-1] > 1 { + buckets = buckets[:len(buckets)-1] + tailCount += counts[len(counts)-1] + counts = counts[:len(counts)-1] + } + } + + iStep := float64(len(buckets)) / maxRuntimeHistogramBuckets + + totalCount := uint64(0) + iNext := 0.0 + for i, count := range counts { + totalCount += count + if float64(i) >= iNext { + iNext += iStep + le := buckets[i+1] + if !math.IsInf(le, 1) { + fmt.Fprintf(w, `%s_bucket{le="%g"} %d`+"\n", name, le, totalCount) } } } + totalCount += tailCount + fmt.Fprintf(w, `%s_bucket{le="+Inf"} %d`+"\n", name, totalCount) } + +// Limit the number of buckets for Go runtime histograms in order to prevent from high cardinality issues at scraper side. +const maxRuntimeHistogramBuckets = 30 diff --git a/go_metrics_test.go b/go_metrics_test.go index 48adc05..cd510e7 100644 --- a/go_metrics_test.go +++ b/go_metrics_test.go @@ -22,40 +22,41 @@ func TestWriteRuntimeHistogramMetricOk(t *testing.T) { f(&runtimemetrics.Float64Histogram{ Counts: []uint64{1, 2, 3}, Buckets: []float64{1, 2, 3, 4}, - }, `foo{quantile="0.5"} 3 -foo{quantile="0.9"} 4 -foo{quantile="0.97"} 4 -foo{quantile="0.99"} 4 -foo{quantile="1"} 4 + }, `foo_bucket{le="2"} 1 +foo_bucket{le="3"} 3 +foo_bucket{le="4"} 6 +foo_bucket{le="+Inf"} 6 `) f(&runtimemetrics.Float64Histogram{ Counts: []uint64{0, 25, 1, 0}, Buckets: []float64{1, 2, 3, 4, math.Inf(1)}, - }, `foo{quantile="0.5"} 3 -foo{quantile="0.9"} 3 -foo{quantile="0.97"} 4 -foo{quantile="0.99"} 4 -foo{quantile="1"} 4 + }, `foo_bucket{le="2"} 0 +foo_bucket{le="3"} 25 +foo_bucket{le="4"} 26 +foo_bucket{le="+Inf"} 26 `) f(&runtimemetrics.Float64Histogram{ Counts: []uint64{0, 25, 1, 3, 0, 44, 15, 132, 10, 0}, Buckets: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, math.Inf(1)}, - }, `foo{quantile="0.5"} 9 -foo{quantile="0.9"} 9 -foo{quantile="0.97"} 10 -foo{quantile="0.99"} 10 -foo{quantile="1"} 10 + }, `foo_bucket{le="2"} 0 +foo_bucket{le="3"} 25 +foo_bucket{le="4"} 26 +foo_bucket{le="5"} 29 +foo_bucket{le="6"} 29 +foo_bucket{le="7"} 73 +foo_bucket{le="8"} 88 +foo_bucket{le="9"} 220 +foo_bucket{le="10"} 230 +foo_bucket{le="+Inf"} 230 `) f(&runtimemetrics.Float64Histogram{ Counts: []uint64{1, 5, 0}, Buckets: []float64{math.Inf(-1), 4, 5, math.Inf(1)}, - }, `foo{quantile="0.5"} 5 -foo{quantile="0.9"} 5 -foo{quantile="0.97"} 5 -foo{quantile="0.99"} 5 -foo{quantile="1"} 5 + }, `foo_bucket{le="4"} 1 +foo_bucket{le="5"} 6 +foo_bucket{le="+Inf"} 6 `) }