From 5b58446f57432d632b54957997578525a1328753 Mon Sep 17 00:00:00 2001 From: Aliaksandr Valialkin Date: Thu, 30 Nov 2023 00:56:43 +0200 Subject: [PATCH] go_metrics.go: follow-up for 8870cd36e76c191e1868f59846368468e5913919 - Expose Go runtime histograms as summaries, since they have too many buckets, which may lead to high cardinality issues at scraper side. - Expose go_gc_pauses_seconds summary - Document exposed go_* metrics --- go_metrics.go | 24 ++++++++++++++------- go_metrics_test.go | 52 +++++++++++++++++++++------------------------- metrics.go | 38 +++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 35 deletions(-) diff --git a/go_metrics.go b/go_metrics.go index 768754d..0bdb20a 100644 --- a/go_metrics.go +++ b/go_metrics.go @@ -16,6 +16,7 @@ var runtimeMetrics = [][2]string{ {"/sync/mutex/wait/total:seconds", "go_mutex_wait_seconds_total"}, {"/cpu/classes/gc/mark/assist:cpu-seconds", "go_gc_mark_assist_cpu_seconds_total"}, {"/cpu/classes/gc/total:cpu-seconds", "go_gc_cpu_seconds_total"}, + {"/gc/pauses:seconds", "go_gc_pauses_seconds"}, {"/cpu/classes/scavenge/total:cpu-seconds", "go_scavenge_cpu_seconds_total"}, {"/gc/gomemlimit:bytes", "go_memlimit_bytes"}, } @@ -103,14 +104,23 @@ func writeRuntimeMetric(w io.Writer, name string, sample *runtimemetrics.Sample) } func writeRuntimeHistogramMetric(w io.Writer, name string, h *runtimemetrics.Float64Histogram) { - runningCount := uint64(0) + // Expose histogram metric as summary, since Go runtime returns too many histogram buckets, + // which may lead to high cardinality issues at the scraper side. buckets := h.Buckets - for i, count := range h.Counts { - fmt.Fprintf(w, `%s_bucket{le="%g"} %d`+"\n", name, buckets[i], runningCount) - runningCount += count + counts := h.Counts + totalCount := uint64(0) + for _, count := range counts { + totalCount += count } - fmt.Fprintf(w, `%s_bucket{le="%g"} %d`+"\n", name, buckets[len(buckets)-1], runningCount) - if !math.IsInf(buckets[len(buckets)-1], 1) { - fmt.Fprintf(w, `%s_bucket{le="+Inf"} %d`+"\n", name, runningCount) + for _, q := range defaultSummaryQuantiles { + upperBound := uint64(math.Ceil(q * float64(totalCount))) + runningCount := uint64(0) + for i, count := range counts { + runningCount += count + if runningCount >= upperBound { + fmt.Fprintf(w, `%s{quantile="%g"} %g`+"\n", name, q, buckets[i+1]) + break + } + } } } diff --git a/go_metrics_test.go b/go_metrics_test.go index e4a137c..48adc05 100644 --- a/go_metrics_test.go +++ b/go_metrics_test.go @@ -22,44 +22,40 @@ func TestWriteRuntimeHistogramMetricOk(t *testing.T) { f(&runtimemetrics.Float64Histogram{ Counts: []uint64{1, 2, 3}, Buckets: []float64{1, 2, 3, 4}, - }, `foo_bucket{le="1"} 0 -foo_bucket{le="2"} 1 -foo_bucket{le="3"} 3 -foo_bucket{le="4"} 6 -foo_bucket{le="+Inf"} 6 + }, `foo{quantile="0.5"} 3 +foo{quantile="0.9"} 4 +foo{quantile="0.97"} 4 +foo{quantile="0.99"} 4 +foo{quantile="1"} 4 `) f(&runtimemetrics.Float64Histogram{ - Counts: []uint64{0, 25, 1, 3}, + Counts: []uint64{0, 25, 1, 0}, Buckets: []float64{1, 2, 3, 4, math.Inf(1)}, - }, `foo_bucket{le="1"} 0 -foo_bucket{le="2"} 0 -foo_bucket{le="3"} 25 -foo_bucket{le="4"} 26 -foo_bucket{le="+Inf"} 29 + }, `foo{quantile="0.5"} 3 +foo{quantile="0.9"} 3 +foo{quantile="0.97"} 4 +foo{quantile="0.99"} 4 +foo{quantile="1"} 4 `) f(&runtimemetrics.Float64Histogram{ - Counts: []uint64{0, 25, 1, 3, 0, 44, 15, 132, 10, 11}, + Counts: []uint64{0, 25, 1, 3, 0, 44, 15, 132, 10, 0}, Buckets: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, math.Inf(1)}, - }, `foo_bucket{le="1"} 0 -foo_bucket{le="2"} 0 -foo_bucket{le="3"} 25 -foo_bucket{le="4"} 26 -foo_bucket{le="5"} 29 -foo_bucket{le="6"} 29 -foo_bucket{le="7"} 73 -foo_bucket{le="8"} 88 -foo_bucket{le="9"} 220 -foo_bucket{le="10"} 230 -foo_bucket{le="+Inf"} 241 + }, `foo{quantile="0.5"} 9 +foo{quantile="0.9"} 9 +foo{quantile="0.97"} 10 +foo{quantile="0.99"} 10 +foo{quantile="1"} 10 `) f(&runtimemetrics.Float64Histogram{ - Counts: []uint64{1, 5}, - Buckets: []float64{math.Inf(-1), 4, math.Inf(1)}, - }, `foo_bucket{le="-Inf"} 0 -foo_bucket{le="4"} 1 -foo_bucket{le="+Inf"} 6 + Counts: []uint64{1, 5, 0}, + Buckets: []float64{math.Inf(-1), 4, 5, math.Inf(1)}, + }, `foo{quantile="0.5"} 5 +foo{quantile="0.9"} 5 +foo{quantile="0.97"} 5 +foo{quantile="0.99"} 5 +foo{quantile="1"} 5 `) } diff --git a/metrics.go b/metrics.go index 7dfa972..e9560b2 100644 --- a/metrics.go +++ b/metrics.go @@ -134,10 +134,26 @@ func WritePrometheus(w io.Writer, exposeProcessMetrics bool) { // // - process_io_storage_written_bytes_total - the number of bytes actually written to disk // +// - go_sched_latencies_seconds - time spent by goroutines in ready state before they start execution +// +// - go_mutex_wait_seconds_total - summary time spent by all the goroutines while waiting for locked mutex +// +// - go_gc_mark_assist_cpu_seconds_total - summary CPU time spent by goroutines in GC mark assist state +// +// - go_gc_cpu_seconds_total - summary time spent in GC +// +// - go_gc_pauses_seconds - duration of GC pauses +// +// - go_scavenge_cpu_seconds_total - CPU time spent on returning the memory to OS +// +// - go_memlimit_bytes - the GOMEMLIMIT env var value +// // - go_memstats_alloc_bytes - memory usage for Go objects in the heap // // - go_memstats_alloc_bytes_total - the cumulative counter for total size of allocated Go objects // +// - go_memstats_buck_hash_sys_bytes - bytes of memory in profiling bucket hash tables +// // - go_memstats_frees_total - the cumulative counter for number of freed Go objects // // - go_memstats_gc_cpu_fraction - the fraction of CPU spent in Go garbage collector @@ -148,20 +164,42 @@ func WritePrometheus(w io.Writer, exposeProcessMetrics bool) { // // - go_memstats_heap_idle_bytes - idle memory ready for new Go object allocations // +// - go_memstats_heap_inuse_bytes - bytes in in-use spans +// // - go_memstats_heap_objects - the number of Go objects in the heap // +// - go_memstats_heap_released_bytes - bytes of physical memory returned to the OS +// // - go_memstats_heap_sys_bytes - memory requested for Go objects from the OS // +// - go_memstats_last_gc_time_seconds - unix timestamp the last garbage collection finished +// +// - go_memstats_lookups_total - the number of pointer lookups performed by the runtime +// // - go_memstats_mallocs_total - the number of allocations for Go objects // +// - go_memstats_mcache_inuse_bytes - bytes of allocated mcache structures +// +// - go_memstats_mcache_sys_bytes - bytes of memory obtained from the OS for mcache structures +// +// - go_memstats_mspan_inuse_bytes - bytes of allocated mspan structures +// +// - go_memstats_mspan_sys_bytes - bytes of memory obtained from the OS for mspan structures +// // - go_memstats_next_gc_bytes - the target heap size when the next garbage collection should start // +// - go_memstats_other_sys_bytes - bytes of memory in miscellaneous off-heap runtime allocations +// // - go_memstats_stack_inuse_bytes - memory used for goroutine stacks // // - go_memstats_stack_sys_bytes - memory requested fromthe OS for goroutine stacks // // - go_memstats_sys_bytes - memory requested by Go runtime from the OS // +// - go_cgo_calls_count - the total number of CGO calls +// +// - go_cpu_count - the number of CPU cores on the host where the app runs +// // The WriteProcessMetrics func is usually called in combination with writing Set metrics // inside "/metrics" handler: //