go_metrics.go: follow-up for 8870cd36e76c191e1868f59846368468e5913919

- Expose Go runtime histograms as summaries, since they have too many buckets,
  which may lead to high cardinality issues at scraper side.

- Expose go_gc_pauses_seconds summary

- Document exposed go_* metrics
This commit is contained in:
Aliaksandr Valialkin 2023-11-30 00:56:43 +02:00
parent 5ad4f70016
commit 5b58446f57
No known key found for this signature in database
GPG Key ID: 52C003EE2BCDB9EB
3 changed files with 79 additions and 35 deletions

View File

@ -16,6 +16,7 @@ var runtimeMetrics = [][2]string{
{"/sync/mutex/wait/total:seconds", "go_mutex_wait_seconds_total"},
{"/cpu/classes/gc/mark/assist:cpu-seconds", "go_gc_mark_assist_cpu_seconds_total"},
{"/cpu/classes/gc/total:cpu-seconds", "go_gc_cpu_seconds_total"},
{"/gc/pauses:seconds", "go_gc_pauses_seconds"},
{"/cpu/classes/scavenge/total:cpu-seconds", "go_scavenge_cpu_seconds_total"},
{"/gc/gomemlimit:bytes", "go_memlimit_bytes"},
}
@ -103,14 +104,23 @@ func writeRuntimeMetric(w io.Writer, name string, sample *runtimemetrics.Sample)
}
func writeRuntimeHistogramMetric(w io.Writer, name string, h *runtimemetrics.Float64Histogram) {
runningCount := uint64(0)
// Expose histogram metric as summary, since Go runtime returns too many histogram buckets,
// which may lead to high cardinality issues at the scraper side.
buckets := h.Buckets
for i, count := range h.Counts {
fmt.Fprintf(w, `%s_bucket{le="%g"} %d`+"\n", name, buckets[i], runningCount)
runningCount += count
counts := h.Counts
totalCount := uint64(0)
for _, count := range counts {
totalCount += count
}
fmt.Fprintf(w, `%s_bucket{le="%g"} %d`+"\n", name, buckets[len(buckets)-1], runningCount)
if !math.IsInf(buckets[len(buckets)-1], 1) {
fmt.Fprintf(w, `%s_bucket{le="+Inf"} %d`+"\n", name, runningCount)
for _, q := range defaultSummaryQuantiles {
upperBound := uint64(math.Ceil(q * float64(totalCount)))
runningCount := uint64(0)
for i, count := range counts {
runningCount += count
if runningCount >= upperBound {
fmt.Fprintf(w, `%s{quantile="%g"} %g`+"\n", name, q, buckets[i+1])
break
}
}
}
}

View File

@ -22,44 +22,40 @@ func TestWriteRuntimeHistogramMetricOk(t *testing.T) {
f(&runtimemetrics.Float64Histogram{
Counts: []uint64{1, 2, 3},
Buckets: []float64{1, 2, 3, 4},
}, `foo_bucket{le="1"} 0
foo_bucket{le="2"} 1
foo_bucket{le="3"} 3
foo_bucket{le="4"} 6
foo_bucket{le="+Inf"} 6
}, `foo{quantile="0.5"} 3
foo{quantile="0.9"} 4
foo{quantile="0.97"} 4
foo{quantile="0.99"} 4
foo{quantile="1"} 4
`)
f(&runtimemetrics.Float64Histogram{
Counts: []uint64{0, 25, 1, 3},
Counts: []uint64{0, 25, 1, 0},
Buckets: []float64{1, 2, 3, 4, math.Inf(1)},
}, `foo_bucket{le="1"} 0
foo_bucket{le="2"} 0
foo_bucket{le="3"} 25
foo_bucket{le="4"} 26
foo_bucket{le="+Inf"} 29
}, `foo{quantile="0.5"} 3
foo{quantile="0.9"} 3
foo{quantile="0.97"} 4
foo{quantile="0.99"} 4
foo{quantile="1"} 4
`)
f(&runtimemetrics.Float64Histogram{
Counts: []uint64{0, 25, 1, 3, 0, 44, 15, 132, 10, 11},
Counts: []uint64{0, 25, 1, 3, 0, 44, 15, 132, 10, 0},
Buckets: []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, math.Inf(1)},
}, `foo_bucket{le="1"} 0
foo_bucket{le="2"} 0
foo_bucket{le="3"} 25
foo_bucket{le="4"} 26
foo_bucket{le="5"} 29
foo_bucket{le="6"} 29
foo_bucket{le="7"} 73
foo_bucket{le="8"} 88
foo_bucket{le="9"} 220
foo_bucket{le="10"} 230
foo_bucket{le="+Inf"} 241
}, `foo{quantile="0.5"} 9
foo{quantile="0.9"} 9
foo{quantile="0.97"} 10
foo{quantile="0.99"} 10
foo{quantile="1"} 10
`)
f(&runtimemetrics.Float64Histogram{
Counts: []uint64{1, 5},
Buckets: []float64{math.Inf(-1), 4, math.Inf(1)},
}, `foo_bucket{le="-Inf"} 0
foo_bucket{le="4"} 1
foo_bucket{le="+Inf"} 6
Counts: []uint64{1, 5, 0},
Buckets: []float64{math.Inf(-1), 4, 5, math.Inf(1)},
}, `foo{quantile="0.5"} 5
foo{quantile="0.9"} 5
foo{quantile="0.97"} 5
foo{quantile="0.99"} 5
foo{quantile="1"} 5
`)
}

View File

@ -134,10 +134,26 @@ func WritePrometheus(w io.Writer, exposeProcessMetrics bool) {
//
// - process_io_storage_written_bytes_total - the number of bytes actually written to disk
//
// - go_sched_latencies_seconds - time spent by goroutines in ready state before they start execution
//
// - go_mutex_wait_seconds_total - summary time spent by all the goroutines while waiting for locked mutex
//
// - go_gc_mark_assist_cpu_seconds_total - summary CPU time spent by goroutines in GC mark assist state
//
// - go_gc_cpu_seconds_total - summary time spent in GC
//
// - go_gc_pauses_seconds - duration of GC pauses
//
// - go_scavenge_cpu_seconds_total - CPU time spent on returning the memory to OS
//
// - go_memlimit_bytes - the GOMEMLIMIT env var value
//
// - go_memstats_alloc_bytes - memory usage for Go objects in the heap
//
// - go_memstats_alloc_bytes_total - the cumulative counter for total size of allocated Go objects
//
// - go_memstats_buck_hash_sys_bytes - bytes of memory in profiling bucket hash tables
//
// - go_memstats_frees_total - the cumulative counter for number of freed Go objects
//
// - go_memstats_gc_cpu_fraction - the fraction of CPU spent in Go garbage collector
@ -148,20 +164,42 @@ func WritePrometheus(w io.Writer, exposeProcessMetrics bool) {
//
// - go_memstats_heap_idle_bytes - idle memory ready for new Go object allocations
//
// - go_memstats_heap_inuse_bytes - bytes in in-use spans
//
// - go_memstats_heap_objects - the number of Go objects in the heap
//
// - go_memstats_heap_released_bytes - bytes of physical memory returned to the OS
//
// - go_memstats_heap_sys_bytes - memory requested for Go objects from the OS
//
// - go_memstats_last_gc_time_seconds - unix timestamp the last garbage collection finished
//
// - go_memstats_lookups_total - the number of pointer lookups performed by the runtime
//
// - go_memstats_mallocs_total - the number of allocations for Go objects
//
// - go_memstats_mcache_inuse_bytes - bytes of allocated mcache structures
//
// - go_memstats_mcache_sys_bytes - bytes of memory obtained from the OS for mcache structures
//
// - go_memstats_mspan_inuse_bytes - bytes of allocated mspan structures
//
// - go_memstats_mspan_sys_bytes - bytes of memory obtained from the OS for mspan structures
//
// - go_memstats_next_gc_bytes - the target heap size when the next garbage collection should start
//
// - go_memstats_other_sys_bytes - bytes of memory in miscellaneous off-heap runtime allocations
//
// - go_memstats_stack_inuse_bytes - memory used for goroutine stacks
//
// - go_memstats_stack_sys_bytes - memory requested fromthe OS for goroutine stacks
//
// - go_memstats_sys_bytes - memory requested by Go runtime from the OS
//
// - go_cgo_calls_count - the total number of CGO calls
//
// - go_cpu_count - the number of CPU cores on the host where the app runs
//
// The WriteProcessMetrics func is usually called in combination with writing Set metrics
// inside "/metrics" handler:
//