metrics/go_metrics.go
Aliaksandr Valialkin 2ec14979a8
go_metrics.go: use histogram buckets instead of summary for Go runtime histogram
It is unclear how and when to reset summary - it is smoothed during the Go app uptime.
On the other hand, histogram buckets can be wrapped into increase() or rate() in order
to calculate the histogram distribution on arbitrary time range.

Limit the number of buckets per Go runtime histogram to 30 in order to prevent from high cardinality issues.
2023-11-30 01:50:21 +02:00

145 lines
5.6 KiB
Go

package metrics
import (
"fmt"
"io"
"math"
"runtime"
runtimemetrics "runtime/metrics"
"strings"
"github.com/valyala/histogram"
)
// See https://pkg.go.dev/runtime/metrics#hdr-Supported_metrics
var runtimeMetrics = [][2]string{
{"/sched/latencies:seconds", "go_sched_latencies_seconds"},
{"/sync/mutex/wait/total:seconds", "go_mutex_wait_seconds_total"},
{"/cpu/classes/gc/mark/assist:cpu-seconds", "go_gc_mark_assist_cpu_seconds_total"},
{"/cpu/classes/gc/total:cpu-seconds", "go_gc_cpu_seconds_total"},
{"/gc/pauses:seconds", "go_gc_pauses_seconds"},
{"/cpu/classes/scavenge/total:cpu-seconds", "go_scavenge_cpu_seconds_total"},
{"/gc/gomemlimit:bytes", "go_memlimit_bytes"},
}
func writeGoMetrics(w io.Writer) {
writeRuntimeMetrics(w)
var ms runtime.MemStats
runtime.ReadMemStats(&ms)
fmt.Fprintf(w, "go_memstats_alloc_bytes %d\n", ms.Alloc)
fmt.Fprintf(w, "go_memstats_alloc_bytes_total %d\n", ms.TotalAlloc)
fmt.Fprintf(w, "go_memstats_buck_hash_sys_bytes %d\n", ms.BuckHashSys)
fmt.Fprintf(w, "go_memstats_frees_total %d\n", ms.Frees)
fmt.Fprintf(w, "go_memstats_gc_cpu_fraction %g\n", ms.GCCPUFraction)
fmt.Fprintf(w, "go_memstats_gc_sys_bytes %d\n", ms.GCSys)
fmt.Fprintf(w, "go_memstats_heap_alloc_bytes %d\n", ms.HeapAlloc)
fmt.Fprintf(w, "go_memstats_heap_idle_bytes %d\n", ms.HeapIdle)
fmt.Fprintf(w, "go_memstats_heap_inuse_bytes %d\n", ms.HeapInuse)
fmt.Fprintf(w, "go_memstats_heap_objects %d\n", ms.HeapObjects)
fmt.Fprintf(w, "go_memstats_heap_released_bytes %d\n", ms.HeapReleased)
fmt.Fprintf(w, "go_memstats_heap_sys_bytes %d\n", ms.HeapSys)
fmt.Fprintf(w, "go_memstats_last_gc_time_seconds %g\n", float64(ms.LastGC)/1e9)
fmt.Fprintf(w, "go_memstats_lookups_total %d\n", ms.Lookups)
fmt.Fprintf(w, "go_memstats_mallocs_total %d\n", ms.Mallocs)
fmt.Fprintf(w, "go_memstats_mcache_inuse_bytes %d\n", ms.MCacheInuse)
fmt.Fprintf(w, "go_memstats_mcache_sys_bytes %d\n", ms.MCacheSys)
fmt.Fprintf(w, "go_memstats_mspan_inuse_bytes %d\n", ms.MSpanInuse)
fmt.Fprintf(w, "go_memstats_mspan_sys_bytes %d\n", ms.MSpanSys)
fmt.Fprintf(w, "go_memstats_next_gc_bytes %d\n", ms.NextGC)
fmt.Fprintf(w, "go_memstats_other_sys_bytes %d\n", ms.OtherSys)
fmt.Fprintf(w, "go_memstats_stack_inuse_bytes %d\n", ms.StackInuse)
fmt.Fprintf(w, "go_memstats_stack_sys_bytes %d\n", ms.StackSys)
fmt.Fprintf(w, "go_memstats_sys_bytes %d\n", ms.Sys)
fmt.Fprintf(w, "go_cgo_calls_count %d\n", runtime.NumCgoCall())
fmt.Fprintf(w, "go_cpu_count %d\n", runtime.NumCPU())
gcPauses := histogram.NewFast()
for _, pauseNs := range ms.PauseNs[:] {
gcPauses.Update(float64(pauseNs) / 1e9)
}
phis := []float64{0, 0.25, 0.5, 0.75, 1}
quantiles := make([]float64, 0, len(phis))
for i, q := range gcPauses.Quantiles(quantiles[:0], phis) {
fmt.Fprintf(w, `go_gc_duration_seconds{quantile="%g"} %g`+"\n", phis[i], q)
}
fmt.Fprintf(w, `go_gc_duration_seconds_sum %g`+"\n", float64(ms.PauseTotalNs)/1e9)
fmt.Fprintf(w, `go_gc_duration_seconds_count %d`+"\n", ms.NumGC)
fmt.Fprintf(w, `go_gc_forced_count %d`+"\n", ms.NumForcedGC)
fmt.Fprintf(w, `go_gomaxprocs %d`+"\n", runtime.GOMAXPROCS(0))
fmt.Fprintf(w, `go_goroutines %d`+"\n", runtime.NumGoroutine())
numThread, _ := runtime.ThreadCreateProfile(nil)
fmt.Fprintf(w, `go_threads %d`+"\n", numThread)
// Export build details.
fmt.Fprintf(w, "go_info{version=%q} 1\n", runtime.Version())
fmt.Fprintf(w, "go_info_ext{compiler=%q, GOARCH=%q, GOOS=%q, GOROOT=%q} 1\n",
runtime.Compiler, runtime.GOARCH, runtime.GOOS, runtime.GOROOT())
}
func writeRuntimeMetrics(w io.Writer) {
samples := make([]runtimemetrics.Sample, len(runtimeMetrics))
for i, rm := range runtimeMetrics {
samples[i].Name = rm[0]
}
runtimemetrics.Read(samples)
for i, rm := range runtimeMetrics {
writeRuntimeMetric(w, rm[1], &samples[i])
}
}
func writeRuntimeMetric(w io.Writer, name string, sample *runtimemetrics.Sample) {
switch sample.Value.Kind() {
case runtimemetrics.KindBad:
panic(fmt.Errorf("BUG: unexpected runtimemetrics.KindBad for sample.Name=%q", sample.Name))
case runtimemetrics.KindUint64:
fmt.Fprintf(w, "%s %d\n", name, sample.Value.Uint64())
case runtimemetrics.KindFloat64:
fmt.Fprintf(w, "%s %g\n", name, sample.Value.Float64())
case runtimemetrics.KindFloat64Histogram:
writeRuntimeHistogramMetric(w, name, sample.Value.Float64Histogram())
}
}
func writeRuntimeHistogramMetric(w io.Writer, name string, h *runtimemetrics.Float64Histogram) {
buckets := h.Buckets
counts := h.Counts
if len(buckets) != len(counts)+1 {
panic(fmt.Errorf("the number of buckets must be bigger than the number of counts by 1 in histogram %s; got buckets=%d, counts=%d", name, len(buckets), len(counts)))
}
tailCount := uint64(0)
if strings.HasSuffix(name, "_seconds") {
// Limit the maximum bucket to 1 second, since Go runtime exposes buckets with 10K seconds,
// which have little sense. At the same time such buckets may lead to high cardinality issues
// at the scraper side.
for len(buckets) > 0 && buckets[len(buckets)-1] > 1 {
buckets = buckets[:len(buckets)-1]
tailCount += counts[len(counts)-1]
counts = counts[:len(counts)-1]
}
}
iStep := float64(len(buckets)) / maxRuntimeHistogramBuckets
totalCount := uint64(0)
iNext := 0.0
for i, count := range counts {
totalCount += count
if float64(i) >= iNext {
iNext += iStep
le := buckets[i+1]
if !math.IsInf(le, 1) {
fmt.Fprintf(w, `%s_bucket{le="%g"} %d`+"\n", name, le, totalCount)
}
}
}
totalCount += tailCount
fmt.Fprintf(w, `%s_bucket{le="+Inf"} %d`+"\n", name, totalCount)
}
// Limit the number of buckets for Go runtime histograms in order to prevent from high cardinality issues at scraper side.
const maxRuntimeHistogramBuckets = 30