2019-07-12 17:18:52 +03:00
|
|
|
package metrics
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"io/ioutil"
|
|
|
|
"log"
|
2021-02-04 17:10:10 +03:00
|
|
|
"os"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
2023-01-16 00:51:57 +03:00
|
|
|
"sync/atomic"
|
2019-07-12 17:18:52 +03:00
|
|
|
"time"
|
|
|
|
)
|
|
|
|
|
|
|
|
// See https://github.com/prometheus/procfs/blob/a4ac0826abceb44c40fc71daed2b301db498b93e/proc_stat.go#L40 .
|
|
|
|
const userHZ = 100
|
|
|
|
|
|
|
|
// See http://man7.org/linux/man-pages/man5/proc.5.html
|
|
|
|
type procStat struct {
|
|
|
|
State byte
|
|
|
|
Ppid int
|
|
|
|
Pgrp int
|
|
|
|
Session int
|
|
|
|
TtyNr int
|
|
|
|
Tpgid int
|
|
|
|
Flags uint
|
|
|
|
Minflt uint
|
|
|
|
Cminflt uint
|
|
|
|
Majflt uint
|
|
|
|
Cmajflt uint
|
|
|
|
Utime uint
|
|
|
|
Stime uint
|
|
|
|
Cutime int
|
|
|
|
Cstime int
|
|
|
|
Priority int
|
|
|
|
Nice int
|
|
|
|
NumThreads int
|
|
|
|
ItrealValue int
|
|
|
|
Starttime uint64
|
|
|
|
Vsize uint
|
|
|
|
Rss int
|
|
|
|
}
|
|
|
|
|
|
|
|
func writeProcessMetrics(w io.Writer) {
|
2021-02-21 23:45:08 +03:00
|
|
|
statFilepath := "/proc/self/stat"
|
2019-07-12 17:18:52 +03:00
|
|
|
data, err := ioutil.ReadFile(statFilepath)
|
|
|
|
if err != nil {
|
2022-07-21 19:33:33 +03:00
|
|
|
log.Printf("ERROR: metrics: cannot open %s: %s", statFilepath, err)
|
2019-07-12 17:18:52 +03:00
|
|
|
return
|
|
|
|
}
|
2023-01-16 00:51:57 +03:00
|
|
|
|
2019-07-12 17:18:52 +03:00
|
|
|
// Search for the end of command.
|
|
|
|
n := bytes.LastIndex(data, []byte(") "))
|
|
|
|
if n < 0 {
|
2022-07-21 19:33:33 +03:00
|
|
|
log.Printf("ERROR: metrics: cannot find command in parentheses in %q read from %s", data, statFilepath)
|
2019-07-12 17:18:52 +03:00
|
|
|
return
|
|
|
|
}
|
|
|
|
data = data[n+2:]
|
|
|
|
|
|
|
|
var p procStat
|
|
|
|
bb := bytes.NewBuffer(data)
|
|
|
|
_, err = fmt.Fscanf(bb, "%c %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d",
|
|
|
|
&p.State, &p.Ppid, &p.Pgrp, &p.Session, &p.TtyNr, &p.Tpgid, &p.Flags, &p.Minflt, &p.Cminflt, &p.Majflt, &p.Cmajflt,
|
|
|
|
&p.Utime, &p.Stime, &p.Cutime, &p.Cstime, &p.Priority, &p.Nice, &p.NumThreads, &p.ItrealValue, &p.Starttime, &p.Vsize, &p.Rss)
|
|
|
|
if err != nil {
|
2022-07-21 19:33:33 +03:00
|
|
|
log.Printf("ERROR: metrics: cannot parse %q read from %s: %s", data, statFilepath, err)
|
2019-07-12 17:18:52 +03:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// It is expensive obtaining `process_open_fds` when big number of file descriptors is opened,
|
2021-02-21 23:45:08 +03:00
|
|
|
// so don't do it here.
|
|
|
|
// See writeFDMetrics instead.
|
2019-07-12 17:18:52 +03:00
|
|
|
|
2019-08-05 19:16:20 +03:00
|
|
|
utime := float64(p.Utime) / userHZ
|
|
|
|
stime := float64(p.Stime) / userHZ
|
2023-12-19 03:36:54 +03:00
|
|
|
WriteCounterFloat64(w, "process_cpu_seconds_system_total", stime)
|
|
|
|
WriteCounterFloat64(w, "process_cpu_seconds_total", utime+stime)
|
|
|
|
WriteCounterFloat64(w, "process_cpu_seconds_user_total", utime)
|
|
|
|
WriteCounterUint64(w, "process_major_pagefaults_total", uint64(p.Majflt))
|
|
|
|
WriteCounterUint64(w, "process_minor_pagefaults_total", uint64(p.Minflt))
|
|
|
|
WriteGaugeUint64(w, "process_num_threads", uint64(p.NumThreads))
|
2024-06-11 13:03:33 +03:00
|
|
|
WriteGaugeUint64(w, "process_resident_memory_bytes", uint64(p.Rss)*uint64(os.Getpagesize()))
|
2023-12-19 03:36:54 +03:00
|
|
|
WriteGaugeUint64(w, "process_start_time_seconds", uint64(startTimeSeconds))
|
|
|
|
WriteGaugeUint64(w, "process_virtual_memory_bytes", uint64(p.Vsize))
|
2021-03-18 00:07:19 +03:00
|
|
|
writeProcessMemMetrics(w)
|
2021-02-21 23:45:08 +03:00
|
|
|
writeIOMetrics(w)
|
|
|
|
}
|
|
|
|
|
2023-01-16 01:01:25 +03:00
|
|
|
var procSelfIOErrLogged uint32
|
|
|
|
|
2021-02-21 23:45:08 +03:00
|
|
|
func writeIOMetrics(w io.Writer) {
|
|
|
|
ioFilepath := "/proc/self/io"
|
|
|
|
data, err := ioutil.ReadFile(ioFilepath)
|
|
|
|
if err != nil {
|
2023-01-16 01:01:25 +03:00
|
|
|
// Do not spam the logs with errors - this error cannot be fixed without process restart.
|
|
|
|
// See https://github.com/VictoriaMetrics/metrics/issues/42
|
|
|
|
if atomic.CompareAndSwapUint32(&procSelfIOErrLogged, 0, 1) {
|
|
|
|
log.Printf("ERROR: metrics: cannot read process_io_* metrics from %q, so these metrics won't be updated until the error is fixed; "+
|
|
|
|
"see https://github.com/VictoriaMetrics/metrics/issues/42 ; The error: %s", ioFilepath, err)
|
2023-01-16 00:51:57 +03:00
|
|
|
}
|
2021-02-21 23:45:08 +03:00
|
|
|
}
|
2023-01-16 00:51:57 +03:00
|
|
|
|
2021-02-21 23:45:08 +03:00
|
|
|
getInt := func(s string) int64 {
|
|
|
|
n := strings.IndexByte(s, ' ')
|
|
|
|
if n < 0 {
|
2022-07-21 19:33:33 +03:00
|
|
|
log.Printf("ERROR: metrics: cannot find whitespace in %q at %q", s, ioFilepath)
|
2021-02-21 23:45:08 +03:00
|
|
|
return 0
|
|
|
|
}
|
|
|
|
v, err := strconv.ParseInt(s[n+1:], 10, 64)
|
|
|
|
if err != nil {
|
2022-07-21 19:33:33 +03:00
|
|
|
log.Printf("ERROR: metrics: cannot parse %q at %q: %s", s, ioFilepath, err)
|
2021-02-21 23:45:08 +03:00
|
|
|
return 0
|
|
|
|
}
|
|
|
|
return v
|
|
|
|
}
|
|
|
|
var rchar, wchar, syscr, syscw, readBytes, writeBytes int64
|
|
|
|
lines := strings.Split(string(data), "\n")
|
|
|
|
for _, s := range lines {
|
|
|
|
s = strings.TrimSpace(s)
|
|
|
|
switch {
|
|
|
|
case strings.HasPrefix(s, "rchar: "):
|
|
|
|
rchar = getInt(s)
|
|
|
|
case strings.HasPrefix(s, "wchar: "):
|
|
|
|
wchar = getInt(s)
|
|
|
|
case strings.HasPrefix(s, "syscr: "):
|
|
|
|
syscr = getInt(s)
|
|
|
|
case strings.HasPrefix(s, "syscw: "):
|
|
|
|
syscw = getInt(s)
|
|
|
|
case strings.HasPrefix(s, "read_bytes: "):
|
|
|
|
readBytes = getInt(s)
|
|
|
|
case strings.HasPrefix(s, "write_bytes: "):
|
|
|
|
writeBytes = getInt(s)
|
|
|
|
}
|
|
|
|
}
|
2023-12-19 03:36:54 +03:00
|
|
|
WriteGaugeUint64(w, "process_io_read_bytes_total", uint64(rchar))
|
|
|
|
WriteGaugeUint64(w, "process_io_written_bytes_total", uint64(wchar))
|
|
|
|
WriteGaugeUint64(w, "process_io_read_syscalls_total", uint64(syscr))
|
|
|
|
WriteGaugeUint64(w, "process_io_write_syscalls_total", uint64(syscw))
|
|
|
|
WriteGaugeUint64(w, "process_io_storage_read_bytes_total", uint64(readBytes))
|
|
|
|
WriteGaugeUint64(w, "process_io_storage_written_bytes_total", uint64(writeBytes))
|
2019-07-12 17:18:52 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
var startTimeSeconds = time.Now().Unix()
|
2021-02-04 17:10:10 +03:00
|
|
|
|
2021-03-18 00:10:35 +03:00
|
|
|
// writeFDMetrics writes process_max_fds and process_open_fds metrics to w.
|
2021-02-04 17:28:55 +03:00
|
|
|
func writeFDMetrics(w io.Writer) {
|
2021-02-04 17:10:10 +03:00
|
|
|
totalOpenFDs, err := getOpenFDsCount("/proc/self/fd")
|
|
|
|
if err != nil {
|
2022-07-21 19:33:33 +03:00
|
|
|
log.Printf("ERROR: metrics: cannot determine open file descriptors count: %s", err)
|
2021-02-04 17:10:10 +03:00
|
|
|
return
|
|
|
|
}
|
|
|
|
maxOpenFDs, err := getMaxFilesLimit("/proc/self/limits")
|
|
|
|
if err != nil {
|
2022-07-21 19:33:33 +03:00
|
|
|
log.Printf("ERROR: metrics: cannot determine the limit on open file descritors: %s", err)
|
2021-02-04 17:10:10 +03:00
|
|
|
return
|
|
|
|
}
|
2023-12-19 03:36:54 +03:00
|
|
|
WriteGaugeUint64(w, "process_max_fds", maxOpenFDs)
|
|
|
|
WriteGaugeUint64(w, "process_open_fds", totalOpenFDs)
|
2021-02-04 17:10:10 +03:00
|
|
|
}
|
|
|
|
|
2021-02-04 17:28:55 +03:00
|
|
|
func getOpenFDsCount(path string) (uint64, error) {
|
2021-02-04 17:10:10 +03:00
|
|
|
f, err := os.Open(path)
|
|
|
|
if err != nil {
|
2021-02-04 17:28:55 +03:00
|
|
|
return 0, err
|
2021-02-04 17:10:10 +03:00
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
var totalOpenFDs uint64
|
|
|
|
for {
|
2021-02-04 17:28:55 +03:00
|
|
|
names, err := f.Readdirnames(512)
|
|
|
|
if err == io.EOF {
|
2021-02-04 17:10:10 +03:00
|
|
|
break
|
|
|
|
}
|
|
|
|
if err != nil {
|
2021-02-04 17:28:55 +03:00
|
|
|
return 0, fmt.Errorf("unexpected error at Readdirnames: %s", err)
|
2021-02-04 17:10:10 +03:00
|
|
|
}
|
|
|
|
totalOpenFDs += uint64(len(names))
|
|
|
|
}
|
|
|
|
return totalOpenFDs, nil
|
|
|
|
}
|
|
|
|
|
2021-02-04 17:28:55 +03:00
|
|
|
func getMaxFilesLimit(path string) (uint64, error) {
|
2021-02-08 15:23:37 +03:00
|
|
|
data, err := ioutil.ReadFile(path)
|
2021-02-04 17:10:10 +03:00
|
|
|
if err != nil {
|
2021-02-04 17:28:55 +03:00
|
|
|
return 0, err
|
2021-02-04 17:10:10 +03:00
|
|
|
}
|
2021-02-08 15:23:37 +03:00
|
|
|
lines := strings.Split(string(data), "\n")
|
|
|
|
const prefix = "Max open files"
|
|
|
|
for _, s := range lines {
|
2021-02-04 17:28:55 +03:00
|
|
|
if !strings.HasPrefix(s, prefix) {
|
2021-02-04 17:10:10 +03:00
|
|
|
continue
|
|
|
|
}
|
2021-02-04 17:28:55 +03:00
|
|
|
text := strings.TrimSpace(s[len(prefix):])
|
|
|
|
// Extract soft limit.
|
|
|
|
n := strings.IndexByte(text, ' ')
|
|
|
|
if n < 0 {
|
|
|
|
return 0, fmt.Errorf("cannot extract soft limit from %q", s)
|
2021-02-04 17:10:10 +03:00
|
|
|
}
|
2021-02-04 17:28:55 +03:00
|
|
|
text = text[:n]
|
|
|
|
if text == "unlimited" {
|
|
|
|
return 1<<64 - 1, nil
|
2021-02-04 17:10:10 +03:00
|
|
|
}
|
2021-02-04 17:28:55 +03:00
|
|
|
limit, err := strconv.ParseUint(text, 10, 64)
|
2021-02-04 17:10:10 +03:00
|
|
|
if err != nil {
|
2021-02-04 17:28:55 +03:00
|
|
|
return 0, fmt.Errorf("cannot parse soft limit from %q: %s", s, err)
|
2021-02-04 17:10:10 +03:00
|
|
|
}
|
2021-02-04 17:28:55 +03:00
|
|
|
return limit, nil
|
2021-02-04 17:10:10 +03:00
|
|
|
}
|
2021-02-04 17:28:55 +03:00
|
|
|
return 0, fmt.Errorf("cannot find max open files limit")
|
2021-02-04 17:10:10 +03:00
|
|
|
}
|
2021-03-17 18:48:29 +03:00
|
|
|
|
2021-03-18 00:07:19 +03:00
|
|
|
// https://man7.org/linux/man-pages/man5/procfs.5.html
|
|
|
|
type memStats struct {
|
|
|
|
vmPeak uint64
|
|
|
|
rssPeak uint64
|
|
|
|
rssAnon uint64
|
|
|
|
rssFile uint64
|
|
|
|
rssShmem uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
func writeProcessMemMetrics(w io.Writer) {
|
|
|
|
ms, err := getMemStats("/proc/self/status")
|
|
|
|
if err != nil {
|
2022-07-21 19:33:33 +03:00
|
|
|
log.Printf("ERROR: metrics: cannot determine memory status: %s", err)
|
2021-03-18 00:07:19 +03:00
|
|
|
return
|
|
|
|
}
|
2023-12-19 03:36:54 +03:00
|
|
|
WriteGaugeUint64(w, "process_virtual_memory_peak_bytes", ms.vmPeak)
|
|
|
|
WriteGaugeUint64(w, "process_resident_memory_peak_bytes", ms.rssPeak)
|
|
|
|
WriteGaugeUint64(w, "process_resident_memory_anon_bytes", ms.rssAnon)
|
|
|
|
WriteGaugeUint64(w, "process_resident_memory_file_bytes", ms.rssFile)
|
|
|
|
WriteGaugeUint64(w, "process_resident_memory_shared_bytes", ms.rssShmem)
|
2021-03-18 00:07:19 +03:00
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
func getMemStats(path string) (*memStats, error) {
|
|
|
|
data, err := ioutil.ReadFile(path)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
var ms memStats
|
|
|
|
lines := strings.Split(string(data), "\n")
|
|
|
|
for _, s := range lines {
|
|
|
|
if !strings.HasPrefix(s, "Vm") && !strings.HasPrefix(s, "Rss") {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
// Extract key value.
|
|
|
|
line := strings.Fields(s)
|
|
|
|
if len(line) != 3 {
|
2021-03-18 00:17:01 +03:00
|
|
|
return nil, fmt.Errorf("unexpected number of fields found in %q; got %d; want %d", s, len(line), 3)
|
2021-03-18 00:07:19 +03:00
|
|
|
}
|
|
|
|
memStatName := line[0]
|
|
|
|
memStatValue := line[1]
|
|
|
|
value, err := strconv.ParseUint(memStatValue, 10, 64)
|
|
|
|
if err != nil {
|
2021-03-18 00:17:01 +03:00
|
|
|
return nil, fmt.Errorf("cannot parse number from %q: %w", s, err)
|
2021-03-18 00:07:19 +03:00
|
|
|
}
|
2021-03-18 00:17:01 +03:00
|
|
|
if line[2] != "kB" {
|
|
|
|
return nil, fmt.Errorf("expecting kB value in %q; got %q", s, line[2])
|
2021-03-18 00:07:19 +03:00
|
|
|
}
|
2021-03-18 00:17:01 +03:00
|
|
|
value *= 1024
|
2021-03-18 00:07:19 +03:00
|
|
|
switch memStatName {
|
|
|
|
case "VmPeak:":
|
|
|
|
ms.vmPeak = value
|
|
|
|
case "VmHWM:":
|
|
|
|
ms.rssPeak = value
|
|
|
|
case "RssAnon:":
|
|
|
|
ms.rssAnon = value
|
|
|
|
case "RssFile:":
|
|
|
|
ms.rssFile = value
|
|
|
|
case "RssShmem:":
|
|
|
|
ms.rssShmem = value
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return &ms, nil
|
|
|
|
}
|