process_metrics_linux.go: follow-up after 7921ac9c64
It is better to just log the error once if /proc/self/io cannot be opened, without exposing additional counters. The error message should contain directions on how to fix the error. Updates https://github.com/VictoriaMetrics/metrics/issues/42
This commit is contained in:
parent
7921ac9c64
commit
fd0a3a0deb
@ -42,27 +42,6 @@ type procStat struct {
|
|||||||
Rss int
|
Rss int
|
||||||
}
|
}
|
||||||
|
|
||||||
var processSelfIONotFoundErrorCount, processSelfIOPermErrorCount, processSelfErrorCount int64
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
ioFilepath := "/proc/self/io"
|
|
||||||
_, err := ioutil.ReadFile(ioFilepath)
|
|
||||||
if err != nil {
|
|
||||||
// Do not spam the logs with errors
|
|
||||||
// This error will not be fixed without process restart
|
|
||||||
var errMsg string
|
|
||||||
switch {
|
|
||||||
case os.IsNotExist(err):
|
|
||||||
errMsg = fmt.Sprintf("ERROR: metrics: cannot open %q: %s. This is expected on kernel without CONFIG_TASK_IO_ACCOUNTING, systems without cgroup controller for IO. This error will be reported once, further errors can be tracked by 'process_io_stats_read_errors_total{reason=\"not_found\"}' metric", ioFilepath, err)
|
|
||||||
case os.IsPermission(err):
|
|
||||||
errMsg = fmt.Sprintf("ERROR: metrics: cannot open %q: %s. This is expected when process is running with limited permissions and capabilities (such as using systemd limitations, cgroups, selinux, apparmor and others). This error will be reported once, further errors can be tracked by 'process_io_stats_read_errors_total{reason=\"permission_denied\"' metric", ioFilepath, err)
|
|
||||||
default:
|
|
||||||
errMsg = fmt.Sprintf("ERROR: metrics: cannot open %s: %s", ioFilepath, err)
|
|
||||||
}
|
|
||||||
log.Print(errMsg)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func writeProcessMetrics(w io.Writer) {
|
func writeProcessMetrics(w io.Writer) {
|
||||||
statFilepath := "/proc/self/stat"
|
statFilepath := "/proc/self/stat"
|
||||||
data, err := ioutil.ReadFile(statFilepath)
|
data, err := ioutil.ReadFile(statFilepath)
|
||||||
@ -108,19 +87,17 @@ func writeProcessMetrics(w io.Writer) {
|
|||||||
writeIOMetrics(w)
|
writeIOMetrics(w)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var procSelfIOErrLogged uint32
|
||||||
|
|
||||||
func writeIOMetrics(w io.Writer) {
|
func writeIOMetrics(w io.Writer) {
|
||||||
ioFilepath := "/proc/self/io"
|
ioFilepath := "/proc/self/io"
|
||||||
data, err := ioutil.ReadFile(ioFilepath)
|
data, err := ioutil.ReadFile(ioFilepath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Do not spam the logs with errors
|
// Do not spam the logs with errors - this error cannot be fixed without process restart.
|
||||||
// This error will not be fixed without process restart
|
// See https://github.com/VictoriaMetrics/metrics/issues/42
|
||||||
switch {
|
if atomic.CompareAndSwapUint32(&procSelfIOErrLogged, 0, 1) {
|
||||||
case os.IsNotExist(err):
|
log.Printf("ERROR: metrics: cannot read process_io_* metrics from %q, so these metrics won't be updated until the error is fixed; "+
|
||||||
atomic.AddInt64(&processSelfIONotFoundErrorCount, 1)
|
"see https://github.com/VictoriaMetrics/metrics/issues/42 ; The error: %s", ioFilepath, err)
|
||||||
case os.IsPermission(err):
|
|
||||||
atomic.AddInt64(&processSelfIOPermErrorCount, 1)
|
|
||||||
default:
|
|
||||||
atomic.AddInt64(&processSelfErrorCount, 1)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -156,10 +133,6 @@ func writeIOMetrics(w io.Writer) {
|
|||||||
writeBytes = getInt(s)
|
writeBytes = getInt(s)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Fprintf(w, "process_io_stats_read_errors_total{reason=\"not_found\"} %d\n", atomic.LoadInt64(&processSelfIONotFoundErrorCount))
|
|
||||||
fmt.Fprintf(w, "process_io_stats_read_errors_total{reason=\"permission_denied\"} %d\n", atomic.LoadInt64(&processSelfIOPermErrorCount))
|
|
||||||
fmt.Fprintf(w, "process_io_stats_read_errors_total{reason=\"other\"} %d\n", atomic.LoadInt64(&processSelfErrorCount))
|
|
||||||
fmt.Fprintf(w, "process_io_read_bytes_total %d\n", rchar)
|
fmt.Fprintf(w, "process_io_read_bytes_total %d\n", rchar)
|
||||||
fmt.Fprintf(w, "process_io_written_bytes_total %d\n", wchar)
|
fmt.Fprintf(w, "process_io_written_bytes_total %d\n", wchar)
|
||||||
fmt.Fprintf(w, "process_io_read_syscalls_total %d\n", syscr)
|
fmt.Fprintf(w, "process_io_read_syscalls_total %d\n", syscr)
|
||||||
|
Loading…
Reference in New Issue
Block a user