diff --git a/process_metrics_linux.go b/process_metrics_linux.go index 895ca72..eae9b24 100644 --- a/process_metrics_linux.go +++ b/process_metrics_linux.go @@ -1,6 +1,7 @@ package metrics import ( + "bufio" "bytes" "fmt" "io" @@ -10,6 +11,7 @@ import ( "strconv" "strings" "time" + "unsafe" ) // See https://github.com/prometheus/procfs/blob/a4ac0826abceb44c40fc71daed2b301db498b93e/proc_stat.go#L40 . @@ -65,6 +67,11 @@ func writeProcessMetrics(w io.Writer) { log.Printf("ERROR: cannot parse %q read from %s: %s", data, statFilepath, err) return } + rssPageCache, rssAnonymous, err := getRSSStats() + if err != nil { + log.Printf("ERROR: cannot obtain RSS page cache bytes: %s", err) + return + } // It is expensive obtaining `process_open_fds` when big number of file descriptors is opened, // so don't do it here. @@ -79,6 +86,8 @@ func writeProcessMetrics(w io.Writer) { fmt.Fprintf(w, "process_minor_pagefaults_total %d\n", p.Minflt) fmt.Fprintf(w, "process_num_threads %d\n", p.NumThreads) fmt.Fprintf(w, "process_resident_memory_bytes %d\n", p.Rss*4096) + fmt.Fprintf(w, "process_resident_memory_anonymous_bytes %d\n", rssAnonymous) + fmt.Fprintf(w, "process_resident_memory_pagecache_bytes %d\n", rssPageCache) fmt.Fprintf(w, "process_start_time_seconds %d\n", startTimeSeconds) fmt.Fprintf(w, "process_virtual_memory_bytes %d\n", p.Vsize) @@ -133,7 +142,7 @@ func writeIOMetrics(w io.Writer) { var startTimeSeconds = time.Now().Unix() -// WriteFDMetrics writes process_max_fds and process_open_fds metrics to w. +// riteFDMetrics writes process_max_fds and process_open_fds metrics to w. func writeFDMetrics(w io.Writer) { totalOpenFDs, err := getOpenFDsCount("/proc/self/fd") if err != nil { @@ -198,3 +207,120 @@ func getMaxFilesLimit(path string) (uint64, error) { } return 0, fmt.Errorf("cannot find max open files limit") } + +// getRSSStats returns RSS bytes for page cache and anonymous memory. +func getRSSStats() (uint64, uint64, error) { + filepath := "/proc/self/smaps" + f, err := os.Open(filepath) + if err != nil { + return 0, 0, fmt.Errorf("cannot open %q: %w", filepath, err) + } + defer func() { + _ = f.Close() + }() + rssPageCache, rssAnonymous, err := getRSSStatsFromSmaps(f) + if err != nil { + return 0, 0, fmt.Errorf("cannot read %q: %w", filepath, err) + } + return rssPageCache, rssAnonymous, nil +} + +func getRSSStatsFromSmaps(r io.Reader) (uint64, uint64, error) { + var pageCacheBytes, anonymousBytes uint64 + var se smapsEntry + ses := newSmapsEntryScanner(r) + for ses.Next(&se) { + if se.anonymousBytes == 0 { + pageCacheBytes += se.rssBytes + } else { + anonymousBytes += se.rssBytes + } + } + if err := ses.Err(); err != nil { + return 0, 0, err + } + return pageCacheBytes, anonymousBytes, nil +} + +type smapsEntry struct { + rssBytes uint64 + anonymousBytes uint64 +} + +func (se *smapsEntry) reset() { + se.rssBytes = 0 + se.anonymousBytes = 0 +} + +type smapsEntryScanner struct { + bs *bufio.Scanner + err error +} + +func newSmapsEntryScanner(r io.Reader) *smapsEntryScanner { + return &smapsEntryScanner{ + bs: bufio.NewScanner(r), + } +} + +func (ses *smapsEntryScanner) Err() error { + return ses.err +} + +// nextSmapsEntry reads the next se from ses. +// +// It returns true after successful read and false on error or on the end of stream. +// ses.Err() method must be called for determining the error. +func (ses *smapsEntryScanner) Next(se *smapsEntry) bool { + se.reset() + if !ses.bs.Scan() { + ses.err = ses.bs.Err() + return false + } + for ses.bs.Scan() { + line := unsafeBytesToString(ses.bs.Bytes()) + switch { + case strings.HasPrefix(line, "VmFlags:"): + return true + case strings.HasPrefix(line, "Rss:"): + n, err := getSmapsSize(line[len("Rss:"):]) + if err != nil { + ses.err = fmt.Errorf("cannot read Rss size: %w", err) + return false + } + se.rssBytes = n + case strings.HasPrefix(line, "Anonymous:"): + n, err := getSmapsSize(line[len("Anonymous:"):]) + if err != nil { + ses.err = fmt.Errorf("cannot read Anonymous size: %w", err) + return false + } + se.anonymousBytes = n + } + } + ses.err = ses.bs.Err() + if ses.err == nil { + ses.err = fmt.Errorf("unexpected end of stream") + } + return false +} + +func getSmapsSize(line string) (uint64, error) { + line = strings.TrimSpace(line) + if !strings.HasSuffix(line, " kB") { + return 0, fmt.Errorf("cannot find %q suffix in %q", " kB", line) + } + line = line[:len(line)-len(" kB")] + n, err := strconv.ParseUint(line, 10, 64) + if err != nil { + return 0, fmt.Errorf("cannot parse %q: %w", line, err) + } + if n > ((1<<64)-1)/1024 { + return 0, fmt.Errorf("too big size in %q: %d kB", line, n) + } + return n * 1024, nil +} + +func unsafeBytesToString(b []byte) string { + return *(*string)(unsafe.Pointer(&b)) +} diff --git a/process_metrics_linux_test.go b/process_metrics_linux_test.go index 7d6d3a6..f32bbe1 100644 --- a/process_metrics_linux_test.go +++ b/process_metrics_linux_test.go @@ -1,6 +1,221 @@ package metrics -import "testing" +import ( + "bytes" + "testing" +) + +func TestGetPageCacheRSSFromSmapsFailure(t *testing.T) { + f := func(s string) { + t.Helper() + bb := bytes.NewBufferString(s) + _, _, err := getRSSStatsFromSmaps(bb) + if err == nil { + t.Fatalf("expecting non-nil error") + } + } + f("foobar") + + // Invalid unit for Rss + f(`7ffcdf335000-7ffcdf337000 r-xp 00000000 00:00 0 [vdso] +Size: 80 kB +KernelPageSize: 4 kB +MMUPageSize: 4 kB +Rss: 12 MB +Pss: 0 kB +Shared_Clean: 4 kB +Shared_Dirty: 0 kB +Private_Clean: 0 kB +Private_Dirty: 0 kB +Referenced: 4 kB +Anonymous: 0 kB +LazyFree: 0 kB +AnonHugePages: 0 kB +ShmemPmdMapped: 0 kB +Shared_Hugetlb: 0 kB +Private_Hugetlb: 0 kB +Swap: 0 kB +SwapPss: 0 kB +Locked: 0 kB +VmFlags: rd ex mr mw me de sd +`) + + // Invalid unit for Anonymous + f(`7ffcdf335000-7ffcdf337000 r-xp 00000000 00:00 0 [vdso] +Size: 80 kB +KernelPageSize: 4 kB +MMUPageSize: 4 kB +Rss: 12 kB +Pss: 0 kB +Shared_Clean: 4 kB +Shared_Dirty: 0 kB +Private_Clean: 0 kB +Private_Dirty: 0 kB +Referenced: 4 kB +Anonymous: 5 MB +LazyFree: 0 kB +AnonHugePages: 0 kB +ShmemPmdMapped: 0 kB +Shared_Hugetlb: 0 kB +Private_Hugetlb: 0 kB +Swap: 0 kB +SwapPss: 0 kB +Locked: 0 kB +VmFlags: rd ex mr mw me de sd +`) + + // Invalid size for Rss + f(`7ffcdf335000-7ffcdf337000 r-xp 00000000 00:00 0 [vdso] +Size: 80 kB +KernelPageSize: 4 kB +MMUPageSize: 4 kB +Rss: 1.2 kB +Pss: 0 kB +Shared_Clean: 4 kB +Shared_Dirty: 0 kB +Private_Clean: 0 kB +Private_Dirty: 0 kB +Referenced: 4 kB +Anonymous: 0 kB +LazyFree: 0 kB +AnonHugePages: 0 kB +ShmemPmdMapped: 0 kB +Shared_Hugetlb: 0 kB +Private_Hugetlb: 0 kB +Swap: 0 kB +SwapPss: 0 kB +Locked: 0 kB +VmFlags: rd ex mr mw me de sd +`) + + // Too big size for Rss + f(`7ffcdf335000-7ffcdf337000 r-xp 00000000 00:00 0 [vdso] +Size: 80 kB +KernelPageSize: 4 kB +MMUPageSize: 4 kB +Rss: 9999999999999999999 kB +Pss: 0 kB +Shared_Clean: 4 kB +Shared_Dirty: 0 kB +Private_Clean: 0 kB +Private_Dirty: 0 kB +Referenced: 4 kB +Anonymous: 0 kB +LazyFree: 0 kB +AnonHugePages: 0 kB +ShmemPmdMapped: 0 kB +Shared_Hugetlb: 0 kB +Private_Hugetlb: 0 kB +Swap: 0 kB +SwapPss: 0 kB +Locked: 0 kB +VmFlags: rd ex mr mw me de sd +`) + + // Partial entry + f(`7ffcdf335000-7ffcdf337000 r-xp 00000000 00:00 0 [vdso] +Size: 80 kB +KernelPageSize: 4 kB +MMUPageSize: 4 kB +Rss: 12 kB +Pss: 0 kB +Shared_Clean: 4 kB +Shared_Dirty: 0 kB +Private_Clean: 0 kB +Private_Dirty: 0 kB +Referenced: 4 kB +Anonymous: 0 kB +LazyFree: 0 kB +`) + + // Partial second entry + f(`7ffcdf335000-7ffcdf337000 r-xp 00000000 00:00 0 [vdso] +Size: 80 kB +KernelPageSize: 4 kB +MMUPageSize: 4 kB +Rss: 12 kB +Pss: 0 kB +Shared_Clean: 4 kB +Shared_Dirty: 0 kB +Private_Clean: 0 kB +Private_Dirty: 0 kB +Referenced: 4 kB +Anonymous: 0 kB +LazyFree: 0 kB +AnonHugePages: 0 kB +ShmemPmdMapped: 0 kB +Shared_Hugetlb: 0 kB +Private_Hugetlb: 0 kB +Swap: 0 kB +SwapPss: 0 kB +Locked: 0 kB +VmFlags: rd ex mr mw me de sd +ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall] +Size: 1024 kB +KernelPageSize: 4 kB +MMUPageSize: 4 kB +Rss: 120 kB +`) +} + +func TestGetPageCacheRSSFromSmapsSuccess(t *testing.T) { + s := `7ffcdf335000-7ffcdf337000 r-xp 00000000 00:00 0 [vdso] +Size: 80 kB +KernelPageSize: 4 kB +MMUPageSize: 4 kB +Rss: 12 kB +Pss: 0 kB +Shared_Clean: 4 kB +Shared_Dirty: 0 kB +Private_Clean: 0 kB +Private_Dirty: 0 kB +Referenced: 4 kB +Anonymous: 0 kB +LazyFree: 0 kB +AnonHugePages: 0 kB +ShmemPmdMapped: 0 kB +Shared_Hugetlb: 0 kB +Private_Hugetlb: 0 kB +Swap: 0 kB +SwapPss: 0 kB +Locked: 0 kB +VmFlags: rd ex mr mw me de sd +ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall] +Size: 1024 kB +KernelPageSize: 4 kB +MMUPageSize: 4 kB +Rss: 120 kB +Pss: 0 kB +Shared_Clean: 0 kB +Shared_Dirty: 0 kB +Private_Clean: 0 kB +Private_Dirty: 0 kB +Referenced: 0 kB +Anonymous: 1024 kB +LazyFree: 0 kB +AnonHugePages: 0 kB +ShmemPmdMapped: 0 kB +Shared_Hugetlb: 0 kB +Private_Hugetlb: 0 kB +Swap: 0 kB +SwapPss: 0 kB +Locked: 0 kB +VmFlags: rd ex +` + bb := bytes.NewBufferString(s) + pageCache, anonymous, err := getRSSStatsFromSmaps(bb) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + expectedPageCache := uint64(12 * 1024) + if pageCache != expectedPageCache { + t.Fatalf("unexpected page cache rss; got %d; want %d", pageCache, expectedPageCache) + } + expectedAnonymous := uint64(120 * 1024) + if anonymous != expectedAnonymous { + t.Fatalf("unexpected anonymous rss; got %d; want %d", anonymous, expectedAnonymous) + } +} func TestGetMaxFilesLimit(t *testing.T) { f := func(want uint64, path string, wantErr bool) {