Switch from log-linear histograms to log-based histograms

Log-based histograms provide lower estimation error for the same number of buckets compared to log-linear histograms.

For example, the current Histogram implementation splits each decimal range (10^n .. 10^(n+1)] into 18 buckets.
These buckets have the following bounds:

- for log-linear histogram: (1 .. 1.5], (1.5 .. 2], (2 .. 2.5], ... (9.5 .. 10]
- for log-based histogram: (1 .. 1.136], (1.136 .. 1.292], ... (8.799 ... 10]

The maximum estimated error for log-linear histogram is reached in the first bucket per each decimal range and equals to 1.5-1=0.5 or 50%.
The maximum estimated error for log-based histogram is constant across buckets and equals to 1.136-1=0.136 or 13.6%.

This means that log-based histogram improves histogram accuracy by up to 50%/13.6% = 3.6 times when using the same number of buckets.

Further reading - https://linuxczar.net/blog/2020/08/13/histogram-error/
This commit is contained in:
Aliaksandr Valialkin 2021-02-15 14:15:01 +02:00
parent 43691b65a0
commit e7f78fa63c
2 changed files with 117 additions and 213 deletions

View File

@ -9,14 +9,15 @@ import (
)
const (
e10Min = -9
e10Max = 18
decimalMultiplier = 2
bucketSize = 9 * decimalMultiplier
bucketsCount = e10Max - e10Min
decimalPrecision = 1e-12
e10Min = -9
e10Max = 18
bucketsPerDecimal = 18
decimalBucketsCount = e10Max - e10Min
bucketsCount = decimalBucketsCount * bucketsPerDecimal
)
var bucketMultiplier = math.Pow(10, 1.0/bucketsPerDecimal)
// Histogram is a histogram for non-negative values with automatically created buckets.
//
// See https://medium.com/@valyala/improving-histogram-usability-for-prometheus-and-grafana-bc7e5df0e350
@ -48,9 +49,8 @@ type Histogram struct {
// Mu gurantees synchronous update for all the counters and sum.
mu sync.Mutex
buckets [bucketsCount]*histogramBucket
decimalBuckets [decimalBucketsCount]*[bucketsPerDecimal]uint64
zeros uint64
lower uint64
upper uint64
@ -65,15 +65,14 @@ func (h *Histogram) Reset() {
}
func (h *Histogram) resetLocked() {
for _, hb := range h.buckets[:] {
if hb == nil {
for _, db := range h.decimalBuckets[:] {
if db == nil {
continue
}
for offset := range hb.counts[:] {
hb.counts[offset] = 0
for i := range db[:] {
db[i] = 0
}
}
h.zeros = 0
h.lower = 0
h.upper = 0
}
@ -86,31 +85,31 @@ func (h *Histogram) Update(v float64) {
// Skip NaNs and negative values.
return
}
bucketIdx, offset := getBucketIdxAndOffset(v)
bucketIdx := (math.Log10(v) - e10Min) * bucketsPerDecimal
idx := uint(bucketIdx)
if bucketIdx == float64(idx) {
// Edge case for 10^n values, which must go to the lower bucket
// according to Prometheus logic for `le`-based histograms.
idx--
}
decimalBucketIdx := idx / bucketsPerDecimal
offset := idx % bucketsPerDecimal
h.mu.Lock()
h.updateLocked(v, bucketIdx, offset)
h.mu.Unlock()
}
func (h *Histogram) updateLocked(v float64, bucketIdx int, offset uint) {
h.sum += v
if bucketIdx < 0 {
// Special cases for zero, too small or too big value
if offset == 0 {
h.zeros++
} else if offset == 1 {
h.lower++
} else {
h.upper++
h.lower++
} else if bucketIdx >= bucketsCount {
h.upper++
} else {
db := h.decimalBuckets[decimalBucketIdx]
if db == nil {
var b [bucketsPerDecimal]uint64
db = &b
h.decimalBuckets[decimalBucketIdx] = db
}
return
db[offset]++
}
hb := h.buckets[bucketIdx]
if hb == nil {
hb = &histogramBucket{}
h.buckets[bucketIdx] = hb
}
hb.counts[offset]++
h.mu.Unlock()
}
// VisitNonZeroBuckets calls f for all buckets with non-zero counters.
@ -121,38 +120,25 @@ func (h *Histogram) updateLocked(v float64, bucketIdx int, offset uint) {
// with `le` (less or equal) labels.
func (h *Histogram) VisitNonZeroBuckets(f func(vmrange string, count uint64)) {
h.mu.Lock()
h.visitNonZeroBucketsLocked(f)
h.mu.Unlock()
}
func (h *Histogram) visitNonZeroBucketsLocked(f func(vmrange string, count uint64)) {
if h.zeros > 0 {
vmrange := getVMRange(-1, 0)
f(vmrange, h.zeros)
}
if h.lower > 0 {
vmrange := getVMRange(-1, 1)
f(vmrange, h.lower)
f(lowerBucketRange, h.lower)
}
for bucketIdx, hb := range h.buckets[:] {
if hb == nil {
for decimalBucketIdx, db := range h.decimalBuckets[:] {
if db == nil {
continue
}
for offset, count := range hb.counts[:] {
for offset, count := range db[:] {
if count > 0 {
vmrange := getVMRange(bucketIdx, uint(offset))
bucketIdx := decimalBucketIdx*bucketsPerDecimal + offset
vmrange := getVMRange(bucketIdx)
f(vmrange, count)
}
}
}
if h.upper > 0 {
vmrange := getVMRange(-1, 2)
f(vmrange, h.upper)
f(upperBucketRange, h.upper)
}
}
type histogramBucket struct {
counts [bucketSize]uint64
h.mu.Unlock()
}
// NewHistogram creates and returns new histogram with the given name.
@ -193,43 +179,27 @@ func (h *Histogram) UpdateDuration(startTime time.Time) {
h.Update(d)
}
func getVMRange(bucketIdx int, offset uint) string {
func getVMRange(bucketIdx int) string {
bucketRangesOnce.Do(initBucketRanges)
if bucketIdx < 0 {
if offset > 2 {
panic(fmt.Errorf("BUG: offset must be in range [0...2] for negative bucketIdx; got %d", offset))
}
return bucketRanges[offset]
}
idx := 3 + uint(bucketIdx)*bucketSize + offset
return bucketRanges[idx]
return bucketRanges[bucketIdx]
}
func initBucketRanges() {
bucketRanges[0] = "0...0"
bucketRanges[1] = fmt.Sprintf("0...%.1fe%d", 1.0, e10Min)
bucketRanges[2] = fmt.Sprintf("%.1fe%d...+Inf", 1.0, e10Max)
idx := 3
start := fmt.Sprintf("%.1fe%d", 1.0, e10Min)
for bucketIdx := 0; bucketIdx < bucketsCount; bucketIdx++ {
for offset := 0; offset < bucketSize; offset++ {
e10 := e10Min + bucketIdx
m := 1 + float64(offset+1)/decimalMultiplier
if math.Abs(m-10) < decimalPrecision {
m = 1
e10++
}
end := fmt.Sprintf("%.1fe%d", m, e10)
bucketRanges[idx] = start + "..." + end
idx++
start = end
}
v := math.Pow10(e10Min)
start := fmt.Sprintf("%.3e", v)
for i := 0; i < bucketsCount; i++ {
v *= bucketMultiplier
end := fmt.Sprintf("%.3e", v)
bucketRanges[i] = start + "..." + end
start = end
}
}
var (
// 3 additional buckets for zero, lower and upper.
bucketRanges [3 + bucketsCount*bucketSize]string
lowerBucketRange = fmt.Sprintf("0...%.3e", math.Pow10(e10Min))
upperBucketRange = fmt.Sprintf("%.3e...+Inf", math.Pow10(e10Max))
bucketRanges [bucketsCount]string
bucketRangesOnce sync.Once
)
@ -238,21 +208,21 @@ func (h *Histogram) marshalTo(prefix string, w io.Writer) {
h.VisitNonZeroBuckets(func(vmrange string, count uint64) {
tag := fmt.Sprintf("vmrange=%q", vmrange)
metricName := addTag(prefix, tag)
name, filters := splitMetricName(metricName)
fmt.Fprintf(w, "%s_bucket%s %d\n", name, filters, count)
name, labels := splitMetricName(metricName)
fmt.Fprintf(w, "%s_bucket%s %d\n", name, labels, count)
countTotal += count
})
if countTotal == 0 {
return
}
name, filters := splitMetricName(prefix)
name, labels := splitMetricName(prefix)
sum := h.getSum()
if float64(int64(sum)) == sum {
fmt.Fprintf(w, "%s_sum%s %d\n", name, filters, int64(sum))
fmt.Fprintf(w, "%s_sum%s %d\n", name, labels, int64(sum))
} else {
fmt.Fprintf(w, "%s_sum%s %g\n", name, filters, sum)
fmt.Fprintf(w, "%s_sum%s %g\n", name, labels, sum)
}
fmt.Fprintf(w, "%s_count%s %d\n", name, filters, countTotal)
fmt.Fprintf(w, "%s_count%s %d\n", name, labels, countTotal)
}
func (h *Histogram) getSum() float64 {
@ -261,46 +231,3 @@ func (h *Histogram) getSum() float64 {
h.mu.Unlock()
return sum
}
func getBucketIdxAndOffset(v float64) (int, uint) {
if v < 0 {
panic(fmt.Errorf("BUG: v must be positive; got %g", v))
}
if v == 0 {
return -1, 0
}
if math.IsInf(v, 1) {
return -1, 2
}
e10 := int(math.Floor(math.Log10(v)))
bucketIdx := e10 - e10Min
if bucketIdx < 0 {
return -1, 1
}
if bucketIdx >= bucketsCount {
if bucketIdx == bucketsCount && math.Abs(math.Pow10(e10)-v) < decimalPrecision {
// Adjust m to be on par with Prometheus 'le' buckets (aka 'less or equal')
return bucketsCount - 1, bucketSize - 1
}
return -1, 2
}
m := ((v / math.Pow10(e10)) - 1) * decimalMultiplier
offset := int(m)
if offset < 0 {
offset = 0
} else if offset >= bucketSize {
offset = bucketSize - 1
}
if math.Abs(float64(offset)-m) < decimalPrecision {
// Adjust offset to be on par with Prometheus 'le' buckets (aka 'less or equal')
offset--
if offset < 0 {
bucketIdx--
offset = bucketSize - 1
if bucketIdx < 0 {
return -1, 1
}
}
}
return bucketIdx, uint(offset)
}

View File

@ -10,81 +10,21 @@ import (
"time"
)
func TestGetBucketIdxAndOffset(t *testing.T) {
f := func(v float64, bucketIdxExpected int, offsetExpected uint) {
t.Helper()
bucketIdx, offset := getBucketIdxAndOffset(v)
if bucketIdx != bucketIdxExpected {
t.Fatalf("unexpected bucketIdx for %g; got %d; want %d", v, bucketIdx, bucketIdxExpected)
}
if offset != offsetExpected {
t.Fatalf("unexpected offset for %g; got %d; want %d", v, offset, offsetExpected)
}
}
const step = 1.0 / decimalMultiplier
const prec = 2 * decimalPrecision
f(0, -1, 0)
f(math.Pow10(e10Min-10), -1, 1)
f(math.Pow10(e10Min-1), -1, 1)
f(3*math.Pow10(e10Min-1), -1, 1)
f(9*math.Pow10(e10Min-1), -1, 1)
f(9.999*math.Pow10(e10Min-1), -1, 1)
f(math.Pow10(e10Min), -1, 1)
f((1+prec)*math.Pow10(e10Min), 0, 0)
f((1+step)*math.Pow10(e10Min), 0, 0)
f((1+step+prec)*math.Pow10(e10Min), 0, 1)
f((1+2*step+prec)*math.Pow10(e10Min), 0, 2)
f((1+3*step+prec)*math.Pow10(e10Min), 0, 3)
f(math.Pow10(e10Min+1), 0, bucketSize-1)
f((1+prec)*math.Pow10(e10Min+1), 1, 0)
f((1+step)*math.Pow10(e10Min+1), 1, 0)
f((1+step+prec)*math.Pow10(e10Min+1), 1, 1)
f(0.1, -e10Min-2, bucketSize-1)
f((1+prec)*0.1, -e10Min-1, 0)
f((1+step)*0.1, -e10Min-1, 0)
f((1+step+prec)*0.1, -e10Min-1, 1)
f((1+(bucketSize-1)*step)*0.1, -e10Min-1, bucketSize-2)
f((1+(bucketSize-1)*step+prec)*0.1, -e10Min-1, bucketSize-1)
f(math.Pow10(e10Max-2), bucketsCount-3, bucketSize-1)
f((1+prec)*math.Pow10(e10Max-2), bucketsCount-2, 0)
f(math.Pow10(e10Max-1), bucketsCount-2, bucketSize-1)
f((1+prec)*math.Pow10(e10Max-1), bucketsCount-1, 0)
f((1+(bucketSize-1)*step)*math.Pow10(e10Max-1), bucketsCount-1, bucketSize-2)
f((1+(bucketSize-1)*step+prec)*math.Pow10(e10Max-1), bucketsCount-1, bucketSize-1)
f(math.Pow10(e10Max), bucketsCount-1, bucketSize-1)
f((1+prec)*math.Pow10(e10Max), -1, 2)
f((1+3*step+prec)*math.Pow10(e10Max), -1, 2)
f(math.Inf(1), -1, 2)
f(999, 11, 17)
f(1000, 11, 17)
f(1001, 12, 0)
f(1002, 12, 0)
f(1003, 12, 0)
}
func TestGetVMRange(t *testing.T) {
f := func(bucketIdx int, offset uint, vmrangeExpected string) {
f := func(bucketIdx int, vmrangeExpected string) {
t.Helper()
vmrange := getVMRange(bucketIdx, offset)
vmrange := getVMRange(bucketIdx)
if vmrange != vmrangeExpected {
t.Fatalf("unexpected vmrange for bucketIdx=%d, offset=%d; got %s; want %s", bucketIdx, offset, vmrange, vmrangeExpected)
t.Fatalf("unexpected vmrange for bucketIdx=%d; got %s; want %s", bucketIdx, vmrange, vmrangeExpected)
}
}
const step = 1.0 / decimalMultiplier
f(-1, 0, "0...0")
f(-1, 1, fmt.Sprintf("0...1.0e%d", e10Min))
f(-1, 2, fmt.Sprintf("1.0e%d...+Inf", e10Max))
f(0, 0, fmt.Sprintf("1.0e%d...%.1fe%d", e10Min, 1+step, e10Min))
f(0, 1, fmt.Sprintf("%.1fe%d...%.1fe%d", 1+step, e10Min, 1+2*step, e10Min))
f(0, bucketSize-2, fmt.Sprintf("%.1fe%d...%.1fe%d", 1+(bucketSize-2)*step, e10Min, 1+(bucketSize-1)*step, e10Min))
f(0, bucketSize-1, fmt.Sprintf("%.1fe%d...%.1fe%d", 1+(bucketSize-1)*step, e10Min, 1.0, e10Min+1))
f(-e10Min, 0, fmt.Sprintf("%.1fe%d...%.1fe%d", 1.0, 0, 1+step, 0))
f(-e10Min, 1, fmt.Sprintf("%.1fe%d...%.1fe%d", 1+step, 0, 1+2*step, 0))
f(-e10Min, bucketSize-2, fmt.Sprintf("%.1fe%d...%.1fe%d", 1+(bucketSize-2)*step, 0, 1+(bucketSize-1)*step, 0))
f(-e10Min, bucketSize-1, fmt.Sprintf("%.1fe%d...%.1fe%d", 1+(bucketSize-1)*step, 0, 1.0, 1))
f(bucketsCount-1, bucketSize-2, fmt.Sprintf("%.1fe%d...%.1fe%d", 1+(bucketSize-2)*step, e10Max-1, 1+(bucketSize-1)*step, e10Max-1))
f(bucketsCount-1, bucketSize-1, fmt.Sprintf("%.1fe%d...%.1fe%d", 1+(bucketSize-1)*step, e10Max-1, 1.0, e10Max))
f(0, "1.000e-09...1.136e-09")
f(1, "1.136e-09...1.292e-09")
f(bucketsPerDecimal-1, "8.799e-09...1.000e-08")
f(bucketsPerDecimal, "1.000e-08...1.136e-08")
f(bucketsPerDecimal*(-e10Min)-1, "8.799e-01...1.000e+00")
f(bucketsPerDecimal*(-e10Min), "1.000e+00...1.136e+00")
f(bucketsPerDecimal*(e10Max-e10Min)-1, "8.799e+17...1.000e+18")
}
func TestHistogramSerial(t *testing.T) {
@ -104,9 +44,29 @@ func TestHistogramSerial(t *testing.T) {
h.Update(float64(i))
}
// Make sure the histogram prints <prefix>_xbucket on marshalTo call
testMarshalTo(t, h, "prefix", "prefix_bucket{vmrange=\"9.5e1...1.0e2\"} 3\nprefix_bucket{vmrange=\"1.0e2...1.5e2\"} 50\nprefix_bucket{vmrange=\"1.5e2...2.0e2\"} 50\nprefix_bucket{vmrange=\"2.0e2...2.5e2\"} 17\nprefix_sum 18900\nprefix_count 120\n")
testMarshalTo(t, h, ` m{foo="bar"}`, "\t m_bucket{foo=\"bar\",vmrange=\"9.5e1...1.0e2\"} 3\n\t m_bucket{foo=\"bar\",vmrange=\"1.0e2...1.5e2\"} 50\n\t m_bucket{foo=\"bar\",vmrange=\"1.5e2...2.0e2\"} 50\n\t m_bucket{foo=\"bar\",vmrange=\"2.0e2...2.5e2\"} 17\n\t m_sum{foo=\"bar\"} 18900\n\t m_count{foo=\"bar\"} 120\n")
// Make sure the histogram prints <prefix>_bucket on marshalTo call
testMarshalTo(t, h, "prefix", `prefix_bucket{vmrange="8.799e+01...1.000e+02"} 3
prefix_bucket{vmrange="1.000e+02...1.136e+02"} 13
prefix_bucket{vmrange="1.136e+02...1.292e+02"} 16
prefix_bucket{vmrange="1.292e+02...1.468e+02"} 17
prefix_bucket{vmrange="1.468e+02...1.668e+02"} 20
prefix_bucket{vmrange="1.668e+02...1.896e+02"} 23
prefix_bucket{vmrange="1.896e+02...2.154e+02"} 26
prefix_bucket{vmrange="2.154e+02...2.448e+02"} 2
prefix_sum 18900
prefix_count 120
`)
testMarshalTo(t, h, ` m{foo="bar"}`, ` m_bucket{foo="bar",vmrange="8.799e+01...1.000e+02"} 3
m_bucket{foo="bar",vmrange="1.000e+02...1.136e+02"} 13
m_bucket{foo="bar",vmrange="1.136e+02...1.292e+02"} 16
m_bucket{foo="bar",vmrange="1.292e+02...1.468e+02"} 17
m_bucket{foo="bar",vmrange="1.468e+02...1.668e+02"} 20
m_bucket{foo="bar",vmrange="1.668e+02...1.896e+02"} 23
m_bucket{foo="bar",vmrange="1.896e+02...2.154e+02"} 26
m_bucket{foo="bar",vmrange="2.154e+02...2.448e+02"} 2
m_sum{foo="bar"} 18900
m_count{foo="bar"} 120
`)
// Verify Reset
h.Reset()
@ -119,13 +79,13 @@ func TestHistogramSerial(t *testing.T) {
// Verify supported ranges
for e10 := -100; e10 < 100; e10++ {
for offset := 0; offset < bucketSize; offset++ {
m := 1 + float64(offset+1)/decimalMultiplier
for offset := 0; offset < bucketsPerDecimal; offset++ {
m := 1 + math.Pow(bucketMultiplier, float64(offset))
f1 := m * math.Pow10(e10)
h.Update(f1)
f2 := (m + 0.5/decimalMultiplier) * math.Pow10(e10)
f2 := (m + 0.5*bucketMultiplier) * math.Pow10(e10)
h.Update(f2)
f3 := (m + 2*decimalPrecision) * math.Pow10(e10)
f3 := (m + 2*bucketMultiplier) * math.Pow10(e10)
h.Update(f3)
}
}
@ -160,7 +120,16 @@ func TestHistogramConcurrent(t *testing.T) {
if err != nil {
t.Fatal(err)
}
testMarshalTo(t, h, "prefix", "prefix_bucket{vmrange=\"5.5e-1...6.0e-1\"} 5\nprefix_bucket{vmrange=\"6.5e-1...7.0e-1\"} 5\nprefix_bucket{vmrange=\"7.5e-1...8.0e-1\"} 5\nprefix_bucket{vmrange=\"8.5e-1...9.0e-1\"} 5\nprefix_bucket{vmrange=\"9.5e-1...1.0e0\"} 5\nprefix_bucket{vmrange=\"1.0e0...1.5e0\"} 15\nprefix_sum 38\nprefix_count 40\n")
testMarshalTo(t, h, "prefix", `prefix_bucket{vmrange="5.995e-01...6.813e-01"} 5
prefix_bucket{vmrange="6.813e-01...7.743e-01"} 5
prefix_bucket{vmrange="7.743e-01...8.799e-01"} 5
prefix_bucket{vmrange="8.799e-01...1.000e+00"} 10
prefix_bucket{vmrange="1.000e+00...1.136e+00"} 5
prefix_bucket{vmrange="1.136e+00...1.292e+00"} 5
prefix_bucket{vmrange="1.292e+00...1.468e+00"} 5
prefix_sum 38
prefix_count 40
`)
var labels []string
var counts []uint64
@ -168,11 +137,19 @@ func TestHistogramConcurrent(t *testing.T) {
labels = append(labels, label)
counts = append(counts, count)
})
labelsExpected := []string{"5.5e-1...6.0e-1", "6.5e-1...7.0e-1", "7.5e-1...8.0e-1", "8.5e-1...9.0e-1", "9.5e-1...1.0e0", "1.0e0...1.5e0"}
labelsExpected := []string{
"5.995e-01...6.813e-01",
"6.813e-01...7.743e-01",
"7.743e-01...8.799e-01",
"8.799e-01...1.000e+00",
"1.000e+00...1.136e+00",
"1.136e+00...1.292e+00",
"1.292e+00...1.468e+00",
}
if !reflect.DeepEqual(labels, labelsExpected) {
t.Fatalf("unexpected labels; got %v; want %v", labels, labelsExpected)
}
countsExpected := []uint64{5, 5, 5, 5, 5, 15}
countsExpected := []uint64{5, 5, 5, 10, 5, 5, 5}
if !reflect.DeepEqual(counts, countsExpected) {
t.Fatalf("unexpected counts; got %v; want %v", counts, countsExpected)
}
@ -186,7 +163,7 @@ func TestHistogramWithTags(t *testing.T) {
var bb bytes.Buffer
WritePrometheus(&bb, false)
result := bb.String()
namePrefixWithTag := `TestHistogram_bucket{tag="foo",vmrange="1.0e2...1.5e2"} 1` + "\n"
namePrefixWithTag := `TestHistogram_bucket{tag="foo",vmrange="1.136e+02...1.292e+02"} 1` + "\n"
if !strings.Contains(result, namePrefixWithTag) {
t.Fatalf("missing histogram %s in the WritePrometheus output; got\n%s", namePrefixWithTag, result)
}