ceremonyclient/pebble/internal/mkbench/split.go

package main

import (
	"cmp"
	"slices"
)

const increment = 50 // ops/sec

// findOptimalSplit computes and returns a value that separates the given pass
// and fail measurements optimally, such that the number of mis-classified
// passes (pass values that fall above the split) and fails (fail values that
// fall below the split) is minimized.
//
// The following gives a visual representation of the problem:
//
//		                     Optimal partition (=550) -----> |
//	                                                         |
//	  Passes:   o          o        o              o o o oo  |
//	  Fails:                         x             x         |x    x  x     x x        x
//	  |---------|---------|---------|---------|---------|----|----|---------|---------|---------|---> x
//	  0        100       200       300       400       500   |   600       700       800       900
//
// The algorithm works by computing the error (i.e. mis-classifications) at
// various points along the x-axis, starting from the origin and increasing by
// the given increment.
func findOptimalSplit(pass, fail []int) int {
	// Not enough data to compute a sensible score.
	if len(pass) == 0 || len(fail) == 0 {
		return -1
	}

	// Maintain counters for the number of incorrectly classified passes and
	// fails. All passes are initially incorrect, as we start at 0. Conversely,
	// no fails are incorrectly classified, as all scores are >= 0.
	pCount, fCount := len(pass), 0
	p, f := make([]int, len(pass)), make([]int, len(fail))
	copy(p, pass)
	copy(f, fail)

	// Sort the inputs.
	slices.Sort(p)
	slices.Sort(f)

	// Find the global min and max.
	min, max := p[0], f[len(fail)-1]

	// Iterate over the range in increments.
	var result [][]int
	for x := min; x <= max; x = x + increment {
		// Reduce the count of incorrect passes as x increases (i.e. fewer pass
		// values are incorrect as x increases).
		for len(p) > 0 && p[0] <= x {
			pCount--
			p = p[1:]
		}

		// Increase the count of incorrect fails as x increases (i.e. more fail
		// values are incorrect as x increases).
		for len(f) > 0 && f[0] < x {
			fCount++
			f = f[1:]
		}

		// Add a (x, score) tuple to result slice.
		result = append(result, []int{x, pCount + fCount})
	}

	// Sort the (x, score) result slice by score ascending. Tie-break by x
	// ascending.
	slices.SortFunc(result, func(a, b []int) int {
		if v := cmp.Compare(a[1], b[1]); v != 0 {
			return v
		}
		return cmp.Compare(a[0], b[0])
	})

	// If there is more than one interval, split the difference between the min
	// and the max.
	splitMin, splitMax := result[0][0], result[0][0]
	for i := 1; i < len(result); i++ {
		if result[i][1] != result[0][1] {
			break
		}
		splitMax = result[i][0]
	}

	return (splitMin + splitMax) / 2
}
v1.2.0 (#31) 2024-01-03 07:31:42 +00:00			`package main`

			`import (`
			`"cmp"`
			`"slices"`
			`)`

			`const increment = 50 // ops/sec`

			`// findOptimalSplit computes and returns a value that separates the given pass`
			`// and fail measurements optimally, such that the number of mis-classified`
			`// passes (pass values that fall above the split) and fails (fail values that`
			`// fall below the split) is minimized.`
			`//`
			`// The following gives a visual representation of the problem:`
			`//`
			`// Optimal partition (=550) -----> \|`
			`// \|`
			`// Passes: o o o o o o oo \|`
			`// Fails: x x \|x x x x x x`
			`// \|---------\|---------\|---------\|---------\|---------\|----\|----\|---------\|---------\|---------\|---> x`
			`// 0 100 200 300 400 500 \| 600 700 800 900`
			`//`
			`// The algorithm works by computing the error (i.e. mis-classifications) at`
			`// various points along the x-axis, starting from the origin and increasing by`
			`// the given increment.`
			`func findOptimalSplit(pass, fail []int) int {`
			`// Not enough data to compute a sensible score.`
			`if len(pass) == 0 \|\| len(fail) == 0 {`
			`return -1`
			`}`

			`// Maintain counters for the number of incorrectly classified passes and`
			`// fails. All passes are initially incorrect, as we start at 0. Conversely,`
			`// no fails are incorrectly classified, as all scores are >= 0.`
			`pCount, fCount := len(pass), 0`
			`p, f := make([]int, len(pass)), make([]int, len(fail))`
			`copy(p, pass)`
			`copy(f, fail)`

			`// Sort the inputs.`
			`slices.Sort(p)`
			`slices.Sort(f)`

			`// Find the global min and max.`
			`min, max := p[0], f[len(fail)-1]`

			`// Iterate over the range in increments.`
			`var result [][]int`
			`for x := min; x <= max; x = x + increment {`
			`// Reduce the count of incorrect passes as x increases (i.e. fewer pass`
			`// values are incorrect as x increases).`
			`for len(p) > 0 && p[0] <= x {`
			`pCount--`
			`p = p[1:]`
			`}`

			`// Increase the count of incorrect fails as x increases (i.e. more fail`
			`// values are incorrect as x increases).`
			`for len(f) > 0 && f[0] < x {`
			`fCount++`
			`f = f[1:]`
			`}`

			`// Add a (x, score) tuple to result slice.`
			`result = append(result, []int{x, pCount + fCount})`
			`}`

			`// Sort the (x, score) result slice by score ascending. Tie-break by x`
			`// ascending.`
			`slices.SortFunc(result, func(a, b []int) int {`
			`if v := cmp.Compare(a[1], b[1]); v != 0 {`
			`return v`
			`}`
			`return cmp.Compare(a[0], b[0])`
			`})`

			`// If there is more than one interval, split the difference between the min`
			`// and the max.`
			`splitMin, splitMax := result[0][0], result[0][0]`
			`for i := 1; i < len(result); i++ {`
			`if result[i][1] != result[0][1] {`
			`break`
			`}`
			`splitMax = result[i][0]`
			`}`

			`return (splitMin + splitMax) / 2`
			`}`