// Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use // of this source code is governed by a BSD-style license that can be found in // the LICENSE file. // Package testkeys provides facilities for generating and comparing // human-readable test keys for use in tests and benchmarks. This package // provides a single Comparer implementation that compares all keys generated // by this package. // // Keys generated by this package may optionally have a 'suffix' encoding an // MVCC timestamp. This suffix is of the form "@". Comparisons on the // suffix are performed using integer value, not the byte representation. package testkeys import ( "bytes" "fmt" "math" "strconv" "strings" "github.com/cockroachdb/pebble/internal/base" "github.com/cockroachdb/pebble/shims/cmp" "golang.org/x/exp/rand" ) const alpha = "abcdefghijklmnopqrstuvwxyz" const suffixDelim = '@' var inverseAlphabet = make(map[byte]int64, len(alpha)) func init() { for i := range alpha { inverseAlphabet[alpha[i]] = int64(i) } } // MaxSuffixLen is the maximum length of a suffix generated by this package. var MaxSuffixLen = 1 + len(fmt.Sprintf("%d", int64(math.MaxInt64))) // Comparer is the comparer for test keys generated by this package. var Comparer = &base.Comparer{ Compare: compare, Equal: func(a, b []byte) bool { return compare(a, b) == 0 }, AbbreviatedKey: func(k []byte) uint64 { return base.DefaultComparer.AbbreviatedKey(k[:split(k)]) }, FormatKey: base.DefaultFormatter, Separator: func(dst, a, b []byte) []byte { ai := split(a) if ai == len(a) { return append(dst, a...) } bi := split(b) if bi == len(b) { return append(dst, a...) } // If the keys are the same just return a. if bytes.Equal(a[:ai], b[:bi]) { return append(dst, a...) } n := len(dst) dst = base.DefaultComparer.Separator(dst, a[:ai], b[:bi]) // Did it pick a separator different than a[:ai] -- if not we can't do better than a. buf := dst[n:] if bytes.Equal(a[:ai], buf) { return append(dst[:n], a...) } // The separator is > a[:ai], so return it return dst }, Successor: func(dst, a []byte) []byte { ai := split(a) if ai == len(a) { return append(dst, a...) } n := len(dst) dst = base.DefaultComparer.Successor(dst, a[:ai]) // Did it pick a successor different than a[:ai] -- if not we can't do better than a. buf := dst[n:] if bytes.Equal(a[:ai], buf) { return append(dst[:n], a...) } // The successor is > a[:ai], so return it. return dst }, ImmediateSuccessor: func(dst, a []byte) []byte { // TODO(jackson): Consider changing this Comparer to only support // representable prefix keys containing characters a-z. ai := split(a) if ai != len(a) { panic("pebble: ImmediateSuccessor invoked with a non-prefix key") } return append(append(dst, a...), 0x00) }, Split: split, Name: "pebble.internal.testkeys", } func compare(a, b []byte) int { ai, bi := split(a), split(b) if v := bytes.Compare(a[:ai], b[:bi]); v != 0 { return v } if len(a[ai:]) == 0 { if len(b[bi:]) == 0 { return 0 } return -1 } else if len(b[bi:]) == 0 { return +1 } return compareTimestamps(a[ai:], b[bi:]) } func split(a []byte) int { i := bytes.LastIndexByte(a, suffixDelim) if i >= 0 { return i } return len(a) } func compareTimestamps(a, b []byte) int { ai, err := parseUintBytes(bytes.TrimPrefix(a, []byte{suffixDelim}), 10, 64) if err != nil { panic(fmt.Sprintf("invalid test mvcc timestamp %q", a)) } bi, err := parseUintBytes(bytes.TrimPrefix(b, []byte{suffixDelim}), 10, 64) if err != nil { panic(fmt.Sprintf("invalid test mvcc timestamp %q", b)) } return cmp.Compare(bi, ai) } // Keyspace describes a finite keyspace of unsuffixed test keys. type Keyspace interface { // Count returns the number of keys that exist within this keyspace. Count() int64 // MaxLen returns the maximum length, in bytes, of a key within this // keyspace. This is only guaranteed to return an upper bound. MaxLen() int // Slice returns the sub-keyspace from index i, inclusive, to index j, // exclusive. The receiver is unmodified. Slice(i, j int64) Keyspace // EveryN returns a key space that includes 1 key for every N keys in the // original keyspace. The receiver is unmodified. EveryN(n int64) Keyspace // key writes the i-th key to the buffer and returns the length. key(buf []byte, i int64) int } // Divvy divides the provided keyspace into N equal portions, containing // disjoint keys evenly distributed across the keyspace. func Divvy(ks Keyspace, n int64) []Keyspace { ret := make([]Keyspace, n) for i := int64(0); i < n; i++ { ret[i] = ks.Slice(i, ks.Count()).EveryN(n) } return ret } // Alpha constructs a keyspace consisting of all keys containing characters a-z, // with at most `maxLength` characters. func Alpha(maxLength int) Keyspace { return alphabet{ alphabet: []byte(alpha), maxLength: maxLength, increment: 1, } } // KeyAt returns the i-th key within the keyspace with a suffix encoding the // timestamp t. func KeyAt(k Keyspace, i int64, t int64) []byte { b := make([]byte, k.MaxLen()+MaxSuffixLen) return b[:WriteKeyAt(b, k, i, t)] } // WriteKeyAt writes the i-th key within the keyspace to the buffer dst, with a // suffix encoding the timestamp t suffix. It returns the number of bytes // written. func WriteKeyAt(dst []byte, k Keyspace, i int64, t int64) int { n := WriteKey(dst, k, i) n += WriteSuffix(dst[n:], t) return n } // Suffix returns the test keys suffix representation of timestamp t. func Suffix(t int64) []byte { b := make([]byte, MaxSuffixLen) return b[:WriteSuffix(b, t)] } // SuffixLen returns the exact length of the given suffix when encoded. func SuffixLen(t int64) int { // Begin at 1 for the '@' delimiter, 1 for a single digit. n := 2 t /= 10 for t > 0 { t /= 10 n++ } return n } // ParseSuffix returns the integer representation of the encoded suffix. func ParseSuffix(s []byte) (int64, error) { return strconv.ParseInt(strings.TrimPrefix(string(s), string(suffixDelim)), 10, 64) } // WriteSuffix writes the test keys suffix representation of timestamp t to dst, // returning the number of bytes written. func WriteSuffix(dst []byte, t int64) int { dst[0] = suffixDelim n := 1 n += len(strconv.AppendInt(dst[n:n], t, 10)) return n } // Key returns the i-th unsuffixed key within the keyspace. func Key(k Keyspace, i int64) []byte { b := make([]byte, k.MaxLen()) return b[:k.key(b, i)] } // WriteKey writes the i-th unsuffixed key within the keyspace to the buffer dst. It // returns the number of bytes written. func WriteKey(dst []byte, k Keyspace, i int64) int { return k.key(dst, i) } type alphabet struct { alphabet []byte maxLength int headSkip int64 tailSkip int64 increment int64 } func (a alphabet) Count() int64 { // Calculate the total number of keys, ignoring the increment. total := keyCount(len(a.alphabet), a.maxLength) - a.headSkip - a.tailSkip // The increment dictates that we take every N keys, where N = a.increment. // Consider a total containing the 5 keys: // a b c d e // ^ ^ ^ // If the increment is 2, this keyspace includes 'a', 'c' and 'e'. After // dividing by the increment, there may be remainder. If there is, there's // one additional key in the alphabet. count := total / a.increment if total%a.increment > 0 { count++ } return count } func (a alphabet) MaxLen() int { return a.maxLength } func (a alphabet) Slice(i, j int64) Keyspace { s := a s.headSkip += i s.tailSkip += a.Count() - j return s } func (a alphabet) EveryN(n int64) Keyspace { s := a s.increment *= n return s } func keyCount(n, l int) int64 { if n == 0 { return 0 } else if n == 1 { return int64(l) } // The number of representable keys in the keyspace is a function of the // length of the alphabet n and the max key length l. Consider how the // number of representable keys grows as l increases: // // l = 1: n // l = 2: n + n^2 // l = 3: n + n^2 + n^3 // ... // Σ i=(1...l) n^i = n*(n^l - 1)/(n-1) return (int64(n) * (int64(math.Pow(float64(n), float64(l))) - 1)) / int64(n-1) } func (a alphabet) key(buf []byte, idx int64) int { // This function generates keys of length 1..maxKeyLength, pulling // characters from the alphabet. The idx determines which key to generate, // generating the i-th lexicographically next key. // // The index to use is advanced by `headSkip`, allowing a keyspace to encode // a subregion of the keyspace. // // Eg, alphabet = `ab`, maxKeyLength = 3: // // aaa aab aba abb baa bab bba bbb // aa ab ba bb // a b // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 // return generateAlphabetKey(buf, a.alphabet, (idx*a.increment)+a.headSkip, keyCount(len(a.alphabet), a.maxLength)) } func generateAlphabetKey(buf, alphabet []byte, i, keyCount int64) int { if keyCount == 0 || i > keyCount || i < 0 { return 0 } // Of the keyCount keys in the generative keyspace, how many are there // starting with a particular character? keysPerCharacter := keyCount / int64(len(alphabet)) // Find the character that the key at index i starts with and set it. characterIdx := i / keysPerCharacter buf[0] = alphabet[characterIdx] // Consider characterIdx = 0, pointing to 'a'. // // aaa aab aba abb baa bab bba bbb // aa ab ba bb // a b // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 // \_________________________/ // |keysPerCharacter| keys // // In our recursive call, we reduce the problem to: // // aaa aab aba abb // aa ab // 0 1 2 3 4 5 // \________________________/ // |keysPerCharacter-1| keys // // In the subproblem, there are keysPerCharacter-1 keys (eliminating the // just 'a' key, plus any keys beginning with any other character). // // The index i is also offset, reduced by the count of keys beginning with // characters earlier in the alphabet (keysPerCharacter*characterIdx) and // the key consisting of just the 'a' (-1). i = i - keysPerCharacter*characterIdx - 1 return 1 + generateAlphabetKey(buf[1:], alphabet, i, keysPerCharacter-1) } // computeAlphabetKeyIndex computes the inverse of generateAlphabetKey, // returning the index of a particular key, given the provided alphabet and max // length of a key. // // len(key) must be ≥ 1. func computeAlphabetKeyIndex(key []byte, alphabet map[byte]int64, n int) int64 { i, ok := alphabet[key[0]] if !ok { panic(fmt.Sprintf("unrecognized alphabet character %v", key[0])) } // How many keys exist that start with the preceding i characters? Each of // the i characters themselves are a key, plus the count of all the keys // with one less character for each. ret := i + i*keyCount(len(alphabet), n-1) if len(key) > 1 { ret += 1 + computeAlphabetKeyIndex(key[1:], alphabet, n-1) } return ret } func abs(a int64) int64 { if a < 0 { return -a } return a } // RandomSeparator returns a random alphabetic key k such that a < k < b, // pulling randomness from the provided random number generator. If dst is // provided and the generated key fits within dst's capacity, the returned slice // will use dst's memory. // // If a prefix P exists such that Prefix(a) < P < Prefix(b), the generated key // will consist of the prefix P appended with the provided suffix. A zero suffix // generates an unsuffixed key. If no such prefix P exists, RandomSeparator will // try to find a key k with either Prefix(a) or Prefix(b) such that a < k < b, // but the generated key will not use the provided suffix. Note that it's // possible that no separator key exists (eg, a='a@2', b='a@1'), in which case // RandomSeparator returns nil. // // If RandomSeparator generates a new prefix, the generated prefix will have // length at most MAX(maxLength, len(Prefix(a)), len(Prefix(b))). // // RandomSeparator panics if a or b fails to decode. func RandomSeparator(dst, a, b []byte, suffix int64, maxLength int, rng *rand.Rand) []byte { if Comparer.Compare(a, b) >= 0 { return nil } // Determine both keys' logical prefixes and suffixes. ai := Comparer.Split(a) bi := Comparer.Split(b) ap := a[:ai] bp := b[:bi] if len(ap) > len(bp) { if len(ap) > maxLength { maxLength = len(ap) } } else if len(bp) >= len(ap) { if len(bp) > maxLength { maxLength = len(bp) } } var as, bs int64 var err error if ai != len(a) { as, err = ParseSuffix(a[ai:]) if err != nil { panic(fmt.Sprintf("failed to parse suffix of %q", a)) } } if bi != len(b) { bs, err = ParseSuffix(b[bi:]) if err != nil { panic(fmt.Sprintf("failed to parse suffix of %q", b)) } } apIdx := computeAlphabetKeyIndex(ap, inverseAlphabet, maxLength) bpIdx := computeAlphabetKeyIndex(bp, inverseAlphabet, maxLength) diff := bpIdx - apIdx generatedIdx := bpIdx if diff > 0 { var add int64 = diff + 1 var start int64 = apIdx if as == 1 { // There's no expressible key with prefix a greater than a@1. So, // exclude ap. start = apIdx + 1 add = diff } if bs == 0 { // No key with prefix b can sort before b@0. We don't want to pick b. add-- } // We're allowing generated id to be in the range [start, start + add - 1]. if start > start+add-1 { return nil } // If we can generate a key which is actually in the middle of apIdx // and bpIdx use it so that we don't have to bother about timestamps. generatedIdx = rng.Int63n(add) + start for diff > 1 && generatedIdx == apIdx || generatedIdx == bpIdx { generatedIdx = rng.Int63n(add) + start } } switch { case generatedIdx == apIdx && generatedIdx == bpIdx: if abs(bs-as) <= 1 { // There's no expressible suffix between the two, and there's no // possible separator key. return nil } // The key b is >= key a, but has the same prefix, so b must have the // smaller timestamp, unless a has timestamp of 0. // // NB: The zero suffix (suffix-less) sorts before all other suffixes, so // any suffix we generate will be greater than it. if as == 0 { // bs > as suffix = bs + rng.Int63n(10) + 1 } else { // bs < as. // Generate suffix in range [bs + 1, as - 1] suffix = bs + 1 + rng.Int63n(as-bs-1) } case generatedIdx == apIdx: // NB: The zero suffix (suffix-less) sorts before all other suffixes, so // any suffix we generate will be greater than it. if as == 0 && suffix == 0 { suffix++ } else if as != 0 && suffix >= as { suffix = rng.Int63n(as) } case generatedIdx == bpIdx: if suffix <= bs { suffix = bs + rng.Int63n(10) + 1 } } if sz := maxLength + SuffixLen(suffix); cap(dst) < sz { dst = make([]byte, sz) } else { dst = dst[:cap(dst)] } var w int if suffix == 0 { w = WriteKey(dst, Alpha(maxLength), generatedIdx) } else { w = WriteKeyAt(dst, Alpha(maxLength), generatedIdx, suffix) } return dst[:w] }