mirror of
https://source.quilibrium.com/quilibrium/ceremonyclient.git
synced 2025-01-23 14:15:18 +00:00
522 lines
16 KiB
Go
522 lines
16 KiB
Go
// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use
|
|
// of this source code is governed by a BSD-style license that can be found in
|
|
// the LICENSE file.
|
|
|
|
package keyspan
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/cockroachdb/pebble/internal/base"
|
|
"github.com/cockroachdb/pebble/internal/invariants"
|
|
"github.com/cockroachdb/pebble/internal/manifest"
|
|
)
|
|
|
|
// LevelIter provides a merged view of spans from sstables in a level.
|
|
// It takes advantage of level invariants to only have one sstable span block
|
|
// open at one time, opened using the newIter function passed in.
|
|
type LevelIter struct {
|
|
cmp base.Compare
|
|
// Denotes the kind of key the level iterator should read. If the key type
|
|
// is KeyTypePoint, the level iterator will read range tombstones (which
|
|
// only affect point keys). If the key type is KeyTypeRange, the level
|
|
// iterator will read range keys. It is invalid to configure an iterator
|
|
// with the KeyTypePointAndRange key type.
|
|
//
|
|
// If key type is KeyTypePoint, no straddle spans are emitted between files,
|
|
// and point key bounds are used to find files instead of range key bounds.
|
|
//
|
|
// TODO(bilal): Straddle spans can safely be produced in rangedel mode once
|
|
// we can guarantee that we will never read sstables in a level that split
|
|
// user keys across them. This might be guaranteed in a future release, but
|
|
// as of CockroachDB 22.2 it is not guaranteed, so to be safe disable it when
|
|
// keyType == KeyTypePoint
|
|
keyType manifest.KeyType
|
|
// The LSM level this LevelIter is initialized for. Used in logging.
|
|
level manifest.Level
|
|
// The below fields are used to fill in gaps between adjacent files' range
|
|
// key spaces. This is an optimization to avoid unnecessarily loading files
|
|
// in cases where range keys are sparse and rare. dir is set by every
|
|
// positioning operation, straddleDir is set to dir whenever a straddling
|
|
// Span is synthesized and the last positioning operation returned a
|
|
// synthesized straddle span.
|
|
//
|
|
// Note that when a straddle span is initialized, iterFile is modified to
|
|
// point to the next file in the straddleDir direction. A change of direction
|
|
// on a straddle key therefore necessitates the value of iterFile to be
|
|
// reverted.
|
|
dir int
|
|
straddle Span
|
|
straddleDir int
|
|
// The iter for the current file (iterFile). It is nil under any of the
|
|
// following conditions:
|
|
// - files.Current() == nil
|
|
// - err != nil
|
|
// - straddleDir != 0, in which case iterFile is not nil and points to the
|
|
// next file (in the straddleDir direction).
|
|
// - some other constraint, like the bounds in opts, caused the file at index to not
|
|
// be relevant to the iteration.
|
|
iter FragmentIterator
|
|
// iterFile holds the current file.
|
|
// INVARIANT: iterFile = files.Current()
|
|
iterFile *manifest.FileMetadata
|
|
newIter TableNewSpanIter
|
|
files manifest.LevelIterator
|
|
err error
|
|
|
|
// The options that were passed in.
|
|
tableOpts SpanIterOptions
|
|
|
|
// TODO(bilal): Add InternalIteratorStats.
|
|
}
|
|
|
|
// LevelIter implements the keyspan.FragmentIterator interface.
|
|
var _ FragmentIterator = (*LevelIter)(nil)
|
|
|
|
// NewLevelIter returns a LevelIter.
|
|
func NewLevelIter(
|
|
opts SpanIterOptions,
|
|
cmp base.Compare,
|
|
newIter TableNewSpanIter,
|
|
files manifest.LevelIterator,
|
|
level manifest.Level,
|
|
keyType manifest.KeyType,
|
|
) *LevelIter {
|
|
l := &LevelIter{}
|
|
l.Init(opts, cmp, newIter, files, level, keyType)
|
|
return l
|
|
}
|
|
|
|
// Init initializes a LevelIter.
|
|
func (l *LevelIter) Init(
|
|
opts SpanIterOptions,
|
|
cmp base.Compare,
|
|
newIter TableNewSpanIter,
|
|
files manifest.LevelIterator,
|
|
level manifest.Level,
|
|
keyType manifest.KeyType,
|
|
) {
|
|
l.err = nil
|
|
l.level = level
|
|
l.tableOpts = opts
|
|
l.cmp = cmp
|
|
l.iterFile = nil
|
|
l.newIter = newIter
|
|
switch keyType {
|
|
case manifest.KeyTypePoint:
|
|
l.keyType = keyType
|
|
l.files = files.Filter(keyType)
|
|
case manifest.KeyTypeRange:
|
|
l.keyType = keyType
|
|
l.files = files.Filter(keyType)
|
|
default:
|
|
panic(fmt.Sprintf("unsupported key type: %v", keyType))
|
|
}
|
|
}
|
|
|
|
func (l *LevelIter) findFileGE(key []byte) *manifest.FileMetadata {
|
|
// Find the earliest file whose largest key is >= key.
|
|
//
|
|
// If the earliest file has its largest key == key and that largest key is a
|
|
// range deletion sentinel, we know that we manufactured this sentinel to convert
|
|
// the exclusive range deletion end key into an inclusive key (reminder: [start, end)#seqnum
|
|
// is the form of a range deletion sentinel which can contribute a largest key = end#sentinel).
|
|
// In this case we don't return this as the earliest file since there is nothing actually
|
|
// equal to key in it.
|
|
|
|
m := l.files.SeekGE(l.cmp, key)
|
|
for m != nil {
|
|
largestKey := m.LargestRangeKey
|
|
if l.keyType == manifest.KeyTypePoint {
|
|
largestKey = m.LargestPointKey
|
|
}
|
|
if !largestKey.IsExclusiveSentinel() || l.cmp(largestKey.UserKey, key) != 0 {
|
|
break
|
|
}
|
|
m = l.files.Next()
|
|
}
|
|
return m
|
|
}
|
|
|
|
func (l *LevelIter) findFileLT(key []byte) *manifest.FileMetadata {
|
|
// Find the last file whose smallest key is < key.
|
|
return l.files.SeekLT(l.cmp, key)
|
|
}
|
|
|
|
type loadFileReturnIndicator int8
|
|
|
|
const (
|
|
noFileLoaded loadFileReturnIndicator = iota
|
|
fileAlreadyLoaded
|
|
newFileLoaded
|
|
)
|
|
|
|
func (l *LevelIter) loadFile(file *manifest.FileMetadata, dir int) loadFileReturnIndicator {
|
|
indicator := noFileLoaded
|
|
if l.iterFile == file {
|
|
if l.err != nil {
|
|
return noFileLoaded
|
|
}
|
|
if l.iter != nil {
|
|
// We are already at the file, but we would need to check for bounds.
|
|
// Set indicator accordingly.
|
|
indicator = fileAlreadyLoaded
|
|
}
|
|
// We were already at file, but don't have an iterator, probably because the file was
|
|
// beyond the iteration bounds. It may still be, but it is also possible that the bounds
|
|
// have changed. We handle that below.
|
|
}
|
|
|
|
// Note that LevelIter.Close() can be called multiple times.
|
|
if indicator != fileAlreadyLoaded {
|
|
if err := l.Close(); err != nil {
|
|
return noFileLoaded
|
|
}
|
|
}
|
|
|
|
l.iterFile = file
|
|
if file == nil {
|
|
return noFileLoaded
|
|
}
|
|
if indicator != fileAlreadyLoaded {
|
|
l.iter, l.err = l.newIter(file, l.tableOpts)
|
|
indicator = newFileLoaded
|
|
}
|
|
if l.err != nil {
|
|
return noFileLoaded
|
|
}
|
|
return indicator
|
|
}
|
|
|
|
// SeekGE implements keyspan.FragmentIterator.
|
|
func (l *LevelIter) SeekGE(key []byte) *Span {
|
|
l.dir = +1
|
|
l.straddle = Span{}
|
|
l.straddleDir = 0
|
|
l.err = nil // clear cached iteration error
|
|
|
|
f := l.findFileGE(key)
|
|
if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(key, f.SmallestRangeKey.UserKey) < 0 {
|
|
// Peek at the previous file.
|
|
prevFile := l.files.Prev()
|
|
l.files.Next()
|
|
if prevFile != nil {
|
|
// We could unconditionally return an empty span between the seek key and
|
|
// f.SmallestRangeKey, however if this span is to the left of all range
|
|
// keys on this level, it could lead to inconsistent behaviour in relative
|
|
// positioning operations. Consider this example, with a b-c range key:
|
|
//
|
|
// SeekGE(a) -> a-b:{}
|
|
// Next() -> b-c{(#5,RANGEKEYSET,@4,foo)}
|
|
// Prev() -> nil
|
|
//
|
|
// Iterators higher up in the iterator stack rely on this sort of relative
|
|
// positioning consistency.
|
|
//
|
|
// TODO(bilal): Investigate ways to be able to return straddle spans in
|
|
// cases similar to the above, while still retaining correctness.
|
|
// Return a straddling key instead of loading the file.
|
|
l.iterFile = f
|
|
if err := l.Close(); err != nil {
|
|
return l.verify(nil)
|
|
}
|
|
l.straddleDir = +1
|
|
l.straddle = Span{
|
|
Start: prevFile.LargestRangeKey.UserKey,
|
|
End: f.SmallestRangeKey.UserKey,
|
|
Keys: nil,
|
|
}
|
|
return l.verify(&l.straddle)
|
|
}
|
|
}
|
|
loadFileIndicator := l.loadFile(f, +1)
|
|
if loadFileIndicator == noFileLoaded {
|
|
return l.verify(nil)
|
|
}
|
|
if span := l.iter.SeekGE(key); span != nil {
|
|
return l.verify(span)
|
|
}
|
|
return l.skipEmptyFileForward()
|
|
}
|
|
|
|
// SeekLT implements keyspan.FragmentIterator.
|
|
func (l *LevelIter) SeekLT(key []byte) *Span {
|
|
l.dir = -1
|
|
l.straddle = Span{}
|
|
l.straddleDir = 0
|
|
l.err = nil // clear cached iteration error
|
|
|
|
f := l.findFileLT(key)
|
|
if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(f.LargestRangeKey.UserKey, key) < 0 {
|
|
// Peek at the next file.
|
|
nextFile := l.files.Next()
|
|
l.files.Prev()
|
|
if nextFile != nil {
|
|
// We could unconditionally return an empty span between f.LargestRangeKey
|
|
// and the seek key, however if this span is to the right of all range keys
|
|
// on this level, it could lead to inconsistent behaviour in relative
|
|
// positioning operations. Consider this example, with a b-c range key:
|
|
//
|
|
// SeekLT(d) -> c-d:{}
|
|
// Prev() -> b-c{(#5,RANGEKEYSET,@4,foo)}
|
|
// Next() -> nil
|
|
//
|
|
// Iterators higher up in the iterator stack rely on this sort of relative
|
|
// positioning consistency.
|
|
//
|
|
// TODO(bilal): Investigate ways to be able to return straddle spans in
|
|
// cases similar to the above, while still retaining correctness.
|
|
// Return a straddling key instead of loading the file.
|
|
l.iterFile = f
|
|
if err := l.Close(); err != nil {
|
|
return l.verify(nil)
|
|
}
|
|
l.straddleDir = -1
|
|
l.straddle = Span{
|
|
Start: f.LargestRangeKey.UserKey,
|
|
End: nextFile.SmallestRangeKey.UserKey,
|
|
Keys: nil,
|
|
}
|
|
return l.verify(&l.straddle)
|
|
}
|
|
}
|
|
if l.loadFile(f, -1) == noFileLoaded {
|
|
return l.verify(nil)
|
|
}
|
|
if span := l.iter.SeekLT(key); span != nil {
|
|
return l.verify(span)
|
|
}
|
|
return l.skipEmptyFileBackward()
|
|
}
|
|
|
|
// First implements keyspan.FragmentIterator.
|
|
func (l *LevelIter) First() *Span {
|
|
l.dir = +1
|
|
l.straddle = Span{}
|
|
l.straddleDir = 0
|
|
l.err = nil // clear cached iteration error
|
|
|
|
if l.loadFile(l.files.First(), +1) == noFileLoaded {
|
|
return l.verify(nil)
|
|
}
|
|
if span := l.iter.First(); span != nil {
|
|
return l.verify(span)
|
|
}
|
|
return l.skipEmptyFileForward()
|
|
}
|
|
|
|
// Last implements keyspan.FragmentIterator.
|
|
func (l *LevelIter) Last() *Span {
|
|
l.dir = -1
|
|
l.straddle = Span{}
|
|
l.straddleDir = 0
|
|
l.err = nil // clear cached iteration error
|
|
|
|
if l.loadFile(l.files.Last(), -1) == noFileLoaded {
|
|
return l.verify(nil)
|
|
}
|
|
if span := l.iter.Last(); span != nil {
|
|
return l.verify(span)
|
|
}
|
|
return l.skipEmptyFileBackward()
|
|
}
|
|
|
|
// Next implements keyspan.FragmentIterator.
|
|
func (l *LevelIter) Next() *Span {
|
|
if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir > 0) {
|
|
return l.verify(nil)
|
|
}
|
|
if l.iter == nil && l.iterFile == nil {
|
|
// l.dir <= 0
|
|
return l.First()
|
|
}
|
|
l.dir = +1
|
|
|
|
if l.iter != nil {
|
|
if span := l.iter.Next(); span != nil {
|
|
return l.verify(span)
|
|
}
|
|
}
|
|
return l.skipEmptyFileForward()
|
|
}
|
|
|
|
// Prev implements keyspan.FragmentIterator.
|
|
func (l *LevelIter) Prev() *Span {
|
|
if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir < 0) {
|
|
return l.verify(nil)
|
|
}
|
|
if l.iter == nil && l.iterFile == nil {
|
|
// l.dir >= 0
|
|
return l.Last()
|
|
}
|
|
l.dir = -1
|
|
|
|
if l.iter != nil {
|
|
if span := l.iter.Prev(); span != nil {
|
|
return l.verify(span)
|
|
}
|
|
}
|
|
return l.skipEmptyFileBackward()
|
|
}
|
|
|
|
func (l *LevelIter) skipEmptyFileForward() *Span {
|
|
if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange &&
|
|
l.iterFile != nil && l.iter != nil {
|
|
// We were at a file that had spans. Check if the next file that has
|
|
// spans is not directly adjacent to the current file i.e. there is a
|
|
// gap in the span keyspace between the two files. In that case, synthesize
|
|
// a "straddle span" in l.straddle and return that.
|
|
//
|
|
// Straddle spans are not created in rangedel mode.
|
|
if err := l.Close(); err != nil {
|
|
l.err = err
|
|
return l.verify(nil)
|
|
}
|
|
startKey := l.iterFile.LargestRangeKey.UserKey
|
|
// Resetting l.iterFile without loading the file into l.iter is okay and
|
|
// does not change the logic in loadFile() as long as l.iter is also nil;
|
|
// which it should be due to the Close() call above.
|
|
l.iterFile = l.files.Next()
|
|
if l.iterFile == nil {
|
|
return l.verify(nil)
|
|
}
|
|
endKey := l.iterFile.SmallestRangeKey.UserKey
|
|
if l.cmp(startKey, endKey) < 0 {
|
|
// There is a gap between the two files. Synthesize a straddling span
|
|
// to avoid unnecessarily loading the next file.
|
|
l.straddle = Span{
|
|
Start: startKey,
|
|
End: endKey,
|
|
}
|
|
l.straddleDir = +1
|
|
return l.verify(&l.straddle)
|
|
}
|
|
} else if l.straddleDir < 0 {
|
|
// We were at a straddle key, but are now changing directions. l.iterFile
|
|
// was already moved backward by skipEmptyFileBackward, so advance it
|
|
// forward.
|
|
l.iterFile = l.files.Next()
|
|
}
|
|
l.straddle = Span{}
|
|
l.straddleDir = 0
|
|
var span *Span
|
|
for span.Empty() {
|
|
fileToLoad := l.iterFile
|
|
if l.keyType == manifest.KeyTypePoint {
|
|
// We haven't iterated to the next file yet if we're in point key
|
|
// (rangedel) mode.
|
|
fileToLoad = l.files.Next()
|
|
}
|
|
if l.loadFile(fileToLoad, +1) == noFileLoaded {
|
|
return l.verify(nil)
|
|
}
|
|
span = l.iter.First()
|
|
// In rangedel mode, we can expect to get empty files that we'd need to
|
|
// skip over, but not in range key mode.
|
|
if l.keyType == manifest.KeyTypeRange {
|
|
break
|
|
}
|
|
}
|
|
return l.verify(span)
|
|
}
|
|
|
|
func (l *LevelIter) skipEmptyFileBackward() *Span {
|
|
// We were at a file that had spans. Check if the previous file that has
|
|
// spans is not directly adjacent to the current file i.e. there is a
|
|
// gap in the span keyspace between the two files. In that case, synthesize
|
|
// a "straddle span" in l.straddle and return that.
|
|
//
|
|
// Straddle spans are not created in rangedel mode.
|
|
if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange &&
|
|
l.iterFile != nil && l.iter != nil {
|
|
if err := l.Close(); err != nil {
|
|
l.err = err
|
|
return l.verify(nil)
|
|
}
|
|
endKey := l.iterFile.SmallestRangeKey.UserKey
|
|
// Resetting l.iterFile without loading the file into l.iter is okay and
|
|
// does not change the logic in loadFile() as long as l.iter is also nil;
|
|
// which it should be due to the Close() call above.
|
|
l.iterFile = l.files.Prev()
|
|
if l.iterFile == nil {
|
|
return l.verify(nil)
|
|
}
|
|
startKey := l.iterFile.LargestRangeKey.UserKey
|
|
if l.cmp(startKey, endKey) < 0 {
|
|
// There is a gap between the two files. Synthesize a straddling span
|
|
// to avoid unnecessarily loading the next file.
|
|
l.straddle = Span{
|
|
Start: startKey,
|
|
End: endKey,
|
|
}
|
|
l.straddleDir = -1
|
|
return l.verify(&l.straddle)
|
|
}
|
|
} else if l.straddleDir > 0 {
|
|
// We were at a straddle key, but are now changing directions. l.iterFile
|
|
// was already advanced forward by skipEmptyFileForward, so move it
|
|
// backward.
|
|
l.iterFile = l.files.Prev()
|
|
}
|
|
l.straddle = Span{}
|
|
l.straddleDir = 0
|
|
var span *Span
|
|
for span.Empty() {
|
|
fileToLoad := l.iterFile
|
|
if l.keyType == manifest.KeyTypePoint {
|
|
fileToLoad = l.files.Prev()
|
|
}
|
|
if l.loadFile(fileToLoad, -1) == noFileLoaded {
|
|
return l.verify(nil)
|
|
}
|
|
span = l.iter.Last()
|
|
// In rangedel mode, we can expect to get empty files that we'd need to
|
|
// skip over, but not in range key mode as the filter on the FileMetadata
|
|
// should guarantee we always get a non-empty file.
|
|
if l.keyType == manifest.KeyTypeRange {
|
|
break
|
|
}
|
|
}
|
|
return l.verify(span)
|
|
}
|
|
|
|
// verify is invoked whenever a span is returned from an iterator positioning
|
|
// method to a caller. During invariant builds, it asserts invariants to the
|
|
// caller.
|
|
func (l *LevelIter) verify(s *Span) *Span {
|
|
// NB: Do not add any logic outside the invariants.Enabled conditional to
|
|
// ensure that verify is always compiled away in production builds.
|
|
if invariants.Enabled {
|
|
if f := l.files.Current(); f != l.iterFile {
|
|
panic(fmt.Sprintf("LevelIter.files.Current (%s) and l.iterFile (%s) diverged",
|
|
f, l.iterFile))
|
|
}
|
|
}
|
|
return s
|
|
}
|
|
|
|
// Error implements keyspan.FragmentIterator.
|
|
func (l *LevelIter) Error() error {
|
|
if l.err != nil || l.iter == nil {
|
|
return l.err
|
|
}
|
|
return l.iter.Error()
|
|
}
|
|
|
|
// Close implements keyspan.FragmentIterator.
|
|
func (l *LevelIter) Close() error {
|
|
if l.iter != nil {
|
|
l.err = l.iter.Close()
|
|
l.iter = nil
|
|
}
|
|
return l.err
|
|
}
|
|
|
|
// String implements keyspan.FragmentIterator.
|
|
func (l *LevelIter) String() string {
|
|
if l.iterFile != nil {
|
|
return fmt.Sprintf("%s: fileNum=%s", l.level, l.iterFile.FileNum)
|
|
}
|
|
return fmt.Sprintf("%s: fileNum=<nil>", l.level)
|
|
}
|