diff --git a/nekryptology/pkg/core/curves/bls48581_curve.go b/nekryptology/pkg/core/curves/bls48581_curve.go index 6326408..bec397d 100644 --- a/nekryptology/pkg/core/curves/bls48581_curve.go +++ b/nekryptology/pkg/core/curves/bls48581_curve.go @@ -7,6 +7,7 @@ package curves import ( + "arena" "errors" "fmt" "io" @@ -47,9 +48,9 @@ func (s *ScalarBls48581) Random(reader io.Reader) Scalar { func (s *ScalarBls48581) Hash(bytes []byte) Scalar { DST := []byte("BLS_SIG_BLS48581G1_XMD:SHA-512_SVDW_RO_NUL_") u := bls48581.Hash_to_field(ext.MC_SHA2, bls48581.HASH_TYPE, DST, bytes, 2) - u[0].Add(u[1]) - b := u[0].Redc() - b.Mod(bls48581.NewBIGints(bls48581.CURVE_Order)) + u[0].Add(u[1], nil) + b := u[0].Redc(nil) + b.Mod(bls48581.NewBIGints(bls48581.CURVE_Order, nil), nil) return &ScalarBls48581{ Value: b, point: s.point, @@ -58,14 +59,14 @@ func (s *ScalarBls48581) Hash(bytes []byte) Scalar { func (s *ScalarBls48581) Zero() Scalar { return &ScalarBls48581{ - Value: bls48581.NewBIGint(0), + Value: bls48581.NewBIGint(0, nil), point: s.point, } } func (s *ScalarBls48581) One() Scalar { return &ScalarBls48581{ - Value: bls48581.NewBIGint(1), + Value: bls48581.NewBIGint(1, nil), point: s.point, } } @@ -75,7 +76,7 @@ func (s *ScalarBls48581) IsZero() bool { } func (s *ScalarBls48581) IsOne() bool { - t := bls48581.NewBIGint(1) + t := bls48581.NewBIGint(1, nil) t.Sub(s.Value) return t.IsZero() } @@ -94,15 +95,15 @@ func (s *ScalarBls48581) IsEven() bool { func (s *ScalarBls48581) New(value int) Scalar { if value > 0 { - t := bls48581.NewBIGint(value) - t.Mod(bls48581.NewBIGints(bls48581.CURVE_Order)) + t := bls48581.NewBIGint(value, nil) + t.Mod(bls48581.NewBIGints(bls48581.CURVE_Order, nil), nil) return &ScalarBls48581{ Value: t, point: s.point, } } else { - t := bls48581.NewBIGint(-value) - v := bls48581.NewBIGints(bls48581.CURVE_Order) + t := bls48581.NewBIGint(-value, nil) + v := bls48581.NewBIGints(bls48581.CURVE_Order, nil) v.Sub(t) return &ScalarBls48581{ Value: v, @@ -121,8 +122,8 @@ func (s *ScalarBls48581) Cmp(rhs Scalar) int { } func (s *ScalarBls48581) Square() Scalar { - sqr := bls48581.NewBIGcopy(s.Value) - sqr = bls48581.Modsqr(sqr, bls48581.NewBIGints(bls48581.CURVE_Order)) + sqr := bls48581.NewBIGcopy(s.Value, nil) + sqr = bls48581.Modsqr(sqr, bls48581.NewBIGints(bls48581.CURVE_Order, nil), nil) return &ScalarBls48581{ Value: sqr, point: s.point, @@ -130,8 +131,13 @@ func (s *ScalarBls48581) Square() Scalar { } func (s *ScalarBls48581) Double() Scalar { - dbl := bls48581.NewBIGcopy(s.Value) - dbl = bls48581.Modmul(dbl, bls48581.NewBIGint(2), bls48581.NewBIGints(bls48581.CURVE_Order)) + dbl := bls48581.NewBIGcopy(s.Value, nil) + dbl = bls48581.Modmul( + dbl, + bls48581.NewBIGint(2, nil), + bls48581.NewBIGints(bls48581.CURVE_Order, nil), + nil, + ) return &ScalarBls48581{ Value: dbl, point: s.point, @@ -139,8 +145,8 @@ func (s *ScalarBls48581) Double() Scalar { } func (s *ScalarBls48581) Invert() (Scalar, error) { - v := bls48581.NewBIGcopy(s.Value) - v.Invmodp(bls48581.NewBIGints(bls48581.CURVE_Order)) + v := bls48581.NewBIGcopy(s.Value, nil) + v.Invmodp(bls48581.NewBIGints(bls48581.CURVE_Order, nil)) if v == nil { return nil, fmt.Errorf("inverse doesn't exist") } @@ -155,9 +161,9 @@ func (s *ScalarBls48581) Sqrt() (Scalar, error) { } func (s *ScalarBls48581) Cube() Scalar { - value := bls48581.NewBIGcopy(s.Value) - value = bls48581.Modsqr(value, bls48581.NewBIGints(bls48581.CURVE_Order)) - value = bls48581.Modmul(value, s.Value, bls48581.NewBIGints(bls48581.CURVE_Order)) + value := bls48581.NewBIGcopy(s.Value, nil) + value = bls48581.Modsqr(value, bls48581.NewBIGints(bls48581.CURVE_Order, nil), nil) + value = bls48581.Modmul(value, s.Value, bls48581.NewBIGints(bls48581.CURVE_Order, nil), nil) return &ScalarBls48581{ Value: value, point: s.point, @@ -167,8 +173,11 @@ func (s *ScalarBls48581) Cube() Scalar { func (s *ScalarBls48581) Add(rhs Scalar) Scalar { r, ok := rhs.(*ScalarBls48581) if ok { - value := bls48581.NewBIGcopy(s.Value) - value = bls48581.ModAdd(value, r.Value, bls48581.NewBIGints(bls48581.CURVE_Order)) + mem := arena.NewArena() + defer mem.Free() + value := bls48581.NewBIGcopy(s.Value, mem) + value = bls48581.ModAdd(value, r.Value, bls48581.NewBIGints(bls48581.CURVE_Order, mem), mem) + value = bls48581.NewBIGcopy(value, nil) return &ScalarBls48581{ Value: value, point: s.point, @@ -181,9 +190,12 @@ func (s *ScalarBls48581) Add(rhs Scalar) Scalar { func (s *ScalarBls48581) Sub(rhs Scalar) Scalar { r, ok := rhs.(*ScalarBls48581) if ok { - value := bls48581.NewBIGcopy(r.Value) - value = bls48581.Modneg(value, bls48581.NewBIGints(bls48581.CURVE_Order)) - value = bls48581.ModAdd(value, s.Value, bls48581.NewBIGints(bls48581.CURVE_Order)) + mem := arena.NewArena() + defer mem.Free() + value := bls48581.NewBIGcopy(r.Value, mem) + value = bls48581.Modneg(value, bls48581.NewBIGints(bls48581.CURVE_Order, mem), mem) + value = bls48581.ModAdd(value, s.Value, bls48581.NewBIGints(bls48581.CURVE_Order, mem), mem) + value = bls48581.NewBIGcopy(value, nil) return &ScalarBls48581{ Value: value, point: s.point, @@ -196,8 +208,11 @@ func (s *ScalarBls48581) Sub(rhs Scalar) Scalar { func (s *ScalarBls48581) Mul(rhs Scalar) Scalar { r, ok := rhs.(*ScalarBls48581) if ok { - value := bls48581.NewBIGcopy(s.Value) - value = bls48581.Modmul(value, r.Value, bls48581.NewBIGints(bls48581.CURVE_Order)) + mem := arena.NewArena() + defer mem.Free() + value := bls48581.NewBIGcopy(s.Value, mem) + value = bls48581.Modmul(value, r.Value, bls48581.NewBIGints(bls48581.CURVE_Order, mem), mem) + value = bls48581.NewBIGcopy(value, nil) return &ScalarBls48581{ Value: value, point: s.point, @@ -214,9 +229,12 @@ func (s *ScalarBls48581) MulAdd(y, z Scalar) Scalar { func (s *ScalarBls48581) Div(rhs Scalar) Scalar { r, ok := rhs.(*ScalarBls48581) if ok { - value := bls48581.NewBIGcopy(r.Value) - value.Invmodp(bls48581.NewBIGints(bls48581.CURVE_Order)) - value = bls48581.Modmul(value, s.Value, bls48581.NewBIGints(bls48581.CURVE_Order)) + mem := arena.NewArena() + defer mem.Free() + value := bls48581.NewBIGcopy(r.Value, mem) + value.Invmodp(bls48581.NewBIGints(bls48581.CURVE_Order, mem)) + value = bls48581.Modmul(value, s.Value, bls48581.NewBIGints(bls48581.CURVE_Order, mem), mem) + value = bls48581.NewBIGcopy(value, nil) return &ScalarBls48581{ Value: value, point: s.point, @@ -227,8 +245,11 @@ func (s *ScalarBls48581) Div(rhs Scalar) Scalar { } func (s *ScalarBls48581) Neg() Scalar { - value := bls48581.NewBIGcopy(s.Value) - value = bls48581.Modneg(value, bls48581.NewBIGints(bls48581.CURVE_Order)) + mem := arena.NewArena() + defer mem.Free() + value := bls48581.NewBIGcopy(s.Value, mem) + value = bls48581.Modneg(value, bls48581.NewBIGints(bls48581.CURVE_Order, mem), mem) + value = bls48581.NewBIGcopy(value, nil) return &ScalarBls48581{ Value: value, point: s.point, @@ -244,7 +265,7 @@ func (s *ScalarBls48581) SetBigInt(v *big.Int) (Scalar, error) { copy(t[bls48581.MODBYTES-uint(len(b)):], b) i := bls48581.FromBytes(t) - i.Mod(bls48581.NewBIGints(bls48581.CURVE_Order)) + i.Mod(bls48581.NewBIGints(bls48581.CURVE_Order, nil), nil) return &ScalarBls48581{ Value: i, point: s.point, @@ -298,7 +319,7 @@ func (s *ScalarBls48581) Point() Point { } func (s *ScalarBls48581) Clone() Scalar { - value := bls48581.NewBIGcopy(s.Value) + value := bls48581.NewBIGcopy(s.Value, nil) return &ScalarBls48581{ Value: value, point: s.point, @@ -306,7 +327,7 @@ func (s *ScalarBls48581) Clone() Scalar { } func (s *ScalarBls48581) SetPoint(p Point) PairingScalar { - value := bls48581.NewBIGcopy(s.Value) + value := bls48581.NewBIGcopy(s.Value, nil) return &ScalarBls48581{ Value: value, point: p, @@ -314,7 +335,7 @@ func (s *ScalarBls48581) SetPoint(p Point) PairingScalar { } func (s *ScalarBls48581) Order() *big.Int { - b := bls48581.NewBIGints(bls48581.CURVE_Order) + b := bls48581.NewBIGints(bls48581.CURVE_Order, nil) bytes := make([]byte, bls48581.MODBYTES) b.ToBytes(bytes) return new(big.Int).SetBytes(bytes) @@ -369,7 +390,7 @@ func (p *PointBls48581G1) Hash(bytes []byte) Point { func (p *PointBls48581G1) Identity() Point { g1 := bls48581.ECP_generator() - g1 = g1.Mul(bls48581.NewBIGint(0)) + g1 = g1.Mul(bls48581.NewBIGint(0, nil), nil, nil) return &PointBls48581G1{ Value: g1, } @@ -384,7 +405,7 @@ func (p *PointBls48581G1) Generator() Point { } func (p *PointBls48581G1) IsIdentity() bool { - return p.Value.Is_infinity() + return p.Value.Is_infinity(nil) } func (p *PointBls48581G1) IsNegative() bool { @@ -395,18 +416,18 @@ func (p *PointBls48581G1) IsNegative() bool { } func (p *PointBls48581G1) IsOnCurve() bool { - return bls48581.G1member(p.Value) + return bls48581.G1member(p.Value, nil) } func (p *PointBls48581G1) Double() Point { - v := bls48581.NewECP() + v := bls48581.NewECP(nil) v.Copy(p.Value) - v.Dbl() + v.Dbl(nil) return &PointBls48581G1{v} } func (p *PointBls48581G1) Scalar() Scalar { - value := bls48581.NewBIG() + value := bls48581.NewBIG(nil) return &ScalarBls48581{ Value: value, point: new(PointBls48581G1), @@ -414,9 +435,9 @@ func (p *PointBls48581G1) Scalar() Scalar { } func (p *PointBls48581G1) Neg() Point { - v := bls48581.NewECP() + v := bls48581.NewECP(nil) v.Copy(p.Value) - v.Neg() + v.Neg(nil) return &PointBls48581G1{v} } @@ -426,9 +447,9 @@ func (p *PointBls48581G1) Add(rhs Point) Point { } r, ok := rhs.(*PointBls48581G1) if ok { - v := bls48581.NewECP() + v := bls48581.NewECP(nil) v.Copy(p.Value) - v.Add(r.Value) + v.Add(r.Value, nil) return &PointBls48581G1{v} } else { return nil @@ -441,9 +462,9 @@ func (p *PointBls48581G1) Sub(rhs Point) Point { } r, ok := rhs.(*PointBls48581G1) if ok { - v := bls48581.NewECP() + v := bls48581.NewECP(nil) v.Copy(p.Value) - v.Sub(r.Value) + v.Sub(r.Value, nil) return &PointBls48581G1{v} } else { return nil @@ -456,9 +477,11 @@ func (p *PointBls48581G1) Mul(rhs Scalar) Point { } r, ok := rhs.(*ScalarBls48581) if ok { - v := bls48581.NewECP() + mem := arena.NewArena() + defer mem.Free() + v := bls48581.NewECP(mem) v.Copy(p.Value) - v = v.Mul(r.Value) + v = v.Mul(r.Value, nil, mem) return &PointBls48581G1{v} } else { return nil @@ -481,7 +504,7 @@ func (p *PointBls48581G1) Set(x, y *big.Int) (Point, error) { y.FillBytes(yBytes) xBig := bls48581.FromBytes(xBytes) yBig := bls48581.FromBytes(yBytes) - v := bls48581.NewECPbigs(xBig, yBig) + v := bls48581.NewECPbigs(xBig, yBig, nil) if v == nil { return nil, fmt.Errorf("invalid coordinates") } @@ -504,7 +527,7 @@ func (p *PointBls48581G1) FromAffineCompressed(bytes []byte) (Point, error) { var b [bls48581.MODBYTES + 1]byte copy(b[:], bytes) value := bls48581.ECP_fromBytes(b[:]) - if value == nil || value.Is_infinity() { + if value == nil || value.Is_infinity(nil) { return nil, errors.New("could not decode") } return &PointBls48581G1{value}, nil @@ -514,7 +537,7 @@ func (p *PointBls48581G1) FromAffineUncompressed(bytes []byte) (Point, error) { var b [bls48581.MODBYTES*2 + 1]byte copy(b[:], bytes) value := bls48581.ECP_fromBytes(b[:]) - if value == nil || value.Is_infinity() { + if value == nil || value.Is_infinity(nil) { return nil, errors.New("could not decode") } return &PointBls48581G1{value}, nil @@ -541,8 +564,10 @@ func (p *PointBls48581G1) SumOfProducts(points []Point, scalars []Scalar) Point } nScalars[i] = s.Value } - value := bls48581.ECP_muln(len(points), nPoints, nScalars) - if value == nil || value.Is_infinity() { + mem := arena.NewArena() + defer mem.Free() + value := bls48581.ECP_muln(len(points), nPoints, nScalars, mem) + if value == nil || value.Is_infinity(mem) { return nil } return &PointBls48581G1{value} @@ -563,77 +588,60 @@ func (p *PointBls48581G1) Pairing(rhs PairingPoint) Scalar { return &ScalarBls48581Gt{pair} } +func (p *PointBls48581G1) Ate2Pairing( + rhs *PointBls48581G2, + lhs2 *PointBls48581G1, + rhs2 *PointBls48581G2, +) Scalar { + ate2 := bls48581.Ate2(rhs2.Value, p.Value, rhs2.Value, lhs2.Value) + + return &ScalarBls48581Gt{ate2} +} + func (p *PointBls48581G1) MultiPairing(points ...PairingPoint) Scalar { return bls48multiPairing(points...) } func (p *PointBls48581G1) X() *big.Int { bytes := make([]byte, bls48581.MODBYTES) - p.Value.GetX().ToBytes(bytes[:]) + p.Value.GetX(nil).ToBytes(bytes[:]) return new(big.Int).SetBytes(bytes) } func (p *PointBls48581G1) Y() *big.Int { bytes := make([]byte, bls48581.MODBYTES) - p.Value.GetY().ToBytes(bytes[:]) + p.Value.GetY(nil).ToBytes(bytes[:]) return new(big.Int).SetBytes(bytes) } func (p *PointBls48581G1) Modulus() *big.Int { - b := bls48581.NewBIGints(bls48581.Modulus) + b := bls48581.NewBIGints(bls48581.Modulus, nil) bytes := make([]byte, bls48581.MODBYTES) b.ToBytes(bytes) return new(big.Int).SetBytes(bytes) } func (p *PointBls48581G1) MarshalBinary() ([]byte, error) { - return pointMarshalBinary(p) + return nil, nil } func (p *PointBls48581G1) UnmarshalBinary(input []byte) error { - pt, err := pointUnmarshalBinary(input) - if err != nil { - return err - } - ppt, ok := pt.(*PointBls48581G1) - if !ok { - return fmt.Errorf("invalid point") - } - p.Value = ppt.Value return nil } func (p *PointBls48581G1) MarshalText() ([]byte, error) { - return pointMarshalText(p) + return nil, nil } func (p *PointBls48581G1) UnmarshalText(input []byte) error { - pt, err := pointUnmarshalText(input) - if err != nil { - return err - } - ppt, ok := pt.(*PointBls48581G1) - if !ok { - return fmt.Errorf("invalid point") - } - p.Value = ppt.Value return nil } func (p *PointBls48581G1) MarshalJSON() ([]byte, error) { - return pointMarshalJson(p) + return nil, nil } func (p *PointBls48581G1) UnmarshalJSON(input []byte) error { - pt, err := pointUnmarshalJson(input) - if err != nil { - return err - } - P, ok := pt.(*PointBls48581G1) - if !ok { - return fmt.Errorf("invalid type") - } - p.Value = P.Value return nil } @@ -646,15 +654,15 @@ func (p *PointBls48581G2) Random(reader io.Reader) Point { func (p *PointBls48581G2) Hash(bytes []byte) Point { DST := []byte("BLS_SIG_BLS48581G2_XMD:SHA-512_SVDW_RO_NUL_") u := bls48581.Hash_to_field(ext.MC_SHA2, bls48581.HASH_TYPE, DST, bytes, 2) - u[0].Add(u[1]) - fp8 := bls48581.NewFP8fp(u[0]) + u[0].Add(u[1], nil) + fp8 := bls48581.NewFP8fp(u[0], nil) v := bls48581.ECP8_map2point(fp8) return &PointBls48581G2{v} } func (p *PointBls48581G2) Identity() Point { g2 := bls48581.ECP8_generator() - g2 = g2.Mul(bls48581.NewBIGint(0)) + g2 = g2.Mul(bls48581.NewBIGint(0, nil), nil) return &PointBls48581G2{ Value: g2, } @@ -669,7 +677,7 @@ func (p *PointBls48581G2) Generator() Point { } func (p *PointBls48581G2) IsIdentity() bool { - return p.Value.Is_infinity() + return p.Value.Is_infinity(nil) } func (p *PointBls48581G2) IsNegative() bool { @@ -680,18 +688,18 @@ func (p *PointBls48581G2) IsNegative() bool { } func (p *PointBls48581G2) IsOnCurve() bool { - return bls48581.G2member(p.Value) + return bls48581.G2member(p.Value, nil) } func (p *PointBls48581G2) Double() Point { - v := bls48581.NewECP8() + v := bls48581.NewECP8(nil) v.Copy(p.Value) - v.Dbl() + v.Dbl(nil) return &PointBls48581G2{v} } func (p *PointBls48581G2) Scalar() Scalar { - value := bls48581.NewBIG() + value := bls48581.NewBIG(nil) return &ScalarBls48581{ Value: value, point: new(PointBls48581G2), @@ -699,9 +707,9 @@ func (p *PointBls48581G2) Scalar() Scalar { } func (p *PointBls48581G2) Neg() Point { - v := bls48581.NewECP8() + v := bls48581.NewECP8(nil) v.Copy(p.Value) - v.Neg() + v.Neg(nil) return &PointBls48581G2{v} } @@ -711,9 +719,9 @@ func (p *PointBls48581G2) Add(rhs Point) Point { } r, ok := rhs.(*PointBls48581G2) if ok { - v := bls48581.NewECP8() + v := bls48581.NewECP8(nil) v.Copy(p.Value) - v.Add(r.Value) + v.Add(r.Value, nil) return &PointBls48581G2{v} } else { return nil @@ -726,9 +734,9 @@ func (p *PointBls48581G2) Sub(rhs Point) Point { } r, ok := rhs.(*PointBls48581G2) if ok { - v := bls48581.NewECP8() + v := bls48581.NewECP8(nil) v.Copy(p.Value) - v.Sub(r.Value) + v.Sub(r.Value, nil) return &PointBls48581G2{v} } else { return nil @@ -741,11 +749,11 @@ func (p *PointBls48581G2) Mul(rhs Scalar) Point { } r, ok := rhs.(*ScalarBls48581) if ok { - v := bls48581.NewECP8() + mem := arena.NewArena() + defer mem.Free() + v := bls48581.NewECP8(nil) v.Copy(p.Value) - bytes := make([]byte, bls48581.MODBYTES) - r.Value.ToBytes(bytes) - v = v.Mul(bls48581.FromBytes(bytes)) + v = v.Mul(r.Value, mem) return &PointBls48581G2{v} } else { return nil @@ -768,8 +776,8 @@ func (p *PointBls48581G2) Set(x, y *big.Int) (Point, error) { y.FillBytes(yBytes) xBig := bls48581.FP8_fromBytes(xBytes) yBig := bls48581.FP8_fromBytes(yBytes) - v := bls48581.NewECP8fp8s(xBig, yBig) - if v == nil || v.Is_infinity() { + v := bls48581.NewECP8fp8s(xBig, yBig, nil) + if v == nil || v.Is_infinity(nil) { return nil, fmt.Errorf("invalid coordinates") } return &PointBls48581G2{v}, nil @@ -791,7 +799,7 @@ func (p *PointBls48581G2) FromAffineCompressed(bytes []byte) (Point, error) { var b [bls48581.MODBYTES*8 + 1]byte copy(b[:], bytes) value := bls48581.ECP8_fromBytes(b[:]) - if value == nil || value.Is_infinity() { + if value == nil || value.Is_infinity(nil) { return nil, errors.New("could not decode") } return &PointBls48581G2{value}, nil @@ -801,7 +809,7 @@ func (p *PointBls48581G2) FromAffineUncompressed(bytes []byte) (Point, error) { var b [bls48581.MODBYTES*16 + 1]byte copy(b[:], bytes) value := bls48581.ECP8_fromBytes(b[:]) - if value == nil || value.Is_infinity() { + if value == nil || value.Is_infinity(nil) { return nil, errors.New("could not decode") } return &PointBls48581G2{value}, nil @@ -828,8 +836,8 @@ func (p *PointBls48581G2) SumOfProducts(points []Point, scalars []Scalar) Point } nScalars[i] = s.Value } - value := bls48581.Mul16(nPoints, nScalars) - if value == nil || value.Is_infinity() { + value := bls48581.Mul16(nPoints, nScalars, nil) + if value == nil || value.Is_infinity(nil) { return nil } return &PointBls48581G2{value} @@ -855,74 +863,47 @@ func (p *PointBls48581G2) MultiPairing(points ...PairingPoint) Scalar { } func (p *PointBls48581G2) X() *big.Int { - x := p.Value.GetX() + x := p.Value.GetX(nil) bytes := make([]byte, 8*bls48581.MODBYTES) x.ToBytes(bytes) return new(big.Int).SetBytes(bytes) } func (p *PointBls48581G2) Y() *big.Int { - y := p.Value.GetY() + y := p.Value.GetY(nil) bytes := make([]byte, 8*bls48581.MODBYTES) y.ToBytes(bytes) return new(big.Int).SetBytes(bytes) } func (p *PointBls48581G2) Modulus() *big.Int { - b := bls48581.NewBIGints(bls48581.Modulus) + b := bls48581.NewBIGints(bls48581.Modulus, nil) bytes := make([]byte, bls48581.MODBYTES) b.ToBytes(bytes) return new(big.Int).SetBytes(bytes) } func (p *PointBls48581G2) MarshalBinary() ([]byte, error) { - return pointMarshalBinary(p) + return nil, nil } func (p *PointBls48581G2) UnmarshalBinary(input []byte) error { - pt, err := pointUnmarshalBinary(input) - if err != nil { - return err - } - ppt, ok := pt.(*PointBls48581G2) - if !ok { - return fmt.Errorf("invalid point") - } - p.Value = ppt.Value return nil } func (p *PointBls48581G2) MarshalText() ([]byte, error) { - return pointMarshalText(p) + return nil, nil } func (p *PointBls48581G2) UnmarshalText(input []byte) error { - pt, err := pointUnmarshalText(input) - if err != nil { - return err - } - ppt, ok := pt.(*PointBls48581G2) - if !ok { - return fmt.Errorf("invalid point") - } - p.Value = ppt.Value return nil } func (p *PointBls48581G2) MarshalJSON() ([]byte, error) { - return pointMarshalJson(p) + return nil, nil } func (p *PointBls48581G2) UnmarshalJSON(input []byte) error { - pt, err := pointUnmarshalJson(input) - if err != nil { - return err - } - P, ok := pt.(*PointBls48581G2) - if !ok { - return fmt.Errorf("invalid type") - } - p.Value = P.Value return nil } @@ -931,21 +912,25 @@ func bls48multiPairing(points ...PairingPoint) Scalar { return nil } valid := true - r := bls48581.Initmp() + mem := arena.NewArena() + defer mem.Free() + r := bls48581.Initmp(mem) for i := 0; i < len(points); i += 2 { pt1, ok := points[i].(*PointBls48581G1) valid = valid && ok pt2, ok := points[i+1].(*PointBls48581G2) valid = valid && ok if valid { - bls48581.Another(r, pt2.Value, pt1.Value) + inner := arena.NewArena() + bls48581.Another(r, pt2.Value, pt1.Value, inner) + inner.Free() } } if !valid { return nil } - v := bls48581.Miller(r) + v := bls48581.Miller(r, mem) v = bls48581.Fexp(v) return &ScalarBls48581Gt{v} } @@ -973,15 +958,15 @@ func (s *ScalarBls48581Gt) Hash(bytes []byte) Scalar { } func (s *ScalarBls48581Gt) Zero() Scalar { - return &ScalarBls48581Gt{bls48581.NewFP48int(0)} + return &ScalarBls48581Gt{bls48581.NewFP48int(0, nil)} } func (s *ScalarBls48581Gt) One() Scalar { - return &ScalarBls48581Gt{bls48581.NewFP48int(1)} + return &ScalarBls48581Gt{bls48581.NewFP48int(1, nil)} } func (s *ScalarBls48581Gt) IsZero() bool { - return s.Value.IsZero() + return s.Value.IsZero(nil) } func (s *ScalarBls48581Gt) IsOne() bool { @@ -1034,7 +1019,7 @@ func (s *ScalarBls48581Gt) IsEven() bool { } func (s *ScalarBls48581Gt) New(input int) Scalar { - fp := bls48581.NewFP48int(input) + fp := bls48581.NewFP48int(input, nil) return &ScalarBls48581Gt{fp} } @@ -1048,20 +1033,20 @@ func (s *ScalarBls48581Gt) Cmp(rhs Scalar) int { } func (s *ScalarBls48581Gt) Square() Scalar { - v := bls48581.NewFP48copy(s.Value) - v.Sqr() + v := bls48581.NewFP48copy(s.Value, nil) + v.Sqr(nil) return &ScalarBls48581Gt{v} } func (s *ScalarBls48581Gt) Double() Scalar { - v := bls48581.NewFP48copy(s.Value) - v.Mul(bls48581.NewFP48int(2)) + v := bls48581.NewFP48copy(s.Value, nil) + v.Mul(bls48581.NewFP48int(2, nil), nil) return &ScalarBls48581Gt{v} } func (s *ScalarBls48581Gt) Invert() (Scalar, error) { - v := bls48581.NewFP48copy(s.Value) - v.Invert() + v := bls48581.NewFP48copy(s.Value, nil) + v.Invert(nil) if v == nil { return nil, fmt.Errorf("not invertible") } @@ -1074,9 +1059,9 @@ func (s *ScalarBls48581Gt) Sqrt() (Scalar, error) { } func (s *ScalarBls48581Gt) Cube() Scalar { - v := bls48581.NewFP48copy(s.Value) - v.Sqr() - v.Mul(s.Value) + v := bls48581.NewFP48copy(s.Value, nil) + v.Sqr(nil) + v.Mul(s.Value, nil) return &ScalarBls48581Gt{v} } @@ -1093,8 +1078,8 @@ func (s *ScalarBls48581Gt) Sub(rhs Scalar) Scalar { func (s *ScalarBls48581Gt) Mul(rhs Scalar) Scalar { r, ok := rhs.(*ScalarBls48581Gt) if ok { - v := bls48581.NewFP48copy(s.Value) - v.Mul(r.Value) + v := bls48581.NewFP48copy(s.Value, nil) + v.Mul(r.Value, nil) return &ScalarBls48581Gt{v} } else { return nil @@ -1108,9 +1093,9 @@ func (s *ScalarBls48581Gt) MulAdd(y, z Scalar) Scalar { func (s *ScalarBls48581Gt) Div(rhs Scalar) Scalar { r, ok := rhs.(*ScalarBls48581Gt) if ok { - v := bls48581.NewFP48copy(r.Value) - v.Invert() - v.Mul(s.Value) + v := bls48581.NewFP48copy(r.Value, nil) + v.Invert(nil) + v.Mul(s.Value, nil) return &ScalarBls48581Gt{v} } else { return nil @@ -1160,7 +1145,7 @@ func (s *ScalarBls48581Gt) SetBytesWide(bytes []byte) (Scalar, error) { } func (s *ScalarBls48581Gt) Clone() Scalar { - fp := bls48581.NewFP48copy(s.Value) + fp := bls48581.NewFP48copy(s.Value, nil) return &ScalarBls48581Gt{ Value: fp, } diff --git a/nekryptology/pkg/core/curves/bls48581_curve_test.go b/nekryptology/pkg/core/curves/bls48581_curve_test.go index d7540c5..72e24f3 100644 --- a/nekryptology/pkg/core/curves/bls48581_curve_test.go +++ b/nekryptology/pkg/core/curves/bls48581_curve_test.go @@ -78,9 +78,9 @@ func TestScalarBls48581G1Invert(t *testing.T) { nine := bls48581G1.Scalar.New(9) actual, _ := nine.Invert() sa, _ := actual.(*ScalarBls48581) - expected, err := bls48581G1.Scalar.SetBigInt(bhex("ab22a52d6e7108e9eabb0e17e8139cf4b9392413a05486ec3dcef3b90bea3db988c1478b9ec2b4f1382ab890f18c0c9a0f85d504cc493f9b79f8c84e41d01ae5070000000000000000")) + expected, err := bls48581G1.Scalar.SetBigInt(bhex("000000000000000007e51ad0414ec8f8799b3f49cc04d5850f9a0c8cf190b82a38f1b4c29e8b47c188b93dea0bb9f3ce3dec8654a0132439b9f49c13e8170ebbeae908716e2da522ab")) require.NoError(t, err) - require.Equal(t, sa.Cmp(expected), 0) + require.Equal(t, sa.Value.ToString(), expected.(*ScalarBls48581).Value.ToString()) } func TestScalarBls48581G1Add(t *testing.T) { @@ -91,11 +91,11 @@ func TestScalarBls48581G1Add(t *testing.T) { require.NotNil(t, fifteen) expected := bls48581G1.Scalar.New(15) require.Equal(t, expected.Cmp(fifteen), 0) - qq := bls48581.NewBIGints(bls48581.CURVE_Order) - qq.Sub(bls48581.NewBIGint(3)) + qq := bls48581.NewBIGints(bls48581.CURVE_Order, nil) + qq.Sub(bls48581.NewBIGint(3, nil)) upper := &ScalarBls48581{ - Value: bls48581.NewBIGcopy(qq), + Value: bls48581.NewBIGcopy(qq, nil), } actual := upper.Add(nine) require.NotNil(t, actual) @@ -106,8 +106,8 @@ func TestScalarBls48581G1Sub(t *testing.T) { bls48581G1 := BLS48581G1() nine := bls48581G1.Scalar.New(9) six := bls48581G1.Scalar.New(6) - n := bls48581.NewFPbig(bls48581.NewBIGints(bls48581.CURVE_Order)) - n.Sub(bls48581.NewFPint(3)) + n := bls48581.NewFPbig(bls48581.NewBIGints(bls48581.CURVE_Order, nil), nil) + n.Sub(bls48581.NewFPint(3, nil), nil) expected := bls48581G1.Scalar.New(0).Sub(bls48581G1.Scalar.New(3)) actual := six.Sub(nine) @@ -138,7 +138,7 @@ func TestScalarBls48581G1Serialize(t *testing.T) { sc := bls48581G1.Scalar.New(255) sequence := sc.Bytes() require.Equal(t, len(sequence), 73) - require.Equal(t, sequence, []byte{0xff, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}) + require.Equal(t, sequence, []byte{0x00, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xff}) ret, err := bls48581G1.Scalar.SetBytes(sequence) require.NoError(t, err) require.Equal(t, ret.Cmp(sc), 0) diff --git a/nekryptology/pkg/core/curves/curve.go b/nekryptology/pkg/core/curves/curve.go index 420c4a4..424203b 100644 --- a/nekryptology/pkg/core/curves/curve.go +++ b/nekryptology/pkg/core/curves/curve.go @@ -575,11 +575,11 @@ func BLS48581G1() *Curve { func bls48581g1Init() { bls48581g1 = Curve{ Scalar: &ScalarBls48581{ - Value: bls48581.NewBIGint(1), + Value: bls48581.NewBIGint(1, nil), point: new(PointBls48581G1), }, Point: new(PointBls48581G1).Identity(), - Name: BLS12381G1Name, + Name: BLS48581G1Name, } } @@ -592,7 +592,7 @@ func BLS48581G2() *Curve { func bls48581g2Init() { bls48581g2 = Curve{ Scalar: &ScalarBls48581{ - Value: bls48581.NewBIGint(1), + Value: bls48581.NewBIGint(1, nil), point: new(PointBls48581G2), }, Point: new(PointBls48581G2).Identity(), @@ -603,7 +603,7 @@ func bls48581g2Init() { func BLS48581(preferredPoint Point) *PairingCurve { return &PairingCurve{ Scalar: &ScalarBls48581{ - Value: bls48581.NewBIG(), + Value: bls48581.NewBIG(nil), point: preferredPoint, }, PointG1: &PointBls48581G1{ @@ -613,7 +613,7 @@ func BLS48581(preferredPoint Point) *PairingCurve { Value: bls48581.ECP8_generator(), }, GT: &ScalarBls48581Gt{ - Value: bls48581.NewFP48int(1), + Value: bls48581.NewFP48int(1, nil), }, Name: BLS48581Name, } @@ -863,38 +863,40 @@ type sswuParams struct { // Let `n` be a number of point-scalar pairs. // Let `w` be a window of bits (6..8, chosen based on `n`, see cost factor). // -// 1. Prepare `2^(w-1) - 1` buckets with indices `[1..2^(w-1))` initialized with identity points. -// Bucket 0 is not needed as it would contain points multiplied by 0. -// 2. Convert scalars to a radix-`2^w` representation with signed digits in `[-2^w/2, 2^w/2]`. -// Note: only the last digit may equal `2^w/2`. -// 3. Starting with the last window, for each point `i=[0..n)` add it to a a bucket indexed by -// the point's scalar's value in the window. -// 4. Once all points in a window are sorted into buckets, add buckets by multiplying each -// by their index. Efficient way of doing it is to start with the last bucket and compute two sums: -// intermediate sum from the last to the first, and the full sum made of all intermediate sums. -// 5. Shift the resulting sum of buckets by `w` bits by using `w` doublings. -// 6. Add to the return value. -// 7. Repeat the loop. +// 1. Prepare `2^(w-1) - 1` buckets with indices `[1..2^(w-1))` initialized with identity points. +// Bucket 0 is not needed as it would contain points multiplied by 0. +// 2. Convert scalars to a radix-`2^w` representation with signed digits in `[-2^w/2, 2^w/2]`. +// Note: only the last digit may equal `2^w/2`. +// 3. Starting with the last window, for each point `i=[0..n)` add it to a a bucket indexed by +// the point's scalar's value in the window. +// 4. Once all points in a window are sorted into buckets, add buckets by multiplying each +// by their index. Efficient way of doing it is to start with the last bucket and compute two sums: +// intermediate sum from the last to the first, and the full sum made of all intermediate sums. +// 5. Shift the resulting sum of buckets by `w` bits by using `w` doublings. +// 6. Add to the return value. +// 7. Repeat the loop. // // Approximate cost w/o wNAF optimizations (A = addition, D = doubling): // // ```ascii // cost = (n*A + 2*(2^w/2)*A + w*D + A)*256/w -// | | | | | -// | | | | looping over 256/w windows -// | | | adding to the result -// sorting points | shifting the sum by w bits (to the next window, starting from last window) -// one by one | -// into buckets adding/subtracting all buckets -// multiplied by their indexes -// using a sum of intermediate sums +// +// | | | | | +// | | | | looping over 256/w windows +// | | | adding to the result +// sorting points | shifting the sum by w bits (to the next window, starting from last window) +// one by one | +// into buckets adding/subtracting all buckets +// multiplied by their indexes +// using a sum of intermediate sums +// // ``` // // For large `n`, dominant factor is (n*256/w) additions. // However, if `w` is too big and `n` is not too big, then `(2^w/2)*A` could dominate. // Therefore, the optimal choice of `w` grows slowly as `n` grows. // -// For constant time we use a fixed window of 6 +// # For constant time we use a fixed window of 6 // // This algorithm is adapted from section 4 of . // and https://cacr.uwaterloo.ca/techreports/2010/cacr2010-26.pdf diff --git a/nekryptology/pkg/core/curves/native/bls48581/arch.go b/nekryptology/pkg/core/curves/native/bls48581/arch_32.go similarity index 97% rename from nekryptology/pkg/core/curves/native/bls48581/arch.go rename to nekryptology/pkg/core/curves/native/bls48581/arch_32.go index ac0c2e7..436f2a7 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/arch.go +++ b/nekryptology/pkg/core/curves/native/bls48581/arch_32.go @@ -1,3 +1,5 @@ +//go:build js && wasm + /* * Copyright (c) 2012-2020 MIRACL UK Ltd. * diff --git a/nekryptology/pkg/core/curves/native/bls48581/arch_64.go b/nekryptology/pkg/core/curves/native/bls48581/arch_64.go new file mode 100644 index 0000000..2736e68 --- /dev/null +++ b/nekryptology/pkg/core/curves/native/bls48581/arch_64.go @@ -0,0 +1,28 @@ +//go:build !js && !wasm + +/* + * Copyright (c) 2012-2020 MIRACL UK Ltd. + * + * This file is part of MIRACL Core + * (see https://github.com/miracl/core). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* core BIG number class */ + +package bls48581 + +type Chunk int64 + +const CHUNK int = 64 /* Set word size */ diff --git a/nekryptology/pkg/core/curves/native/bls48581/big.go b/nekryptology/pkg/core/curves/native/bls48581/big_32.go similarity index 99% rename from nekryptology/pkg/core/curves/native/bls48581/big.go rename to nekryptology/pkg/core/curves/native/bls48581/big_32.go index 2540cb9..c556db2 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/big.go +++ b/nekryptology/pkg/core/curves/native/bls48581/big_32.go @@ -1,3 +1,5 @@ +//go:build js && wasm + /* * Copyright (c) 2012-2020 MIRACL UK Ltd. * diff --git a/nekryptology/pkg/core/curves/native/bls48581/big_64.go b/nekryptology/pkg/core/curves/native/bls48581/big_64.go new file mode 100644 index 0000000..31d016c --- /dev/null +++ b/nekryptology/pkg/core/curves/native/bls48581/big_64.go @@ -0,0 +1,999 @@ +//go:build !js && !wasm + +/* + * Copyright (c) 2012-2020 MIRACL UK Ltd. + * + * This file is part of MIRACL Core + * (see https://github.com/miracl/core). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* core BIG number class */ + +package bls48581 + +import ( + "arena" + "math/bits" + "strconv" + + "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/curves/native/bls48581/ext" +) + +//import "fmt" + +type BIG struct { + w [NLEN]Chunk +} + +type DBIG struct { + w [2 * NLEN]Chunk +} + +/***************** 64-bit specific code ****************/ + +/* First the 32/64-bit dependent BIG code */ +/* Note that because of the lack of a 128-bit integer, 32 and 64-bit code needs to be done differently */ + +/* return a*b as DBIG */ +func mul(a *BIG, b *BIG, mem *arena.Arena) *DBIG { + c := NewDBIG(mem) + carry := Chunk(0) + + for i := 0; i < NLEN; i++ { + carry = 0 + for j := 0; j < NLEN; j++ { + carry, c.w[i+j] = mulAdd(a.w[i], b.w[j], carry, c.w[i+j]) + } + c.w[NLEN+i] = carry + } + + return c +} + +/* return a^2 as DBIG */ +func sqr(a *BIG, mem *arena.Arena) *DBIG { + c := NewDBIG(mem) + carry := Chunk(0) + + for i := 0; i < NLEN; i++ { + carry = 0 + for j := i + 1; j < NLEN; j++ { + //if a.w[i]<0 {fmt.Printf("Negative m i in sqr\n")} + //if a.w[j]<0 {fmt.Printf("Negative m j in sqr\n")} + carry, c.w[i+j] = mulAdd(2*a.w[i], a.w[j], carry, c.w[i+j]) + } + c.w[NLEN+i] = carry + } + + for i := 0; i < NLEN; i++ { + //if a.w[i]<0 {fmt.Printf("Negative m s in sqr\n")} + top, bot := mulAdd(a.w[i], a.w[i], 0, c.w[2*i]) + + c.w[2*i] = bot + c.w[2*i+1] += top + } + c.norm() + return c +} + +func monty(md *BIG, mc Chunk, d *DBIG, mem *arena.Arena) *BIG { + carry := Chunk(0) + m := Chunk(0) + for i := 0; i < NLEN; i++ { + if mc == -1 { + m = (-d.w[i]) & BMASK + } else { + if mc == 1 { + m = d.w[i] + } else { + m = (mc * d.w[i]) & BMASK + } + } + + carry = 0 + for j := 0; j < NLEN; j++ { + carry, d.w[i+j] = mulAdd(m, md.w[j], carry, d.w[i+j]) + //if m<0 {fmt.Printf("Negative m in monty\n")} + //if md.w[j]<0 {fmt.Printf("Negative m in monty\n")} + } + d.w[NLEN+i] += carry + } + + b := NewBIG(mem) + for i := 0; i < NLEN; i++ { + b.w[i] = d.w[NLEN+i] + } + b.norm() + return b +} + +/* set this[i]+=x*y+c, and return high part */ +func mulAdd(a Chunk, b Chunk, c Chunk, r Chunk) (Chunk, Chunk) { + + tp, bt := bits.Mul64(uint64(a), uint64(b)) // use math/bits intrinsic + bot := Chunk(bt & uint64(BMASK)) + top := Chunk((tp << (64 - BASEBITS)) | (bt >> BASEBITS)) + bot += c + bot += r + carry := bot >> BASEBITS + bot &= BMASK + top += carry + return top, bot + +} + +/************************************************************/ + +func (r *BIG) get(i int) Chunk { + return r.w[i] +} + +func (r *BIG) set(i int, x Chunk) { + r.w[i] = x +} + +func (r *BIG) xortop(x Chunk) { + r.w[NLEN-1] ^= x +} + +/* normalise BIG - force all digits < 2^BASEBITS */ +func (r *BIG) norm() Chunk { + carry := Chunk(0) + for i := 0; i < NLEN-1; i++ { + d := r.w[i] + carry + r.w[i] = d & BMASK + carry = d >> BASEBITS + } + r.w[NLEN-1] = (r.w[NLEN-1] + carry) + return (r.w[NLEN-1] >> ((8 * MODBYTES) % BASEBITS)) +} + +/* Shift right by less than a word */ +func (r *BIG) fshr(k uint) int { + w := r.w[0] & ((Chunk(1) << k) - 1) /* shifted out part */ + for i := 0; i < NLEN-1; i++ { + r.w[i] = (r.w[i] >> k) | ((r.w[i+1] << (BASEBITS - k)) & BMASK) + } + r.w[NLEN-1] = r.w[NLEN-1] >> k + return int(w) +} + +/* Shift right by less than a word */ +func (r *BIG) fshl(k uint) int { + r.w[NLEN-1] = (r.w[NLEN-1] << k) | (r.w[NLEN-2] >> (BASEBITS - k)) + for i := NLEN - 2; i > 0; i-- { + r.w[i] = ((r.w[i] << k) & BMASK) | (r.w[i-1] >> (BASEBITS - k)) + } + r.w[0] = (r.w[0] << k) & BMASK + return int(r.w[NLEN-1] >> ((8 * MODBYTES) % BASEBITS)) /* return excess - only used in ff.c */ +} + +func NewBIG(mem *arena.Arena) *BIG { + var b *BIG + if mem != nil { + b = arena.New[BIG](mem) + } else { + b = new(BIG) + } + for i := 0; i < NLEN; i++ { + b.w[i] = 0 + } + return b +} + +func NewBIGints(x [NLEN]Chunk, mem *arena.Arena) *BIG { + var b *BIG + if mem != nil { + b = arena.New[BIG](mem) + } else { + b = new(BIG) + } + for i := 0; i < NLEN; i++ { + b.w[i] = x[i] + } + return b +} + +func NewBIGint(x int, mem *arena.Arena) *BIG { + var b *BIG + if mem != nil { + b = arena.New[BIG](mem) + } else { + b = new(BIG) + } + b.w[0] = Chunk(x) + for i := 1; i < NLEN; i++ { + b.w[i] = 0 + } + return b +} + +func NewBIGcopy(x *BIG, mem *arena.Arena) *BIG { + var b *BIG + if mem != nil { + b = arena.New[BIG](mem) + } else { + b = new(BIG) + } + for i := 0; i < NLEN; i++ { + b.w[i] = x.w[i] + } + return b +} + +func NewBIGdcopy(x *DBIG, mem *arena.Arena) *BIG { + var b *BIG + if mem != nil { + b = arena.New[BIG](mem) + } else { + b = new(BIG) + } + for i := 0; i < NLEN; i++ { + b.w[i] = x.w[i] + } + return b +} + +/* test for zero */ +func (r *BIG) IsZero() bool { + d := Chunk(0) + for i := 0; i < NLEN; i++ { + d |= r.w[i] + } + return (1 & ((d - 1) >> BASEBITS)) != 0 +} + +/* set to zero */ +func (r *BIG) zero() { + for i := 0; i < NLEN; i++ { + r.w[i] = 0 + } +} + +/* Test for equal to one */ +func (r *BIG) isunity() bool { + d := Chunk(0) + for i := 1; i < NLEN; i++ { + d |= r.w[i] + } + return (1 & ((d - 1) >> BASEBITS) & (((r.w[0] ^ 1) - 1) >> BASEBITS)) != 0 +} + +/* set to one */ +func (r *BIG) one() { + r.w[0] = 1 + for i := 1; i < NLEN; i++ { + r.w[i] = 0 + } +} + +/* Copy from another BIG */ +func (r *BIG) copy(x *BIG) { + for i := 0; i < NLEN; i++ { + r.w[i] = x.w[i] + } +} + +/* Copy from another DBIG */ +func (r *BIG) dcopy(x *DBIG) { + for i := 0; i < NLEN; i++ { + r.w[i] = x.w[i] + } +} + +/* Conditional swap of two bigs depending on d using XOR - no branches */ +func (r *BIG) cswap(b *BIG, d int) Chunk { + c := Chunk(-d) + s := Chunk(0) + v := r.w[0] ^ b.w[1] + va := v + v + va >>= 1 + for i := 0; i < NLEN; i++ { + t := c & (r.w[i] ^ b.w[i]) + t ^= v + e := r.w[i] ^ t + s ^= e // to force calculation of e + r.w[i] = e ^ va + e = b.w[i] ^ t + s ^= e + b.w[i] = e ^ va + } + return s +} + +func (r *BIG) cmove(g *BIG, d int) Chunk { + b := Chunk(-d) + s := Chunk(0) + v := r.w[0] ^ g.w[1] + va := v + v + va >>= 1 + for i := 0; i < NLEN; i++ { + t := (r.w[i] ^ g.w[i]) & b + t ^= v + e := r.w[i] ^ t + s ^= e + r.w[i] = e ^ va + } + return s +} + +/* general shift right */ +func (r *BIG) shr(k uint) { + n := (k % BASEBITS) + m := int(k / BASEBITS) + for i := 0; i < NLEN-m-1; i++ { + r.w[i] = (r.w[m+i] >> n) | ((r.w[m+i+1] << (BASEBITS - n)) & BMASK) + } + r.w[NLEN-m-1] = r.w[NLEN-1] >> n + for i := NLEN - m; i < NLEN; i++ { + r.w[i] = 0 + } +} + +/* general shift left */ +func (r *BIG) shl(k uint) { + n := k % BASEBITS + m := int(k / BASEBITS) + + r.w[NLEN-1] = (r.w[NLEN-1-m] << n) + if NLEN >= m+2 { + r.w[NLEN-1] |= (r.w[NLEN-m-2] >> (BASEBITS - n)) + } + for i := NLEN - 2; i > m; i-- { + r.w[i] = ((r.w[i-m] << n) & BMASK) | (r.w[i-m-1] >> (BASEBITS - n)) + } + r.w[m] = (r.w[0] << n) & BMASK + for i := 0; i < m; i++ { + r.w[i] = 0 + } +} + +/* return number of bits */ +func (r *BIG) nbits() int { + t := NewBIGcopy(r, nil) + k := NLEN - 1 + t.norm() + for k >= 0 && t.w[k] == 0 { + k-- + } + if k < 0 { + return 0 + } + bts := int(BASEBITS) * k + c := t.w[k] + for c != 0 { + c /= 2 + bts++ + } + return bts +} + +func (r *BIG) Nbits() int { + return r.nbits() +} + +/* Convert to Hex String */ +func (r *BIG) ToString() string { + s := "" + len := r.nbits() + + if len%4 == 0 { + len /= 4 + } else { + len /= 4 + len++ + + } + MB := int(MODBYTES * 2) + if len < MB { + len = MB + } + + for i := len - 1; i >= 0; i-- { + b := NewBIGcopy(r, nil) + + b.shr(uint(i * 4)) + s += strconv.FormatInt(int64(b.w[0]&15), 16) + } + return s +} + +func (r *BIG) Add(x *BIG) { + for i := 0; i < NLEN; i++ { + r.w[i] = r.w[i] + x.w[i] + } +} + +func (r *BIG) or(x *BIG) { + for i := 0; i < NLEN; i++ { + r.w[i] = r.w[i] | x.w[i] + } +} + +/* return this+x */ +func (r *BIG) Plus(x *BIG) *BIG { + s := new(BIG) + for i := 0; i < NLEN; i++ { + s.w[i] = r.w[i] + x.w[i] + } + s.norm() + return s +} + +/* this+=x, where x is int */ +func (r *BIG) inc(x int) { + r.norm() + r.w[0] += Chunk(x) +} + +/* this*=c and catch overflow in DBIG */ +func (r *BIG) pxmul(c int, mem *arena.Arena) *DBIG { + m := NewDBIG(mem) + carry := Chunk(0) + for j := 0; j < NLEN; j++ { + carry, m.w[j] = mulAdd(r.w[j], Chunk(c), carry, m.w[j]) + //if c<0 {fmt.Printf("Negative c in pxmul\n")} + //if r.w[j]<0 {fmt.Printf("Negative c in pxmul\n")} + } + m.w[NLEN] = carry + return m +} + +/* return this-x */ +func (r *BIG) Minus(x *BIG) *BIG { + d := new(BIG) + for i := 0; i < NLEN; i++ { + d.w[i] = r.w[i] - x.w[i] + } + return d +} + +/* this-=x */ +func (r *BIG) Sub(x *BIG) { + for i := 0; i < NLEN; i++ { + r.w[i] = r.w[i] - x.w[i] + } +} + +/* reverse subtract this=x-this */ +func (r *BIG) rsub(x *BIG) { + for i := 0; i < NLEN; i++ { + r.w[i] = x.w[i] - r.w[i] + } +} + +/* this-=x, where x is int */ +func (r *BIG) dec(x int) { + r.norm() + r.w[0] -= Chunk(x) +} + +/* this*=x, where x is small intNEXCESS */ +func (r *BIG) pmul(c int) Chunk { + carry := Chunk(0) + // r.norm(); + for i := 0; i < NLEN; i++ { + ak := r.w[i] + r.w[i] = 0 + carry, r.w[i] = mulAdd(ak, Chunk(c), carry, r.w[i]) + //if c<0 {fmt.Printf("Negative c in pmul\n")} + //if ak<0 {fmt.Printf("Negative c in pmul\n")} + } + return carry +} + +/* convert this BIG to byte array */ +func (r *BIG) tobytearray(b []byte, n int) { + //r.norm(); + c := NewBIGcopy(r, nil) + c.norm() + + for i := int(MODBYTES) - 1; i >= 0; i-- { + b[i+n] = byte(c.w[0]) + c.fshr(8) + } +} + +/* convert from byte array to BIG */ +func frombytearray(b []byte, n int) *BIG { + m := NewBIG(nil) + l := len(b) + for i := 0; i < int(MODBYTES); i++ { + m.fshl(8) + if i < l { + m.w[0] += Chunk(int(b[i+n] & 0xff)) + } else { + m.w[0] += Chunk(int(0 & 0xff)) + } + } + return m +} + +func (r *BIG) ToBytes(b []byte) { + r.tobytearray(b, 0) +} + +func FromBytes(b []byte) *BIG { + return frombytearray(b, 0) +} + +/* divide by 3 */ +func (r *BIG) div3() int { + carry := Chunk(0) + r.norm() + base := (Chunk(1) << BASEBITS) + for i := NLEN - 1; i >= 0; i-- { + ak := (carry*base + r.w[i]) + r.w[i] = ak / 3 + carry = ak % 3 + } + return int(carry) +} + +/* return a*b where result fits in a BIG */ +func smul(a *BIG, b *BIG) *BIG { + carry := Chunk(0) + c := NewBIG(nil) + for i := 0; i < NLEN; i++ { + carry = 0 + for j := 0; j < NLEN; j++ { + if i+j < NLEN { + carry, c.w[i+j] = mulAdd(a.w[i], b.w[j], carry, c.w[i+j]) + } + } + } + return c +} + +/* Compare a and b, return 0 if a==b, -1 if ab. Inputs must be normalised */ +func Comp(a *BIG, b *BIG) int { + gt := Chunk(0) + eq := Chunk(1) + for i := NLEN - 1; i >= 0; i-- { + gt |= ((b.w[i] - a.w[i]) >> BASEBITS) & eq + eq &= ((b.w[i] ^ a.w[i]) - 1) >> BASEBITS + } + return int(gt + gt + eq - 1) +} + +/* return parity */ +func (r *BIG) parity() int { + return int(r.w[0] % 2) +} + +/* return n-th bit */ +func (r *BIG) bit(n int) int { + return int((r.w[n/int(BASEBITS)] & (Chunk(1) << (uint(n) % BASEBITS))) >> (uint(n) % BASEBITS)) + // if (r.w[n/int(BASEBITS)] & (Chunk(1) << (uint(n) % BASEBITS))) > 0 { + // return 1 + // } + // return 0 +} + +/* return n last bits */ +func (r *BIG) lastbits(n int) int { + msk := (1 << uint(n)) - 1 + r.norm() + return (int(r.w[0])) & msk +} + +/* set x = x mod 2^m */ +func (r *BIG) mod2m(m uint) { + wd := int(m / BASEBITS) + bt := m % BASEBITS + msk := (Chunk(1) << bt) - 1 + r.w[wd] &= msk + for i := wd + 1; i < NLEN; i++ { + r.w[i] = 0 + } +} + +/* a=1/a mod 2^256. This is very fast! */ +func (r *BIG) invmod2m() { + U := NewBIG(nil) + b := NewBIG(nil) + c := NewBIG(nil) + + U.inc(invmod256(r.lastbits(8))) + + for i := 8; i < BIGBITS; i <<= 1 { + U.norm() + ui := uint(i) + b.copy(r) + b.mod2m(ui) + t1 := smul(U, b) + t1.shr(ui) + c.copy(r) + c.shr(ui) + c.mod2m(ui) + + t2 := smul(U, c) + t2.mod2m(ui) + t1.Add(t2) + t1.norm() + b = smul(t1, U) + t1.copy(b) + t1.mod2m(ui) + + t2.one() + t2.shl(ui) + t1.rsub(t2) + t1.norm() + t1.shl(ui) + U.Add(t1) + } + U.mod2m(8 * MODBYTES) + r.copy(U) + r.norm() +} + +func (r *BIG) ctmod(m *BIG, bd uint, mem *arena.Arena) { + k := bd + sr := NewBIG(mem) + c := NewBIGcopy(m, mem) + r.norm() + + c.shl(k) + + for { + sr.copy(r) + sr.Sub(c) + sr.norm() + r.cmove(sr, int(1-((sr.w[NLEN-1]>>uint(CHUNK-1))&1))) + if k == 0 { + break + } + c.fshr(1) + k -= 1 + } +} + +/* reduce this mod m */ +func (r *BIG) Mod(m *BIG, mem *arena.Arena) { + k := r.nbits() - m.nbits() + if k < 0 { + k = 0 + } + r.ctmod(m, uint(k), mem) +} + +func (r *BIG) ctdiv(m *BIG, bd uint, mem *arena.Arena) { + k := bd + e := NewBIGint(1, mem) + sr := NewBIG(mem) + a := NewBIGcopy(r, mem) + c := NewBIGcopy(m, mem) + r.norm() + r.zero() + + c.shl(k) + e.shl(k) + + for { + sr.copy(a) + sr.Sub(c) + sr.norm() + d := int(1 - ((sr.w[NLEN-1] >> uint(CHUNK-1)) & 1)) + a.cmove(sr, d) + sr.copy(r) + sr.Add(e) + sr.norm() + r.cmove(sr, d) + if k == 0 { + break + } + c.fshr(1) + e.fshr(1) + k -= 1 + } +} + +/* divide this by m */ +func (r *BIG) div(m *BIG, mem *arena.Arena) { + k := r.nbits() - m.nbits() + if k < 0 { + k = 0 + } + r.ctdiv(m, uint(k), mem) +} + +/* get 8*MODBYTES size random number */ +func Random(rng *ext.RAND) *BIG { + m := NewBIG(nil) + var j int = 0 + var r byte = 0 + /* generate random BIG */ + for i := 0; i < 8*int(MODBYTES); i++ { + if j == 0 { + r = rng.GetByte() + } else { + r >>= 1 + } + + b := Chunk(int(r & 1)) + m.shl(1) + m.w[0] += b + j++ + j &= 7 + } + return m +} + +/* Create random BIG in portable way, one bit at a time */ +func Randomnum(q *BIG, rng *ext.RAND) *BIG { + d := NewDBIG(nil) + var j int = 0 + var r byte = 0 + for i := 0; i < 2*q.nbits(); i++ { + if j == 0 { + r = rng.GetByte() + } else { + r >>= 1 + } + + b := Chunk(int(r & 1)) + d.shl(1) + d.w[0] += b + j++ + j &= 7 + } + m := d.Mod(q, nil) + return m +} + +func Randtrunc(q *BIG, trunc int, rng *ext.RAND) *BIG { + m := Randomnum(q, rng) + if q.nbits() > trunc { + m.mod2m(uint(trunc)) + } + return m +} + +/* return a*b mod m */ +func Modmul(a1, b1, m *BIG, mem *arena.Arena) *BIG { + a := NewBIGcopy(a1, mem) + b := NewBIGcopy(b1, mem) + a.Mod(m, mem) + b.Mod(m, mem) + d := mul(a, b, mem) + return d.ctmod(m, uint(m.nbits()), mem) +} + +/* return a^2 mod m */ +func Modsqr(a1, m *BIG, mem *arena.Arena) *BIG { + a := NewBIGcopy(a1, mem) + a.Mod(m, mem) + d := sqr(a, mem) + return d.ctmod(m, uint(m.nbits()), mem) +} + +/* return -a mod m */ +func Modneg(a1, m *BIG, mem *arena.Arena) *BIG { + a := NewBIGcopy(a1, mem) + a.Mod(m, mem) + a.rsub(m) + a.norm() + return a +} + +/* return a+b mod m */ +func ModAdd(a1, b1, m *BIG, mem *arena.Arena) *BIG { + a := NewBIGcopy(a1, mem) + b := NewBIGcopy(b1, mem) + a.Mod(m, mem) + b.Mod(m, mem) + a.Add(b) + a.norm() + a.ctmod(m, 1, mem) + return a +} + +/* Jacobi Symbol (this/p). Returns 0, 1 or -1 */ +func (r *BIG) Jacobi(p *BIG) int { + mem := arena.NewArena() + defer mem.Free() + m := 0 + t := NewBIGint(0, mem) + x := NewBIGint(0, mem) + n := NewBIGint(0, mem) + zilch := NewBIGint(0, mem) + one := NewBIGint(1, mem) + if p.parity() == 0 || Comp(r, zilch) == 0 || Comp(p, one) <= 0 { + return 0 + } + r.norm() + x.copy(r) + n.copy(p) + x.Mod(p, mem) + + for Comp(n, one) > 0 { + if Comp(x, zilch) == 0 { + return 0 + } + n8 := n.lastbits(3) + k := 0 + for x.parity() == 0 { + k++ + x.shr(1) + } + if k%2 == 1 { + m += (n8*n8 - 1) / 8 + } + m += (n8 - 1) * (x.lastbits(2) - 1) / 4 + t.copy(n) + t.Mod(x, mem) + n.copy(x) + x.copy(t) + m %= 2 + + } + if m == 0 { + return 1 + } + return -1 +} + +/* this=1/this mod p. Binary method */ +func (r *BIG) Invmodp(p *BIG) { + mem := arena.NewArena() + defer mem.Free() + r.Mod(p, mem) + if r.IsZero() { + return + } + u := NewBIGcopy(r, mem) + v := NewBIGcopy(p, mem) + x1 := NewBIGint(1, mem) + x2 := NewBIGint(0, mem) + t := NewBIGint(0, mem) + one := NewBIGint(1, mem) + for Comp(u, one) != 0 && Comp(v, one) != 0 { + for u.parity() == 0 { + u.fshr(1) + t.copy(x1) + t.Add(p) + x1.cmove(t, x1.parity()) + x1.norm() + x1.fshr(1) + } + for v.parity() == 0 { + v.fshr(1) + t.copy(x2) + t.Add(p) + x2.cmove(t, x2.parity()) + x2.norm() + x2.fshr(1) + } + if Comp(u, v) >= 0 { + u.Sub(v) + u.norm() + t.copy(x1) + t.Add(p) + x1.cmove(t, (Comp(x1, x2)>>1)&1) + x1.Sub(x2) + x1.norm() + } else { + v.Sub(u) + v.norm() + t.copy(x2) + t.Add(p) + x2.cmove(t, (Comp(x2, x1)>>1)&1) + x2.Sub(x1) + x2.norm() + } + } + r.copy(x1) + r.cmove(x2, Comp(u, one)&1) +} + +/* return this^e mod m */ +func (r *BIG) Powmod(e1 *BIG, m *BIG, mem *arena.Arena) *BIG { + e := NewBIGcopy(e1, mem) + r.norm() + e.norm() + a := NewBIGint(1, mem) + z := NewBIGcopy(e, mem) + s := NewBIGcopy(r, mem) + for true { + bt := z.parity() + z.fshr(1) + if bt == 1 { + a = Modmul(a, s, m, mem) + } + if z.IsZero() { + break + } + s = Modsqr(s, m, mem) + } + return a +} + +/* Arazi and Qi inversion mod 256 */ +func invmod256(a int) int { + var t1 int = 0 + c := (a >> 1) & 1 + t1 += c + t1 &= 1 + t1 = 2 - t1 + t1 <<= 1 + U := t1 + 1 + + // i=2 + b := a & 3 + t1 = U * b + t1 >>= 2 + c = (a >> 2) & 3 + t2 := (U * c) & 3 + t1 += t2 + t1 *= U + t1 &= 3 + t1 = 4 - t1 + t1 <<= 2 + U += t1 + + // i=4 + b = a & 15 + t1 = U * b + t1 >>= 4 + c = (a >> 4) & 15 + t2 = (U * c) & 15 + t1 += t2 + t1 *= U + t1 &= 15 + t1 = 16 - t1 + t1 <<= 4 + U += t1 + + return U +} + +func logb2(w uint32) uint { + v := w + v |= (v >> 1) + v |= (v >> 2) + v |= (v >> 4) + v |= (v >> 8) + v |= (v >> 16) + + v = v - ((v >> 1) & 0x55555555) + v = (v & 0x33333333) + ((v >> 2) & 0x33333333) + r := uint((((v + (v >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24) + return (r) +} + +// Optimized combined shift, subtract and norm +func ssn(r *BIG, a *BIG, m *BIG) int { + n := NLEN - 1 + m.w[0] = (m.w[0] >> 1) | ((m.w[1] << (BASEBITS - 1)) & BMASK) + r.w[0] = a.w[0] - m.w[0] + carry := r.w[0] >> BASEBITS + r.w[0] &= BMASK + for i := 1; i < n; i++ { + m.w[i] = (m.w[i] >> 1) | ((m.w[i+1] << (BASEBITS - 1)) & BMASK) + r.w[i] = a.w[i] - m.w[i] + carry + carry = r.w[i] >> BASEBITS + r.w[i] &= BMASK + } + m.w[n] >>= 1 + r.w[n] = a.w[n] - m.w[n] + carry + return int((r.w[n] >> uint(CHUNK-1)) & 1) +} diff --git a/nekryptology/pkg/core/curves/native/bls48581/bls256.go b/nekryptology/pkg/core/curves/native/bls48581/bls256.go index c397a91..8d30ea1 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/bls256.go +++ b/nekryptology/pkg/core/curves/native/bls48581/bls256.go @@ -42,7 +42,7 @@ func ceil(a int, b int) int { /* output u \in F_p */ func Hash_to_field(hash int, hlen int, DST []byte, M []byte, ctr int) []*FP { - q := NewBIGints(Modulus) + q := NewBIGints(Modulus, nil) nbq := q.nbits() L := ceil(nbq+AESKEY*8, 8) var u []*FP @@ -53,7 +53,7 @@ func Hash_to_field(hash int, hlen int, DST []byte, M []byte, ctr int) []*FP { for j := 0; j < L; j++ { fd[j] = OKM[i*L+j] } - u = append(u, NewFPbig(DBIG_fromBytes(fd).ctmod(q, uint(8*L-nbq)))) + u = append(u, NewFPbig(DBIG_fromBytes(fd).ctmod(q, uint(8*L-nbq), nil), nil)) } return u } @@ -65,15 +65,15 @@ func Bls256_hash_to_point(M []byte) *ECP { P := ECP_map2point(u[0]) P1 := ECP_map2point(u[1]) - P.Add(P1) + P.Add(P1, nil) P.Cfp() - P.Affine() + P.Affine(nil) return P } func Init() int { G := ECP8_generator() - if G.Is_infinity() { + if G.Is_infinity(nil) { return BLS_FAIL } G2_TAB = precomp(G) @@ -82,7 +82,7 @@ func Init() int { /* generate key pair, private key S, public key W */ func KeyPairGenerate(IKM []byte, S []byte, W []byte) int { - r := NewBIGints(CURVE_Order) + r := NewBIGints(CURVE_Order, nil) nbr := r.nbits() L := ceil(3*ceil(nbr, 8), 2) LEN := ext.InttoBytes(L, 2) @@ -93,7 +93,7 @@ func KeyPairGenerate(IKM []byte, S []byte, W []byte) int { AIKM[len(IKM)] = 0 G := ECP8_generator() - if G.Is_infinity() { + if G.Is_infinity(nil) { return BLS_FAIL } SALT := []byte("BLS-SIG-KEYGEN-SALT-") @@ -101,10 +101,10 @@ func KeyPairGenerate(IKM []byte, S []byte, W []byte) int { OKM := ext.HKDF_Expand(ext.MC_SHA2, HASH_TYPE, L, PRK, LEN) dx := DBIG_fromBytes(OKM[:]) - s := dx.ctmod(r, uint(8*L-nbr)) + s := dx.ctmod(r, uint(8*L-nbr), nil) s.ToBytes(S) // SkToPk - G = G2mul(G, s) + G = G2mul(G, s, nil) G.ToBytes(W, true) return BLS_OK } @@ -113,7 +113,7 @@ func KeyPairGenerate(IKM []byte, S []byte, W []byte) int { func Core_Sign(SIG []byte, M []byte, S []byte) int { D := Bls256_hash_to_point(M) s := FromBytes(S) - D = G1mul(D, s) + D = G1mul(D, s, nil) D.ToBytes(SIG, true) return BLS_OK } @@ -124,21 +124,21 @@ func Core_Verify(SIG []byte, M []byte, W []byte) int { HM := Bls256_hash_to_point(M) D := ECP_fromBytes(SIG) - if !G1member(D) { + if !G1member(D, nil) { return BLS_FAIL } - D.Neg() + D.Neg(nil) PK := ECP8_fromBytes(W) - if !G2member(PK) { + if !G2member(PK, nil) { return BLS_FAIL } // Use new multi-pairing mechanism - r := Initmp() + r := Initmp(nil) Another_pc(r, G2_TAB, D) - Another(r, PK, HM) - v := Miller(r) + Another(r, PK, HM, nil) + v := Miller(r, nil) //.. or alternatively // G := ECP8_generator() diff --git a/nekryptology/pkg/core/curves/native/bls48581/config_big.go b/nekryptology/pkg/core/curves/native/bls48581/config_big_32.go similarity index 98% rename from nekryptology/pkg/core/curves/native/bls48581/config_big.go rename to nekryptology/pkg/core/curves/native/bls48581/config_big_32.go index 699a6e2..b814453 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/config_big.go +++ b/nekryptology/pkg/core/curves/native/bls48581/config_big_32.go @@ -1,3 +1,5 @@ +//go:build js && wasm + /* * Copyright (c) 2012-2020 MIRACL UK Ltd. * diff --git a/nekryptology/pkg/core/curves/native/bls48581/config_big_64.go b/nekryptology/pkg/core/curves/native/bls48581/config_big_64.go new file mode 100644 index 0000000..d31bd4d --- /dev/null +++ b/nekryptology/pkg/core/curves/native/bls48581/config_big_64.go @@ -0,0 +1,36 @@ +//go:build !js && !wasm + +/* + * Copyright (c) 2012-2020 MIRACL UK Ltd. + * + * This file is part of MIRACL Core + * (see https://github.com/miracl/core). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package bls48581 + +// BIG length in bytes and number base +const MODBYTES uint = 73 +const BASEBITS uint = 60 + +// BIG lengths and Masks +const NLEN int = int((1 + ((8*MODBYTES - 1) / BASEBITS))) +const DNLEN int = 2 * NLEN +const BMASK Chunk = ((Chunk(1) << BASEBITS) - 1) +const HBITS uint = (BASEBITS / 2) +const HMASK Chunk = ((Chunk(1) << HBITS) - 1) +const NEXCESS int = (1 << (uint(CHUNK) - BASEBITS - 1)) + +const BIGBITS int = int(MODBYTES * 8) diff --git a/nekryptology/pkg/core/curves/native/bls48581/config_curve.go b/nekryptology/pkg/core/curves/native/bls48581/config_curve.go index dc23507..7a39de1 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/config_curve.go +++ b/nekryptology/pkg/core/curves/native/bls48581/config_curve.go @@ -19,11 +19,6 @@ package bls48581 -// Curve types -const WEIERSTRASS int = 0 -const EDWARDS int = 1 -const MONTGOMERY int = 2 - // Pairing Friendly? const NOT int = 0 const BN int = 1 @@ -31,10 +26,6 @@ const BLS12 int = 2 const BLS24 int = 3 const BLS48 int = 4 -// Pairing Twist type -const D_TYPE int = 0 -const M_TYPE int = 1 - // Sparsity const FP_ZERO int = 0 const FP_ONE int = 1 @@ -43,34 +34,16 @@ const FP_SPARSER int = 3 const FP_SPARSE int = 4 const FP_DENSE int = 5 -// Pairing x parameter sign -const POSITIVEX int = 0 -const NEGATIVEX int = 1 - -// Curve type - -const CURVETYPE int = WEIERSTRASS const CURVE_A int = 0 -const CURVE_PAIRING_TYPE int = BLS48 -// Pairings only - -const SEXTIC_TWIST int = D_TYPE -const SIGN_OF_X int = NEGATIVEX const ATE_BITS int = 33 const G2_TABLE int = 36 const HTC_ISO int = 0 const HTC_ISO_G2 int = 0 -// associated hash function and AES key size - const HASH_TYPE int = 64 const AESKEY int = 32 -const ALLOW_ALT_COMPRESS bool = false - -// These are manually decided policy decisions. To block any potential patent issues set to false. - const USE_GLV bool = true const USE_GS_G2 bool = true const USE_GS_GT bool = true diff --git a/nekryptology/pkg/core/curves/native/bls48581/config_field.go b/nekryptology/pkg/core/curves/native/bls48581/config_field_32.go similarity index 98% rename from nekryptology/pkg/core/curves/native/bls48581/config_field.go rename to nekryptology/pkg/core/curves/native/bls48581/config_field_32.go index 1d47ce8..81d262c 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/config_field.go +++ b/nekryptology/pkg/core/curves/native/bls48581/config_field_32.go @@ -1,3 +1,5 @@ +//go:build js && wasm + /* * Copyright (c) 2012-2020 MIRACL UK Ltd. * diff --git a/nekryptology/pkg/core/curves/native/bls48581/config_field_64.go b/nekryptology/pkg/core/curves/native/bls48581/config_field_64.go new file mode 100644 index 0000000..186ddd5 --- /dev/null +++ b/nekryptology/pkg/core/curves/native/bls48581/config_field_64.go @@ -0,0 +1,49 @@ +//go:build !js && !wasm + +/* + * Copyright (c) 2012-2020 MIRACL UK Ltd. + * + * This file is part of MIRACL Core + * (see https://github.com/miracl/core). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package bls48581 + +// Modulus types +const NOT_SPECIAL int = 0 +const PSEUDO_MERSENNE int = 1 +const MONTGOMERY_FRIENDLY int = 2 +const GENERALISED_MERSENNE int = 3 + +const NEGATOWER int = 0 +const POSITOWER int = 1 + +// Modulus details +const MODBITS uint = 581 /* Number of bits in Modulus */ +const PM1D2 uint = 1 /* Modulus mod 8 */ +const RIADZ int = 2 /* hash-to-point Z */ +const RIADZG2A int = 2 /* G2 hash-to-point Z */ +const RIADZG2B int = 0 /* G2 hash-to-point Z */ +const MODTYPE int = NOT_SPECIAL //NOT_SPECIAL +const QNRI int = 0 // Fp2 QNR +const TOWER int = POSITOWER // Tower type +const FEXCESS int32 = ((int32(1) << 19) - 1) + +// Modulus Masks +const OMASK Chunk = ((Chunk(-1)) << (MODBITS % BASEBITS)) +const TBITS uint = MODBITS % BASEBITS // Number of active bits in top word +const TMASK Chunk = (Chunk(1) << TBITS) - 1 + +const BIG_ENDIAN_SIGN bool = false diff --git a/nekryptology/pkg/core/curves/native/bls48581/dbig.go b/nekryptology/pkg/core/curves/native/bls48581/dbig.go index 2e9a2ae..755cdd2 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/dbig.go +++ b/nekryptology/pkg/core/curves/native/bls48581/dbig.go @@ -21,28 +21,46 @@ package bls48581 -import "strconv" +import ( + "arena" + "strconv" +) //import "fmt" -func NewDBIG() *DBIG { - b := new(DBIG) +func NewDBIG(mem *arena.Arena) *DBIG { + var b *DBIG + if mem != nil { + b = arena.New[DBIG](mem) + } else { + b = new(DBIG) + } for i := 0; i < DNLEN; i++ { b.w[i] = 0 } return b } -func NewDBIGcopy(x *DBIG) *DBIG { - b := new(DBIG) +func NewDBIGcopy(x *DBIG, mem *arena.Arena) *DBIG { + var b *DBIG + if mem != nil { + b = arena.New[DBIG](mem) + } else { + b = new(DBIG) + } for i := 0; i < DNLEN; i++ { b.w[i] = x.w[i] } return b } -func NewDBIGscopy(x *BIG) *DBIG { - b := new(DBIG) +func NewDBIGscopy(x *BIG, mem *arena.Arena) *DBIG { + var b *DBIG + if mem != nil { + b = arena.New[DBIG](mem) + } else { + b = new(DBIG) + } for i := 0; i < NLEN-1; i++ { b.w[i] = x.w[i] } @@ -67,8 +85,8 @@ func (r *DBIG) norm() { } /* split DBIG at position n, return higher half, keep lower half */ -func (r *DBIG) split(n uint) *BIG { - t := NewBIG() +func (r *DBIG) split(n uint, mem *arena.Arena) *BIG { + t := NewBIG(mem) m := n % BASEBITS carry := r.w[DNLEN-1] << (BASEBITS - m) @@ -173,11 +191,11 @@ func (r *DBIG) shr(k uint) { } } -func (r *DBIG) ctmod(m *BIG, bd uint) *BIG { +func (r *DBIG) ctmod(m *BIG, bd uint, mem *arena.Arena) *BIG { k := bd r.norm() - c := NewDBIGscopy(m) - dr := NewDBIG() + c := NewDBIGscopy(m, mem) + dr := NewDBIG(mem) c.shl(k) @@ -192,25 +210,25 @@ func (r *DBIG) ctmod(m *BIG, bd uint) *BIG { k -= 1 c.shr(1) } - return NewBIGdcopy(r) + return NewBIGdcopy(r, mem) } /* reduces this DBIG mod a BIG, and returns the BIG */ -func (r *DBIG) Mod(m *BIG) *BIG { +func (r *DBIG) Mod(m *BIG, mem *arena.Arena) *BIG { k := r.nbits() - m.nbits() if k < 0 { k = 0 } - return r.ctmod(m, uint(k)) + return r.ctmod(m, uint(k), mem) } -func (r *DBIG) ctdiv(m *BIG, bd uint) *BIG { +func (r *DBIG) ctdiv(m *BIG, bd uint, mem *arena.Arena) *BIG { k := bd - c := NewDBIGscopy(m) - a := NewBIGint(0) - e := NewBIGint(1) - sr := NewBIG() - dr := NewDBIG() + c := NewDBIGscopy(m, mem) + a := NewBIGint(0, mem) + e := NewBIGint(1, mem) + sr := NewBIG(mem) + dr := NewDBIG(mem) r.norm() c.shl(k) @@ -237,12 +255,12 @@ func (r *DBIG) ctdiv(m *BIG, bd uint) *BIG { } /* return this/c */ -func (r *DBIG) div(m *BIG) *BIG { +func (r *DBIG) div(m *BIG, mem *arena.Arena) *BIG { k := r.nbits() - m.nbits() if k < 0 { k = 0 } - return r.ctdiv(m, uint(k)) + return r.ctdiv(m, uint(k), mem) } /* Convert to Hex String */ @@ -259,7 +277,7 @@ func (r *DBIG) toString() string { } for i := len - 1; i >= 0; i-- { - b := NewDBIGcopy(r) + b := NewDBIGcopy(r, nil) b.shr(uint(i * 4)) s += strconv.FormatInt(int64(b.w[0]&15), 16) @@ -270,7 +288,7 @@ func (r *DBIG) toString() string { /* return number of bits */ func (r *DBIG) nbits() int { k := DNLEN - 1 - t := NewDBIGcopy(r) + t := NewDBIGcopy(r, nil) t.norm() for k >= 0 && t.w[k] == 0 { k-- @@ -289,7 +307,7 @@ func (r *DBIG) nbits() int { /* convert from byte array to BIG */ func DBIG_fromBytes(b []byte) *DBIG { - m := NewDBIG() + m := NewDBIG(nil) for i := 0; i < len(b); i++ { m.shl(8) m.w[0] += Chunk(int(b[i] & 0xff)) diff --git a/nekryptology/pkg/core/curves/native/bls48581/ecdh.go b/nekryptology/pkg/core/curves/native/bls48581/ecdh.go deleted file mode 100644 index 0480043..0000000 --- a/nekryptology/pkg/core/curves/native/bls48581/ecdh.go +++ /dev/null @@ -1,381 +0,0 @@ -/* - * Copyright (c) 2012-2020 MIRACL UK Ltd. - * - * This file is part of MIRACL Core - * (see https://github.com/miracl/ext.. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ECDH/ECIES/ECDSA API Functions */ - -package bls48581 - -//import "fmt" -import "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/curves/native/bls48581/ext" - -const INVALID_PUBLIC_KEY int = -2 -const ERROR int = -3 - -//const INVALID int = -4 -const EFS int = int(MODBYTES) -const EGS int = int(MODBYTES) - -// Transform a point multiplier to RFC7748 form -func RFC7748(r *BIG) { - lg := 0 - t := NewBIGint(1) - c := CURVE_Cof_I - for c != 1 { - lg++ - c /= 2 - } - n := uint(8*EGS - lg + 1) - r.mod2m(n) - t.shl(n) - r.Add(t) - c = r.lastbits(lg) - r.dec(c) -} - -/* return true if S is in ranger 0 < S < order , else return false */ -func ECDH_IN_RANGE(S []byte) bool { - r := NewBIGints(CURVE_Order) - s := FromBytes(S) - if s.IsZero() { - return false - } - if Comp(s, r) >= 0 { - return false - } - return true -} - -/* Calculate a public/private EC GF(p) key pair W,S where W=S.G mod EC(p), - * where S is the secret key and W is the public key - * and G is fixed generator. - * If RNG is NULL then the private key is provided externally in S - * otherwise it is generated randomly internally */ -func ECDH_KEY_PAIR_GENERATE(RNG *ext.RAND, S []byte, W []byte) int { - res := 0 - var s *BIG - var G *ECP - - G = ECP_generator() - r := NewBIGints(CURVE_Order) - - if RNG == nil { - s = FromBytes(S) - } else { - if CURVETYPE != WEIERSTRASS { - s = Random(RNG) // from random bytes - } else { - s = Randomnum(r, RNG) // Removes biases - } - } - - if CURVETYPE != WEIERSTRASS { - RFC7748(s) // For Montgomery or Edwards, apply RFC7748 transformation - } - - s.ToBytes(S) - WP := G.clmul(s, r) - WP.ToBytes(W, false) // To use point compression on public keys, change to true - - return res -} - -/* validate public key */ -func ECDH_PUBLIC_KEY_VALIDATE(W []byte) int { - WP := ECP_fromBytes(W) - res := 0 - - r := NewBIGints(CURVE_Order) - - if WP.Is_infinity() { - res = INVALID_PUBLIC_KEY - } - if res == 0 { - - q := NewBIGints(Modulus) - nb := q.nbits() - k := NewBIGint(1) - k.shl(uint((nb + 4) / 2)) - k.Add(q) - k.div(r) - - for k.parity() == 0 { - k.shr(1) - WP.Dbl() - } - - if !k.isunity() { - WP = WP.lmul(k) - } - if WP.Is_infinity() { - res = INVALID_PUBLIC_KEY - } - - } - return res -} - -/* IEEE-1363 Diffie-Hellman online calculation Z=S.WD */ -// type = 0 is just x coordinate output -// type = 1 for standard compressed output -// type = 2 for standard uncompress output 04|x|y -func ECDH_ECPSVDP_DH(S []byte, WD []byte, Z []byte, typ int) int { - res := 0 - - s := FromBytes(S) - - W := ECP_fromBytes(WD) - if W.Is_infinity() { - res = ERROR - } - - if res == 0 { - r := NewBIGints(CURVE_Order) - W = W.clmul(s, r) - if W.Is_infinity() { - res = ERROR - } else { - if CURVETYPE != MONTGOMERY { - if typ > 0 { - if typ == 1 { - W.ToBytes(Z, true) - } else { - W.ToBytes(Z, false) - } - } else { - W.GetX().ToBytes(Z) - } - return res - } else { - W.GetX().ToBytes(Z) - } - } - } - return res -} - -/* IEEE ECDSA Signature, C and D are signature on F using private key S */ -func ECDH_ECPSP_DSA(sha int, RNG *ext.RAND, S []byte, F []byte, C []byte, D []byte) int { - var T [EGS]byte - - B := ext.GPhashit(ext.MC_SHA2, sha, EGS, 0, F, -1, nil) - G := ECP_generator() - - r := NewBIGints(CURVE_Order) - s := FromBytes(S) - f := FromBytes(B[:]) - - c := NewBIGint(0) - d := NewBIGint(0) - V := NewECP() - - for d.IsZero() { - u := Randomnum(r, RNG) - w := Randomnum(r, RNG) /* IMPORTANT - side channel masking to protect invmodp() */ - - V.Copy(G) - V = V.clmul(u, r) - vx := V.GetX() - c.copy(vx) - c.Mod(r) - if c.IsZero() { - continue - } - u.copy(Modmul(u, w, r)) - u.Invmodp(r) - d.copy(Modmul(s, c, r)) - d.copy(ModAdd(d, f, r)) - d.copy(Modmul(d, w, r)) - d.copy(Modmul(u, d, r)) - } - - c.ToBytes(T[:]) - for i := 0; i < EGS; i++ { - C[i] = T[i] - } - d.ToBytes(T[:]) - for i := 0; i < EGS; i++ { - D[i] = T[i] - } - return 0 -} - -/* IEEE1363 ECDSA Signature Verification. Signature C and D on F is verified using public key W */ -func ECDH_ECPVP_DSA(sha int, W []byte, F []byte, C []byte, D []byte) int { - res := 0 - - B := ext.GPhashit(ext.MC_SHA2, sha, EGS, 0, F, -1, nil) - - G := ECP_generator() - r := NewBIGints(CURVE_Order) - - c := FromBytes(C) - d := FromBytes(D) - f := FromBytes(B[:]) - - if c.IsZero() || Comp(c, r) >= 0 || d.IsZero() || Comp(d, r) >= 0 { - res = ERROR - } - - if res == 0 { - d.Invmodp(r) - f.copy(Modmul(f, d, r)) - h2 := Modmul(c, d, r) - - WP := ECP_fromBytes(W) - if WP.Is_infinity() { - res = ERROR - } else { - P := NewECP() - P.Copy(WP) - - P = P.Mul2(h2, G, f) - - if P.Is_infinity() { - res = ERROR - } else { - d = P.GetX() - d.Mod(r) - - if Comp(d, c) != 0 { - res = ERROR - } - } - } - } - - return res -} - -/* IEEE1363 ECIES encryption. Encryption of plaintext M uses public key W and produces ciphertext V,C,T */ -func ECDH_ECIES_ENCRYPT(sha int, P1 []byte, P2 []byte, RNG *ext.RAND, W []byte, M []byte, V []byte, T []byte) []byte { - var Z [EFS]byte - var VZ [3*EFS + 1]byte - var K1 [AESKEY]byte - var K2 [AESKEY]byte - var U [EGS]byte - - if ECDH_KEY_PAIR_GENERATE(RNG, U[:], V) != 0 { - return nil - } - if ECDH_ECPSVDP_DH(U[:], W, Z[:], 0) != 0 { - return nil - } - - for i := 0; i < 2*EFS+1; i++ { - VZ[i] = V[i] - } - for i := 0; i < EFS; i++ { - VZ[2*EFS+1+i] = Z[i] - } - - K := ext.KDF2(ext.MC_SHA2, sha, VZ[:], P1, 2*AESKEY) - - for i := 0; i < AESKEY; i++ { - K1[i] = K[i] - K2[i] = K[AESKEY+i] - } - - C := ext.AES_CBC_IV0_ENCRYPT(K1[:], M) - - L2 := ext.InttoBytes(len(P2), 8) - - var AC []byte - - for i := 0; i < len(C); i++ { - AC = append(AC, C[i]) - } - for i := 0; i < len(P2); i++ { - AC = append(AC, P2[i]) - } - for i := 0; i < 8; i++ { - AC = append(AC, L2[i]) - } - - ext.HMAC(ext.MC_SHA2, sha, T, len(T), K2[:], AC) - - return C -} - -/* constant time n-byte compare */ -func ncomp(T1 []byte, T2 []byte, n int) bool { - res := 0 - for i := 0; i < n; i++ { - res |= int(T1[i] ^ T2[i]) - } - if res == 0 { - return true - } - return false -} - -/* IEEE1363 ECIES decryption. Decryption of ciphertext V,C,T using private key U outputs plaintext M */ -func ECDH_ECIES_DECRYPT(sha int, P1 []byte, P2 []byte, V []byte, C []byte, T []byte, U []byte) []byte { - var Z [EFS]byte - var VZ [3*EFS + 1]byte - var K1 [AESKEY]byte - var K2 [AESKEY]byte - - var TAG []byte = T[:] - - if ECDH_ECPSVDP_DH(U, V, Z[:], 0) != 0 { - return nil - } - - for i := 0; i < 2*EFS+1; i++ { - VZ[i] = V[i] - } - for i := 0; i < EFS; i++ { - VZ[2*EFS+1+i] = Z[i] - } - - K := ext.KDF2(ext.MC_SHA2, sha, VZ[:], P1, 2*AESKEY) - - for i := 0; i < AESKEY; i++ { - K1[i] = K[i] - K2[i] = K[AESKEY+i] - } - - M := ext.AES_CBC_IV0_DECRYPT(K1[:], C) - - if M == nil { - return nil - } - - L2 := ext.InttoBytes(len(P2), 8) - - var AC []byte - - for i := 0; i < len(C); i++ { - AC = append(AC, C[i]) - } - for i := 0; i < len(P2); i++ { - AC = append(AC, P2[i]) - } - for i := 0; i < 8; i++ { - AC = append(AC, L2[i]) - } - - ext.HMAC(ext.MC_SHA2, sha, TAG, len(TAG), K2[:], AC) - - if !ncomp(T, TAG, len(T)) { - return nil - } - - return M -} diff --git a/nekryptology/pkg/core/curves/native/bls48581/fp.go b/nekryptology/pkg/core/curves/native/bls48581/fp.go index c42feca..2cab741 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/fp.go +++ b/nekryptology/pkg/core/curves/native/bls48581/fp.go @@ -22,7 +22,11 @@ package bls48581 -import "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/curves/native/bls48581/ext" +import ( + "arena" + + "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/curves/native/bls48581/ext" +) type FP struct { x *BIG @@ -31,84 +35,119 @@ type FP struct { /* Constructors */ -func NewFP() *FP { - F := new(FP) - F.x = NewBIG() - F.XES = 1 - return F -} - -func NewFPint(a int) *FP { - F := new(FP) - if a < 0 { - m := NewBIGints(Modulus) - m.inc(a) - m.norm() - F.x = NewBIGcopy(m) +func NewFP(mem *arena.Arena) *FP { + if mem != nil { + F := arena.New[FP](mem) + F.x = NewBIG(mem) + F.XES = 1 + return F } else { - F.x = NewBIGint(a) + F := new(FP) + F.x = NewBIG(nil) + F.XES = 1 + return F } - F.nres() - return F } -func NewFPbig(a *BIG) *FP { - F := new(FP) - F.x = NewBIGcopy(a) - F.nres() - return F +func NewFPint(a int, mem *arena.Arena) *FP { + if mem != nil { + F := arena.New[FP](mem) + if a < 0 { + m := NewBIGints(Modulus, mem) + m.inc(a) + m.norm() + F.x = NewBIGcopy(m, mem) + } else { + F.x = NewBIGint(a, mem) + } + F.nres(mem) + return F + } else { + F := new(FP) + if a < 0 { + m := NewBIGints(Modulus, nil) + m.inc(a) + m.norm() + F.x = NewBIGcopy(m, nil) + } else { + F.x = NewBIGint(a, nil) + } + F.nres(nil) + return F + } } -func NewFPcopy(a *FP) *FP { - F := new(FP) - F.x = NewBIGcopy(a.x) - F.XES = a.XES - return F +func NewFPbig(a *BIG, mem *arena.Arena) *FP { + if mem != nil { + F := arena.New[FP](mem) + F.x = NewBIGcopy(a, mem) + F.nres(mem) + return F + } else { + F := new(FP) + F.x = NewBIGcopy(a, nil) + F.nres(nil) + return F + } +} + +func NewFPcopy(a *FP, mem *arena.Arena) *FP { + if mem != nil { + F := arena.New[FP](mem) + F.x = NewBIGcopy(a.x, mem) + F.XES = a.XES + return F + } else { + F := new(FP) + F.x = NewBIGcopy(a.x, nil) + F.XES = a.XES + return F + } } func NewFPrand(rng *ext.RAND) *FP { - m := NewBIGints(Modulus) + m := NewBIGints(Modulus, nil) w := Randomnum(m, rng) - F := NewFPbig(w) + F := NewFPbig(w, nil) return F } func (F *FP) ToString() string { - F.reduce() - return F.Redc().ToString() + F.reduce(nil) + return F.Redc(nil).ToString() } /* convert to Montgomery n-residue form */ -func (F *FP) nres() { +func (F *FP) nres(mem *arena.Arena) { if MODTYPE != PSEUDO_MERSENNE && MODTYPE != GENERALISED_MERSENNE { - r := NewBIGints(R2modp) - d := mul(F.x, r) - F.x.copy(mod(d)) + r := NewBIGints(R2modp, mem) + d := mul(F.x, r, mem) + F.x.copy(mod(d, mem)) F.XES = 2 } else { - md := NewBIGints(Modulus) - F.x.Mod(md) + md := NewBIGints(Modulus, mem) + F.x.Mod(md, mem) F.XES = 1 } } /* convert back to regular form */ -func (F *FP) Redc() *BIG { +func (F *FP) Redc(mem *arena.Arena) *BIG { if MODTYPE != PSEUDO_MERSENNE && MODTYPE != GENERALISED_MERSENNE { - d := NewDBIGscopy(F.x) - return mod(d) + d := NewDBIGscopy(F.x, mem) + return mod(d, mem) } else { - r := NewBIGcopy(F.x) + r := NewBIGcopy(F.x, mem) return r } } /* reduce a DBIG to a BIG using the appropriate form of the modulus */ -func mod(d *DBIG) *BIG { +func mod(d *DBIG, mem *arena.Arena) *BIG { if MODTYPE == PSEUDO_MERSENNE { - t := d.split(MODBITS) - b := NewBIGdcopy(d) + t := d.split(MODBITS, mem) + b := NewBIGdcopy(d, mem) v := t.pmul(int(MConst)) @@ -128,7 +167,7 @@ func mod(d *DBIG) *BIG { d.w[NLEN+i-1] = bot d.w[NLEN+i] += top } - b := NewBIG() + b := NewBIG(mem) for i := 0; i < NLEN; i++ { b.w[i] = d.w[NLEN+i] @@ -138,14 +177,14 @@ func mod(d *DBIG) *BIG { } if MODTYPE == GENERALISED_MERSENNE { // GoldiLocks only - t := d.split(MODBITS) - b := NewBIGdcopy(d) + t := d.split(MODBITS, mem) + b := NewBIGdcopy(d, mem) b.Add(t) - dd := NewDBIGscopy(t) + dd := NewDBIGscopy(t, mem) dd.shl(MODBITS / 2) - tt := dd.split(MODBITS) - lo := NewBIGdcopy(dd) + tt := dd.split(MODBITS, mem) + lo := NewBIGdcopy(dd, mem) b.Add(tt) b.Add(lo) b.norm() @@ -163,10 +202,10 @@ func mod(d *DBIG) *BIG { } if MODTYPE == NOT_SPECIAL { - md := NewBIGints(Modulus) - return monty(md, MConst, d) + md := NewBIGints(Modulus, mem) + return monty(md, MConst, d, mem) } - return NewBIG() + return NewBIG(mem) } // find appoximation to quotient of a/m @@ -189,9 +228,9 @@ func quo(n *BIG, m *BIG) int { } /* reduce this mod Modulus */ -func (F *FP) reduce() { - m := NewBIGints(Modulus) - r := NewBIGints(Modulus) +func (F *FP) reduce(mem *arena.Arena) { + m := NewBIGints(Modulus, mem) + r := NewBIGints(Modulus, mem) var sb uint F.x.norm() @@ -217,43 +256,49 @@ func (F *FP) reduce() { } /* test this=0? */ -func (F *FP) IsZero() bool { - W := NewFPcopy(F) - W.reduce() +func (F *FP) IsZero(mem *arena.Arena) bool { + W := NewFPcopy(F, mem) + W.reduce(mem) return W.x.IsZero() } func (F *FP) IsOne() bool { - W := NewFPcopy(F) - W.reduce() - T := NewFPint(1) + mem := arena.NewArena() + defer mem.Free() + W := NewFPcopy(F, mem) + W.reduce(mem) + T := NewFPint(1, mem) return W.Equals(T) } func (F *FP) islarger() int { - if F.IsZero() { + mem := arena.NewArena() + defer mem.Free() + if F.IsZero(mem) { return 0 } - sx := NewBIGints(Modulus) - fx := F.Redc() + sx := NewBIGints(Modulus, mem) + fx := F.Redc(mem) sx.Sub(fx) sx.norm() return Comp(fx, sx) } func (F *FP) ToBytes(b []byte) { - F.Redc().ToBytes(b) + F.Redc(nil).ToBytes(b) } func FP_fromBytes(b []byte) *FP { t := FromBytes(b) - return NewFPbig(t) + return NewFPbig(t, nil) } func (F *FP) isunity() bool { - W := NewFPcopy(F) - W.reduce() - return W.Redc().isunity() + mem := arena.NewArena() + defer mem.Free() + W := NewFPcopy(F, mem) + W.reduce(mem) + return W.Redc(mem).isunity() } /* copy from FP b */ @@ -270,25 +315,27 @@ func (F *FP) zero() { /* set this=1 */ func (F *FP) one() { + mem := arena.NewArena() + defer mem.Free() F.x.one() - F.nres() + F.nres(mem) } /* return sign */ -func (F *FP) sign() int { +func (F *FP) sign(mem *arena.Arena) int { if BIG_ENDIAN_SIGN { - m := NewBIGints(Modulus) + m := NewBIGints(Modulus, mem) m.dec(1) m.fshr(1) - n := NewFPcopy(F) - n.reduce() - w := n.Redc() + n := NewFPcopy(F, mem) + n.reduce(mem) + w := n.Redc(mem) cp := Comp(w, m) return ((cp + 1) & 2) >> 1 } else { - W := NewFPcopy(F) - W.reduce() - return W.Redc().parity() + W := NewFPcopy(F, mem) + W.reduce(mem) + return W.Redc(mem).parity() } } @@ -315,20 +362,20 @@ func (F *FP) cmove(b *FP, d int) { } /* this*=b mod Modulus */ -func (F *FP) Mul(b *FP) { +func (F *FP) Mul(b *FP, mem *arena.Arena) { if int64(F.XES)*int64(b.XES) > int64(FEXCESS) { - F.reduce() + F.reduce(mem) } - d := mul(F.x, b.x) - F.x.copy(mod(d)) + d := mul(F.x, b.x, mem) + F.x.copy(mod(d, mem)) F.XES = 2 } /* this = -this mod Modulus */ -func (F *FP) Neg() { - m := NewBIGints(Modulus) +func (F *FP) Neg(mem *arena.Arena) { + m := NewBIGints(Modulus, mem) sb := logb2(uint32(F.XES - 1)) m.fshl(sb) @@ -336,12 +383,12 @@ func (F *FP) Neg() { F.XES = (1 << sb) + 1 if F.XES > FEXCESS { - F.reduce() + F.reduce(mem) } } /* this*=c mod Modulus, where c is a small int */ -func (F *FP) imul(c int) { +func (F *FP) imul(c int, mem *arena.Arena) { // F.norm() s := false if c < 0 { @@ -350,60 +397,60 @@ func (F *FP) imul(c int) { } if MODTYPE == PSEUDO_MERSENNE || MODTYPE == GENERALISED_MERSENNE { - d := F.x.pxmul(c) - F.x.copy(mod(d)) + d := F.x.pxmul(c, mem) + F.x.copy(mod(d, mem)) F.XES = 2 } else { if F.XES*int32(c) <= FEXCESS { F.x.pmul(c) F.XES *= int32(c) } else { - n := NewFPint(c) - F.Mul(n) + n := NewFPint(c, mem) + F.Mul(n, mem) } } if s { - F.Neg() + F.Neg(mem) F.norm() } } /* this*=this mod Modulus */ -func (F *FP) Sqr() { +func (F *FP) Sqr(mem *arena.Arena) { if int64(F.XES)*int64(F.XES) > int64(FEXCESS) { - F.reduce() + F.reduce(mem) } - d := sqr(F.x) - F.x.copy(mod(d)) + d := sqr(F.x, mem) + F.x.copy(mod(d, mem)) F.XES = 2 } /* this+=b */ -func (F *FP) Add(b *FP) { +func (F *FP) Add(b *FP, mem *arena.Arena) { F.x.Add(b.x) F.XES += b.XES if F.XES > FEXCESS { - F.reduce() + F.reduce(mem) } } /* this-=b */ -func (F *FP) Sub(b *FP) { - n := NewFPcopy(b) - n.Neg() - F.Add(n) +func (F *FP) Sub(b *FP, mem *arena.Arena) { + n := NewFPcopy(b, mem) + n.Neg(mem) + F.Add(n, mem) } -func (F *FP) rsub(b *FP) { - F.Neg() - F.Add(b) +func (F *FP) rsub(b *FP, mem *arena.Arena) { + F.Neg(mem) + F.Add(b, mem) } /* this/=2 mod Modulus */ -func (F *FP) div2() { - p := NewBIGints(Modulus) +func (F *FP) div2(mem *arena.Arena) { + p := NewBIGints(Modulus, mem) pr := F.x.parity() - w := NewBIGcopy(F.x) + w := NewBIGcopy(F.x, mem) F.x.fshr(1) w.Add(p) w.norm() @@ -413,18 +460,22 @@ func (F *FP) div2() { /* return jacobi symbol (this/Modulus) */ func (F *FP) jacobi() int { - w := F.Redc() - p := NewBIGints(Modulus) + mem := arena.NewArena() + defer mem.Free() + w := F.Redc(mem) + p := NewBIGints(Modulus, mem) return w.Jacobi(p) } /* return TRUE if this==a */ func (F *FP) Equals(a *FP) bool { - f := NewFPcopy(F) - s := NewFPcopy(a) + mem := arena.NewArena() + defer mem.Free() + f := NewFPcopy(F, mem) + s := NewFPcopy(a, mem) - s.reduce() - f.reduce() + s.reduce(mem) + f.reduce(mem) if Comp(s.x, f.x) == 0 { return true } @@ -432,20 +483,22 @@ func (F *FP) Equals(a *FP) bool { } func (F *FP) Comp(a *FP) int { - f := NewFPcopy(F) - s := NewFPcopy(a) + mem := arena.NewArena() + defer mem.Free() + f := NewFPcopy(F, mem) + s := NewFPcopy(a, mem) - s.reduce() - f.reduce() + s.reduce(mem) + f.reduce(mem) return Comp(s.x, f.x) } -func (F *FP) pow(e *BIG) *FP { +func (F *FP) pow(e *BIG, mem *arena.Arena) *FP { var tb []*FP var w [1 + (NLEN*int(BASEBITS)+3)/4]int8 F.norm() - t := NewBIGcopy(e) + t := NewBIGcopy(e, mem) t.norm() nb := 1 + (t.nbits()+3)/4 @@ -456,51 +509,51 @@ func (F *FP) pow(e *BIG) *FP { w[i] = int8(lsbs) t.fshr(4) } - tb = append(tb, NewFPint(1)) - tb = append(tb, NewFPcopy(F)) + tb = append(tb, NewFPint(1, mem)) + tb = append(tb, NewFPcopy(F, mem)) for i := 2; i < 16; i++ { - tb = append(tb, NewFPcopy(tb[i-1])) - tb[i].Mul(F) + tb = append(tb, NewFPcopy(tb[i-1], mem)) + tb[i].Mul(F, mem) } - r := NewFPcopy(tb[w[nb-1]]) + r := NewFPcopy(tb[w[nb-1]], mem) for i := nb - 2; i >= 0; i-- { - r.Sqr() - r.Sqr() - r.Sqr() - r.Sqr() - r.Mul(tb[w[i]]) + r.Sqr(mem) + r.Sqr(mem) + r.Sqr(mem) + r.Sqr(mem) + r.Mul(tb[w[i]], mem) } - r.reduce() + r.reduce(mem) return r } // See https://eprint.iacr.org/2018/1038 // return this^(p-3)/4 or this^(p-5)/8 -func (F *FP) fpow() *FP { +func (F *FP) fpow(mem *arena.Arena) *FP { ac := [11]int{1, 2, 3, 6, 12, 15, 30, 60, 120, 240, 255} - var xp []*FP + xp := arena.MakeSlice[*FP](mem, 11, 11) // phase 1 - xp = append(xp, NewFPcopy(F)) - xp = append(xp, NewFPcopy(F)) - xp[1].Sqr() - xp = append(xp, NewFPcopy(xp[1])) - xp[2].Mul(F) - xp = append(xp, NewFPcopy(xp[2])) - xp[3].Sqr() - xp = append(xp, NewFPcopy(xp[3])) - xp[4].Sqr() - xp = append(xp, NewFPcopy(xp[4])) - xp[5].Mul(xp[2]) - xp = append(xp, NewFPcopy(xp[5])) - xp[6].Sqr() - xp = append(xp, NewFPcopy(xp[6])) - xp[7].Sqr() - xp = append(xp, NewFPcopy(xp[7])) - xp[8].Sqr() - xp = append(xp, NewFPcopy(xp[8])) - xp[9].Sqr() - xp = append(xp, NewFPcopy(xp[9])) - xp[10].Mul(xp[5]) + xp[0] = NewFPcopy(F, mem) + xp[1] = NewFPcopy(F, mem) + xp[1].Sqr(mem) + xp[2] = NewFPcopy(xp[1], mem) + xp[2].Mul(F, mem) + xp[3] = NewFPcopy(xp[2], mem) + xp[3].Sqr(mem) + xp[4] = NewFPcopy(xp[3], mem) + xp[4].Sqr(mem) + xp[5] = NewFPcopy(xp[4], mem) + xp[5].Mul(xp[2], mem) + xp[6] = NewFPcopy(xp[5], mem) + xp[6].Sqr(mem) + xp[7] = NewFPcopy(xp[6], mem) + xp[7].Sqr(mem) + xp[8] = NewFPcopy(xp[7], mem) + xp[8].Sqr(mem) + xp[9] = NewFPcopy(xp[8], mem) + xp[9].Sqr(mem) + xp[10] = NewFPcopy(xp[9], mem) + xp[10].Mul(xp[5], mem) var n, c int e := int(PM1D2) @@ -529,7 +582,7 @@ func (F *FP) fpow() *FP { k := w - c i := 10 - key := NewFP() + key := NewFP(mem) if k != 0 { for ac[i] > k { @@ -544,7 +597,7 @@ func (F *FP) fpow() *FP { if ac[i] > k { continue } - key.Mul(xp[i]) + key.Mul(xp[i], mem) k -= ac[i] } // phase 2 @@ -555,19 +608,19 @@ func (F *FP) fpow() *FP { j := 3 m := 8 nw := n - bw - t := NewFP() + t := NewFP(mem) for 2*m < nw { t.copy(xp[j]) j++ for i = 0; i < m; i++ { - t.Sqr() + t.Sqr(mem) } xp[j].copy(xp[j-1]) - xp[j].Mul(t) + xp[j].Mul(t, mem) m *= 2 } lo := nw - m - r := NewFPcopy(xp[j]) + r := NewFPcopy(xp[j], mem) for lo != 0 { m /= 2 @@ -578,84 +631,86 @@ func (F *FP) fpow() *FP { lo -= m t.copy(r) for i = 0; i < m; i++ { - t.Sqr() + t.Sqr(mem) } r.copy(t) - r.Mul(xp[j]) + r.Mul(xp[j], mem) } // phase 3 if bw != 0 { for i = 0; i < bw; i++ { - r.Sqr() + r.Sqr(mem) } - r.Mul(key) + r.Mul(key, mem) } if MODTYPE == GENERALISED_MERSENNE { // Goldilocks ONLY key.copy(r) - r.Sqr() - r.Mul(F) + r.Sqr(mem) + r.Mul(F, mem) for i = 0; i < n+1; i++ { - r.Sqr() + r.Sqr(mem) } - r.Mul(key) + r.Mul(key, mem) } for nd > 0 { - r.Sqr() + r.Sqr(mem) nd-- } return r } // calculates r=x^(p-1-2^e)/2^{e+1) where 2^e|p-1 -func (F *FP) progen() { +func (F *FP) progen(mem *arena.Arena) { if MODTYPE == PSEUDO_MERSENNE || MODTYPE == GENERALISED_MERSENNE { - F.copy(F.fpow()) + F.copy(F.fpow(mem)) return } e := uint(PM1D2) - m := NewBIGints(Modulus) + m := NewBIGints(Modulus, mem) m.dec(1) m.shr(e) m.dec(1) m.fshr(1) - F.copy(F.pow(m)) + F.copy(F.pow(m, mem)) } /* this=1/this mod Modulus */ -func (F *FP) Invert(h *FP) { +func (F *FP) Invert(h *FP, mem *arena.Arena) { e := int(PM1D2) F.norm() - s := NewFPcopy(F) + s := NewFPcopy(F, mem) for i := 0; i < e-1; i++ { - s.Sqr() - s.Mul(F) + s.Sqr(mem) + s.Mul(F, mem) } if h == nil { - F.progen() + F.progen(mem) } else { F.copy(h) } for i := 0; i <= e; i++ { - F.Sqr() + F.Sqr(mem) } - F.Mul(s) - F.reduce() + F.Mul(s, mem) + F.reduce(mem) } /* test for Quadratic residue */ func (F *FP) qr(h *FP) int { - r := NewFPcopy(F) + mem := arena.NewArena() + defer mem.Free() + r := NewFPcopy(F, mem) e := int(PM1D2) - r.progen() + r.progen(mem) if h != nil { h.copy(r) } - r.Sqr() - r.Mul(F) + r.Sqr(mem) + r.Mul(F, mem) for i := 0; i < e-1; i++ { - r.Sqr() + r.Sqr(mem) } if r.isunity() { @@ -666,29 +721,29 @@ func (F *FP) qr(h *FP) int { } /* return sqrt(this) mod Modulus */ -func (F *FP) Sqrt(h *FP) *FP { +func (F *FP) Sqrt(h *FP, mem *arena.Arena) *FP { e := int(PM1D2) - g := NewFPcopy(F) + g := NewFPcopy(F, mem) if h == nil { - g.progen() + g.progen(mem) } else { g.copy(h) } - m := NewBIGints(ROI) - v := NewFPbig(m) + m := NewBIGints(ROI, mem) + v := NewFPbig(m, mem) - t := NewFPcopy(g) - t.Sqr() - t.Mul(F) + t := NewFPcopy(g, mem) + t.Sqr(mem) + t.Mul(F, mem) - r := NewFPcopy(F) - r.Mul(g) - b := NewFPcopy(t) + r := NewFPcopy(F, mem) + r.Mul(g, mem) + b := NewFPcopy(t, mem) for k := e; k > 1; k-- { for j := 1; j < k-1; j++ { - b.Sqr() + b.Sqr(mem) } var u int if b.isunity() { @@ -697,41 +752,43 @@ func (F *FP) Sqrt(h *FP) *FP { u = 1 } g.copy(r) - g.Mul(v) + g.Mul(v, mem) r.cmove(g, u) - v.Sqr() + v.Sqr(mem) g.copy(t) - g.Mul(v) + g.Mul(v, mem) t.cmove(g, u) b.copy(t) } - sgn := r.sign() - nr := NewFPcopy(r) - nr.Neg() + sgn := r.sign(mem) + nr := NewFPcopy(r, mem) + nr.Neg(mem) nr.norm() r.cmove(nr, sgn) return r } func (F *FP) invsqrt(i *FP, s *FP) int { - h := NewFP() + mem := arena.NewArena() + defer mem.Free() + h := NewFP(mem) qr := F.qr(h) - s.copy(F.Sqrt(h)) + s.copy(F.Sqrt(h, mem)) i.copy(F) - i.Invert(h) + i.Invert(h, mem) return qr } // Two for the price of one - See Hamburg https://eprint.iacr.org/2012/309.pdf // Calculate Invert of i and square root of s, return QR func FP_tpo(i *FP, s *FP) int { - w := NewFPcopy(s) - t := NewFPcopy(i) - w.Mul(i) - t.Mul(w) + w := NewFPcopy(s, nil) + t := NewFPcopy(i, nil) + w.Mul(i, nil) + t.Mul(w, nil) qr := t.invsqrt(i, s) - i.Mul(w) - s.Mul(i) + i.Mul(w, nil) + s.Mul(i, nil) return qr } diff --git a/nekryptology/pkg/core/curves/native/bls48581/fp16.go b/nekryptology/pkg/core/curves/native/bls48581/fp16.go index 5f21e49..197ff43 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/fp16.go +++ b/nekryptology/pkg/core/curves/native/bls48581/fp16.go @@ -23,6 +23,8 @@ package bls48581 +import "arena" + //import "fmt" type FP16 struct { @@ -30,46 +32,81 @@ type FP16 struct { b *FP8 } -func NewFP16() *FP16 { - F := new(FP16) - F.a = NewFP8() - F.b = NewFP8() - return F +func NewFP16(mem *arena.Arena) *FP16 { + if mem != nil { + F := arena.New[FP16](mem) + F.a = NewFP8(mem) + F.b = NewFP8(mem) + return F + } else { + F := new(FP16) + F.a = NewFP8(nil) + F.b = NewFP8(nil) + return F + } } /* Constructors */ -func NewFP16int(a int) *FP16 { - F := new(FP16) - F.a = NewFP8int(a) - F.b = NewFP8() - return F +func NewFP16int(a int, mem *arena.Arena) *FP16 { + if mem != nil { + F := arena.New[FP16](mem) + F.a = NewFP8int(a, mem) + F.b = NewFP8(mem) + return F + } else { + F := new(FP16) + F.a = NewFP8int(a, nil) + F.b = NewFP8(nil) + return F + } } -func NewFP16copy(x *FP16) *FP16 { - F := new(FP16) - F.a = NewFP8copy(x.a) - F.b = NewFP8copy(x.b) - return F +func NewFP16copy(x *FP16, mem *arena.Arena) *FP16 { + if mem != nil { + F := arena.New[FP16](mem) + F.a = NewFP8copy(x.a, mem) + F.b = NewFP8copy(x.b, mem) + return F + } else { + F := new(FP16) + F.a = NewFP8copy(x.a, nil) + F.b = NewFP8copy(x.b, nil) + return F + } } -func NewFP16fp8s(c *FP8, d *FP8) *FP16 { - F := new(FP16) - F.a = NewFP8copy(c) - F.b = NewFP8copy(d) - return F +func NewFP16fp8s(c *FP8, d *FP8, mem *arena.Arena) *FP16 { + if mem != nil { + F := arena.New[FP16](mem) + F.a = c + F.b = d + return F + } else { + F := new(FP16) + F.a = c + F.b = d + return F + } } -func NewFP16fp8(c *FP8) *FP16 { - F := new(FP16) - F.a = NewFP8copy(c) - F.b = NewFP8() - return F +func NewFP16fp8(c *FP8, mem *arena.Arena) *FP16 { + if mem != nil { + F := arena.New[FP16](mem) + F.a = c + F.b = NewFP8(mem) + return F + } else { + F := new(FP16) + F.a = c + F.b = NewFP8(nil) + return F + } } /* reduce all components of this mod Modulus */ -func (F *FP16) reduce() { - F.a.reduce() - F.b.reduce() +func (F *FP16) reduce(mem *arena.Arena) { + F.a.reduce(mem) + F.b.reduce(mem) } /* normalise all components of this mod Modulus */ @@ -79,8 +116,8 @@ func (F *FP16) norm() { } /* test this==0 ? */ -func (F *FP16) IsZero() bool { - return F.a.IsZero() && F.b.IsZero() +func (F *FP16) IsZero(mem *arena.Arena) bool { + return F.a.IsZero(mem) && F.b.IsZero(mem) } func (F *FP16) ToBytes(bf []byte) { @@ -107,7 +144,7 @@ func FP16_fromBytes(bf []byte) *FP16 { t[i] = bf[i+MB] } ta := FP8_fromBytes(t[:]) - return NewFP16fp8s(ta, tb) + return NewFP16fp8s(ta, tb, nil) } /* Conditional move */ @@ -118,13 +155,15 @@ func (F *FP16) cmove(g *FP16, d int) { /* test this==1 ? */ func (F *FP16) isunity() bool { - one := NewFP8int(1) - return F.a.Equals(one) && F.b.IsZero() + mem := arena.NewArena() + defer mem.Free() + one := NewFP8int(1, mem) + return F.a.Equals(one) && F.b.IsZero(mem) } /* test is w real? That is in a+ib test b is zero */ func (F *FP16) isreal() bool { - return F.b.IsZero() + return F.b.IsZero(nil) } /* extract real part a */ @@ -165,137 +204,137 @@ func (F *FP16) one() { } /* set this=-this */ -func (F *FP16) Neg() { +func (F *FP16) Neg(mem *arena.Arena) { F.norm() - m := NewFP8copy(F.a) - t := NewFP8() - m.Add(F.b) - m.Neg() + m := NewFP8copy(F.a, mem) + t := NewFP8(mem) + m.Add(F.b, mem) + m.Neg(mem) t.copy(m) - t.Add(F.b) + t.Add(F.b, mem) F.b.copy(m) - F.b.Add(F.a) + F.b.Add(F.a, mem) F.a.copy(t) F.norm() } /* this=conjugate(this) */ -func (F *FP16) conj() { - F.b.Neg() +func (F *FP16) conj(mem *arena.Arena) { + F.b.Neg(mem) F.norm() } /* this=-conjugate(this) */ -func (F *FP16) nconj() { - F.a.Neg() +func (F *FP16) nconj(mem *arena.Arena) { + F.a.Neg(mem) F.norm() } /* this+=x */ -func (F *FP16) Add(x *FP16) { - F.a.Add(x.a) - F.b.Add(x.b) +func (F *FP16) Add(x *FP16, mem *arena.Arena) { + F.a.Add(x.a, mem) + F.b.Add(x.b, mem) } /* this-=x */ -func (F *FP16) Sub(x *FP16) { - m := NewFP16copy(x) - m.Neg() - F.Add(m) +func (F *FP16) Sub(x *FP16, mem *arena.Arena) { + m := NewFP16copy(x, mem) + m.Neg(mem) + F.Add(m, mem) } /* this-=x */ -func (F *FP16) rsub(x *FP16) { - F.Neg() - F.Add(x) +func (F *FP16) rsub(x *FP16, mem *arena.Arena) { + F.Neg(mem) + F.Add(x, mem) } /* this*=s where s is FP8 */ -func (F *FP16) pmul(s *FP8) { - F.a.Mul(s) - F.b.Mul(s) +func (F *FP16) pmul(s *FP8, mem *arena.Arena) { + F.a.Mul(s, mem) + F.b.Mul(s, mem) } /* this*=s where s is FP2 */ -func (F *FP16) qmul(s *FP2) { - F.a.qmul(s) - F.b.qmul(s) +func (F *FP16) qmul(s *FP2, mem *arena.Arena) { + F.a.qmul(s, mem) + F.b.qmul(s, mem) } /* this*=s where s is FP */ -func (F *FP16) tmul(s *FP) { - F.a.tmul(s) - F.b.tmul(s) +func (F *FP16) tmul(s *FP, mem *arena.Arena) { + F.a.tmul(s, mem) + F.b.tmul(s, mem) } /* this*=c where c is int */ -func (F *FP16) imul(c int) { - F.a.imul(c) - F.b.imul(c) +func (F *FP16) imul(c int, mem *arena.Arena) { + F.a.imul(c, mem) + F.b.imul(c, mem) } /* this*=this */ -func (F *FP16) Sqr() { - t1 := NewFP8copy(F.a) - t2 := NewFP8copy(F.b) - t3 := NewFP8copy(F.a) +func (F *FP16) Sqr(mem *arena.Arena) { + t1 := NewFP8copy(F.a, mem) + t2 := NewFP8copy(F.b, mem) + t3 := NewFP8copy(F.a, mem) - t3.Mul(F.b) - t1.Add(F.b) - t2.times_i() + t3.Mul(F.b, mem) + t1.Add(F.b, mem) + t2.times_i(mem) - t2.Add(F.a) + t2.Add(F.a, mem) t1.norm() t2.norm() F.a.copy(t1) - F.a.Mul(t2) + F.a.Mul(t2, mem) t2.copy(t3) - t2.times_i() - t2.Add(t3) + t2.times_i(mem) + t2.Add(t3, mem) t2.norm() - t2.Neg() - F.a.Add(t2) + t2.Neg(mem) + F.a.Add(t2, mem) F.b.copy(t3) - F.b.Add(t3) + F.b.Add(t3, mem) F.norm() } /* this*=y */ -func (F *FP16) Mul(y *FP16) { - t1 := NewFP8copy(F.a) - t2 := NewFP8copy(F.b) - t3 := NewFP8() - t4 := NewFP8copy(F.b) +func (F *FP16) Mul(y *FP16, mem *arena.Arena) { + t1 := NewFP8copy(F.a, mem) + t2 := NewFP8copy(F.b, mem) + t3 := NewFP8(mem) + t4 := NewFP8copy(F.b, mem) - t1.Mul(y.a) - t2.Mul(y.b) + t1.Mul(y.a, mem) + t2.Mul(y.b, mem) t3.copy(y.b) - t3.Add(y.a) - t4.Add(F.a) + t3.Add(y.a, mem) + t4.Add(F.a, mem) t3.norm() t4.norm() - t4.Mul(t3) + t4.Mul(t3, mem) t3.copy(t1) - t3.Neg() - t4.Add(t3) + t3.Neg(mem) + t4.Add(t3, mem) t4.norm() t3.copy(t2) - t3.Neg() + t3.Neg(mem) F.b.copy(t4) - F.b.Add(t3) + F.b.Add(t3, mem) - t2.times_i() + t2.times_i(mem) F.a.copy(t2) - F.a.Add(t1) + F.a.Add(t1, mem) F.norm() } @@ -306,77 +345,77 @@ func (F *FP16) toString() string { } /* this=1/this */ -func (F *FP16) Invert() { - t1 := NewFP8copy(F.a) - t2 := NewFP8copy(F.b) +func (F *FP16) Invert(mem *arena.Arena) { + t1 := NewFP8copy(F.a, mem) + t2 := NewFP8copy(F.b, mem) - t1.Sqr() - t2.Sqr() - t2.times_i() + t1.Sqr(mem) + t2.Sqr(mem) + t2.times_i(mem) t2.norm() - t1.Sub(t2) + t1.Sub(t2, mem) t1.norm() - t1.Invert(nil) + t1.Invert(nil, mem) - F.a.Mul(t1) - t1.Neg() + F.a.Mul(t1, mem) + t1.Neg(mem) t1.norm() - F.b.Mul(t1) + F.b.Mul(t1, mem) } /* this*=i where i = sqrt(sqrt(-1+sqrt(-1))) */ -func (F *FP16) times_i() { - s := NewFP8copy(F.b) - t := NewFP8copy(F.a) - s.times_i() +func (F *FP16) times_i(mem *arena.Arena) { + s := NewFP8copy(F.b, mem) + t := NewFP8copy(F.a, mem) + s.times_i(mem) F.a.copy(s) F.b.copy(t) F.norm() } -func (F *FP16) times_i2() { - F.a.times_i() - F.b.times_i() +func (F *FP16) times_i2(mem *arena.Arena) { + F.a.times_i(mem) + F.b.times_i(mem) } -func (F *FP16) times_i4() { - F.a.times_i2() - F.b.times_i2() +func (F *FP16) times_i4(mem *arena.Arena) { + F.a.times_i2(mem) + F.b.times_i2(mem) } /* this=this^p using Frobenius */ -func (F *FP16) frob(f *FP2) { - ff := NewFP2copy(f) - ff.Sqr() +func (F *FP16) frob(f *FP2, mem *arena.Arena) { + ff := NewFP2copy(f, mem) + ff.Sqr(mem) ff.norm() - F.a.frob(ff) - F.b.frob(ff) - F.b.qmul(f) - F.b.times_i() + F.a.frob(ff, mem) + F.b.frob(ff, mem) + F.b.qmul(f, mem) + F.b.times_i(mem) } /* this=this^e */ -func (F *FP16) pow(e *BIG) *FP16 { - w := NewFP16copy(F) +func (F *FP16) pow(e *BIG, mem *arena.Arena) *FP16 { + w := NewFP16copy(F, mem) w.norm() - z := NewBIGcopy(e) - r := NewFP16int(1) + z := NewBIGcopy(e, mem) + r := NewFP16int(1, mem) z.norm() for true { bt := z.parity() z.fshr(1) if bt == 1 { - r.Mul(w) + r.Mul(w, mem) } if z.IsZero() { break } - w.Sqr() + w.Sqr(mem) } - r.reduce() + r.reduce(mem) return r } diff --git a/nekryptology/pkg/core/curves/native/bls48581/fp2.go b/nekryptology/pkg/core/curves/native/bls48581/fp2.go index 861445d..1824f4b 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/fp2.go +++ b/nekryptology/pkg/core/curves/native/bls48581/fp2.go @@ -23,7 +23,11 @@ package bls48581 -import "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/curves/native/bls48581/ext" +import ( + "arena" + + "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/curves/native/bls48581/ext" +) //import "fmt" @@ -32,72 +36,128 @@ type FP2 struct { b *FP } -func NewFP2() *FP2 { - F := new(FP2) - F.a = NewFP() - F.b = NewFP() - return F +func NewFP2(mem *arena.Arena) *FP2 { + if mem != nil { + F := arena.New[FP2](mem) + F.a = NewFP(mem) + F.b = NewFP(mem) + return F + } else { + F := new(FP2) + F.a = NewFP(nil) + F.b = NewFP(nil) + return F + } } /* Constructors */ -func NewFP2int(a int) *FP2 { - F := new(FP2) - F.a = NewFPint(a) - F.b = NewFP() - return F +func NewFP2int(a int, mem *arena.Arena) *FP2 { + if mem != nil { + F := arena.New[FP2](mem) + F.a = NewFPint(a, mem) + F.b = NewFP(mem) + return F + } else { + F := new(FP2) + F.a = NewFPint(a, nil) + F.b = NewFP(nil) + return F + } } -func NewFP2ints(a int, b int) *FP2 { - F := new(FP2) - F.a = NewFPint(a) - F.b = NewFPint(b) - return F +func NewFP2ints(a int, b int, mem *arena.Arena) *FP2 { + if mem != nil { + F := arena.New[FP2](mem) + F.a = NewFPint(a, mem) + F.b = NewFPint(b, mem) + return F + } else { + F := new(FP2) + F.a = NewFPint(a, nil) + F.b = NewFPint(b, nil) + return F + } } -func NewFP2copy(x *FP2) *FP2 { - F := new(FP2) - F.a = NewFPcopy(x.a) - F.b = NewFPcopy(x.b) - return F +func NewFP2copy(x *FP2, mem *arena.Arena) *FP2 { + if mem != nil { + F := arena.New[FP2](mem) + F.a = NewFPcopy(x.a, mem) + F.b = NewFPcopy(x.b, mem) + return F + } else { + F := new(FP2) + F.a = NewFPcopy(x.a, nil) + F.b = NewFPcopy(x.b, nil) + return F + } } -func NewFP2fps(c *FP, d *FP) *FP2 { - F := new(FP2) - F.a = NewFPcopy(c) - F.b = NewFPcopy(d) - return F +func NewFP2fps(c *FP, d *FP, mem *arena.Arena) *FP2 { + if mem != nil { + F := arena.New[FP2](mem) + F.a = NewFPcopy(c, mem) + F.b = NewFPcopy(d, mem) + return F + } else { + F := new(FP2) + F.a = NewFPcopy(c, nil) + F.b = NewFPcopy(d, nil) + return F + } } -func NewFP2bigs(c *BIG, d *BIG) *FP2 { - F := new(FP2) - F.a = NewFPbig(c) - F.b = NewFPbig(d) - return F +func NewFP2bigs(c *BIG, d *BIG, mem *arena.Arena) *FP2 { + if mem != nil { + F := arena.New[FP2](mem) + F.a = NewFPbig(c, mem) + F.b = NewFPbig(d, mem) + return F + } else { + F := new(FP2) + F.a = NewFPbig(c, nil) + F.b = NewFPbig(d, nil) + return F + } } -func NewFP2fp(c *FP) *FP2 { - F := new(FP2) - F.a = NewFPcopy(c) - F.b = NewFP() - return F +func NewFP2fp(c *FP, mem *arena.Arena) *FP2 { + if mem != nil { + F := arena.New[FP2](mem) + F.a = NewFPcopy(c, mem) + F.b = NewFP(mem) + return F + } else { + F := new(FP2) + F.a = NewFPcopy(c, nil) + F.b = NewFP(nil) + return F + } } -func NewFP2big(c *BIG) *FP2 { - F := new(FP2) - F.a = NewFPbig(c) - F.b = NewFP() - return F +func NewFP2big(c *BIG, mem *arena.Arena) *FP2 { + if mem != nil { + F := arena.New[FP2](mem) + F.a = NewFPbig(c, mem) + F.b = NewFP(mem) + return F + } else { + F := new(FP2) + F.a = NewFPbig(c, nil) + F.b = NewFP(nil) + return F + } } func NewFP2rand(rng *ext.RAND) *FP2 { - F := NewFP2fps(NewFPrand(rng), NewFPrand(rng)) + F := NewFP2fps(NewFPrand(rng), NewFPrand(rng), nil) return F } /* reduce components mod Modulus */ -func (F *FP2) reduce() { - F.a.reduce() - F.b.reduce() +func (F *FP2) reduce(mem *arena.Arena) { + F.a.reduce(mem) + F.b.reduce(mem) } /* normalise components of w */ @@ -107,12 +167,12 @@ func (F *FP2) norm() { } /* test this=0 ? */ -func (F *FP2) IsZero() bool { - return (F.a.IsZero() && F.b.IsZero()) +func (F *FP2) IsZero(mem *arena.Arena) bool { + return (F.a.IsZero(mem) && F.b.IsZero(mem)) } func (F *FP2) islarger() int { - if F.IsZero() { + if F.IsZero(nil) { return 0 } cmp := F.b.islarger() @@ -146,7 +206,7 @@ func FP2_fromBytes(bf []byte) *FP2 { t[i] = bf[i+MB] } ta := FP_fromBytes(t[:]) - return NewFP2fps(ta, tb) + return NewFP2fps(ta, tb, nil) } func (F *FP2) cmove(g *FP2, d int) { @@ -156,8 +216,10 @@ func (F *FP2) cmove(g *FP2, d int) { /* test this=1 ? */ func (F *FP2) isunity() bool { - one := NewFPint(1) - return (F.a.Equals(one) && F.b.IsZero()) + mem := arena.NewArena() + defer mem.Free() + one := NewFPint(1, mem) + return (F.a.Equals(one) && F.b.IsZero(mem)) } /* test this=x */ @@ -166,13 +228,13 @@ func (F *FP2) Equals(x *FP2) bool { } /* extract a */ -func (F *FP2) GetA() *BIG { - return F.a.Redc() +func (F *FP2) GetA(mem *arena.Arena) *BIG { + return F.a.Redc(mem) } /* extract b */ -func (F *FP2) GetB() *BIG { - return F.b.Redc() +func (F *FP2) GetB(mem *arena.Arena) *BIG { + return F.b.Redc(mem) } /* copy this=x */ @@ -194,12 +256,12 @@ func (F *FP2) one() { } /* Return sign */ -func (F *FP2) sign() int { - p1 := F.a.sign() - p2 := F.b.sign() +func (F *FP2) sign(mem *arena.Arena) int { + p1 := F.a.sign(mem) + p2 := F.b.sign(mem) var u int if BIG_ENDIAN_SIGN { - if F.b.IsZero() { + if F.b.IsZero(mem) { u = 1 } else { u = 0 @@ -207,7 +269,7 @@ func (F *FP2) sign() int { p2 ^= (p1 ^ p2) & u return p2 } else { - if F.a.IsZero() { + if F.a.IsZero(mem) { u = 1 } else { u = 0 @@ -218,106 +280,106 @@ func (F *FP2) sign() int { } /* negate this mod Modulus */ -func (F *FP2) Neg() { - m := NewFPcopy(F.a) - t := NewFP() +func (F *FP2) Neg(mem *arena.Arena) { + m := NewFPcopy(F.a, mem) + t := NewFP(mem) - m.Add(F.b) - m.Neg() + m.Add(F.b, mem) + m.Neg(mem) t.copy(m) - t.Add(F.b) + t.Add(F.b, mem) F.b.copy(m) - F.b.Add(F.a) + F.b.Add(F.a, mem) F.a.copy(t) } /* set to a-ib */ -func (F *FP2) conj() { - F.b.Neg() +func (F *FP2) conj(mem *arena.Arena) { + F.b.Neg(mem) F.b.norm() } /* this+=a */ -func (F *FP2) Add(x *FP2) { - F.a.Add(x.a) - F.b.Add(x.b) +func (F *FP2) Add(x *FP2, mem *arena.Arena) { + F.a.Add(x.a, mem) + F.b.Add(x.b, mem) } /* this-=a */ -func (F *FP2) Sub(x *FP2) { - m := NewFP2copy(x) - m.Neg() - F.Add(m) +func (F *FP2) Sub(x *FP2, mem *arena.Arena) { + m := NewFP2copy(x, mem) + m.Neg(mem) + F.Add(m, mem) } /* this-=a */ -func (F *FP2) rsub(x *FP2) { - F.Neg() - F.Add(x) +func (F *FP2) rsub(x *FP2, mem *arena.Arena) { + F.Neg(mem) + F.Add(x, mem) } /* this*=s, where s is an FP */ -func (F *FP2) pmul(s *FP) { - F.a.Mul(s) - F.b.Mul(s) +func (F *FP2) pmul(s *FP, mem *arena.Arena) { + F.a.Mul(s, mem) + F.b.Mul(s, mem) } /* this*=i, where i is an int */ -func (F *FP2) imul(c int) { - F.a.imul(c) - F.b.imul(c) +func (F *FP2) imul(c int, mem *arena.Arena) { + F.a.imul(c, mem) + F.b.imul(c, mem) } /* this*=this */ -func (F *FP2) Sqr() { - w1 := NewFPcopy(F.a) - w3 := NewFPcopy(F.a) - mb := NewFPcopy(F.b) - w1.Add(F.b) +func (F *FP2) Sqr(mem *arena.Arena) { + w1 := NewFPcopy(F.a, mem) + w3 := NewFPcopy(F.a, mem) + mb := NewFPcopy(F.b, mem) + w1.Add(F.b, mem) - w3.Add(F.a) + w3.Add(F.a, mem) w3.norm() - F.b.Mul(w3) + F.b.Mul(w3, mem) - mb.Neg() - F.a.Add(mb) + mb.Neg(mem) + F.a.Add(mb, mem) w1.norm() F.a.norm() - F.a.Mul(w1) + F.a.Mul(w1, mem) } /* this*=y */ /* Now using Lazy reduction */ -func (F *FP2) Mul(y *FP2) { +func (F *FP2) Mul(y *FP2, mem *arena.Arena) { if int64(F.a.XES+F.b.XES)*int64(y.a.XES+y.b.XES) > int64(FEXCESS) { if F.a.XES > 1 { - F.a.reduce() + F.a.reduce(mem) } if F.b.XES > 1 { - F.b.reduce() + F.b.reduce(mem) } } - pR := NewDBIG() - C := NewBIGcopy(F.a.x) - D := NewBIGcopy(y.a.x) - p := NewBIGints(Modulus) + pR := NewDBIG(mem) + C := NewBIGcopy(F.a.x, mem) + D := NewBIGcopy(y.a.x, mem) + p := NewBIGints(Modulus, mem) pR.ucopy(p) - A := mul(F.a.x, y.a.x) - B := mul(F.b.x, y.b.x) + A := mul(F.a.x, y.a.x, mem) + B := mul(F.b.x, y.b.x, mem) C.Add(F.b.x) C.norm() D.Add(y.b.x) D.norm() - E := mul(C, D) - FF := NewDBIGcopy(A) + E := mul(C, D, mem) + FF := NewDBIGcopy(A, mem) FF.Add(B) B.rsub(pR) @@ -326,82 +388,84 @@ func (F *FP2) Mul(y *FP2) { E.Sub(FF) E.norm() - F.a.x.copy(mod(A)) + F.a.x.copy(mod(A, mem)) F.a.XES = 3 - F.b.x.copy(mod(E)) + F.b.x.copy(mod(E, mem)) F.b.XES = 2 } /* -func (F *FP2) pow(b *BIG) { - w := NewFP2copy(F); - r := NewFP2int(1) - z := NewBIGcopy(b) - for true { - bt := z.parity() - z.shr(1) - if bt==1 { - r.Mul(w) + func (F *FP2) pow(b *BIG) { + w := NewFP2copy(F); + r := NewFP2int(1) + z := NewBIGcopy(b) + for true { + bt := z.parity() + z.shr(1) + if bt==1 { + r.Mul(w) + } + if z.IsZero() {break} + w.Sqr() } - if z.IsZero() {break} - w.Sqr() + r.reduce() + F.copy(r) } - r.reduce() - F.copy(r) -} */ func (F *FP2) qr(h *FP) int { - c := NewFP2copy(F) - c.conj() - c.Mul(F) + mem := arena.NewArena() + defer mem.Free() + c := NewFP2copy(F, mem) + c.conj(mem) + c.Mul(F, mem) return c.a.qr(h) } /* sqrt(a+ib) = sqrt(a+sqrt(a*a-n*b*b)/2)+ib/(2*sqrt(a+sqrt(a*a-n*b*b)/2)) */ -func (F *FP2) Sqrt(h *FP) { - if F.IsZero() { +func (F *FP2) Sqrt(h *FP, mem *arena.Arena) { + if F.IsZero(mem) { return } - w1 := NewFPcopy(F.b) - w2 := NewFPcopy(F.a) - w3 := NewFP() - w4 := NewFP() - hint := NewFP() - w1.Sqr() - w2.Sqr() - w1.Add(w2) + w1 := NewFPcopy(F.b, mem) + w2 := NewFPcopy(F.a, mem) + w3 := NewFP(mem) + w4 := NewFP(mem) + hint := NewFP(mem) + w1.Sqr(mem) + w2.Sqr(mem) + w1.Add(w2, mem) w1.norm() - w1 = w1.Sqrt(h) + w1 = w1.Sqrt(h, mem) w2.copy(F.a) w3.copy(F.a) - w2.Add(w1) + w2.Add(w1, mem) w2.norm() - w2.div2() + w2.div2(mem) w1.copy(F.b) - w1.div2() + w1.div2(mem) qr := w2.qr(hint) // tweak hint w3.copy(hint) - w3.Neg() + w3.Neg(mem) w3.norm() w4.copy(w2) - w4.Neg() + w4.Neg(mem) w4.norm() w2.cmove(w4, 1-qr) hint.cmove(w3, 1-qr) - F.a.copy(w2.Sqrt(hint)) + F.a.copy(w2.Sqrt(hint, mem)) w3.copy(w2) - w3.Invert(hint) - w3.Mul(F.a) + w3.Invert(hint, mem) + w3.Mul(F.a, mem) F.b.copy(w3) - F.b.Mul(w1) + F.b.Mul(w1, mem) w4.copy(F.a) F.a.cmove(F.b, 1-qr) @@ -425,9 +489,9 @@ func (F *FP2) Sqrt(h *FP) { F.b.cmove(w4,1-qr) */ - sgn := F.sign() - nr := NewFP2copy(F) - nr.Neg() + sgn := F.sign(mem) + nr := NewFP2copy(F, mem) + nr.Neg(mem) nr.norm() F.cmove(nr, sgn) } @@ -443,63 +507,63 @@ func (F *FP2) toString() string { } /* this=1/this */ -func (F *FP2) Invert(h *FP) { +func (F *FP2) Invert(h *FP, mem *arena.Arena) { F.norm() - w1 := NewFPcopy(F.a) - w2 := NewFPcopy(F.b) + w1 := NewFPcopy(F.a, mem) + w2 := NewFPcopy(F.b, mem) - w1.Sqr() - w2.Sqr() - w1.Add(w2) - w1.Invert(h) - F.a.Mul(w1) - w1.Neg() + w1.Sqr(mem) + w2.Sqr(mem) + w1.Add(w2, mem) + w1.Invert(h, mem) + F.a.Mul(w1, mem) + w1.Neg(mem) w1.norm() - F.b.Mul(w1) + F.b.Mul(w1, mem) } /* this/=2 */ -func (F *FP2) div2() { - F.a.div2() - F.b.div2() +func (F *FP2) div2(mem *arena.Arena) { + F.a.div2(mem) + F.b.div2(mem) } /* this*=sqrt(-1) */ -func (F *FP2) times_i() { - z := NewFPcopy(F.a) +func (F *FP2) times_i(mem *arena.Arena) { + z := NewFPcopy(F.a, mem) F.a.copy(F.b) - F.a.Neg() + F.a.Neg(mem) F.b.copy(z) } /* w*=(1+sqrt(-1)) */ /* where X*2-(2^i+sqrt(-1)) is irreducible for FP4 */ -func (F *FP2) Mul_ip() { - t := NewFP2copy(F) +func (F *FP2) Mul_ip(mem *arena.Arena) { + t := NewFP2copy(F, mem) i := QNRI - F.times_i() + F.times_i(mem) for i > 0 { - t.Add(t) + t.Add(t, mem) t.norm() i-- } - F.Add(t) + F.Add(t, mem) if TOWER == POSITOWER { F.norm() - F.Neg() + F.Neg(mem) } } /* w/=(2^i+sqrt(-1)) */ -func (F *FP2) div_ip() { - z := NewFP2ints(1<= 0; i-- { if v.bit(i) != 1 { t.copy(b) - sf.conj() - c.conj() - b.xtr_A(a, sf, c) - sf.conj() + sf.conj(mem) + c.conj(mem) + b.xtr_A(a, sf, c, mem) + sf.conj(mem) c.copy(t) - c.xtr_D() - a.xtr_D() + c.xtr_D(mem) + a.xtr_D(mem) } else { t.copy(a) - t.conj() + t.conj(mem) a.copy(b) - a.xtr_D() - b.xtr_A(c, sf, t) - c.xtr_D() + a.xtr_D(mem) + b.xtr_A(c, sf, t, mem) + c.xtr_D(mem) } } if par == 0 { @@ -492,25 +549,25 @@ func (F *FP4) xtr_pow(n *BIG) *FP4 { } else { r.copy(b) } - r.reduce() + r.reduce(mem) return r } /* r=ck^a.cl^n using XTR double exponentiation method on traces of FP12s. See Stam thesis. */ -func (F *FP4) xtr_pow2(ck *FP4, ckml *FP4, ckm2l *FP4, a *BIG, b *BIG) *FP4 { +func (F *FP4) xtr_pow2(ck *FP4, ckml *FP4, ckm2l *FP4, a *BIG, b *BIG, mem *arena.Arena) *FP4 { - e := NewBIGcopy(a) - d := NewBIGcopy(b) - w := NewBIGint(0) + e := NewBIGcopy(a, mem) + d := NewBIGcopy(b, mem) + w := NewBIGint(0, mem) e.norm() d.norm() - cu := NewFP4copy(ck) // can probably be passed in w/o copying - cv := NewFP4copy(F) - cumv := NewFP4copy(ckml) - cum2v := NewFP4copy(ckm2l) - r := NewFP4() - t := NewFP4() + cu := NewFP4copy(ck, mem) // can probably be passed in w/o copying + cv := NewFP4copy(F, mem) + cumv := NewFP4copy(ckml, mem) + cum2v := NewFP4copy(ckm2l, mem) + r := NewFP4(mem) + t := NewFP4(mem) f2 := 0 for d.parity() == 0 && e.parity() == 0 { @@ -531,9 +588,9 @@ func (F *FP4) xtr_pow2(ck *FP4, ckml *FP4, ckm2l *FP4, a *BIG, b *BIG) *FP4 { e.norm() t.copy(cv) - t.xtr_A(cu, cumv, cum2v) + t.xtr_A(cu, cumv, cum2v, mem) cum2v.copy(cumv) - cum2v.conj() + cum2v.conj(mem) cumv.copy(cv) cv.copy(cu) cu.copy(t) @@ -541,24 +598,24 @@ func (F *FP4) xtr_pow2(ck *FP4, ckml *FP4, ckm2l *FP4, a *BIG, b *BIG) *FP4 { if d.parity() == 0 { d.fshr(1) r.copy(cum2v) - r.conj() + r.conj(mem) t.copy(cumv) - t.xtr_A(cu, cv, r) + t.xtr_A(cu, cv, r, mem) cum2v.copy(cumv) - cum2v.xtr_D() + cum2v.xtr_D(mem) cumv.copy(t) - cu.xtr_D() + cu.xtr_D(mem) } else { if e.parity() == 1 { d.Sub(e) d.norm() d.fshr(1) t.copy(cv) - t.xtr_A(cu, cumv, cum2v) - cu.xtr_D() + t.xtr_A(cu, cumv, cum2v, mem) + cu.xtr_D(mem) cum2v.copy(cv) - cum2v.xtr_D() - cum2v.conj() + cum2v.xtr_D(mem) + cum2v.conj(mem) cv.copy(t) } else { w.copy(d) @@ -566,13 +623,13 @@ func (F *FP4) xtr_pow2(ck *FP4, ckml *FP4, ckm2l *FP4, a *BIG, b *BIG) *FP4 { d.fshr(1) e.copy(w) t.copy(cumv) - t.xtr_D() + t.xtr_D(mem) cumv.copy(cum2v) - cumv.conj() + cumv.conj(mem) cum2v.copy(t) - cum2v.conj() + cum2v.conj(mem) t.copy(cv) - t.xtr_D() + t.xtr_D(mem) cv.copy(cu) cu.copy(t) } @@ -587,7 +644,7 @@ func (F *FP4) xtr_pow2(ck *FP4, ckml *FP4, ckm2l *FP4, a *BIG, b *BIG) *FP4 { e.Sub(d) e.norm() t.copy(cv) - t.xtr_A(cu, cumv, cum2v) + t.xtr_A(cu, cumv, cum2v, mem) cum2v.copy(cumv) cumv.copy(cu) cu.copy(t) @@ -598,13 +655,13 @@ func (F *FP4) xtr_pow2(ck *FP4, ckml *FP4, ckm2l *FP4, a *BIG, b *BIG) *FP4 { d.fshr(1) e.copy(w) t.copy(cumv) - t.xtr_D() + t.xtr_D(mem) cumv.copy(cum2v) - cumv.conj() + cumv.conj(mem) cum2v.copy(t) - cum2v.conj() + cum2v.conj(mem) t.copy(cv) - t.xtr_D() + t.xtr_D(mem) cv.copy(cu) cu.copy(t) } else { @@ -616,52 +673,52 @@ func (F *FP4) xtr_pow2(ck *FP4, ckml *FP4, ckm2l *FP4, a *BIG, b *BIG) *FP4 { d.copy(w) d.fshr(1) t.copy(cv) - t.xtr_A(cu, cumv, cum2v) - cumv.conj() + t.xtr_A(cu, cumv, cum2v, mem) + cumv.conj(mem) cum2v.copy(cu) - cum2v.xtr_D() - cum2v.conj() + cum2v.xtr_D(mem) + cum2v.conj(mem) cu.copy(cv) - cu.xtr_D() + cu.xtr_D(mem) cv.copy(t) } else { d.fshr(1) r.copy(cum2v) - r.conj() + r.conj(mem) t.copy(cumv) - t.xtr_A(cu, cv, r) + t.xtr_A(cu, cv, r, mem) cum2v.copy(cumv) - cum2v.xtr_D() + cum2v.xtr_D(mem) cumv.copy(t) - cu.xtr_D() + cu.xtr_D(mem) } } } } } r.copy(cv) - r.xtr_A(cu, cumv, cum2v) + r.xtr_A(cu, cumv, cum2v, mem) for i := 0; i < f2; i++ { - r.xtr_D() + r.xtr_D(mem) } - r = r.xtr_pow(d) + r = r.xtr_pow(d, mem) return r } /* this/=2 */ -func (F *FP4) div2() { - F.a.div2() - F.b.div2() +func (F *FP4) div2(mem *arena.Arena) { + F.a.div2(mem) + F.b.div2(mem) } -func (F *FP4) div_i() { - u := NewFP2copy(F.a) - v := NewFP2copy(F.b) - u.div_ip() +func (F *FP4) div_i(mem *arena.Arena) { + u := NewFP2copy(F.a, mem) + v := NewFP2copy(F.b, mem) + u.div_ip(mem) F.a.copy(v) F.b.copy(u) if TOWER == POSITOWER { - F.Neg() + F.Neg(mem) F.norm() } } @@ -688,70 +745,72 @@ func (F *FP4) pow(b *BIG) { /* */ // Test for Quadratic Residue func (F *FP4) qr(h *FP) int { - c := NewFP4copy(F) - c.conj() - c.Mul(F) + mem := arena.NewArena() + defer mem.Free() + c := NewFP4copy(F, mem) + c.conj(mem) + c.Mul(F, mem) return c.a.qr(h) } // sqrt(a+ib) = sqrt(a+sqrt(a*a-n*b*b)/2)+ib/(2*sqrt(a+sqrt(a*a-n*b*b)/2)) -func (F *FP4) Sqrt(h *FP) { - if F.IsZero() { +func (F *FP4) Sqrt(h *FP, mem *arena.Arena) { + if F.IsZero(mem) { return } - a := NewFP2copy(F.a) - b := NewFP2() - s := NewFP2copy(F.b) - t := NewFP2copy(F.a) - hint := NewFP() + a := NewFP2copy(F.a, mem) + b := NewFP2(mem) + s := NewFP2copy(F.b, mem) + t := NewFP2copy(F.a, mem) + hint := NewFP(mem) - s.Sqr() - a.Sqr() - s.Mul_ip() + s.Sqr(mem) + a.Sqr(mem) + s.Mul_ip(mem) s.norm() - a.Sub(s) + a.Sub(s, mem) s.copy(a) s.norm() - s.Sqrt(h) + s.Sqrt(h, mem) a.copy(t) b.copy(t) - a.Add(s) + a.Add(s, mem) a.norm() - a.div2() + a.div2(mem) b.copy(F.b) - b.div2() + b.div2(mem) qr := a.qr(hint) // tweak hint - multiply old hint by Norm(1/Beta)^e where Beta is irreducible polynomial s.copy(a) - twk := NewFPbig(NewBIGints(TWK)) - twk.Mul(hint) - s.div_ip() + twk := NewFPbig(NewBIGints(TWK, mem), mem) + twk.Mul(hint, mem) + s.div_ip(mem) s.norm() a.cmove(s, 1-qr) hint.cmove(twk, 1-qr) F.a.copy(a) - F.a.Sqrt(hint) + F.a.Sqrt(hint, mem) s.copy(a) - s.Invert(hint) - s.Mul(F.a) + s.Invert(hint, mem) + s.Mul(F.a, mem) F.b.copy(s) - F.b.Mul(b) + F.b.Mul(b, mem) t.copy(F.a) F.a.cmove(F.b, 1-qr) F.b.cmove(t, 1-qr) - sgn := F.sign() - nr := NewFP4copy(F) - nr.Neg() + sgn := F.sign(mem) + nr := NewFP4copy(F, mem) + nr.Neg(mem) nr.norm() F.cmove(nr, sgn) } diff --git a/nekryptology/pkg/core/curves/native/bls48581/fp48.go b/nekryptology/pkg/core/curves/native/bls48581/fp48.go index fd8df7f..50e9d85 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/fp48.go +++ b/nekryptology/pkg/core/curves/native/bls48581/fp48.go @@ -22,6 +22,8 @@ package bls48581 +import "arena" + //import "fmt" type FP48 struct { @@ -32,29 +34,52 @@ type FP48 struct { } /* Constructors */ -func NewFP48fp16(d *FP16) *FP48 { - F := new(FP48) - F.a = NewFP16copy(d) - F.b = NewFP16() - F.c = NewFP16() - F.stype = FP_SPARSEST - return F +func NewFP48fp16(d *FP16, mem *arena.Arena) *FP48 { + if mem != nil { + F := arena.New[FP48](mem) + F.a = NewFP16copy(d, mem) + F.b = NewFP16(mem) + F.c = NewFP16(mem) + F.stype = FP_SPARSEST + return F + } else { + F := new(FP48) + F.a = NewFP16copy(d, nil) + F.b = NewFP16(nil) + F.c = NewFP16(nil) + F.stype = FP_SPARSEST + return F + } } -func NewFP48() *FP48 { - F := new(FP48) - F.a = NewFP16() - F.b = NewFP16() - F.c = NewFP16() - F.stype = FP_ZERO - return F +func NewFP48(mem *arena.Arena) *FP48 { + if mem != nil { + F := arena.New[FP48](mem) + F.a = NewFP16(mem) + F.b = NewFP16(mem) + F.c = NewFP16(mem) + F.stype = FP_ZERO + return F + } else { + F := new(FP48) + F.a = NewFP16(nil) + F.b = NewFP16(nil) + F.c = NewFP16(nil) + F.stype = FP_ZERO + return F + } } -func NewFP48int(d int) *FP48 { - F := new(FP48) - F.a = NewFP16int(d) - F.b = NewFP16() - F.c = NewFP16() +func NewFP48int(d int, mem *arena.Arena) *FP48 { + var F *FP48 + if mem != nil { + F = arena.New[FP48](mem) + } else { + F = new(FP48) + } + F.a = NewFP16int(d, mem) + F.b = NewFP16(mem) + F.c = NewFP16(mem) if d == 1 { F.stype = FP_ONE } else { @@ -63,29 +88,39 @@ func NewFP48int(d int) *FP48 { return F } -func NewFP48fp16s(d *FP16, e *FP16, f *FP16) *FP48 { - F := new(FP48) - F.a = NewFP16copy(d) - F.b = NewFP16copy(e) - F.c = NewFP16copy(f) +func NewFP48fp16s(d *FP16, e *FP16, f *FP16, mem *arena.Arena) *FP48 { + var F *FP48 + if mem != nil { + F = arena.New[FP48](mem) + } else { + F = new(FP48) + } + F.a = d + F.b = e + F.c = f F.stype = FP_DENSE return F } -func NewFP48copy(x *FP48) *FP48 { - F := new(FP48) - F.a = NewFP16copy(x.a) - F.b = NewFP16copy(x.b) - F.c = NewFP16copy(x.c) +func NewFP48copy(x *FP48, mem *arena.Arena) *FP48 { + var F *FP48 + if mem != nil { + F = arena.New[FP48](mem) + } else { + F = new(FP48) + } + F.a = NewFP16copy(x.a, mem) + F.b = NewFP16copy(x.b, mem) + F.c = NewFP16copy(x.c, mem) F.stype = x.stype return F } /* reduce all components of this mod Modulus */ -func (F *FP48) reduce() { - F.a.reduce() - F.b.reduce() - F.c.reduce() +func (F *FP48) reduce(mem *arena.Arena) { + F.a.reduce(mem) + F.b.reduce(mem) + F.c.reduce(mem) } /* normalise all components of this */ @@ -96,8 +131,8 @@ func (F *FP48) norm() { } /* test x==0 ? */ -func (F *FP48) IsZero() bool { - return (F.a.IsZero() && F.b.IsZero() && F.c.IsZero()) +func (F *FP48) IsZero(mem *arena.Arena) bool { + return (F.a.IsZero(mem) && F.b.IsZero(mem) && F.c.IsZero(mem)) } /* Conditional move */ @@ -126,15 +161,17 @@ func (F *FP48) selector(g []*FP48, b int32) { F.cmove(g[6], teq(babs, 6)) F.cmove(g[7], teq(babs, 7)) - invF := NewFP48copy(F) - invF.conj() + invF := NewFP48copy(F, nil) + invF.conj(nil) F.cmove(invF, int(m&1)) } /* test x==1 ? */ func (F *FP48) Isunity() bool { - one := NewFP16int(1) - return (F.a.Equals(one) && F.b.IsZero() && F.c.IsZero()) + mem := arena.NewArena() + defer mem.Free() + one := NewFP16int(1, mem) + return (F.a.Equals(one) && F.b.IsZero(mem) && F.c.IsZero(mem)) } /* return 1 if x==y, else 0 */ @@ -182,94 +219,94 @@ func (F *FP48) zero() { } /* this=conj(this) */ -func (F *FP48) conj() { - F.a.conj() - F.b.nconj() - F.c.conj() +func (F *FP48) conj(mem *arena.Arena) { + F.a.conj(mem) + F.b.nconj(mem) + F.c.conj(mem) } /* Granger-Scott Unitary Squaring */ -func (F *FP48) uSqr() { - A := NewFP16copy(F.a) - B := NewFP16copy(F.c) - C := NewFP16copy(F.b) - D := NewFP16() +func (F *FP48) uSqr(mem *arena.Arena) { + A := NewFP16copy(F.a, mem) + B := NewFP16copy(F.c, mem) + C := NewFP16copy(F.b, mem) + D := NewFP16(mem) - F.a.Sqr() + F.a.Sqr(mem) D.copy(F.a) - D.Add(F.a) - F.a.Add(D) + D.Add(F.a, mem) + F.a.Add(D, mem) F.a.norm() - A.nconj() + A.nconj(mem) - A.Add(A) - F.a.Add(A) - B.Sqr() - B.times_i() + A.Add(A, mem) + F.a.Add(A, mem) + B.Sqr(mem) + B.times_i(mem) D.copy(B) - D.Add(B) - B.Add(D) + D.Add(B, mem) + B.Add(D, mem) B.norm() - C.Sqr() + C.Sqr(mem) D.copy(C) - D.Add(C) - C.Add(D) + D.Add(C, mem) + C.Add(D, mem) C.norm() - F.b.conj() - F.b.Add(F.b) - F.c.nconj() + F.b.conj(mem) + F.b.Add(F.b, mem) + F.c.nconj(mem) - F.c.Add(F.c) - F.b.Add(B) - F.c.Add(C) - F.reduce() + F.c.Add(F.c, mem) + F.b.Add(B, mem) + F.c.Add(C, mem) + F.reduce(mem) F.stype = FP_DENSE } /* Chung-Hasan SQR2 method from http://cacr.uwaterloo.ca/techreports/2006/cacr2006-24.pdf */ -func (F *FP48) Sqr() { +func (F *FP48) Sqr(mem *arena.Arena) { if F.stype == FP_ONE { return } - A := NewFP16copy(F.a) - B := NewFP16copy(F.b) - C := NewFP16copy(F.c) - D := NewFP16copy(F.a) + A := NewFP16copy(F.a, mem) + B := NewFP16copy(F.b, mem) + C := NewFP16copy(F.c, mem) + D := NewFP16copy(F.a, mem) - A.Sqr() - B.Mul(F.c) - B.Add(B) + A.Sqr(mem) + B.Mul(F.c, mem) + B.Add(B, mem) B.norm() - C.Sqr() - D.Mul(F.b) - D.Add(D) + C.Sqr(mem) + D.Mul(F.b, mem) + D.Add(D, mem) - F.c.Add(F.a) - F.c.Add(F.b) + F.c.Add(F.a, mem) + F.c.Add(F.b, mem) F.c.norm() - F.c.Sqr() + F.c.Sqr(mem) F.a.copy(A) - A.Add(B) + A.Add(B, mem) A.norm() - A.Add(C) - A.Add(D) + A.Add(C, mem) + A.Add(D, mem) A.norm() - A.Neg() - B.times_i() - C.times_i() + A.Neg(mem) + B.times_i(mem) + C.times_i(mem) - F.a.Add(B) + F.a.Add(B, mem) F.b.copy(C) - F.b.Add(D) - F.c.Add(A) + F.b.Add(D, mem) + F.c.Add(A, mem) if F.stype == FP_SPARSER || F.stype == FP_SPARSEST { F.stype = FP_SPARSE } else { @@ -279,70 +316,70 @@ func (F *FP48) Sqr() { } /* FP48 full multiplication this=this*y */ -func (F *FP48) Mul(y *FP48) { - z0 := NewFP16copy(F.a) - z1 := NewFP16() - z2 := NewFP16copy(F.b) - z3 := NewFP16() - t0 := NewFP16copy(F.a) - t1 := NewFP16copy(y.a) +func (F *FP48) Mul(y *FP48, mem *arena.Arena) { + z0 := NewFP16copy(F.a, mem) + z1 := NewFP16(mem) + z2 := NewFP16copy(F.b, mem) + z3 := NewFP16(mem) + t0 := NewFP16copy(F.a, mem) + t1 := NewFP16copy(y.a, mem) - z0.Mul(y.a) - z2.Mul(y.b) + z0.Mul(y.a, mem) + z2.Mul(y.b, mem) - t0.Add(F.b) + t0.Add(F.b, mem) t0.norm() - t1.Add(y.b) + t1.Add(y.b, mem) t1.norm() z1.copy(t0) - z1.Mul(t1) + z1.Mul(t1, mem) t0.copy(F.b) - t0.Add(F.c) + t0.Add(F.c, mem) t0.norm() t1.copy(y.b) - t1.Add(y.c) + t1.Add(y.c, mem) t1.norm() z3.copy(t0) - z3.Mul(t1) + z3.Mul(t1, mem) t0.copy(z0) - t0.Neg() + t0.Neg(mem) t1.copy(z2) - t1.Neg() + t1.Neg(mem) - z1.Add(t0) + z1.Add(t0, mem) //z1.norm(); F.b.copy(z1) - F.b.Add(t1) + F.b.Add(t1, mem) - z3.Add(t1) - z2.Add(t0) + z3.Add(t1, mem) + z2.Add(t0, mem) t0.copy(F.a) - t0.Add(F.c) + t0.Add(F.c, mem) t0.norm() t1.copy(y.a) - t1.Add(y.c) + t1.Add(y.c, mem) t1.norm() - t0.Mul(t1) - z2.Add(t0) + t0.Mul(t1, mem) + z2.Add(t0, mem) t0.copy(F.c) - t0.Mul(y.c) + t0.Mul(y.c, mem) t1.copy(t0) - t1.Neg() + t1.Neg(mem) F.c.copy(z2) - F.c.Add(t1) - z3.Add(t1) - t0.times_i() - F.b.Add(t0) + F.c.Add(t1, mem) + z3.Add(t1, mem) + t0.times_i(mem) + F.b.Add(t0, mem) z3.norm() - z3.times_i() + z3.times_i(mem) F.a.copy(z0) - F.a.Add(z3) + F.a.Add(z3, mem) F.stype = FP_DENSE F.norm() } @@ -350,7 +387,7 @@ func (F *FP48) Mul(y *FP48) { /* FP48 full multiplication w=w*y */ /* Supports sparse multiplicands */ /* Usually w is denser than y */ -func (F *FP48) ssmul(y *FP48) { +func (F *FP48) ssmul(y *FP48, mem *arena.Arena) { if F.stype == FP_ONE { F.Copy(y) return @@ -359,483 +396,307 @@ func (F *FP48) ssmul(y *FP48) { return } if y.stype >= FP_SPARSE { - z0 := NewFP16copy(F.a) - z1 := NewFP16() - z2 := NewFP16() - z3 := NewFP16() - z0.Mul(y.a) + z0 := NewFP16copy(F.a, mem) + z1 := NewFP16(mem) + z2 := NewFP16(mem) + z3 := NewFP16(mem) + z0.Mul(y.a, mem) - if SEXTIC_TWIST == M_TYPE { - if y.stype == FP_SPARSE || F.stype == FP_SPARSE { - z2.getb().copy(F.b.getb()) - z2.getb().Mul(y.b.getb()) - z2.geta().zero() - if y.stype != FP_SPARSE { - z2.geta().copy(F.b.getb()) - z2.geta().Mul(y.b.geta()) - } - if F.stype != FP_SPARSE { - z2.geta().copy(F.b.geta()) - z2.geta().Mul(y.b.getb()) - } - z2.times_i() - } else { - z2.copy(F.b) - z2.Mul(y.b) - } - } else { - z2.copy(F.b) - z2.Mul(y.b) - } - t0 := NewFP16copy(F.a) - t1 := NewFP16copy(y.a) - t0.Add(F.b) + z2.copy(F.b) + z2.Mul(y.b, mem) + t0 := NewFP16copy(F.a, mem) + t1 := NewFP16copy(y.a, mem) + t0.Add(F.b, mem) t0.norm() - t1.Add(y.b) + t1.Add(y.b, mem) t1.norm() z1.copy(t0) - z1.Mul(t1) + z1.Mul(t1, mem) t0.copy(F.b) - t0.Add(F.c) + t0.Add(F.c, mem) t0.norm() t1.copy(y.b) - t1.Add(y.c) + t1.Add(y.c, mem) t1.norm() z3.copy(t0) - z3.Mul(t1) + z3.Mul(t1, mem) t0.copy(z0) - t0.Neg() + t0.Neg(mem) t1.copy(z2) - t1.Neg() + t1.Neg(mem) - z1.Add(t0) + z1.Add(t0, mem) F.b.copy(z1) - F.b.Add(t1) + F.b.Add(t1, mem) - z3.Add(t1) - z2.Add(t0) + z3.Add(t1, mem) + z2.Add(t0, mem) t0.copy(F.a) - t0.Add(F.c) + t0.Add(F.c, mem) t0.norm() t1.copy(y.a) - t1.Add(y.c) + t1.Add(y.c, mem) t1.norm() - t0.Mul(t1) - z2.Add(t0) + t0.Mul(t1, mem) + z2.Add(t0, mem) - if SEXTIC_TWIST == D_TYPE { - if y.stype == FP_SPARSE || F.stype == FP_SPARSE { - t0.geta().copy(F.c.geta()) - t0.geta().Mul(y.c.geta()) - t0.getb().zero() - if y.stype != FP_SPARSE { - t0.getb().copy(F.c.geta()) - t0.getb().Mul(y.c.getb()) - } - if F.stype != FP_SPARSE { - t0.getb().copy(F.c.getb()) - t0.getb().Mul(y.c.geta()) - } - } else { - t0.copy(F.c) - t0.Mul(y.c) + if y.stype == FP_SPARSE || F.stype == FP_SPARSE { + t0.geta().copy(F.c.geta()) + t0.geta().Mul(y.c.geta(), mem) + t0.getb().zero() + if y.stype != FP_SPARSE { + t0.getb().copy(F.c.geta()) + t0.getb().Mul(y.c.getb(), mem) + } + if F.stype != FP_SPARSE { + t0.getb().copy(F.c.getb()) + t0.getb().Mul(y.c.geta(), mem) } } else { t0.copy(F.c) - t0.Mul(y.c) + t0.Mul(y.c, mem) } t1.copy(t0) - t1.Neg() + t1.Neg(mem) F.c.copy(z2) - F.c.Add(t1) - z3.Add(t1) - t0.times_i() - F.b.Add(t0) + F.c.Add(t1, mem) + z3.Add(t1, mem) + t0.times_i(mem) + F.b.Add(t0, mem) z3.norm() - z3.times_i() + z3.times_i(mem) F.a.copy(z0) - F.a.Add(z3) + F.a.Add(z3, mem) } else { if F.stype == FP_SPARSER || F.stype == FP_SPARSEST { - F.smul(y) + F.smul(y, mem) return } - if SEXTIC_TWIST == D_TYPE { // dense by sparser - 13m - z0 := NewFP16copy(F.a) - z2 := NewFP16copy(F.b) - z3 := NewFP16copy(F.b) - t0 := NewFP16() - t1 := NewFP16copy(y.a) - z0.Mul(y.a) + z0 := NewFP16copy(F.a, mem) + z2 := NewFP16copy(F.b, mem) + z3 := NewFP16copy(F.b, mem) + t0 := NewFP16(mem) + t1 := NewFP16copy(y.a, mem) + z0.Mul(y.a, mem) - if y.stype == FP_SPARSEST { - z2.tmul(y.b.a.a.a.a) - } else { - z2.pmul(y.b.geta()) - } - F.b.Add(F.a) - t1.geta().Add(y.b.geta()) - - t1.norm() - F.b.norm() - F.b.Mul(t1) - z3.Add(F.c) - z3.norm() - - if y.stype == FP_SPARSEST { - z3.tmul(y.b.a.a.a.a) - } else { - z3.pmul(y.b.geta()) - } - - t0.copy(z0) - t0.Neg() - t1.copy(z2) - t1.Neg() - - F.b.Add(t0) - - F.b.Add(t1) - z3.Add(t1) - z2.Add(t0) - - t0.copy(F.a) - t0.Add(F.c) - t0.norm() - z3.norm() - t0.Mul(y.a) - F.c.copy(z2) - F.c.Add(t0) - - z3.times_i() - F.a.copy(z0) - F.a.Add(z3) + if y.stype == FP_SPARSEST { + z2.tmul(y.b.a.a.a.a, mem) + } else { + z2.pmul(y.b.geta(), mem) } - if SEXTIC_TWIST == M_TYPE { - z0 := NewFP16copy(F.a) - z1 := NewFP16() - z2 := NewFP16() - z3 := NewFP16() - t0 := NewFP16copy(F.a) - t1 := NewFP16() + F.b.Add(F.a, mem) + t1.geta().Add(y.b.geta(), mem) - z0.Mul(y.a) - t0.Add(F.b) - t0.norm() + t1.norm() + F.b.norm() + F.b.Mul(t1, mem) + z3.Add(F.c, mem) + z3.norm() - z1.copy(t0) - z1.Mul(y.a) - t0.copy(F.b) - t0.Add(F.c) - t0.norm() - - z3.copy(t0) - - if y.stype == FP_SPARSEST { - z3.tmul(y.c.b.a.a.a) - } else { - z3.pmul(y.c.getb()) - } - z3.times_i() - - t0.copy(z0) - t0.Neg() - z1.Add(t0) - F.b.copy(z1) - z2.copy(t0) - - t0.copy(F.a) - t0.Add(F.c) - t0.norm() - t1.copy(y.a) - t1.Add(y.c) - t1.norm() - - t0.Mul(t1) - z2.Add(t0) - t0.copy(F.c) - - if y.stype == FP_SPARSEST { - t0.tmul(y.c.b.a.a.a) - } else { - t0.pmul(y.c.getb()) - } - t0.times_i() - t1.copy(t0) - t1.Neg() - - F.c.copy(z2) - F.c.Add(t1) - z3.Add(t1) - t0.times_i() - F.b.Add(t0) - z3.norm() - z3.times_i() - F.a.copy(z0) - F.a.Add(z3) + if y.stype == FP_SPARSEST { + z3.tmul(y.b.a.a.a.a, mem) + } else { + z3.pmul(y.b.geta(), mem) } + + t0.copy(z0) + t0.Neg(mem) + t1.copy(z2) + t1.Neg(mem) + + F.b.Add(t0, mem) + + F.b.Add(t1, mem) + z3.Add(t1, mem) + z2.Add(t0, mem) + + t0.copy(F.a) + t0.Add(F.c, mem) + t0.norm() + z3.norm() + t0.Mul(y.a, mem) + F.c.copy(z2) + F.c.Add(t0, mem) + + z3.times_i(mem) + F.a.copy(z0) + F.a.Add(z3, mem) } F.stype = FP_DENSE F.norm() } /* Special case of multiplication arises from special form of ATE pairing line function */ -func (F *FP48) smul(y *FP48) { - if SEXTIC_TWIST == D_TYPE { - w1 := NewFP8copy(F.a.geta()) - w2 := NewFP8copy(F.a.getb()) - var w3 *FP8 +func (F *FP48) smul(y *FP48, mem *arena.Arena) { + w1 := NewFP8copy(F.a.geta(), mem) + w2 := NewFP8copy(F.a.getb(), mem) + var w3 *FP8 - w1.Mul(y.a.geta()) - w2.Mul(y.a.getb()) + w1.Mul(y.a.geta(), mem) + w2.Mul(y.a.getb(), mem) - if y.stype == FP_SPARSEST || F.stype == FP_SPARSEST { - if y.stype == FP_SPARSEST && F.stype == FP_SPARSEST { - t := NewFPcopy(F.b.a.a.a.a) - t.Mul(y.b.a.a.a.a) - w3 = NewFP8fp(t) - } else { - if y.stype != FP_SPARSEST { - w3 = NewFP8copy(y.b.geta()) - w3.tmul(F.b.a.a.a.a) - } else { - w3 = NewFP8copy(F.b.geta()) - w3.tmul(y.b.a.a.a.a) - } - } + if y.stype == FP_SPARSEST || F.stype == FP_SPARSEST { + if y.stype == FP_SPARSEST && F.stype == FP_SPARSEST { + t := NewFPcopy(F.b.a.a.a.a, mem) + t.Mul(y.b.a.a.a.a, mem) + w3 = NewFP8fp(t, mem) } else { - w3 = NewFP8copy(F.b.geta()) - w3.Mul(y.b.geta()) + if y.stype != FP_SPARSEST { + w3 = NewFP8copy(y.b.geta(), mem) + w3.tmul(F.b.a.a.a.a, mem) + } else { + w3 = NewFP8copy(F.b.geta(), mem) + w3.tmul(y.b.a.a.a.a, mem) + } } - ta := NewFP8copy(F.a.geta()) - tb := NewFP8copy(y.a.geta()) - ta.Add(F.a.getb()) - ta.norm() - tb.Add(y.a.getb()) - tb.norm() - tc := NewFP8copy(ta) - tc.Mul(tb) - t := NewFP8copy(w1) - t.Add(w2) - t.Neg() - tc.Add(t) - - ta.copy(F.a.geta()) - ta.Add(F.b.geta()) - ta.norm() - tb.copy(y.a.geta()) - tb.Add(y.b.geta()) - tb.norm() - td := NewFP8copy(ta) - td.Mul(tb) - t.copy(w1) - t.Add(w3) - t.Neg() - td.Add(t) - - ta.copy(F.a.getb()) - ta.Add(F.b.geta()) - ta.norm() - tb.copy(y.a.getb()) - tb.Add(y.b.geta()) - tb.norm() - te := NewFP8copy(ta) - te.Mul(tb) - t.copy(w2) - t.Add(w3) - t.Neg() - te.Add(t) - - w2.times_i() - w1.Add(w2) - - F.a.geta().copy(w1) - F.a.getb().copy(tc) - F.b.geta().copy(td) - F.b.getb().copy(te) - F.c.geta().copy(w3) - F.c.getb().zero() - - F.a.norm() - F.b.norm() } else { - w1 := NewFP8copy(F.a.geta()) - w2 := NewFP8copy(F.a.getb()) - var w3 *FP8 - - w1.Mul(y.a.geta()) - w2.Mul(y.a.getb()) - - if y.stype == FP_SPARSEST || F.stype == FP_SPARSEST { - if y.stype == FP_SPARSEST && F.stype == FP_SPARSEST { - t := NewFPcopy(F.c.b.a.a.a) - t.Mul(y.c.b.a.a.a) - w3 = NewFP8fp(t) - } else { - if y.stype != FP_SPARSEST { - w3 = NewFP8copy(y.c.getb()) - w3.tmul(F.c.b.a.a.a) - } else { - w3 = NewFP8copy(F.c.getb()) - w3.tmul(y.c.b.a.a.a) - } - } - } else { - w3 = NewFP8copy(F.c.getb()) - w3.Mul(y.c.getb()) - } - - ta := NewFP8copy(F.a.geta()) - tb := NewFP8copy(y.a.geta()) - ta.Add(F.a.getb()) - ta.norm() - tb.Add(y.a.getb()) - tb.norm() - tc := NewFP8copy(ta) - tc.Mul(tb) - t := NewFP8copy(w1) - t.Add(w2) - t.Neg() - tc.Add(t) - - ta.copy(F.a.geta()) - ta.Add(F.c.getb()) - ta.norm() - tb.copy(y.a.geta()) - tb.Add(y.c.getb()) - tb.norm() - td := NewFP8copy(ta) - td.Mul(tb) - t.copy(w1) - t.Add(w3) - t.Neg() - td.Add(t) - - ta.copy(F.a.getb()) - ta.Add(F.c.getb()) - ta.norm() - tb.copy(y.a.getb()) - tb.Add(y.c.getb()) - tb.norm() - te := NewFP8copy(ta) - te.Mul(tb) - t.copy(w2) - t.Add(w3) - t.Neg() - te.Add(t) - - w2.times_i() - w1.Add(w2) - F.a.geta().copy(w1) - F.a.getb().copy(tc) - - w3.times_i() - w3.norm() - F.b.geta().zero() - F.b.getb().copy(w3) - - te.norm() - te.times_i() - F.c.geta().copy(te) - F.c.getb().copy(td) - - F.a.norm() - F.c.norm() - + w3 = NewFP8copy(F.b.geta(), mem) + w3.Mul(y.b.geta(), mem) } + ta := NewFP8copy(F.a.geta(), mem) + tb := NewFP8copy(y.a.geta(), mem) + ta.Add(F.a.getb(), mem) + ta.norm() + tb.Add(y.a.getb(), mem) + tb.norm() + tc := NewFP8copy(ta, mem) + tc.Mul(tb, mem) + t := NewFP8copy(w1, mem) + t.Add(w2, mem) + t.Neg(mem) + tc.Add(t, mem) + + ta.copy(F.a.geta()) + ta.Add(F.b.geta(), mem) + ta.norm() + tb.copy(y.a.geta()) + tb.Add(y.b.geta(), mem) + tb.norm() + td := NewFP8copy(ta, mem) + td.Mul(tb, mem) + t.copy(w1) + t.Add(w3, mem) + t.Neg(mem) + td.Add(t, mem) + + ta.copy(F.a.getb()) + ta.Add(F.b.geta(), mem) + ta.norm() + tb.copy(y.a.getb()) + tb.Add(y.b.geta(), mem) + tb.norm() + te := NewFP8copy(ta, mem) + te.Mul(tb, mem) + t.copy(w2) + t.Add(w3, mem) + t.Neg(mem) + te.Add(t, mem) + + w2.times_i(mem) + w1.Add(w2, mem) + + F.a.geta().copy(w1) + F.a.getb().copy(tc) + F.b.geta().copy(td) + F.b.getb().copy(te) + F.c.geta().copy(w3) + F.c.getb().zero() + + F.a.norm() + F.b.norm() F.stype = FP_SPARSE } /* this=1/this */ -func (F *FP48) Invert() { - f0 := NewFP16copy(F.a) - f1 := NewFP16copy(F.b) - f2 := NewFP16copy(F.a) - f3 := NewFP16() +func (F *FP48) Invert(mem *arena.Arena) { + f0 := NewFP16copy(F.a, mem) + f1 := NewFP16copy(F.b, mem) + f2 := NewFP16copy(F.a, mem) + f3 := NewFP16(mem) //F.norm() - f0.Sqr() - f1.Mul(F.c) - f1.times_i() - f0.Sub(f1) + f0.Sqr(mem) + f1.Mul(F.c, mem) + f1.times_i(mem) + f0.Sub(f1, mem) f0.norm() f1.copy(F.c) - f1.Sqr() - f1.times_i() - f2.Mul(F.b) - f1.Sub(f2) + f1.Sqr(mem) + f1.times_i(mem) + f2.Mul(F.b, mem) + f1.Sub(f2, mem) f1.norm() f2.copy(F.b) - f2.Sqr() + f2.Sqr(mem) f3.copy(F.a) - f3.Mul(F.c) - f2.Sub(f3) + f3.Mul(F.c, mem) + f2.Sub(f3, mem) f2.norm() f3.copy(F.b) - f3.Mul(f2) - f3.times_i() - F.a.Mul(f0) - f3.Add(F.a) - F.c.Mul(f1) - F.c.times_i() + f3.Mul(f2, mem) + f3.times_i(mem) + F.a.Mul(f0, mem) + f3.Add(F.a, mem) + F.c.Mul(f1, mem) + F.c.times_i(mem) - f3.Add(F.c) + f3.Add(F.c, mem) f3.norm() - f3.Invert() + f3.Invert(mem) F.a.copy(f0) - F.a.Mul(f3) + F.a.Mul(f3, mem) F.b.copy(f1) - F.b.Mul(f3) + F.b.Mul(f3, mem) F.c.copy(f2) - F.c.Mul(f3) + F.c.Mul(f3, mem) F.stype = FP_DENSE } /* this=this^p using Frobenius */ -func (F *FP48) frob(f *FP2, n int) { - f2 := NewFP2copy(f) - f3 := NewFP2copy(f) +func (F *FP48) frob(f *FP2, n int, mem *arena.Arena) { + f2 := NewFP2copy(f, mem) + f3 := NewFP2copy(f, mem) - f2.Sqr() - f3.Mul(f2) + f2.Sqr(mem) + f3.Mul(f2, mem) - f3.Mul_ip() + f3.Mul_ip(mem) f3.norm() - f3.Mul_ip() + f3.Mul_ip(mem) f3.norm() for i := 0; i < n; i++ { - F.a.frob(f3) - F.b.frob(f3) - F.c.frob(f3) + F.a.frob(f3, mem) + F.b.frob(f3, mem) + F.c.frob(f3, mem) - F.b.qmul(f) - F.b.times_i4() - F.b.times_i2() - F.c.qmul(f2) - F.c.times_i4() - F.c.times_i4() - F.c.times_i4() + F.b.qmul(f, mem) + F.b.times_i4(mem) + F.b.times_i2(mem) + F.c.qmul(f2, mem) + F.c.times_i4(mem) + F.c.times_i4(mem) + F.c.times_i4(mem) } F.stype = FP_DENSE } /* trace function */ -func (F *FP48) trace() *FP16 { - t := NewFP16() +func (F *FP48) trace(mem *arena.Arena) *FP16 { + t := NewFP16(mem) t.copy(F.a) - t.imul(3) - t.reduce() + t.imul(3, mem) + t.reduce(mem) return t } @@ -856,7 +717,7 @@ func FP48_fromBytes(w []byte) *FP48 { t[i] = w[i+2*MB] } a := FP16_fromBytes(t[:]) - return NewFP48fp16s(a, b, c) + return NewFP48fp16s(a, b, c, nil) } /* convert this to byte array */ @@ -883,48 +744,48 @@ func (F *FP48) ToString() string { } /* this=this^e */ -func (F *FP48) Pow(e *BIG) *FP48 { - sf := NewFP48copy(F) +func (F *FP48) Pow(e *BIG, mem *arena.Arena) *FP48 { + sf := NewFP48copy(F, mem) sf.norm() - e1 := NewBIGcopy(e) + e1 := NewBIGcopy(e, mem) e1.norm() - e3 := NewBIGcopy(e1) + e3 := NewBIGcopy(e1, mem) e3.pmul(3) e3.norm() - w := NewFP48copy(sf) + w := NewFP48copy(sf, mem) if e3.IsZero() { w.one() return w } nb := e3.nbits() for i := nb - 2; i >= 1; i-- { - w.uSqr() + w.uSqr(mem) bt := e3.bit(i) - e1.bit(i) if bt == 1 { - w.Mul(sf) + w.Mul(sf, mem) } if bt == -1 { - sf.conj() - w.Mul(sf) - sf.conj() + sf.conj(mem) + w.Mul(sf, mem) + sf.conj(mem) } } - w.reduce() + w.reduce(mem) return w } /* constant time powering by small integer of max length bts */ -func (F *FP48) pinpow(e int, bts int) { +func (F *FP48) pinpow(e int, bts int, mem *arena.Arena) { var R []*FP48 - R = append(R, NewFP48int(1)) - R = append(R, NewFP48copy(F)) + R = append(R, NewFP48int(1, mem)) + R = append(R, NewFP48copy(F, mem)) for i := bts - 1; i >= 0; i-- { b := (e >> uint(i)) & 1 - R[1-b].Mul(R[b]) - R[b].uSqr() + R[1-b].Mul(R[b], mem) + R[b].uSqr(mem) } F.Copy(R[0]) } @@ -985,79 +846,79 @@ func pow16(q []*FP48, u []*BIG) *FP48 { var w4 [NLEN*int(BASEBITS) + 1]int8 var s4 [NLEN*int(BASEBITS) + 1]int8 var t []*BIG - r := NewFP48() - p := NewFP48() - mt := NewBIGint(0) + r := NewFP48(nil) + p := NewFP48(nil) + mt := NewBIGint(0, nil) var bt int8 var k int for i := 0; i < 16; i++ { - t = append(t, NewBIGcopy(u[i])) + t = append(t, NewBIGcopy(u[i], nil)) } - g1 = append(g1, NewFP48copy(q[0])) // q[0] - g1 = append(g1, NewFP48copy(g1[0])) - g1[1].Mul(q[1]) // q[0].q[1] - g1 = append(g1, NewFP48copy(g1[0])) - g1[2].Mul(q[2]) // q[0].q[2] - g1 = append(g1, NewFP48copy(g1[1])) - g1[3].Mul(q[2]) // q[0].q[1].q[2] - g1 = append(g1, NewFP48copy(g1[0])) - g1[4].Mul(q[3]) // q[0].q[3] - g1 = append(g1, NewFP48copy(g1[1])) - g1[5].Mul(q[3]) // q[0].q[1].q[3] - g1 = append(g1, NewFP48copy(g1[2])) - g1[6].Mul(q[3]) // q[0].q[2].q[3] - g1 = append(g1, NewFP48copy(g1[3])) - g1[7].Mul(q[3]) // q[0].q[1].q[2].q[3] + g1 = append(g1, NewFP48copy(q[0], nil)) // q[0] + g1 = append(g1, NewFP48copy(g1[0], nil)) + g1[1].Mul(q[1], nil) // q[0].q[1] + g1 = append(g1, NewFP48copy(g1[0], nil)) + g1[2].Mul(q[2], nil) // q[0].q[2] + g1 = append(g1, NewFP48copy(g1[1], nil)) + g1[3].Mul(q[2], nil) // q[0].q[1].q[2] + g1 = append(g1, NewFP48copy(g1[0], nil)) + g1[4].Mul(q[3], nil) // q[0].q[3] + g1 = append(g1, NewFP48copy(g1[1], nil)) + g1[5].Mul(q[3], nil) // q[0].q[1].q[3] + g1 = append(g1, NewFP48copy(g1[2], nil)) + g1[6].Mul(q[3], nil) // q[0].q[2].q[3] + g1 = append(g1, NewFP48copy(g1[3], nil)) + g1[7].Mul(q[3], nil) // q[0].q[1].q[2].q[3] - g2 = append(g2, NewFP48copy(q[4])) // q[0] - g2 = append(g2, NewFP48copy(g2[0])) - g2[1].Mul(q[5]) // q[0].q[1] - g2 = append(g2, NewFP48copy(g2[0])) - g2[2].Mul(q[6]) // q[0].q[2] - g2 = append(g2, NewFP48copy(g2[1])) - g2[3].Mul(q[6]) // q[0].q[1].q[2] - g2 = append(g2, NewFP48copy(g2[0])) - g2[4].Mul(q[7]) // q[0].q[3] - g2 = append(g2, NewFP48copy(g2[1])) - g2[5].Mul(q[7]) // q[0].q[1].q[3] - g2 = append(g2, NewFP48copy(g2[2])) - g2[6].Mul(q[7]) // q[0].q[2].q[3] - g2 = append(g2, NewFP48copy(g2[3])) - g2[7].Mul(q[7]) // q[0].q[1].q[2].q[3] + g2 = append(g2, NewFP48copy(q[4], nil)) // q[0] + g2 = append(g2, NewFP48copy(g2[0], nil)) + g2[1].Mul(q[5], nil) // q[0].q[1] + g2 = append(g2, NewFP48copy(g2[0], nil)) + g2[2].Mul(q[6], nil) // q[0].q[2] + g2 = append(g2, NewFP48copy(g2[1], nil)) + g2[3].Mul(q[6], nil) // q[0].q[1].q[2] + g2 = append(g2, NewFP48copy(g2[0], nil)) + g2[4].Mul(q[7], nil) // q[0].q[3] + g2 = append(g2, NewFP48copy(g2[1], nil)) + g2[5].Mul(q[7], nil) // q[0].q[1].q[3] + g2 = append(g2, NewFP48copy(g2[2], nil)) + g2[6].Mul(q[7], nil) // q[0].q[2].q[3] + g2 = append(g2, NewFP48copy(g2[3], nil)) + g2[7].Mul(q[7], nil) // q[0].q[1].q[2].q[3] - g3 = append(g3, NewFP48copy(q[8])) // q[0] - g3 = append(g3, NewFP48copy(g3[0])) - g3[1].Mul(q[9]) // q[0].q[1] - g3 = append(g3, NewFP48copy(g3[0])) - g3[2].Mul(q[10]) // q[0].q[2] - g3 = append(g3, NewFP48copy(g3[1])) - g3[3].Mul(q[10]) // q[0].q[1].q[2] - g3 = append(g3, NewFP48copy(g3[0])) - g3[4].Mul(q[11]) // q[0].q[3] - g3 = append(g3, NewFP48copy(g3[1])) - g3[5].Mul(q[11]) // q[0].q[1].q[3] - g3 = append(g3, NewFP48copy(g3[2])) - g3[6].Mul(q[11]) // q[0].q[2].q[3] - g3 = append(g3, NewFP48copy(g3[3])) - g3[7].Mul(q[11]) // q[0].q[1].q[2].q[3] + g3 = append(g3, NewFP48copy(q[8], nil)) // q[0] + g3 = append(g3, NewFP48copy(g3[0], nil)) + g3[1].Mul(q[9], nil) // q[0].q[1] + g3 = append(g3, NewFP48copy(g3[0], nil)) + g3[2].Mul(q[10], nil) // q[0].q[2] + g3 = append(g3, NewFP48copy(g3[1], nil)) + g3[3].Mul(q[10], nil) // q[0].q[1].q[2] + g3 = append(g3, NewFP48copy(g3[0], nil)) + g3[4].Mul(q[11], nil) // q[0].q[3] + g3 = append(g3, NewFP48copy(g3[1], nil)) + g3[5].Mul(q[11], nil) // q[0].q[1].q[3] + g3 = append(g3, NewFP48copy(g3[2], nil)) + g3[6].Mul(q[11], nil) // q[0].q[2].q[3] + g3 = append(g3, NewFP48copy(g3[3], nil)) + g3[7].Mul(q[11], nil) // q[0].q[1].q[2].q[3] - g4 = append(g4, NewFP48copy(q[12])) // q[0] - g4 = append(g4, NewFP48copy(g4[0])) - g4[1].Mul(q[13]) // q[0].q[1] - g4 = append(g4, NewFP48copy(g4[0])) - g4[2].Mul(q[14]) // q[0].q[2] - g4 = append(g4, NewFP48copy(g4[1])) - g4[3].Mul(q[14]) // q[0].q[1].q[2] - g4 = append(g4, NewFP48copy(g4[0])) - g4[4].Mul(q[15]) // q[0].q[3] - g4 = append(g4, NewFP48copy(g4[1])) - g4[5].Mul(q[15]) // q[0].q[1].q[3] - g4 = append(g4, NewFP48copy(g4[2])) - g4[6].Mul(q[15]) // q[0].q[2].q[3] - g4 = append(g4, NewFP48copy(g4[3])) - g4[7].Mul(q[15]) // q[0].q[1].q[2].q[3] + g4 = append(g4, NewFP48copy(q[12], nil)) // q[0] + g4 = append(g4, NewFP48copy(g4[0], nil)) + g4[1].Mul(q[13], nil) // q[0].q[1] + g4 = append(g4, NewFP48copy(g4[0], nil)) + g4[2].Mul(q[14], nil) // q[0].q[2] + g4 = append(g4, NewFP48copy(g4[1], nil)) + g4[3].Mul(q[14], nil) // q[0].q[1].q[2] + g4 = append(g4, NewFP48copy(g4[0], nil)) + g4[4].Mul(q[15], nil) // q[0].q[3] + g4 = append(g4, NewFP48copy(g4[1], nil)) + g4[5].Mul(q[15], nil) // q[0].q[1].q[3] + g4 = append(g4, NewFP48copy(g4[2], nil)) + g4[6].Mul(q[15], nil) // q[0].q[2].q[3] + g4 = append(g4, NewFP48copy(g4[3], nil)) + g4[7].Mul(q[15], nil) // q[0].q[1].q[2].q[3] // Make them odd pb1 := 1 - t[0].parity() @@ -1149,41 +1010,41 @@ func pow16(q []*FP48, u []*BIG) *FP48 { // Main loop p.selector(g1, int32(2*w1[nb-1]+1)) r.selector(g2, int32(2*w2[nb-1]+1)) - p.Mul(r) + p.Mul(r, nil) r.selector(g3, int32(2*w3[nb-1]+1)) - p.Mul(r) + p.Mul(r, nil) r.selector(g4, int32(2*w4[nb-1]+1)) - p.Mul(r) + p.Mul(r, nil) for i := nb - 2; i >= 0; i-- { - p.uSqr() + p.uSqr(nil) r.selector(g1, int32(2*w1[i]+s1[i])) - p.Mul(r) + p.Mul(r, nil) r.selector(g2, int32(2*w2[i]+s2[i])) - p.Mul(r) + p.Mul(r, nil) r.selector(g3, int32(2*w3[i]+s3[i])) - p.Mul(r) + p.Mul(r, nil) r.selector(g4, int32(2*w4[i]+s4[i])) - p.Mul(r) + p.Mul(r, nil) } // apply correction r.Copy(q[0]) - r.conj() - r.Mul(p) + r.conj(nil) + r.Mul(p, nil) p.cmove(r, pb1) r.Copy(q[4]) - r.conj() - r.Mul(p) + r.conj(nil) + r.Mul(p, nil) p.cmove(r, pb2) r.Copy(q[8]) - r.conj() - r.Mul(p) + r.conj(nil) + r.Mul(p, nil) p.cmove(r, pb3) r.Copy(q[12]) - r.conj() - r.Mul(p) + r.conj(nil) + r.Mul(p, nil) p.cmove(r, pb4) - p.reduce() + p.reduce(nil) return p } diff --git a/nekryptology/pkg/core/curves/native/bls48581/fp8.go b/nekryptology/pkg/core/curves/native/bls48581/fp8.go index 4b94ff8..eed3355 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/fp8.go +++ b/nekryptology/pkg/core/curves/native/bls48581/fp8.go @@ -23,7 +23,11 @@ package bls48581 -import "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/curves/native/bls48581/ext" +import ( + "arena" + + "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/curves/native/bls48581/ext" +) //import "fmt" @@ -32,66 +36,115 @@ type FP8 struct { b *FP4 } -func NewFP8() *FP8 { - F := new(FP8) - F.a = NewFP4() - F.b = NewFP4() - return F +func NewFP8(mem *arena.Arena) *FP8 { + if mem != nil { + F := arena.New[FP8](mem) + F.a = NewFP4(mem) + F.b = NewFP4(mem) + return F + } else { + F := new(FP8) + F.a = NewFP4(nil) + F.b = NewFP4(nil) + return F + } } /* Constructors */ -func NewFP8int(a int) *FP8 { - F := new(FP8) - F.a = NewFP4int(a) - F.b = NewFP4() - return F +func NewFP8int(a int, mem *arena.Arena) *FP8 { + if mem != nil { + F := arena.New[FP8](mem) + F.a = NewFP4int(a, mem) + F.b = NewFP4(mem) + return F + } else { + F := new(FP8) + F.a = NewFP4int(a, nil) + F.b = NewFP4(nil) + return F + } } /* Constructors */ -func NewFP8ints(a int, b int) *FP8 { - F := new(FP8) - F.a = NewFP4int(a) - F.b = NewFP4int(b) - return F +func NewFP8ints(a int, b int, mem *arena.Arena) *FP8 { + if mem != nil { + F := arena.New[FP8](mem) + F.a = NewFP4int(a, mem) + F.b = NewFP4int(b, mem) + return F + } else { + F := new(FP8) + F.a = NewFP4int(a, nil) + F.b = NewFP4int(b, nil) + return F + } } -func NewFP8copy(x *FP8) *FP8 { - F := new(FP8) - F.a = NewFP4copy(x.a) - F.b = NewFP4copy(x.b) - return F +func NewFP8copy(x *FP8, mem *arena.Arena) *FP8 { + if mem != nil { + F := arena.New[FP8](mem) + F.a = NewFP4copy(x.a, mem) + F.b = NewFP4copy(x.b, mem) + return F + } else { + F := new(FP8) + F.a = NewFP4copy(x.a, nil) + F.b = NewFP4copy(x.b, nil) + return F + } } -func NewFP8fp4s(c *FP4, d *FP4) *FP8 { - F := new(FP8) - F.a = NewFP4copy(c) - F.b = NewFP4copy(d) - return F +func NewFP8fp4s(c *FP4, d *FP4, mem *arena.Arena) *FP8 { + if mem != nil { + F := arena.New[FP8](mem) + F.a = NewFP4copy(c, mem) + F.b = NewFP4copy(d, mem) + return F + } else { + F := new(FP8) + F.a = NewFP4copy(c, nil) + F.b = NewFP4copy(d, nil) + return F + } } -func NewFP8fp4(c *FP4) *FP8 { - F := new(FP8) - F.a = NewFP4copy(c) - F.b = NewFP4() - return F +func NewFP8fp4(c *FP4, mem *arena.Arena) *FP8 { + if mem != nil { + F := arena.New[FP8](mem) + F.a = NewFP4copy(c, mem) + F.b = NewFP4(mem) + return F + } else { + F := new(FP8) + F.a = NewFP4copy(c, nil) + F.b = NewFP4(nil) + return F + } } -func NewFP8fp(c *FP) *FP8 { - F := new(FP8) - F.a = NewFP4fp(c) - F.b = NewFP4() - return F +func NewFP8fp(c *FP, mem *arena.Arena) *FP8 { + if mem != nil { + F := arena.New[FP8](mem) + F.a = NewFP4fp(c, mem) + F.b = NewFP4(mem) + return F + } else { + F := new(FP8) + F.a = NewFP4fp(c, nil) + F.b = NewFP4(nil) + return F + } } func NewFP8rand(rng *ext.RAND) *FP8 { - F := NewFP8fp4s(NewFP4rand(rng), NewFP4rand(rng)) + F := NewFP8fp4s(NewFP4rand(rng), NewFP4rand(rng), nil) return F } /* reduce all components of this mod Modulus */ -func (F *FP8) reduce() { - F.a.reduce() - F.b.reduce() +func (F *FP8) reduce(mem *arena.Arena) { + F.a.reduce(mem) + F.b.reduce(mem) } /* normalise all components of this mod Modulus */ @@ -101,12 +154,12 @@ func (F *FP8) norm() { } /* test this==0 ? */ -func (F *FP8) IsZero() bool { - return F.a.IsZero() && F.b.IsZero() +func (F *FP8) IsZero(mem *arena.Arena) bool { + return F.a.IsZero(mem) && F.b.IsZero(mem) } func (F *FP8) islarger() int { - if F.IsZero() { + if F.IsZero(nil) { return 0 } cmp := F.b.islarger() @@ -140,7 +193,7 @@ func FP8_fromBytes(bf []byte) *FP8 { t[i] = bf[i+MB] } ta := FP4_fromBytes(t[:]) - return NewFP8fp4s(ta, tb) + return NewFP8fp4s(ta, tb, nil) } /* Conditional move */ @@ -151,13 +204,15 @@ func (F *FP8) cmove(g *FP8, d int) { /* test this==1 ? */ func (F *FP8) isunity() bool { - one := NewFP4int(1) - return F.a.Equals(one) && F.b.IsZero() + mem := arena.NewArena() + defer mem.Free() + one := NewFP4int(1, mem) + return F.a.Equals(one) && F.b.IsZero(mem) } /* test is w real? That is in a+ib test b is zero */ func (F *FP8) isreal() bool { - return F.b.IsZero() + return F.b.IsZero(nil) } /* extract real part a */ @@ -198,12 +253,12 @@ func (F *FP8) one() { } /* Return sign */ -func (F *FP8) sign() int { - p1 := F.a.sign() - p2 := F.b.sign() +func (F *FP8) sign(mem *arena.Arena) int { + p1 := F.a.sign(mem) + p2 := F.b.sign(mem) var u int if BIG_ENDIAN_SIGN { - if F.b.IsZero() { + if F.b.IsZero(mem) { u = 1 } else { u = 0 @@ -211,7 +266,7 @@ func (F *FP8) sign() int { p2 ^= (p1 ^ p2) & u return p2 } else { - if F.a.IsZero() { + if F.a.IsZero(mem) { u = 1 } else { u = 0 @@ -222,137 +277,137 @@ func (F *FP8) sign() int { } /* set this=-this */ -func (F *FP8) Neg() { +func (F *FP8) Neg(mem *arena.Arena) { F.norm() - m := NewFP4copy(F.a) - t := NewFP4() - m.Add(F.b) - m.Neg() + m := NewFP4copy(F.a, mem) + t := NewFP4(mem) + m.Add(F.b, mem) + m.Neg(mem) t.copy(m) - t.Add(F.b) + t.Add(F.b, mem) F.b.copy(m) - F.b.Add(F.a) + F.b.Add(F.a, mem) F.a.copy(t) F.norm() } /* this=conjugate(this) */ -func (F *FP8) conj() { - F.b.Neg() +func (F *FP8) conj(mem *arena.Arena) { + F.b.Neg(mem) F.norm() } /* this=-conjugate(this) */ -func (F *FP8) nconj() { - F.a.Neg() +func (F *FP8) nconj(mem *arena.Arena) { + F.a.Neg(mem) F.norm() } /* this+=x */ -func (F *FP8) Add(x *FP8) { - F.a.Add(x.a) - F.b.Add(x.b) +func (F *FP8) Add(x *FP8, mem *arena.Arena) { + F.a.Add(x.a, mem) + F.b.Add(x.b, mem) } /* this-=x */ -func (F *FP8) Sub(x *FP8) { - m := NewFP8copy(x) - m.Neg() - F.Add(m) +func (F *FP8) Sub(x *FP8, mem *arena.Arena) { + m := NewFP8copy(x, mem) + m.Neg(mem) + F.Add(m, mem) } /* this-=x */ -func (F *FP8) rsub(x *FP8) { - F.Neg() - F.Add(x) +func (F *FP8) rsub(x *FP8, mem *arena.Arena) { + F.Neg(mem) + F.Add(x, mem) } /* this*=s where s is FP4 */ -func (F *FP8) pmul(s *FP4) { - F.a.Mul(s) - F.b.Mul(s) +func (F *FP8) pmul(s *FP4, mem *arena.Arena) { + F.a.Mul(s, mem) + F.b.Mul(s, mem) } /* this*=s where s is FP2 */ -func (F *FP8) qmul(s *FP2) { - F.a.pmul(s) - F.b.pmul(s) +func (F *FP8) qmul(s *FP2, mem *arena.Arena) { + F.a.pmul(s, mem) + F.b.pmul(s, mem) } /* this*=s where s is FP */ -func (F *FP8) tmul(s *FP) { - F.a.qmul(s) - F.b.qmul(s) +func (F *FP8) tmul(s *FP, mem *arena.Arena) { + F.a.qmul(s, mem) + F.b.qmul(s, mem) } /* this*=c where c is int */ -func (F *FP8) imul(c int) { - F.a.imul(c) - F.b.imul(c) +func (F *FP8) imul(c int, mem *arena.Arena) { + F.a.imul(c, mem) + F.b.imul(c, mem) } /* this*=this */ -func (F *FP8) Sqr() { - t1 := NewFP4copy(F.a) - t2 := NewFP4copy(F.b) - t3 := NewFP4copy(F.a) +func (F *FP8) Sqr(mem *arena.Arena) { + t1 := NewFP4copy(F.a, mem) + t2 := NewFP4copy(F.b, mem) + t3 := NewFP4copy(F.a, mem) - t3.Mul(F.b) - t1.Add(F.b) - t2.times_i() + t3.Mul(F.b, mem) + t1.Add(F.b, mem) + t2.times_i(mem) - t2.Add(F.a) + t2.Add(F.a, mem) t1.norm() t2.norm() F.a.copy(t1) - F.a.Mul(t2) + F.a.Mul(t2, mem) t2.copy(t3) - t2.times_i() - t2.Add(t3) + t2.times_i(mem) + t2.Add(t3, mem) t2.norm() - t2.Neg() - F.a.Add(t2) + t2.Neg(mem) + F.a.Add(t2, mem) F.b.copy(t3) - F.b.Add(t3) + F.b.Add(t3, mem) F.norm() } /* this*=y */ -func (F *FP8) Mul(y *FP8) { - t1 := NewFP4copy(F.a) - t2 := NewFP4copy(F.b) - t3 := NewFP4() - t4 := NewFP4copy(F.b) +func (F *FP8) Mul(y *FP8, mem *arena.Arena) { + t1 := NewFP4copy(F.a, mem) + t2 := NewFP4copy(F.b, mem) + t3 := NewFP4(mem) + t4 := NewFP4copy(F.b, mem) - t1.Mul(y.a) - t2.Mul(y.b) + t1.Mul(y.a, mem) + t2.Mul(y.b, mem) t3.copy(y.b) - t3.Add(y.a) - t4.Add(F.a) + t3.Add(y.a, mem) + t4.Add(F.a, mem) t3.norm() t4.norm() - t4.Mul(t3) + t4.Mul(t3, mem) t3.copy(t1) - t3.Neg() - t4.Add(t3) + t3.Neg(mem) + t4.Add(t3, mem) t4.norm() t3.copy(t2) - t3.Neg() + t3.Neg(mem) F.b.copy(t4) - F.b.Add(t3) + F.b.Add(t3, mem) - t2.times_i() + t2.times_i(mem) F.a.copy(t2) - F.a.Add(t1) + F.a.Add(t1, mem) F.norm() } @@ -363,55 +418,55 @@ func (F *FP8) toString() string { } /* this=1/this */ -func (F *FP8) Invert(h *FP) { - t1 := NewFP4copy(F.a) - t2 := NewFP4copy(F.b) +func (F *FP8) Invert(h *FP, mem *arena.Arena) { + t1 := NewFP4copy(F.a, mem) + t2 := NewFP4copy(F.b, mem) - t1.Sqr() - t2.Sqr() - t2.times_i() + t1.Sqr(mem) + t2.Sqr(mem) + t2.times_i(mem) t2.norm() - t1.Sub(t2) + t1.Sub(t2, mem) t1.norm() - t1.Invert(h) + t1.Invert(h, mem) - F.a.Mul(t1) - t1.Neg() + F.a.Mul(t1, mem) + t1.Neg(mem) t1.norm() - F.b.Mul(t1) + F.b.Mul(t1, mem) } /* this*=i where i = sqrt(sqrt(-1+sqrt(-1))) */ -func (F *FP8) times_i() { - s := NewFP4copy(F.b) - t := NewFP4copy(F.a) - s.times_i() +func (F *FP8) times_i(mem *arena.Arena) { + s := NewFP4copy(F.b, mem) + t := NewFP4copy(F.a, mem) + s.times_i(mem) F.a.copy(s) F.b.copy(t) F.norm() if TOWER == POSITOWER { - F.Neg() + F.Neg(mem) F.norm() } } -func (F *FP8) times_i2() { - F.a.times_i() - F.b.times_i() +func (F *FP8) times_i2(mem *arena.Arena) { + F.a.times_i(mem) + F.b.times_i(mem) } /* this=this^p using Frobenius */ -func (F *FP8) frob(f *FP2) { - ff := NewFP2copy(f) - ff.Sqr() - ff.Mul_ip() +func (F *FP8) frob(f *FP2, mem *arena.Arena) { + ff := NewFP2copy(f, mem) + ff.Sqr(mem) + ff.Mul_ip(mem) ff.norm() - F.a.frob(ff) - F.b.frob(ff) - F.b.pmul(f) - F.b.times_i() + F.a.frob(ff, mem) + F.b.frob(ff, mem) + F.b.pmul(f, mem) + F.b.times_i(mem) } /* this=this^e @@ -671,19 +726,19 @@ func (F *FP8) xtr_pow2(ck *FP8, ckml *FP8, ckm2l *FP8, a *BIG, b *BIG) *FP8 { } */ /* this/=2 */ -func (F *FP8) div2() { - F.a.div2() - F.b.div2() +func (F *FP8) div2(mem *arena.Arena) { + F.a.div2(mem) + F.b.div2(mem) } -func (F *FP8) div_i() { - u := NewFP4copy(F.a) - v := NewFP4copy(F.b) - u.div_i() +func (F *FP8) div_i(mem *arena.Arena) { + u := NewFP4copy(F.a, mem) + v := NewFP4copy(F.b, mem) + u.div_i(mem) F.a.copy(v) F.b.copy(u) if TOWER == POSITOWER { - F.Neg() + F.Neg(mem) F.norm() } } @@ -710,70 +765,72 @@ func (F *FP8) pow(b *BIG) { /* */ // Test for Quadratic Residue func (F *FP8) qr(h *FP) int { - c := NewFP8copy(F) - c.conj() - c.Mul(F) + mem := arena.NewArena() + defer mem.Free() + c := NewFP8copy(F, mem) + c.conj(mem) + c.Mul(F, mem) return c.a.qr(h) } // sqrt(a+ib) = sqrt(a+sqrt(a*a-n*b*b)/2)+ib/(2*sqrt(a+sqrt(a*a-n*b*b)/2)) -func (F *FP8) Sqrt(h *FP) { - if F.IsZero() { +func (F *FP8) Sqrt(h *FP, mem *arena.Arena) { + if F.IsZero(mem) { return } - a := NewFP4copy(F.a) - b := NewFP4() - s := NewFP4copy(F.b) - t := NewFP4copy(F.a) - hint := NewFP() + a := NewFP4copy(F.a, mem) + b := NewFP4(mem) + s := NewFP4copy(F.b, mem) + t := NewFP4copy(F.a, mem) + hint := NewFP(mem) - s.Sqr() - a.Sqr() - s.times_i() + s.Sqr(mem) + a.Sqr(mem) + s.times_i(mem) s.norm() - a.Sub(s) + a.Sub(s, mem) s.copy(a) s.norm() - s.Sqrt(h) + s.Sqrt(h, mem) a.copy(t) b.copy(t) - a.Add(s) + a.Add(s, mem) a.norm() - a.div2() + a.div2(mem) b.copy(F.b) - b.div2() + b.div2(mem) qr := a.qr(hint) // tweak hint - multiply old hint by Norm(1/Beta)^e where Beta is irreducible polynomial s.copy(a) - twk := NewFPbig(NewBIGints(TWK)) - twk.Mul(hint) - s.div_i() + twk := NewFPbig(NewBIGints(TWK, mem), mem) + twk.Mul(hint, mem) + s.div_i(mem) s.norm() a.cmove(s, 1-qr) hint.cmove(twk, 1-qr) F.a.copy(a) - F.a.Sqrt(hint) + F.a.Sqrt(hint, mem) s.copy(a) - s.Invert(hint) - s.Mul(F.a) + s.Invert(hint, mem) + s.Mul(F.a, mem) F.b.copy(s) - F.b.Mul(b) + F.b.Mul(b, mem) t.copy(F.a) F.a.cmove(F.b, 1-qr) F.b.cmove(t, 1-qr) - sgn := F.sign() - nr := NewFP8copy(F) - nr.Neg() + sgn := F.sign(mem) + nr := NewFP8copy(F, mem) + nr.Neg(mem) nr.norm() F.cmove(nr, sgn) } diff --git a/nekryptology/pkg/core/curves/native/bls48581/g1.go b/nekryptology/pkg/core/curves/native/bls48581/g1.go index 99227dc..9959b11 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/g1.go +++ b/nekryptology/pkg/core/curves/native/bls48581/g1.go @@ -19,6 +19,8 @@ package bls48581 +import "arena" + //import "fmt" /* Elliptic Curve Point Structure */ @@ -29,54 +31,59 @@ type ECP struct { } /* Constructors */ -func NewECP() *ECP { - E := new(ECP) - E.x = NewFP() - E.y = NewFPint(1) - if CURVETYPE == EDWARDS { - E.z = NewFPint(1) +func NewECP(mem *arena.Arena) *ECP { + var E *ECP + if mem != nil { + E = arena.New[ECP](mem) } else { - E.z = NewFP() + E = new(ECP) } + E.x = NewFP(mem) + E.y = NewFPint(1, mem) + E.z = NewFP(mem) return E } /* set (x,y) from two BIGs */ -func NewECPbigs(ix *BIG, iy *BIG) *ECP { - E := new(ECP) - E.x = NewFPbig(ix) - E.y = NewFPbig(iy) - E.z = NewFPint(1) - E.x.norm() - rhs := RHS(E.x) - - if CURVETYPE == MONTGOMERY { - if rhs.qr(nil) != 1 { - E.inf() - } +func NewECPbigs(ix *BIG, iy *BIG, mem *arena.Arena) *ECP { + var E *ECP + if mem != nil { + E = arena.New[ECP](mem) } else { - y2 := NewFPcopy(E.y) - y2.Sqr() - if !y2.Equals(rhs) { - E.inf() - } + E = new(ECP) + } + E.x = NewFPbig(ix, mem) + E.y = NewFPbig(iy, mem) + E.z = NewFPint(1, mem) + E.x.norm() + rhs := RHS(E.x, mem) + + y2 := NewFPcopy(E.y, mem) + y2.Sqr(mem) + if !y2.Equals(rhs) { + E.inf() } return E } /* set (x,y) from BIG and a bit */ -func NewECPbigint(ix *BIG, s int) *ECP { - E := new(ECP) - E.x = NewFPbig(ix) - E.y = NewFP() +func NewECPbigint(ix *BIG, s int, mem *arena.Arena) *ECP { + var E *ECP + if mem != nil { + E = arena.New[ECP](mem) + } else { + E = new(ECP) + } + E.x = NewFPbig(ix, mem) + E.y = NewFP(mem) E.x.norm() - rhs := RHS(E.x) - E.z = NewFPint(1) - hint := NewFP() + rhs := RHS(E.x, mem) + E.z = NewFPint(1, mem) + hint := NewFP(mem) if rhs.qr(hint) == 1 { - ny := rhs.Sqrt(hint) - if ny.sign() != s { - ny.Neg() + ny := rhs.Sqrt(hint, mem) + if ny.sign(mem) != s { + ny.Neg(mem) ny.norm() } E.y.copy(ny) @@ -87,18 +94,21 @@ func NewECPbigint(ix *BIG, s int) *ECP { } /* set from x - calculate y from curve equation */ -func NewECPbig(ix *BIG) *ECP { - E := new(ECP) - E.x = NewFPbig(ix) - E.y = NewFP() +func NewECPbig(ix *BIG, mem *arena.Arena) *ECP { + var E *ECP + if mem != nil { + E = arena.New[ECP](mem) + } else { + E = new(ECP) + } + E.x = NewFPbig(ix, mem) + E.y = NewFP(mem) E.x.norm() - rhs := RHS(E.x) - E.z = NewFPint(1) - hint := NewFP() + rhs := RHS(E.x, mem) + E.z = NewFPint(1, mem) + hint := NewFP(mem) if rhs.qr(hint) == 1 { - if CURVETYPE != MONTGOMERY { - E.y.copy(rhs.Sqrt(hint)) - } + E.y.copy(rhs.Sqrt(hint, mem)) } else { E.inf() } @@ -106,36 +116,23 @@ func NewECPbig(ix *BIG) *ECP { } /* test for O point-at-infinity */ -func (E *ECP) Is_infinity() bool { +func (E *ECP) Is_infinity(mem *arena.Arena) bool { // if E.INF {return true} - if CURVETYPE == EDWARDS { - return (E.x.IsZero() && E.y.Equals(E.z)) - } - if CURVETYPE == WEIERSTRASS { - return (E.x.IsZero() && E.z.IsZero()) - } - if CURVETYPE == MONTGOMERY { - return E.z.IsZero() - } - return true + return (E.x.IsZero(mem) && E.z.IsZero(mem)) } /* Conditional swap of P and Q dependant on d */ func (E *ECP) cswap(Q *ECP, d int) { E.x.cswap(Q.x, d) - if CURVETYPE != MONTGOMERY { - E.y.cswap(Q.y, d) - } + E.y.cswap(Q.y, d) E.z.cswap(Q.z, d) } /* Conditional move of Q to P dependant on d */ func (E *ECP) cmove(Q *ECP, d int) { E.x.cmove(Q.x, d) - if CURVETYPE != MONTGOMERY { - E.y.cmove(Q.y, d) - } + E.y.cmove(Q.y, d) E.z.cmove(Q.z, d) } @@ -149,28 +146,20 @@ func teq(b int32, c int32) int { /* this=P */ func (E *ECP) Copy(P *ECP) { E.x.copy(P.x) - if CURVETYPE != MONTGOMERY { - E.y.copy(P.y) - } + E.y.copy(P.y) E.z.copy(P.z) } /* this=-this */ -func (E *ECP) Neg() { - if CURVETYPE == WEIERSTRASS { - E.y.Neg() - E.y.norm() - } - if CURVETYPE == EDWARDS { - E.x.Neg() - E.x.norm() - } +func (E *ECP) Neg(mem *arena.Arena) { + E.y.Neg(mem) + E.y.norm() return } /* Constant time select from pre-computed table */ func (E *ECP) selector(W []*ECP, b int32) { - MP := NewECP() + MP := NewECP(nil) m := b >> 31 babs := (b ^ m) - m @@ -186,137 +175,106 @@ func (E *ECP) selector(W []*ECP, b int32) { E.cmove(W[7], teq(babs, 7)) MP.Copy(E) - MP.Neg() + MP.Neg(nil) E.cmove(MP, int(m&1)) } /* set this=O */ func (E *ECP) inf() { E.x.zero() - if CURVETYPE != MONTGOMERY { - E.y.one() - } - if CURVETYPE != EDWARDS { - E.z.zero() - } else { - E.z.one() - } + E.y.one() + E.z.zero() } /* Test P == Q */ func (E *ECP) Equals(Q *ECP) bool { - a := NewFP() - b := NewFP() + mem := arena.NewArena() + defer mem.Free() + a := NewFP(mem) + b := NewFP(mem) a.copy(E.x) - a.Mul(Q.z) - a.reduce() + a.Mul(Q.z, mem) + a.reduce(mem) b.copy(Q.x) - b.Mul(E.z) - b.reduce() + b.Mul(E.z, mem) + b.reduce(mem) if !a.Equals(b) { return false } - if CURVETYPE != MONTGOMERY { - a.copy(E.y) - a.Mul(Q.z) - a.reduce() - b.copy(Q.y) - b.Mul(E.z) - b.reduce() - if !a.Equals(b) { - return false - } + a.copy(E.y) + a.Mul(Q.z, mem) + a.reduce(mem) + b.copy(Q.y) + b.Mul(E.z, mem) + b.reduce(mem) + if !a.Equals(b) { + return false } return true } /* Calculate RHS of curve equation */ -func RHS(x *FP) *FP { - r := NewFPcopy(x) - r.Sqr() +func RHS(x *FP, mem *arena.Arena) *FP { + r := NewFPcopy(x, mem) + r.Sqr(mem) - if CURVETYPE == WEIERSTRASS { // x^3+Ax+B - b := NewFPbig(NewBIGints(CURVE_B)) - r.Mul(x) - if CURVE_A == -3 { - cx := NewFPcopy(x) - cx.imul(3) - cx.Neg() - cx.norm() - r.Add(cx) - } - r.Add(b) + // x^3+Ax+B + b := NewFPbig(NewBIGints(CURVE_B, mem), mem) + r.Mul(x, mem) + if CURVE_A == -3 { + cx := NewFPcopy(x, mem) + cx.imul(3, mem) + cx.Neg(mem) + cx.norm() + r.Add(cx, mem) } - if CURVETYPE == EDWARDS { // (Ax^2-1)/(Bx^2-1) - b := NewFPbig(NewBIGints(CURVE_B)) + r.Add(b, mem) - one := NewFPint(1) - b.Mul(r) - b.Sub(one) - b.norm() - if CURVE_A == -1 { - r.Neg() - } - r.Sub(one) - r.norm() - b.Invert(nil) - r.Mul(b) - } - if CURVETYPE == MONTGOMERY { // x^3+Ax^2+x - x3 := NewFP() - x3.copy(r) - x3.Mul(x) - r.imul(CURVE_A) - r.Add(x3) - r.Add(x) - } - r.reduce() + r.reduce(mem) return r } /* set to affine - from (x,y,z) to (x,y) */ -func (E *ECP) Affine() { - if E.Is_infinity() { +func (E *ECP) Affine(mem *arena.Arena) { + if E.Is_infinity(mem) { return } - one := NewFPint(1) + one := NewFPint(1, mem) if E.z.Equals(one) { return } - E.z.Invert(nil) - E.x.Mul(E.z) - E.x.reduce() + E.z.Invert(nil, mem) + E.x.Mul(E.z, mem) + E.x.reduce(mem) - if CURVETYPE != MONTGOMERY { - E.y.Mul(E.z) - E.y.reduce() - } + E.y.Mul(E.z, mem) + E.y.reduce(mem) E.z.copy(one) } /* extract x as a BIG */ -func (E *ECP) GetX() *BIG { - W := NewECP() +func (E *ECP) GetX(mem *arena.Arena) *BIG { + W := NewECP(mem) W.Copy(E) - W.Affine() - return W.x.Redc() + W.Affine(mem) + return W.x.Redc(mem) } /* extract y as a BIG */ -func (E *ECP) GetY() *BIG { - W := NewECP() +func (E *ECP) GetY(mem *arena.Arena) *BIG { + W := NewECP(mem) W.Copy(E) - W.Affine() - return W.y.Redc() + W.Affine(mem) + return W.y.Redc(mem) } /* get sign of Y */ -func (E *ECP) GetS() int { - W := NewECP() +func (E *ECP) GetS(mem *arena.Arena) int { + W := NewECP(mem) W.Copy(E) - W.Affine() - return W.y.sign() + W.Affine(mem) + return W.y.sign(mem) } /* extract x as an FP */ @@ -338,55 +296,25 @@ func (E *ECP) getz() *FP { func (E *ECP) ToBytes(b []byte, compress bool) { var t [int(MODBYTES)]byte MB := int(MODBYTES) - alt := false - W := NewECP() + W := NewECP(nil) W.Copy(E) - W.Affine() - W.x.Redc().ToBytes(t[:]) + W.Affine(nil) + W.x.Redc(nil).ToBytes(t[:]) - if CURVETYPE == MONTGOMERY { - for i := 0; i < MB; i++ { - b[i] = t[i] + for i := 0; i < MB; i++ { + b[i+1] = t[i] + } + if compress { + b[0] = 0x02 + if W.y.sign(nil) == 1 { + b[0] = 0x03 } - //b[0] = 0x06 return } - - if (MODBITS-1)%8 <= 4 && ALLOW_ALT_COMPRESS { - alt = true - } - - if alt { - for i := 0; i < MB; i++ { - b[i] = t[i] - } - if compress { - b[0] |= 0x80 - if W.y.islarger() == 1 { - b[0] |= 0x20 - } - } else { - W.y.Redc().ToBytes(t[:]) - for i := 0; i < MB; i++ { - b[i+MB] = t[i] - } - } - } else { - for i := 0; i < MB; i++ { - b[i+1] = t[i] - } - if compress { - b[0] = 0x02 - if W.y.sign() == 1 { - b[0] = 0x03 - } - return - } - b[0] = 0x04 - W.y.Redc().ToBytes(t[:]) - for i := 0; i < MB; i++ { - b[i+MB+1] = t[i] - } + b[0] = 0x04 + W.y.Redc(nil).ToBytes(t[:]) + for i := 0; i < MB; i++ { + b[i+MB+1] = t[i] } } @@ -394,616 +322,194 @@ func (E *ECP) ToBytes(b []byte, compress bool) { func ECP_fromBytes(b []byte) *ECP { var t [int(MODBYTES)]byte MB := int(MODBYTES) - p := NewBIGints(Modulus) - alt := false + p := NewBIGints(Modulus, nil) - if CURVETYPE == MONTGOMERY { - for i := 0; i < MB; i++ { - t[i] = b[i] - } - px := FromBytes(t[:]) - if Comp(px, p) >= 0 { - return NewECP() - } - return NewECPbig(px) + for i := 0; i < MB; i++ { + t[i] = b[i+1] + } + px := FromBytes(t[:]) + if Comp(px, p) >= 0 { + return NewECP(nil) } - if (MODBITS-1)%8 <= 4 && ALLOW_ALT_COMPRESS { - alt = true + if b[0] == 0x04 { + for i := 0; i < MB; i++ { + t[i] = b[i+MB+1] + } + py := FromBytes(t[:]) + if Comp(py, p) >= 0 { + return NewECP(nil) + } + return NewECPbigs(px, py, nil) } - if alt { - for i := 0; i < MB; i++ { - t[i] = b[i] - } - t[0] &= 0x1f - px := FromBytes(t[:]) - if (b[0] & 0x80) == 0 { - for i := 0; i < MB; i++ { - t[i] = b[i+MB] - } - py := FromBytes(t[:]) - return NewECPbigs(px, py) - } else { - sgn := (b[0] & 0x20) >> 5 - P := NewECPbigint(px, 0) - cmp := P.y.islarger() - if (sgn == 1 && cmp != 1) || (sgn == 0 && cmp == 1) { - P.Neg() - } - return P - } - } else { - for i := 0; i < MB; i++ { - t[i] = b[i+1] - } - px := FromBytes(t[:]) - if Comp(px, p) >= 0 { - return NewECP() - } - - if b[0] == 0x04 { - for i := 0; i < MB; i++ { - t[i] = b[i+MB+1] - } - py := FromBytes(t[:]) - if Comp(py, p) >= 0 { - return NewECP() - } - return NewECPbigs(px, py) - } - - if b[0] == 0x02 || b[0] == 0x03 { - return NewECPbigint(px, int(b[0]&1)) - } + if b[0] == 0x02 || b[0] == 0x03 { + return NewECPbigint(px, int(b[0]&1), nil) } - return NewECP() + return NewECP(nil) } /* convert to hex string */ func (E *ECP) ToString() string { - W := NewECP() + W := NewECP(nil) W.Copy(E) - W.Affine() - if W.Is_infinity() { + W.Affine(nil) + if W.Is_infinity(nil) { return "infinity" } - if CURVETYPE == MONTGOMERY { - return "(" + W.x.Redc().ToString() + ")" - } else { - return "(" + W.x.Redc().ToString() + "," + W.y.Redc().ToString() + ")" - } + return "(" + W.x.Redc(nil).ToString() + "," + W.y.Redc(nil).ToString() + ")" } /* this*=2 */ -func (E *ECP) Dbl() { +func (E *ECP) Dbl(mem *arena.Arena) { + t0 := NewFPcopy(E.y, mem) + t0.Sqr(mem) + t1 := NewFPcopy(E.y, mem) + t1.Mul(E.z, mem) + t2 := NewFPcopy(E.z, mem) + t2.Sqr(mem) - if CURVETYPE == WEIERSTRASS { - if CURVE_A == 0 { - t0 := NewFPcopy(E.y) - t0.Sqr() - t1 := NewFPcopy(E.y) - t1.Mul(E.z) - t2 := NewFPcopy(E.z) - t2.Sqr() + E.z.copy(t0) + E.z.Add(t0, mem) + E.z.norm() + E.z.Add(E.z, mem) + E.z.Add(E.z, mem) + E.z.norm() + t2.imul(3*CURVE_B_I, mem) - E.z.copy(t0) - E.z.Add(t0) - E.z.norm() - E.z.Add(E.z) - E.z.Add(E.z) - E.z.norm() - t2.imul(3 * CURVE_B_I) + x3 := NewFPcopy(t2, mem) + x3.Mul(E.z, mem) - x3 := NewFPcopy(t2) - x3.Mul(E.z) + y3 := NewFPcopy(t0, mem) + y3.Add(t2, mem) + y3.norm() + E.z.Mul(t1, mem) + t1.copy(t2) + t1.Add(t2, mem) + t2.Add(t1, mem) + t0.Sub(t2, mem) + t0.norm() + y3.Mul(t0, mem) + y3.Add(x3, mem) + t1.copy(E.x) + t1.Mul(E.y, mem) + E.x.copy(t0) + E.x.norm() + E.x.Mul(t1, mem) + E.x.Add(E.x, mem) + E.x.norm() + E.y.copy(y3) + E.y.norm() - y3 := NewFPcopy(t0) - y3.Add(t2) - y3.norm() - E.z.Mul(t1) - t1.copy(t2) - t1.Add(t2) - t2.Add(t1) - t0.Sub(t2) - t0.norm() - y3.Mul(t0) - y3.Add(x3) - t1.copy(E.x) - t1.Mul(E.y) - E.x.copy(t0) - E.x.norm() - E.x.Mul(t1) - E.x.Add(E.x) - E.x.norm() - E.y.copy(y3) - E.y.norm() - } else { - t0 := NewFPcopy(E.x) - t1 := NewFPcopy(E.y) - t2 := NewFPcopy(E.z) - t3 := NewFPcopy(E.x) - z3 := NewFPcopy(E.z) - y3 := NewFP() - x3 := NewFP() - b := NewFP() - - if CURVE_B_I == 0 { - b.copy(NewFPbig(NewBIGints(CURVE_B))) - } - - t0.Sqr() //1 x^2 - t1.Sqr() //2 y^2 - t2.Sqr() //3 - - t3.Mul(E.y) //4 - t3.Add(t3) - t3.norm() //5 - z3.Mul(E.x) //6 - z3.Add(z3) - z3.norm() //7 - y3.copy(t2) - - if CURVE_B_I == 0 { - y3.Mul(b) - } else { - y3.imul(CURVE_B_I) - } - - y3.Sub(z3) //9 *** - x3.copy(y3) - x3.Add(y3) - x3.norm() //10 - - y3.Add(x3) //11 - x3.copy(t1) - x3.Sub(y3) - x3.norm() //12 - y3.Add(t1) - y3.norm() //13 - y3.Mul(x3) //14 - x3.Mul(t3) //15 - t3.copy(t2) - t3.Add(t2) //16 - t2.Add(t3) //17 - - if CURVE_B_I == 0 { - z3.Mul(b) - } else { - z3.imul(CURVE_B_I) - } - - z3.Sub(t2) //19 - z3.Sub(t0) - z3.norm() //20 *** - t3.copy(z3) - t3.Add(z3) //21 - - z3.Add(t3) - z3.norm() //22 - t3.copy(t0) - t3.Add(t0) //23 - t0.Add(t3) //24 - t0.Sub(t2) - t0.norm() //25 - - t0.Mul(z3) //26 - y3.Add(t0) //27 - t0.copy(E.y) - t0.Mul(E.z) //28 - t0.Add(t0) - t0.norm() //29 - z3.Mul(t0) //30 - x3.Sub(z3) //x3.norm();//31 - t0.Add(t0) - t0.norm() //32 - t1.Add(t1) - t1.norm() //33 - z3.copy(t0) - z3.Mul(t1) //34 - - E.x.copy(x3) - E.x.norm() - E.y.copy(y3) - E.y.norm() - E.z.copy(z3) - E.z.norm() - } - } - - if CURVETYPE == EDWARDS { - C := NewFPcopy(E.x) - D := NewFPcopy(E.y) - H := NewFPcopy(E.z) - J := NewFP() - - E.x.Mul(E.y) - E.x.Add(E.x) - E.x.norm() - C.Sqr() - D.Sqr() - if CURVE_A == -1 { - C.Neg() - } - E.y.copy(C) - E.y.Add(D) - E.y.norm() - - H.Sqr() - H.Add(H) - E.z.copy(E.y) - J.copy(E.y) - J.Sub(H) - J.norm() - E.x.Mul(J) - C.Sub(D) - C.norm() - E.y.Mul(C) - E.z.Mul(J) - - } - if CURVETYPE == MONTGOMERY { - A := NewFPcopy(E.x) - B := NewFPcopy(E.x) - AA := NewFP() - BB := NewFP() - C := NewFP() - - A.Add(E.z) - A.norm() - AA.copy(A) - AA.Sqr() - B.Sub(E.z) - B.norm() - BB.copy(B) - BB.Sqr() - C.copy(AA) - C.Sub(BB) - C.norm() - - E.x.copy(AA) - E.x.Mul(BB) - - A.copy(C) - A.imul((CURVE_A + 2) / 4) - - BB.Add(A) - BB.norm() - E.z.copy(BB) - E.z.Mul(C) - } return } /* this+=Q */ -func (E *ECP) Add(Q *ECP) { +func (E *ECP) Add(Q *ECP, mem *arena.Arena) { + b := 3 * CURVE_B_I + t0 := NewFPcopy(E.x, mem) + t0.Mul(Q.x, mem) + t1 := NewFPcopy(E.y, mem) + t1.Mul(Q.y, mem) + t2 := NewFPcopy(E.z, mem) + t2.Mul(Q.z, mem) + t3 := NewFPcopy(E.x, mem) + t3.Add(E.y, mem) + t3.norm() + t4 := NewFPcopy(Q.x, mem) + t4.Add(Q.y, mem) + t4.norm() + t3.Mul(t4, mem) + t4.copy(t0) + t4.Add(t1, mem) - if CURVETYPE == WEIERSTRASS { - if CURVE_A == 0 { - b := 3 * CURVE_B_I - t0 := NewFPcopy(E.x) - t0.Mul(Q.x) - t1 := NewFPcopy(E.y) - t1.Mul(Q.y) - t2 := NewFPcopy(E.z) - t2.Mul(Q.z) - t3 := NewFPcopy(E.x) - t3.Add(E.y) - t3.norm() - t4 := NewFPcopy(Q.x) - t4.Add(Q.y) - t4.norm() - t3.Mul(t4) - t4.copy(t0) - t4.Add(t1) + t3.Sub(t4, mem) + t3.norm() + t4.copy(E.y) + t4.Add(E.z, mem) + t4.norm() + x3 := NewFPcopy(Q.y, mem) + x3.Add(Q.z, mem) + x3.norm() - t3.Sub(t4) - t3.norm() - t4.copy(E.y) - t4.Add(E.z) - t4.norm() - x3 := NewFPcopy(Q.y) - x3.Add(Q.z) - x3.norm() + t4.Mul(x3, mem) + x3.copy(t1) + x3.Add(t2, mem) - t4.Mul(x3) - x3.copy(t1) - x3.Add(t2) + t4.Sub(x3, mem) + t4.norm() + x3.copy(E.x) + x3.Add(E.z, mem) + x3.norm() + y3 := NewFPcopy(Q.x, mem) + y3.Add(Q.z, mem) + y3.norm() + x3.Mul(y3, mem) + y3.copy(t0) + y3.Add(t2, mem) + y3.rsub(x3, mem) + y3.norm() + x3.copy(t0) + x3.Add(t0, mem) + t0.Add(x3, mem) + t0.norm() + t2.imul(b, mem) - t4.Sub(x3) - t4.norm() - x3.copy(E.x) - x3.Add(E.z) - x3.norm() - y3 := NewFPcopy(Q.x) - y3.Add(Q.z) - y3.norm() - x3.Mul(y3) - y3.copy(t0) - y3.Add(t2) - y3.rsub(x3) - y3.norm() - x3.copy(t0) - x3.Add(t0) - t0.Add(x3) - t0.norm() - t2.imul(b) + z3 := NewFPcopy(t1, mem) + z3.Add(t2, mem) + z3.norm() + t1.Sub(t2, mem) + t1.norm() + y3.imul(b, mem) - z3 := NewFPcopy(t1) - z3.Add(t2) - z3.norm() - t1.Sub(t2) - t1.norm() - y3.imul(b) + x3.copy(y3) + x3.Mul(t4, mem) + t2.copy(t3) + t2.Mul(t1, mem) + x3.rsub(t2, mem) + y3.Mul(t0, mem) + t1.Mul(z3, mem) + y3.Add(t1, mem) + t0.Mul(t3, mem) + z3.Mul(t4, mem) + z3.Add(t0, mem) - x3.copy(y3) - x3.Mul(t4) - t2.copy(t3) - t2.Mul(t1) - x3.rsub(t2) - y3.Mul(t0) - t1.Mul(z3) - y3.Add(t1) - t0.Mul(t3) - z3.Mul(t4) - z3.Add(t0) + E.x.copy(x3) + E.x.norm() + E.y.copy(y3) + E.y.norm() + E.z.copy(z3) + E.z.norm() - E.x.copy(x3) - E.x.norm() - E.y.copy(y3) - E.y.norm() - E.z.copy(z3) - E.z.norm() - } else { - - t0 := NewFPcopy(E.x) - t1 := NewFPcopy(E.y) - t2 := NewFPcopy(E.z) - t3 := NewFPcopy(E.x) - t4 := NewFPcopy(Q.x) - z3 := NewFP() - y3 := NewFPcopy(Q.x) - x3 := NewFPcopy(Q.y) - b := NewFP() - - if CURVE_B_I == 0 { - b.copy(NewFPbig(NewBIGints(CURVE_B))) - } - - t0.Mul(Q.x) //1 - t1.Mul(Q.y) //2 - t2.Mul(Q.z) //3 - - t3.Add(E.y) - t3.norm() //4 - t4.Add(Q.y) - t4.norm() //5 - t3.Mul(t4) //6 - t4.copy(t0) - t4.Add(t1) //7 - t3.Sub(t4) - t3.norm() //8 - t4.copy(E.y) - t4.Add(E.z) - t4.norm() //9 - x3.Add(Q.z) - x3.norm() //10 - t4.Mul(x3) //11 - x3.copy(t1) - x3.Add(t2) //12 - - t4.Sub(x3) - t4.norm() //13 - x3.copy(E.x) - x3.Add(E.z) - x3.norm() //14 - y3.Add(Q.z) - y3.norm() //15 - - x3.Mul(y3) //16 - y3.copy(t0) - y3.Add(t2) //17 - - y3.rsub(x3) - y3.norm() //18 - z3.copy(t2) - - if CURVE_B_I == 0 { - z3.Mul(b) - } else { - z3.imul(CURVE_B_I) - } - - x3.copy(y3) - x3.Sub(z3) - x3.norm() //20 - z3.copy(x3) - z3.Add(x3) //21 - - x3.Add(z3) //22 - z3.copy(t1) - z3.Sub(x3) - z3.norm() //23 - x3.Add(t1) - x3.norm() //24 - - if CURVE_B_I == 0 { - y3.Mul(b) - } else { - y3.imul(CURVE_B_I) - } - - t1.copy(t2) - t1.Add(t2) //26 - t2.Add(t1) //27 - - y3.Sub(t2) //28 - - y3.Sub(t0) - y3.norm() //29 - t1.copy(y3) - t1.Add(y3) //30 - y3.Add(t1) - y3.norm() //31 - - t1.copy(t0) - t1.Add(t0) //32 - t0.Add(t1) //33 - t0.Sub(t2) - t0.norm() //34 - t1.copy(t4) - t1.Mul(y3) //35 - t2.copy(t0) - t2.Mul(y3) //36 - y3.copy(x3) - y3.Mul(z3) //37 - y3.Add(t2) //38 - x3.Mul(t3) //39 - x3.Sub(t1) //40 - z3.Mul(t4) //41 - t1.copy(t3) - t1.Mul(t0) //42 - z3.Add(t1) - E.x.copy(x3) - E.x.norm() - E.y.copy(y3) - E.y.norm() - E.z.copy(z3) - E.z.norm() - - } - } - if CURVETYPE == EDWARDS { - b := NewFPbig(NewBIGints(CURVE_B)) - A := NewFPcopy(E.z) - B := NewFP() - C := NewFPcopy(E.x) - D := NewFPcopy(E.y) - EE := NewFP() - F := NewFP() - G := NewFP() - - A.Mul(Q.z) - B.copy(A) - B.Sqr() - C.Mul(Q.x) - D.Mul(Q.y) - - EE.copy(C) - EE.Mul(D) - EE.Mul(b) - F.copy(B) - F.Sub(EE) - G.copy(B) - G.Add(EE) - - if CURVE_A == 1 { - EE.copy(D) - EE.Sub(C) - } - C.Add(D) - - B.copy(E.x) - B.Add(E.y) - D.copy(Q.x) - D.Add(Q.y) - B.norm() - D.norm() - B.Mul(D) - B.Sub(C) - B.norm() - F.norm() - B.Mul(F) - E.x.copy(A) - E.x.Mul(B) - G.norm() - if CURVE_A == 1 { - EE.norm() - C.copy(EE) - C.Mul(G) - } - if CURVE_A == -1 { - C.norm() - C.Mul(G) - } - E.y.copy(A) - E.y.Mul(C) - E.z.copy(F) - E.z.Mul(G) - } return } -/* Differential Add for Montgomery curves. this+=Q where W is this-Q and is affine. */ -func (E *ECP) dAdd(Q *ECP, W *ECP) { - A := NewFPcopy(E.x) - B := NewFPcopy(E.x) - C := NewFPcopy(Q.x) - D := NewFPcopy(Q.x) - DA := NewFP() - CB := NewFP() - - A.Add(E.z) - B.Sub(E.z) - - C.Add(Q.z) - D.Sub(Q.z) - A.norm() - D.norm() - - DA.copy(D) - DA.Mul(A) - C.norm() - B.norm() - - CB.copy(C) - CB.Mul(B) - - A.copy(DA) - A.Add(CB) - A.norm() - A.Sqr() - B.copy(DA) - B.Sub(CB) - B.norm() - B.Sqr() - - E.x.copy(A) - E.z.copy(W.x) - E.z.Mul(B) - -} - /* this-=Q */ -func (E *ECP) Sub(Q *ECP) { - NQ := NewECP() +func (E *ECP) Sub(Q *ECP, mem *arena.Arena) { + NQ := NewECP(mem) NQ.Copy(Q) - NQ.Neg() - E.Add(NQ) + NQ.Neg(mem) + E.Add(NQ, mem) } /* constant time multiply by small integer of length bts - use lAdder */ -func (E *ECP) pinmul(e int32, bts int32) *ECP { - if CURVETYPE == MONTGOMERY { - return E.lmul(NewBIGint(int(e))) - } else { - P := NewECP() - R0 := NewECP() - R1 := NewECP() - R1.Copy(E) +func (E *ECP) pinmul(e int32, bts int32, mem *arena.Arena) *ECP { + P := NewECP(mem) + R0 := NewECP(mem) + R1 := NewECP(mem) + R1.Copy(E) - for i := bts - 1; i >= 0; i-- { - b := int((e >> uint32(i)) & 1) - P.Copy(R1) - P.Add(R0) - R0.cswap(R1, b) - R1.Copy(P) - R0.Dbl() - R0.cswap(R1, b) - } - P.Copy(R0) - return P + for i := bts - 1; i >= 0; i-- { + b := int((e >> uint32(i)) & 1) + P.Copy(R1) + P.Add(R0, mem) + R0.cswap(R1, b) + R1.Copy(P) + R0.Dbl(mem) + R0.cswap(R1, b) } + P.Copy(R0) + return P } // Point multiplication, multiplies a point P by a scalar e @@ -1016,120 +522,97 @@ func (E *ECP) pinmul(e int32, bts int32) *ECP { // The point multiplication methods used will process leading zeros correctly. // So this function leaks information about the length of e... -func (E *ECP) lmul(e *BIG) *ECP { - return E.clmul(e, e) +func (E *ECP) lmul(e *BIG, outer, mem *arena.Arena) *ECP { + return E.clmul(e, e, outer, mem) } // .. but this one does not (typically set maxe=r) // Set P=e*P /* return e.this */ -func (E *ECP) clmul(e *BIG, maxe *BIG) *ECP { - if e.IsZero() || E.Is_infinity() { - return NewECP() +func (E *ECP) clmul(e *BIG, maxe *BIG, outer, mem *arena.Arena) *ECP { + if e.IsZero() || E.Is_infinity(mem) { + return NewECP(outer) } - P := NewECP() - cm := NewBIGcopy(e) + P := NewECP(outer) + cm := NewBIGcopy(e, mem) cm.or(maxe) max := cm.nbits() - if CURVETYPE == MONTGOMERY { - /* use LAdder */ - D := NewECP() - R0 := NewECP() - R0.Copy(E) - R1 := NewECP() - R1.Copy(E) - R1.Dbl() - D.Copy(E) - D.Affine() - nb := max - for i := nb - 2; i >= 0; i-- { - b := int(e.bit(i)) - P.Copy(R1) - P.dAdd(R0, D) - R0.cswap(R1, b) - R1.Copy(P) - R0.Dbl() - R0.cswap(R1, b) - } - P.Copy(R0) - } else { - // fixed size windows - mt := NewBIG() - t := NewBIG() - Q := NewECP() - C := NewECP() + // fixed size windows + mt := NewBIG(mem) + t := NewBIG(mem) + Q := NewECP(mem) + C := NewECP(mem) - var W []*ECP - var w [1 + (NLEN*int(BASEBITS)+3)/4]int8 + var W []*ECP + var w [1 + (NLEN*int(BASEBITS)+3)/4]int8 - Q.Copy(E) - Q.Dbl() + Q.Copy(E) + Q.Dbl(mem) - W = append(W, NewECP()) - W[0].Copy(E) + W = append(W, NewECP(mem)) + W[0].Copy(E) - for i := 1; i < 8; i++ { - W = append(W, NewECP()) - W[i].Copy(W[i-1]) - W[i].Add(Q) - } - - // make exponent odd - Add 2P if even, P if odd - t.copy(e) - s := int(t.parity()) - t.inc(1) - t.norm() - ns := int(t.parity()) - mt.copy(t) - mt.inc(1) - mt.norm() - t.cmove(mt, s) - Q.cmove(E, ns) - C.Copy(Q) - - nb := 1 + (max+3)/4 - - // convert exponent to signed 4-bit window - for i := 0; i < nb; i++ { - w[i] = int8(t.lastbits(5) - 16) - t.dec(int(w[i])) - t.norm() - t.fshr(4) - } - w[nb] = int8(t.lastbits(5)) - - //P.Copy(W[(int(w[nb])-1)/2]) - P.selector(W, int32(w[nb])) - for i := nb - 1; i >= 0; i-- { - Q.selector(W, int32(w[i])) - P.Dbl() - P.Dbl() - P.Dbl() - P.Dbl() - P.Add(Q) - } - P.Sub(C) /* apply correction */ + for i := 1; i < 8; i++ { + W = append(W, NewECP(mem)) + W[i].Copy(W[i-1]) + W[i].Add(Q, mem) } + + // make exponent odd - Add 2P if even, P if odd + t.copy(e) + s := int(t.parity()) + t.inc(1) + t.norm() + ns := int(t.parity()) + mt.copy(t) + mt.inc(1) + mt.norm() + t.cmove(mt, s) + Q.cmove(E, ns) + C.Copy(Q) + + nb := 1 + (max+3)/4 + + // convert exponent to signed 4-bit window + for i := 0; i < nb; i++ { + w[i] = int8(t.lastbits(5) - 16) + t.dec(int(w[i])) + t.norm() + t.fshr(4) + } + w[nb] = int8(t.lastbits(5)) + + //P.Copy(W[(int(w[nb])-1)/2]) + P.selector(W, int32(w[nb])) + for i := nb - 1; i >= 0; i-- { + Q.selector(W, int32(w[i])) + P.Dbl(mem) + P.Dbl(mem) + P.Dbl(mem) + P.Dbl(mem) + P.Add(Q, mem) + } + P.Sub(C, mem) /* apply correction */ return P } /* Public version */ -func (E *ECP) Mul(e *BIG) *ECP { - return E.lmul(e) +func (E *ECP) Mul(e *BIG, outer, mem *arena.Arena) *ECP { + return E.lmul(e, outer, mem) } // Generic multi-multiplication, fixed 4-bit window, P=Sigma e_i*X_i -func ECP_muln(n int, X []*ECP, e []*BIG) *ECP { - P := NewECP() - R := NewECP() - S := NewECP() +func ECP_muln(n int, X []*ECP, e []*BIG, mem *arena.Arena) *ECP { + P := NewECP(nil) + R := NewECP(mem) + S := NewECP(mem) var B []*ECP - t := NewBIG() + t := NewBIG(mem) for i := 0; i < 16; i++ { - B = append(B, NewECP()) + B = append(B, NewECP(mem)) } - mt := NewBIGcopy(e[0]) + mt := NewBIGcopy(e[0], mem) mt.norm() for i := 1; i < n; i++ { // find biggest t.copy(e[i]) @@ -1142,36 +625,42 @@ func ECP_muln(n int, X []*ECP, e []*BIG) *ECP { for j := 0; j < 16; j++ { B[j].inf() } + + inner := arena.NewArena() for j := 0; j < n; j++ { mt.copy(e[j]) mt.norm() mt.shr(uint(i * 4)) k := mt.lastbits(4) - B[k].Add(X[j]) + B[k].Add(X[j], inner) + if j%32 == 0 || j == n-1 { + inner.Free() + inner = arena.NewArena() + } } R.inf() S.inf() for j := 15; j >= 1; j-- { - R.Add(B[j]) - S.Add(R) + R.Add(B[j], mem) + S.Add(R, mem) } for j := 0; j < 4; j++ { - P.Dbl() + P.Dbl(mem) } - P.Add(S) + P.Add(S, mem) } return P } /* Return e.this+f.Q */ -func (E *ECP) Mul2(e *BIG, Q *ECP, f *BIG) *ECP { - te := NewBIG() - tf := NewBIG() - mt := NewBIG() - S := NewECP() - T := NewECP() - C := NewECP() +func (E *ECP) Mul2(e *BIG, Q *ECP, f *BIG, mem *arena.Arena) *ECP { + te := NewBIG(mem) + tf := NewBIG(mem) + mt := NewBIG(mem) + S := NewECP(mem) + T := NewECP(mem) + C := NewECP(mem) var W []*ECP var w [1 + (NLEN*int(BASEBITS)+1)/2]int8 @@ -1180,28 +669,28 @@ func (E *ECP) Mul2(e *BIG, Q *ECP, f *BIG) *ECP { // precompute table for i := 0; i < 8; i++ { - W = append(W, NewECP()) + W = append(W, NewECP(mem)) } W[1].Copy(E) - W[1].Sub(Q) + W[1].Sub(Q, mem) W[2].Copy(E) - W[2].Add(Q) + W[2].Add(Q, mem) S.Copy(Q) - S.Dbl() + S.Dbl(mem) W[0].Copy(W[1]) - W[0].Sub(S) + W[0].Sub(S, mem) W[3].Copy(W[2]) - W[3].Add(S) + W[3].Add(S, mem) T.Copy(E) - T.Dbl() + T.Dbl(mem) W[5].Copy(W[1]) - W[5].Add(T) + W[5].Add(T, mem) W[6].Copy(W[2]) - W[6].Add(T) + W[6].Add(T, mem) W[4].Copy(W[5]) - W[4].Sub(S) + W[4].Sub(S, mem) W[7].Copy(W[6]) - W[7].Add(S) + W[7].Add(S, mem) // if multiplier is odd, Add 2, else Add 1 to multiplier, and Add 2P or P to correction @@ -1225,7 +714,7 @@ func (E *ECP) Mul2(e *BIG, Q *ECP, f *BIG) *ECP { mt.norm() tf.cmove(mt, s) S.cmove(Q, ns) - C.Add(S) + C.Add(S, mem) mt.copy(te) mt.Add(tf) @@ -1249,48 +738,31 @@ func (E *ECP) Mul2(e *BIG, Q *ECP, f *BIG) *ECP { S.selector(W, int32(w[nb])) for i := nb - 1; i >= 0; i-- { T.selector(W, int32(w[i])) - S.Dbl() - S.Dbl() - S.Add(T) + S.Dbl(mem) + S.Dbl(mem) + S.Add(T, mem) } - S.Sub(C) /* apply correction */ + S.Sub(C, mem) /* apply correction */ return S } func (E *ECP) Cfp() { - cf := CURVE_Cof_I - if cf == 1 { - return - } - if cf == 4 { - E.Dbl() - E.Dbl() - return - } - if cf == 8 { - E.Dbl() - E.Dbl() - E.Dbl() - return - } - c := NewBIGints(CURVE_Cof) - E.Copy(E.lmul(c)) + mem := arena.NewArena() + defer mem.Free() + c := NewBIGints(CURVE_Cof, mem) + E.Copy(E.lmul(c, nil, mem)) } /* Hunt and Peck a BIG to a curve point */ -func ECP_hap2point(h *BIG) *ECP { +func ECP_hap2point(h *BIG, mem *arena.Arena) *ECP { var P *ECP - x := NewBIGcopy(h) + x := NewBIGcopy(h, mem) for true { - if CURVETYPE != MONTGOMERY { - P = NewECPbigint(x, 0) - } else { - P = NewECPbig(x) - } + P = NewECPbigint(x, 0, mem) x.inc(1) x.norm() - if !P.Is_infinity() { + if !P.Is_infinity(mem) { break } } @@ -1299,539 +771,102 @@ func ECP_hap2point(h *BIG) *ECP { /* Constant time Map to Point */ func ECP_map2point(h *FP) *ECP { - P := NewECP() + P := NewECP(nil) - if CURVETYPE == MONTGOMERY { - // Elligator 2 - X1 := NewFP() - X2 := NewFP() - w := NewFP() - one := NewFPint(1) - A := NewFPint(CURVE_A) - t := NewFPcopy(h) - N := NewFP() - D := NewFP() - hint := NewFP() + // swu method + A := NewFP(nil) + B := NewFP(nil) + X1 := NewFP(nil) + X2 := NewFP(nil) + X3 := NewFP(nil) + one := NewFPint(1, nil) + Y := NewFP(nil) + D := NewFP(nil) + t := NewFPcopy(h, nil) + w := NewFP(nil) + //Y3:=NewFP() + sgn := t.sign(nil) - t.Sqr() + // Shallue and van de Woestijne + // SQRTm3 not available, so preprocess this out + /* */ + Z := RIADZ + X1.copy(NewFPint(Z, nil)) + X3.copy(X1) + A.copy(RHS(X1, nil)) + B.copy(NewFPbig(NewBIGints(SQRTm3, nil), nil)) + B.imul(Z, nil) - if PM1D2 == 2 { - t.Add(t) - } - if PM1D2 == 1 { - t.Neg() - } - if PM1D2 > 2 { - t.imul(QNRI) - } + t.Sqr(nil) + Y.copy(A) + Y.Mul(t, nil) + t.copy(one) + t.Add(Y, nil) + t.norm() + Y.rsub(one, nil) + Y.norm() + D.copy(t) + D.Mul(Y, nil) + D.Mul(B, nil) - t.norm() - D.copy(t) - D.Add(one) - D.norm() + w.copy(A) + FP_tpo(D, w) - X1.copy(A) - X1.Neg() - X1.norm() - X2.copy(X1) - X2.Mul(t) - - w.copy(X1) - w.Sqr() - N.copy(w) - N.Mul(X1) - w.Mul(A) - w.Mul(D) - N.Add(w) - t.copy(D) - t.Sqr() - t.Mul(X1) - N.Add(t) - N.norm() - - t.copy(N) - t.Mul(D) - qres := t.qr(hint) - w.copy(t) - w.Invert(hint) - D.copy(w) - D.Mul(N) - X1.Mul(D) - X2.Mul(D) - X1.cmove(X2, 1-qres) - - a := X1.Redc() - P.Copy(NewECPbig(a)) - } - if CURVETYPE == EDWARDS { - // Elligator 2 - map to Montgomery, place point, map back - X1 := NewFP() - X2 := NewFP() - t := NewFPcopy(h) - w := NewFP() - one := NewFPint(1) - A := NewFP() - w1 := NewFP() - w2 := NewFP() - B := NewFPbig(NewBIGints(CURVE_B)) - Y := NewFP() - K := NewFP() - D := NewFP() - hint := NewFP() - //Y3:=NewFP() - rfc := 0 - - if MODTYPE != GENERALISED_MERSENNE { - A.copy(B) - - if CURVE_A == 1 { - A.Add(one) - B.Sub(one) - } else { - A.Sub(one) - B.Add(one) - } - A.norm() - B.norm() - - A.div2() - B.div2() - B.div2() - - K.copy(B) - K.Neg() - K.norm() - //K.Invert(nil) - K.invsqrt(K, w1) - - rfc = RIADZ - if rfc == 1 { // RFC7748 - A.Mul(K) - K.Mul(w1) - //K=K.Sqrt(nil) - } else { - B.Sqr() - } - } else { - rfc = 1 - A.copy(NewFPint(156326)) - } - - t.Sqr() - qnr := 0 - if PM1D2 == 2 { - t.Add(t) - qnr = 2 - } - if PM1D2 == 1 { - t.Neg() - qnr = -1 - } - if PM1D2 > 2 { - t.imul(QNRI) - qnr = QNRI - } - t.norm() - - D.copy(t) - D.Add(one) - D.norm() - X1.copy(A) - X1.Neg() - X1.norm() - X2.copy(X1) - X2.Mul(t) - - // Figure out RHS of Montgomery curve in rational form gx1/d^3 - - w.copy(X1) - w.Sqr() - w1.copy(w) - w1.Mul(X1) - w.Mul(A) - w.Mul(D) - w1.Add(w) - w2.copy(D) - w2.Sqr() - - if rfc == 0 { - w.copy(X1) - w.Mul(B) - w2.Mul(w) - w1.Add(w2) - } else { - w2.Mul(X1) - w1.Add(w2) - } - w1.norm() - - B.copy(w1) - B.Mul(D) - qres := B.qr(hint) - w.copy(B) - w.Invert(hint) - D.copy(w) - D.Mul(w1) - X1.Mul(D) - X2.Mul(D) - D.Sqr() - - w1.copy(B) - w1.imul(qnr) - w.copy(NewFPbig(NewBIGints(CURVE_HTPC))) - w.Mul(hint) - w2.copy(D) - w2.Mul(h) - - X1.cmove(X2, 1-qres) - B.cmove(w1, 1-qres) - hint.cmove(w, 1-qres) - D.cmove(w2, 1-qres) - - Y.copy(B.Sqrt(hint)) - Y.Mul(D) - - /* - Y.copy(B.Sqrt(hint)) - Y.Mul(D) - - B.imul(qnr) - w.copy(NewFPbig(NewBIGints(CURVE_HTPC))) - hint.Mul(w) - - Y3.copy(B.Sqrt(hint)) - D.Mul(h) - Y3.Mul(D) - - X1.cmove(X2,1-qres) - Y.cmove(Y3,1-qres) - */ - w.copy(Y) - w.Neg() + w.Mul(B, nil) + if w.sign(nil) == 1 { + w.Neg(nil) w.norm() - Y.cmove(w, qres^Y.sign()) - - if rfc == 0 { - X1.Mul(K) - Y.Mul(K) - } - - if MODTYPE == GENERALISED_MERSENNE { - t.copy(X1) - t.Sqr() - w.copy(t) - w.Add(one) - w.norm() - t.Sub(one) - t.norm() - w1.copy(t) - w1.Mul(Y) - w1.Add(w1) - X2.copy(w1) - X2.Add(w1) - X2.norm() - t.Sqr() - Y.Sqr() - Y.Add(Y) - Y.Add(Y) - Y.norm() - B.copy(t) - B.Add(Y) - B.norm() - - w2.copy(Y) - w2.Sub(t) - w2.norm() - w2.Mul(X1) - t.Mul(X1) - Y.div2() - w1.copy(Y) - w1.Mul(w) - w1.rsub(t) - w1.norm() - - t.copy(X2) - t.Mul(w1) - P.x.copy(t) - t.copy(w2) - t.Mul(B) - P.y.copy(t) - t.copy(w1) - t.Mul(B) - P.z.copy(t) - - return P - } else { - w1.copy(X1) - w1.Add(one) - w1.norm() - w2.copy(X1) - w2.Sub(one) - w2.norm() - t.copy(w1) - t.Mul(Y) - X1.Mul(w1) - - if rfc == 1 { - X1.Mul(K) - } - Y.Mul(w2) - P.x.copy(X1) - P.y.copy(Y) - P.z.copy(t) - - return P - } } - if CURVETYPE == WEIERSTRASS { - // swu method - A := NewFP() - B := NewFP() - X1 := NewFP() - X2 := NewFP() - X3 := NewFP() - one := NewFPint(1) - Y := NewFP() - D := NewFP() - t := NewFPcopy(h) - w := NewFP() - D2 := NewFP() - hint := NewFP() - GX1 := NewFP() - //Y3:=NewFP() - sgn := t.sign() - if CURVE_A != 0 || HTC_ISO != 0 { - if HTC_ISO != 0 { - /* CAHCZS - A.copy(NewFPbig(NewBIGints(CURVE_Ad))) - B.copy(NewFPbig(NewBIGints(CURVE_Bd))) - CAHCZF */ - } else { - A.copy(NewFPint(CURVE_A)) - B.copy(NewFPbig(NewBIGints(CURVE_B))) - } - // SSWU method - t.Sqr() - t.imul(RIADZ) - w.copy(t) - w.Add(one) - w.norm() + w.Mul(B, nil) + w.Mul(h, nil) + w.Mul(Y, nil) + w.Mul(D, nil) - w.Mul(t) - D.copy(A) - D.Mul(w) + X1.Neg(nil) + X1.norm() + X1.div2(nil) + X2.copy(X1) + X1.Sub(w, nil) + X1.norm() + X2.Add(w, nil) + X2.norm() + A.Add(A, nil) + A.Add(A, nil) + A.norm() + t.Sqr(nil) + t.Mul(D, nil) + t.Sqr(nil) + A.Mul(t, nil) + X3.Add(A, nil) + X3.norm() - w.Add(one) - w.norm() - w.Mul(B) - w.Neg() - w.norm() + rhs := RHS(X2, nil) + X3.cmove(X2, rhs.qr(nil)) + rhs.copy(RHS(X1, nil)) + X3.cmove(X1, rhs.qr(nil)) + rhs.copy(RHS(X3, nil)) + Y.copy(rhs.Sqrt(nil, nil)) - X2.copy(w) - X3.copy(t) - X3.Mul(X2) + ne := Y.sign(nil) ^ sgn + w.copy(Y) + w.Neg(nil) + w.norm() + Y.cmove(w, ne) - // x^3+Ad^2x+Bd^3 - GX1.copy(X2) - GX1.Sqr() - D2.copy(D) - D2.Sqr() - w.copy(A) - w.Mul(D2) - GX1.Add(w) - GX1.norm() - GX1.Mul(X2) - D2.Mul(D) - w.copy(B) - w.Mul(D2) - GX1.Add(w) - GX1.norm() - - w.copy(GX1) - w.Mul(D) - qr := w.qr(hint) - D.copy(w) - D.Invert(hint) - D.Mul(GX1) - X2.Mul(D) - X3.Mul(D) - t.Mul(h) - D2.copy(D) - D2.Sqr() - - D.copy(D2) - D.Mul(t) - t.copy(w) - t.imul(RIADZ) - X1.copy(NewFPbig(NewBIGints(CURVE_HTPC))) - X1.Mul(hint) - - X2.cmove(X3, 1-qr) - D2.cmove(D, 1-qr) - w.cmove(t, 1-qr) - hint.cmove(X1, 1-qr) - - Y.copy(w.Sqrt(hint)) - Y.Mul(D2) - /* - Y.copy(w.Sqrt(hint)) - Y.Mul(D2) - - D2.Mul(t) - w.imul(RIADZ) - - X1.copy(NewFPbig(NewBIGints(CURVE_HTPC))) - hint.Mul(X1) - - Y3.copy(w.Sqrt(hint)) - Y3.Mul(D2) - - X2.cmove(X3,1-qr) - Y.cmove(Y3,1-qr) - */ - ne := Y.sign() ^ sgn - w.copy(Y) - w.Neg() - w.norm() - Y.cmove(w, ne) - - if HTC_ISO != 0 { - /* CAHCZS - k:=0 - isox:=HTC_ISO - isoy:=3*(isox-1)/2 - - //xnum - xnum:=NewFPbig(NewBIGints(PC[k])); k+=1 - for i:=0;i> 31 babs := (b ^ m) - m @@ -88,25 +99,26 @@ func (E *ECP8) selector(W []*ECP8, b int32) { E.cmove(W[7], teq(babs, 7)) MP.Copy(E) - MP.Neg() + MP.Neg(nil) E.cmove(MP, int(m&1)) } /* Test if P == Q */ func (E *ECP8) Equals(Q *ECP8) bool { - - a := NewFP8copy(E.x) - b := NewFP8copy(Q.x) - a.Mul(Q.z) - b.Mul(E.z) + mem := arena.NewArena() + defer mem.Free() + a := NewFP8copy(E.x, mem) + b := NewFP8copy(Q.x, mem) + a.Mul(Q.z, mem) + b.Mul(E.z, mem) if !a.Equals(b) { return false } a.copy(E.y) b.copy(Q.y) - a.Mul(Q.z) - b.Mul(E.z) + a.Mul(Q.z, mem) + b.Mul(E.z, mem) if !a.Equals(b) { return false } @@ -115,38 +127,38 @@ func (E *ECP8) Equals(Q *ECP8) bool { } /* set to Affine - (x,y,z) to (x,y) */ -func (E *ECP8) Affine() { - if E.Is_infinity() { +func (E *ECP8) Affine(mem *arena.Arena) { + if E.Is_infinity(mem) { return } - one := NewFP8int(1) + one := NewFP8int(1, mem) if E.z.Equals(one) { - E.x.reduce() - E.y.reduce() + E.x.reduce(mem) + E.y.reduce(mem) return } - E.z.Invert(nil) + E.z.Invert(nil, mem) - E.x.Mul(E.z) - E.x.reduce() - E.y.Mul(E.z) - E.y.reduce() + E.x.Mul(E.z, mem) + E.x.reduce(mem) + E.y.Mul(E.z, mem) + E.y.reduce(mem) E.z.copy(one) } /* extract affine x as FP2 */ -func (E *ECP8) GetX() *FP8 { - W := NewECP8() +func (E *ECP8) GetX(mem *arena.Arena) *FP8 { + W := NewECP8(mem) W.Copy(E) - W.Affine() + W.Affine(mem) return W.x } /* extract affine y as FP2 */ -func (E *ECP8) GetY() *FP8 { - W := NewECP8() +func (E *ECP8) GetY(mem *arena.Arena) *FP8 { + W := NewECP8(mem) W.Copy(E) - W.Affine() + W.Affine(mem) return W.y } @@ -169,47 +181,24 @@ func (E *ECP8) getz() *FP8 { func (E *ECP8) ToBytes(b []byte, compress bool) { var t [8 * int(MODBYTES)]byte MB := 8 * int(MODBYTES) - alt := false - W := NewECP8() + W := NewECP8(nil) W.Copy(E) - W.Affine() + W.Affine(nil) W.x.ToBytes(t[:]) - if (MODBITS-1)%8 <= 4 && ALLOW_ALT_COMPRESS { - alt = true + for i := 0; i < MB; i++ { + b[i+1] = t[i] } - - if alt { + if !compress { + b[0] = 0x04 + W.y.ToBytes(t[:]) for i := 0; i < MB; i++ { - b[i] = t[i] + b[i+MB+1] = t[i] } - if !compress { - W.y.ToBytes(t[:]) - for i := 0; i < MB; i++ { - b[i+MB] = t[i] - } - } else { - b[0] |= 0x80 - if W.y.islarger() == 1 { - b[0] |= 0x20 - } - } - } else { - for i := 0; i < MB; i++ { - b[i+1] = t[i] - } - if !compress { - b[0] = 0x04 - W.y.ToBytes(t[:]) - for i := 0; i < MB; i++ { - b[i+MB+1] = t[i] - } - } else { - b[0] = 0x02 - if W.y.sign() == 1 { - b[0] = 0x03 - } + b[0] = 0x02 + if W.y.sign(nil) == 1 { + b[0] = 0x03 } } } @@ -219,92 +208,64 @@ func ECP8_fromBytes(b []byte) *ECP8 { var t [8 * int(MODBYTES)]byte MB := 8 * int(MODBYTES) typ := int(b[0]) - alt := false - if (MODBITS-1)%8 <= 4 && ALLOW_ALT_COMPRESS { - alt = true + for i := 0; i < MB; i++ { + t[i] = b[i+1] } - - if alt { + rx := FP8_fromBytes(t[:]) + if typ == 0x04 { for i := 0; i < MB; i++ { - t[i] = b[i] - } - t[0] &= 0x1f - rx := FP8_fromBytes(t[:]) - if (b[0] & 0x80) == 0 { - for i := 0; i < MB; i++ { - t[i] = b[i+MB] - } - ry := FP8_fromBytes(t[:]) - return NewECP8fp8s(rx, ry) - } else { - sgn := (b[0] & 0x20) >> 5 - P := NewECP8fp8(rx, 0) - cmp := P.y.islarger() - if (sgn == 1 && cmp != 1) || (sgn == 0 && cmp == 1) { - P.Neg() - } - return P + t[i] = b[i+MB+1] } + ry := FP8_fromBytes(t[:]) + return NewECP8fp8s(rx, ry, nil) } else { - for i := 0; i < MB; i++ { - t[i] = b[i+1] - } - rx := FP8_fromBytes(t[:]) - if typ == 0x04 { - for i := 0; i < MB; i++ { - t[i] = b[i+MB+1] - } - ry := FP8_fromBytes(t[:]) - return NewECP8fp8s(rx, ry) - } else { - return NewECP8fp8(rx, typ&1) - } + return NewECP8fp8(rx, typ&1, nil) } } /* convert this to hex string */ func (E *ECP8) ToString() string { - W := NewECP8() + W := NewECP8(nil) W.Copy(E) - W.Affine() - if W.Is_infinity() { + W.Affine(nil) + if W.Is_infinity(nil) { return "infinity" } return "(" + W.x.toString() + "," + W.y.toString() + ")" } /* Calculate RHS of twisted curve equation x^3+B/i */ -func RHS8(x *FP8) *FP8 { - r := NewFP8copy(x) - r.Sqr() - b2 := NewFP2big(NewBIGints(CURVE_B)) - b4 := NewFP4fp2(b2) - b := NewFP8fp4(b4) +func RHS8(x *FP8, mem *arena.Arena) *FP8 { + r := NewFP8copy(x, mem) + r.Sqr(mem) + b2 := NewFP2big(NewBIGints(CURVE_B, mem), mem) + b4 := NewFP4fp2(b2, mem) + b := NewFP8fp4(b4, mem) - if SEXTIC_TWIST == D_TYPE { - b.div_i() - } - if SEXTIC_TWIST == M_TYPE { - b.times_i() - } - r.Mul(x) - r.Add(b) + b.div_i(mem) + r.Mul(x, mem) + r.Add(b, mem) - r.reduce() + r.reduce(mem) return r } /* construct this from (x,y) - but set to O if not on curve */ -func NewECP8fp8s(ix *FP8, iy *FP8) *ECP8 { - E := new(ECP8) - E.x = NewFP8copy(ix) - E.y = NewFP8copy(iy) - E.z = NewFP8int(1) +func NewECP8fp8s(ix *FP8, iy *FP8, mem *arena.Arena) *ECP8 { + var E *ECP8 + if mem != nil { + E = arena.New[ECP8](mem) + } else { + E = new(ECP8) + } + E.x = NewFP8copy(ix, mem) + E.y = NewFP8copy(iy, mem) + E.z = NewFP8int(1, mem) E.x.norm() - rhs := RHS8(E.x) - y2 := NewFP8copy(E.y) - y2.Sqr() + rhs := RHS8(E.x, mem) + y2 := NewFP8copy(E.y, mem) + y2.Sqr(mem) if !y2.Equals(rhs) { E.inf() } @@ -312,20 +273,25 @@ func NewECP8fp8s(ix *FP8, iy *FP8) *ECP8 { } /* construct this from x - but set to O if not on curve */ -func NewECP8fp8(ix *FP8, s int) *ECP8 { - E := new(ECP8) - h := NewFP() - E.x = NewFP8copy(ix) - E.y = NewFP8int(1) - E.z = NewFP8int(1) +func NewECP8fp8(ix *FP8, s int, mem *arena.Arena) *ECP8 { + var E *ECP8 + if mem != nil { + E = arena.New[ECP8](mem) + } else { + E = new(ECP8) + } + h := NewFP(mem) + E.x = NewFP8copy(ix, mem) + E.y = NewFP8int(1, mem) + E.z = NewFP8int(1, mem) E.x.norm() - rhs := RHS8(E.x) + rhs := RHS8(E.x, mem) if rhs.qr(h) == 1 { - rhs.Sqrt(h) - if rhs.sign() != s { - rhs.Neg() + rhs.Sqrt(h, mem) + if rhs.sign(mem) != s { + rhs.Neg(mem) } - rhs.reduce() + rhs.reduce(mem) E.y.copy(rhs) } else { @@ -335,55 +301,48 @@ func NewECP8fp8(ix *FP8, s int) *ECP8 { } /* this+=this */ -func (E *ECP8) Dbl() int { - iy := NewFP8copy(E.y) - if SEXTIC_TWIST == D_TYPE { - iy.times_i() - } +func (E *ECP8) Dbl(mem *arena.Arena) int { + iy := NewFP8copy(E.y, mem) + iy.times_i(mem) - t0 := NewFP8copy(E.y) - t0.Sqr() - if SEXTIC_TWIST == D_TYPE { - t0.times_i() - } - t1 := NewFP8copy(iy) - t1.Mul(E.z) - t2 := NewFP8copy(E.z) - t2.Sqr() + t0 := NewFP8copy(E.y, mem) + t0.Sqr(mem) + t0.times_i(mem) + t1 := NewFP8copy(iy, mem) + t1.Mul(E.z, mem) + t2 := NewFP8copy(E.z, mem) + t2.Sqr(mem) E.z.copy(t0) - E.z.Add(t0) + E.z.Add(t0, mem) E.z.norm() - E.z.Add(E.z) - E.z.Add(E.z) + E.z.Add(E.z, mem) + E.z.Add(E.z, mem) E.z.norm() - t2.imul(3 * CURVE_B_I) - if SEXTIC_TWIST == M_TYPE { - t2.times_i() - } - x3 := NewFP8copy(t2) - x3.Mul(E.z) + t2.imul(3*CURVE_B_I, mem) + x3 := NewFP8copy(t2, mem) + x3.Mul(E.z, mem) - y3 := NewFP8copy(t0) + y3 := NewFP8copy(t0, mem) - y3.Add(t2) + y3.Add(t2, mem) y3.norm() - E.z.Mul(t1) + E.z.Mul(t1, mem) t1.copy(t2) - t1.Add(t2) - t2.Add(t1) + t1.Add(t2, mem) + t2.Add(t1, mem) t2.norm() - t0.Sub(t2) + t0.Sub(t2, mem) t0.norm() //y^2-9bz^2 - y3.Mul(t0) - y3.Add(x3) //(y^2+3z*2)(y^2-9z^2)+3b.z^2.8y^2 + y3.Mul(t0, mem) + y3.Add(x3, mem) //(y^2+3z*2)(y^2-9z^2)+3b.z^2.8y^2 t1.copy(E.x) - t1.Mul(iy) // + t1.Mul(iy, mem) // E.x.copy(t0) E.x.norm() - E.x.Mul(t1) - E.x.Add(E.x) //(y^2-9bz^2)xy2 + E.x.Mul(t1, mem) + E.x.Add(E.x, mem) //(y^2-9bz^2)xy2 E.x.norm() E.y.copy(y3) @@ -393,90 +352,78 @@ func (E *ECP8) Dbl() int { } /* this+=Q - return 0 for Add, 1 for double, -1 for O */ -func (E *ECP8) Add(Q *ECP8) int { +func (E *ECP8) Add(Q *ECP8, mem *arena.Arena) int { b := 3 * CURVE_B_I - t0 := NewFP8copy(E.x) - t0.Mul(Q.x) // x.Q.x - t1 := NewFP8copy(E.y) - t1.Mul(Q.y) // y.Q.y + t0 := NewFP8copy(E.x, mem) + t0.Mul(Q.x, mem) // x.Q.x + t1 := NewFP8copy(E.y, mem) + t1.Mul(Q.y, mem) // y.Q.y - t2 := NewFP8copy(E.z) - t2.Mul(Q.z) - t3 := NewFP8copy(E.x) - t3.Add(E.y) + t2 := NewFP8copy(E.z, mem) + t2.Mul(Q.z, mem) + t3 := NewFP8copy(E.x, mem) + t3.Add(E.y, mem) t3.norm() //t3=X1+Y1 - t4 := NewFP8copy(Q.x) - t4.Add(Q.y) - t4.norm() //t4=X2+Y2 - t3.Mul(t4) //t3=(X1+Y1)(X2+Y2) + t4 := NewFP8copy(Q.x, mem) + t4.Add(Q.y, mem) + t4.norm() //t4=X2+Y2 + t3.Mul(t4, mem) //t3=(X1+Y1)(X2+Y2) t4.copy(t0) - t4.Add(t1) //t4=X1.X2+Y1.Y2 + t4.Add(t1, mem) //t4=X1.X2+Y1.Y2 - t3.Sub(t4) + t3.Sub(t4, mem) t3.norm() - if SEXTIC_TWIST == D_TYPE { - t3.times_i() //t3=(X1+Y1)(X2+Y2)-(X1.X2+Y1.Y2) = X1.Y2+X2.Y1 - } + t3.times_i(mem) //t3=(X1+Y1)(X2+Y2)-(X1.X2+Y1.Y2) = X1.Y2+X2.Y1 t4.copy(E.y) - t4.Add(E.z) + t4.Add(E.z, mem) t4.norm() //t4=Y1+Z1 - x3 := NewFP8copy(Q.y) - x3.Add(Q.z) + x3 := NewFP8copy(Q.y, mem) + x3.Add(Q.z, mem) x3.norm() //x3=Y2+Z2 - t4.Mul(x3) //t4=(Y1+Z1)(Y2+Z2) - x3.copy(t1) // - x3.Add(t2) //X3=Y1.Y2+Z1.Z2 + t4.Mul(x3, mem) //t4=(Y1+Z1)(Y2+Z2) + x3.copy(t1) // + x3.Add(t2, mem) //X3=Y1.Y2+Z1.Z2 - t4.Sub(x3) + t4.Sub(x3, mem) t4.norm() - if SEXTIC_TWIST == D_TYPE { - t4.times_i() //t4=(Y1+Z1)(Y2+Z2) - (Y1.Y2+Z1.Z2) = Y1.Z2+Y2.Z1 - } + t4.times_i(mem) //t4=(Y1+Z1)(Y2+Z2) - (Y1.Y2+Z1.Z2) = Y1.Z2+Y2.Z1 x3.copy(E.x) - x3.Add(E.z) + x3.Add(E.z, mem) x3.norm() // x3=X1+Z1 - y3 := NewFP8copy(Q.x) - y3.Add(Q.z) - y3.norm() // y3=X2+Z2 - x3.Mul(y3) // x3=(X1+Z1)(X2+Z2) + y3 := NewFP8copy(Q.x, mem) + y3.Add(Q.z, mem) + y3.norm() // y3=X2+Z2 + x3.Mul(y3, mem) // x3=(X1+Z1)(X2+Z2) y3.copy(t0) - y3.Add(t2) // y3=X1.X2+Z1+Z2 - y3.rsub(x3) + y3.Add(t2, mem) // y3=X1.X2+Z1+Z2 + y3.rsub(x3, mem) y3.norm() // y3=(X1+Z1)(X2+Z2) - (X1.X2+Z1.Z2) = X1.Z2+X2.Z1 - if SEXTIC_TWIST == D_TYPE { - t0.times_i() // x.Q.x - t1.times_i() // y.Q.y - } + t0.times_i(mem) // x.Q.x + t1.times_i(mem) // y.Q.y x3.copy(t0) - x3.Add(t0) - t0.Add(x3) + x3.Add(t0, mem) + t0.Add(x3, mem) t0.norm() - t2.imul(b) - if SEXTIC_TWIST == M_TYPE { - t2.times_i() - } - z3 := NewFP8copy(t1) - z3.Add(t2) + t2.imul(b, mem) + z3 := NewFP8copy(t1, mem) + z3.Add(t2, mem) z3.norm() - t1.Sub(t2) + t1.Sub(t2, mem) t1.norm() - y3.imul(b) - if SEXTIC_TWIST == M_TYPE { - y3.times_i() - } + y3.imul(b, mem) x3.copy(y3) - x3.Mul(t4) + x3.Mul(t4, mem) t2.copy(t3) - t2.Mul(t1) - x3.rsub(t2) - y3.Mul(t0) - t1.Mul(z3) - y3.Add(t1) - t0.Mul(t3) - z3.Mul(t4) - z3.Add(t0) + t2.Mul(t1, mem) + x3.rsub(t2, mem) + y3.Mul(t0, mem) + t1.Mul(z3, mem) + y3.Add(t1, mem) + t0.Mul(t3, mem) + z3.Mul(t4, mem) + z3.Add(t0, mem) E.x.copy(x3) E.x.norm() @@ -489,51 +436,42 @@ func (E *ECP8) Add(Q *ECP8) int { } /* set this-=Q */ -func (E *ECP8) Sub(Q *ECP8) int { - NQ := NewECP8() +func (E *ECP8) Sub(Q *ECP8, mem *arena.Arena) int { + NQ := NewECP8(mem) NQ.Copy(Q) - NQ.Neg() - D := E.Add(NQ) + NQ.Neg(mem) + D := E.Add(NQ, mem) return D } func ECP8_frob_constants() [3]*FP2 { - Fra := NewBIGints(Fra) - Frb := NewBIGints(Frb) - X := NewFP2bigs(Fra, Frb) + Fra := NewBIGints(Fra, nil) + Frb := NewBIGints(Frb, nil) + X := NewFP2bigs(Fra, Frb, nil) - F0 := NewFP2copy(X) - F0.Sqr() - F2 := NewFP2copy(F0) - F2.Mul_ip() + F0 := NewFP2copy(X, nil) + F0.Sqr(nil) + F2 := NewFP2copy(F0, nil) + F2.Mul_ip(nil) F2.norm() - F1 := NewFP2copy(F2) - F1.Sqr() - F2.Mul(F1) + F1 := NewFP2copy(F2, nil) + F1.Sqr(nil) + F2.Mul(F1, nil) - F2.Mul_ip() + F2.Mul_ip(nil) F2.norm() F1.copy(X) - if SEXTIC_TWIST == M_TYPE { - F1.Mul_ip() - F1.norm() - F1.Invert(nil) - F0.copy(F1) - F0.Sqr() - F1.Mul(F0) - } - if SEXTIC_TWIST == D_TYPE { - F0.copy(F1) - F0.Sqr() - F1.Mul(F0) - F0.Mul_ip() - F0.norm() - F1.Mul_ip() - F1.norm() - F1.Mul_ip() - F1.norm() - } + + F0.copy(F1) + F0.Sqr(nil) + F1.Mul(F0, nil) + F0.Mul_ip(nil) + F0.norm() + F1.Mul_ip(nil) + F1.norm() + F1.Mul_ip(nil) + F1.norm() F := [3]*FP2{F0, F1, F2} return F @@ -542,41 +480,27 @@ func ECP8_frob_constants() [3]*FP2 { /* set this*=q, where q is Modulus, using Frobenius */ func (E *ECP8) frob(F [3]*FP2, n int) { for i := 0; i < n; i++ { - E.x.frob(F[2]) - if SEXTIC_TWIST == M_TYPE { - E.x.qmul(F[0]) - E.x.times_i2() - } - if SEXTIC_TWIST == D_TYPE { - E.x.qmul(F[0]) - E.x.times_i2() - } - E.y.frob(F[2]) - if SEXTIC_TWIST == M_TYPE { - E.y.qmul(F[1]) - E.y.times_i2() - E.y.times_i() - } - if SEXTIC_TWIST == D_TYPE { - E.y.qmul(F[1]) - E.y.times_i() - } - - E.z.frob(F[2]) + E.x.frob(F[2], nil) + E.x.qmul(F[0], nil) + E.x.times_i2(nil) + E.y.frob(F[2], nil) + E.y.qmul(F[1], nil) + E.y.times_i(nil) + E.z.frob(F[2], nil) } } /* P*=e */ -func (E *ECP8) mul(e *BIG) *ECP8 { +func (E *ECP8) mul(e *BIG, mem *arena.Arena) *ECP8 { /* fixed size windows */ - mt := NewBIG() - t := NewBIG() - P := NewECP8() - Q := NewECP8() - C := NewECP8() + mt := NewBIG(mem) + t := NewBIG(mem) + P := NewECP8(nil) + Q := NewECP8(mem) + C := NewECP8(mem) - if E.Is_infinity() { - return NewECP8() + if E.Is_infinity(mem) { + return NewECP8(mem) } var W []*ECP8 @@ -584,15 +508,15 @@ func (E *ECP8) mul(e *BIG) *ECP8 { /* precompute table */ Q.Copy(E) - Q.Dbl() + Q.Dbl(mem) - W = append(W, NewECP8()) + W = append(W, NewECP8(mem)) W[0].Copy(E) for i := 1; i < 8; i++ { - W = append(W, NewECP8()) + W = append(W, NewECP8(mem)) W[i].Copy(W[i-1]) - W[i].Add(Q) + W[i].Add(Q, mem) } /* make exponent odd - Add 2P if even, P if odd */ @@ -622,81 +546,80 @@ func (E *ECP8) mul(e *BIG) *ECP8 { P.selector(W, int32(w[nb])) for i := nb - 1; i >= 0; i-- { Q.selector(W, int32(w[i])) - P.Dbl() - P.Dbl() - P.Dbl() - P.Dbl() - P.Add(Q) + P.Dbl(mem) + P.Dbl(mem) + P.Dbl(mem) + P.Dbl(mem) + P.Add(Q, mem) } - P.Sub(C) - P.Affine() + P.Sub(C, mem) + P.Affine(mem) return P } /* Public version */ -func (E *ECP8) Mul(e *BIG) *ECP8 { - return E.mul(e) +func (E *ECP8) Mul(e *BIG, mem *arena.Arena) *ECP8 { + return E.mul(e, mem) } /* needed for SOK */ func (E *ECP8) Cfp() { F := ECP8_frob_constants() - x := NewBIGints(CURVE_Bnx) + x := NewBIGints(CURVE_Bnx, nil) - xQ := E.Mul(x) - x2Q := xQ.Mul(x) - x3Q := x2Q.Mul(x) - x4Q := x3Q.Mul(x) - x5Q := x4Q.Mul(x) - x6Q := x5Q.Mul(x) - x7Q := x6Q.Mul(x) - x8Q := x7Q.Mul(x) + xQ := E.Mul(x, nil) + x2Q := xQ.Mul(x, nil) + x3Q := x2Q.Mul(x, nil) + x4Q := x3Q.Mul(x, nil) + x5Q := x4Q.Mul(x, nil) + x6Q := x5Q.Mul(x, nil) + x7Q := x6Q.Mul(x, nil) + x8Q := x7Q.Mul(x, nil) - if SIGN_OF_X == NEGATIVEX { - xQ.Neg() - x3Q.Neg() - x5Q.Neg() - x7Q.Neg() - } - x8Q.Sub(x7Q) - x8Q.Sub(E) + xQ.Neg(nil) + x3Q.Neg(nil) + x5Q.Neg(nil) + x7Q.Neg(nil) - x7Q.Sub(x6Q) + x8Q.Sub(x7Q, nil) + x8Q.Sub(E, nil) + + x7Q.Sub(x6Q, nil) x7Q.frob(F, 1) - x6Q.Sub(x5Q) + x6Q.Sub(x5Q, nil) x6Q.frob(F, 2) - x5Q.Sub(x4Q) + x5Q.Sub(x4Q, nil) x5Q.frob(F, 3) - x4Q.Sub(x3Q) + x4Q.Sub(x3Q, nil) x4Q.frob(F, 4) - x3Q.Sub(x2Q) + x3Q.Sub(x2Q, nil) x3Q.frob(F, 5) - x2Q.Sub(xQ) + x2Q.Sub(xQ, nil) x2Q.frob(F, 6) - xQ.Sub(E) + xQ.Sub(E, nil) xQ.frob(F, 7) - E.Dbl() + E.Dbl(nil) E.frob(F, 8) - E.Add(x8Q) - E.Add(x7Q) - E.Add(x6Q) - E.Add(x5Q) + E.Add(x8Q, nil) + E.Add(x7Q, nil) + E.Add(x6Q, nil) + E.Add(x5Q, nil) - E.Add(x4Q) - E.Add(x3Q) - E.Add(x2Q) - E.Add(xQ) + E.Add(x4Q, nil) + E.Add(x3Q, nil) + E.Add(x2Q, nil) + E.Add(xQ, nil) - E.Affine() + E.Affine(nil) } func ECP8_generator() *ECP8 { @@ -704,34 +627,34 @@ func ECP8_generator() *ECP8 { G = NewECP8fp8s( NewFP8fp4s( NewFP4fp2s( - NewFP2bigs(NewBIGints(CURVE_Pxaaa), NewBIGints(CURVE_Pxaab)), - NewFP2bigs(NewBIGints(CURVE_Pxaba), NewBIGints(CURVE_Pxabb))), + NewFP2bigs(NewBIGints(CURVE_Pxaaa, nil), NewBIGints(CURVE_Pxaab, nil), nil), + NewFP2bigs(NewBIGints(CURVE_Pxaba, nil), NewBIGints(CURVE_Pxabb, nil), nil), nil), NewFP4fp2s( - NewFP2bigs(NewBIGints(CURVE_Pxbaa), NewBIGints(CURVE_Pxbab)), - NewFP2bigs(NewBIGints(CURVE_Pxbba), NewBIGints(CURVE_Pxbbb)))), + NewFP2bigs(NewBIGints(CURVE_Pxbaa, nil), NewBIGints(CURVE_Pxbab, nil), nil), + NewFP2bigs(NewBIGints(CURVE_Pxbba, nil), NewBIGints(CURVE_Pxbbb, nil), nil), nil), nil), NewFP8fp4s( NewFP4fp2s( - NewFP2bigs(NewBIGints(CURVE_Pyaaa), NewBIGints(CURVE_Pyaab)), - NewFP2bigs(NewBIGints(CURVE_Pyaba), NewBIGints(CURVE_Pyabb))), + NewFP2bigs(NewBIGints(CURVE_Pyaaa, nil), NewBIGints(CURVE_Pyaab, nil), nil), + NewFP2bigs(NewBIGints(CURVE_Pyaba, nil), NewBIGints(CURVE_Pyabb, nil), nil), nil), NewFP4fp2s( - NewFP2bigs(NewBIGints(CURVE_Pybaa), NewBIGints(CURVE_Pybab)), - NewFP2bigs(NewBIGints(CURVE_Pybba), NewBIGints(CURVE_Pybbb))))) + NewFP2bigs(NewBIGints(CURVE_Pybaa, nil), NewBIGints(CURVE_Pybab, nil), nil), + NewFP2bigs(NewBIGints(CURVE_Pybba, nil), NewBIGints(CURVE_Pybbb, nil), nil), nil), nil), nil) return G } func ECP8_hap2point(h *BIG) *ECP8 { - one := NewBIGint(1) - x := NewBIGcopy(h) + one := NewBIGint(1, nil) + x := NewBIGcopy(h, nil) var X2 *FP2 var X4 *FP4 var X8 *FP8 var Q *ECP8 for true { - X2 = NewFP2bigs(one, x) - X4 = NewFP4fp2(X2) - X8 = NewFP8fp4(X4) - Q = NewECP8fp8(X8, 0) - if !Q.Is_infinity() { + X2 = NewFP2bigs(one, x, nil) + X4 = NewFP4fp2(X2, nil) + X8 = NewFP8fp4(X4, nil) + Q = NewECP8fp8(X8, 0, nil) + if !Q.Is_infinity(nil) { break } x.inc(1) @@ -743,83 +666,83 @@ func ECP8_hap2point(h *BIG) *ECP8 { /* Deterministic mapping of Fp to point on curve */ func ECP8_map2point(H *FP8) *ECP8 { // Shallue and van de Woestijne - NY := NewFP8int(1) - T := NewFP8copy(H) - sgn := T.sign() + NY := NewFP8int(1, nil) + T := NewFP8copy(H, nil) + sgn := T.sign(nil) - Z := NewFPint(RIADZG2A) - X1 := NewFP8fp(Z) - X3 := NewFP8copy(X1) - A := RHS8(X1) - W := NewFP8copy(A) - W.Sqrt(nil) + Z := NewFPint(RIADZG2A, nil) + X1 := NewFP8fp(Z, nil) + X3 := NewFP8copy(X1, nil) + A := RHS8(X1, nil) + W := NewFP8copy(A, nil) + W.Sqrt(nil, nil) - s := NewFPbig(NewBIGints(SQRTm3)) - Z.Mul(s) + s := NewFPbig(NewBIGints(SQRTm3, nil), nil) + Z.Mul(s, nil) - T.Sqr() - Y := NewFP8copy(A) - Y.Mul(T) + T.Sqr(nil) + Y := NewFP8copy(A, nil) + Y.Mul(T, nil) T.copy(NY) - T.Add(Y) + T.Add(Y, nil) T.norm() - Y.rsub(NY) + Y.rsub(NY, nil) Y.norm() NY.copy(T) - NY.Mul(Y) + NY.Mul(Y, nil) - NY.tmul(Z) - NY.Invert(nil) + NY.tmul(Z, nil) + NY.Invert(nil, nil) - W.tmul(Z) - if W.sign() == 1 { - W.Neg() + W.tmul(Z, nil) + if W.sign(nil) == 1 { + W.Neg(nil) W.norm() } - W.tmul(Z) - W.Mul(H) - W.Mul(Y) - W.Mul(NY) + W.tmul(Z, nil) + W.Mul(H, nil) + W.Mul(Y, nil) + W.Mul(NY, nil) - X1.Neg() + X1.Neg(nil) X1.norm() - X1.div2() - X2 := NewFP8copy(X1) - X1.Sub(W) + X1.div2(nil) + X2 := NewFP8copy(X1, nil) + X1.Sub(W, nil) X1.norm() - X2.Add(W) + X2.Add(W, nil) X2.norm() - A.Add(A) - A.Add(A) + A.Add(A, nil) + A.Add(A, nil) A.norm() - T.Sqr() - T.Mul(NY) - T.Sqr() - A.Mul(T) - X3.Add(A) + T.Sqr(nil) + T.Mul(NY, nil) + T.Sqr(nil) + A.Mul(T, nil) + X3.Add(A, nil) X3.norm() - Y.copy(RHS8(X2)) + Y.copy(RHS8(X2, nil)) X3.cmove(X2, Y.qr(nil)) - Y.copy(RHS8(X1)) + Y.copy(RHS8(X1, nil)) X3.cmove(X1, Y.qr(nil)) - Y.copy(RHS8(X3)) - Y.Sqrt(nil) + Y.copy(RHS8(X3, nil)) + Y.Sqrt(nil, nil) - ne := Y.sign() ^ sgn + ne := Y.sign(nil) ^ sgn W.copy(Y) - W.Neg() + W.Neg(nil) W.norm() Y.cmove(W, ne) - return NewECP8fp8s(X3, Y) + return NewECP8fp8s(X3, Y, nil) } /* Map octet string to curve point */ func ECP8_mapit(h []byte) *ECP8 { - q := NewBIGints(Modulus) + q := NewBIGints(Modulus, nil) dx := DBIG_fromBytes(h) - x := dx.Mod(q) + x := dx.Mod(q, nil) Q := ECP8_hap2point(x) Q.Cfp() @@ -830,14 +753,14 @@ func ECP8_mapit(h []byte) *ECP8 { // Bos & Costello https://eprint.iacr.org/2013/458.pdf // Faz-Hernandez & Longa & Sanchez https://eprint.iacr.org/2013/158.pdf // Side channel attack secure -func Mul16(Q []*ECP8, u []*BIG) *ECP8 { - W := NewECP8() - P := NewECP8() +func Mul16(Q []*ECP8, u []*BIG, mem *arena.Arena) *ECP8 { + W := NewECP8(mem) + P := NewECP8(mem) var T1 []*ECP8 var T2 []*ECP8 var T3 []*ECP8 var T4 []*ECP8 - mt := NewBIG() + mt := NewBIG(mem) var t []*BIG var bt int8 var k int @@ -852,104 +775,104 @@ func Mul16(Q []*ECP8, u []*BIG) *ECP8 { var s4 [NLEN*int(BASEBITS) + 1]int8 for i := 0; i < 16; i++ { - t = append(t, NewBIGcopy(u[i])) + t = append(t, NewBIGcopy(u[i], mem)) } - T1 = append(T1, NewECP8()) + T1 = append(T1, NewECP8(mem)) T1[0].Copy(Q[0]) // Q[0] - T1 = append(T1, NewECP8()) + T1 = append(T1, NewECP8(mem)) T1[1].Copy(T1[0]) - T1[1].Add(Q[1]) // Q[0]+Q[1] - T1 = append(T1, NewECP8()) + T1[1].Add(Q[1], mem) // Q[0]+Q[1] + T1 = append(T1, NewECP8(mem)) T1[2].Copy(T1[0]) - T1[2].Add(Q[2]) // Q[0]+Q[2] - T1 = append(T1, NewECP8()) + T1[2].Add(Q[2], mem) // Q[0]+Q[2] + T1 = append(T1, NewECP8(mem)) T1[3].Copy(T1[1]) - T1[3].Add(Q[2]) // Q[0]+Q[1]+Q[2] - T1 = append(T1, NewECP8()) + T1[3].Add(Q[2], mem) // Q[0]+Q[1]+Q[2] + T1 = append(T1, NewECP8(mem)) T1[4].Copy(T1[0]) - T1[4].Add(Q[3]) // Q[0]+Q[3] - T1 = append(T1, NewECP8()) + T1[4].Add(Q[3], mem) // Q[0]+Q[3] + T1 = append(T1, NewECP8(mem)) T1[5].Copy(T1[1]) - T1[5].Add(Q[3]) // Q[0]+Q[1]+Q[3] - T1 = append(T1, NewECP8()) + T1[5].Add(Q[3], mem) // Q[0]+Q[1]+Q[3] + T1 = append(T1, NewECP8(mem)) T1[6].Copy(T1[2]) - T1[6].Add(Q[3]) // Q[0]+Q[2]+Q[3] - T1 = append(T1, NewECP8()) + T1[6].Add(Q[3], mem) // Q[0]+Q[2]+Q[3] + T1 = append(T1, NewECP8(mem)) T1[7].Copy(T1[3]) - T1[7].Add(Q[3]) // Q[0]+Q[1]+Q[2]+Q[3] + T1[7].Add(Q[3], mem) // Q[0]+Q[1]+Q[2]+Q[3] - T2 = append(T2, NewECP8()) + T2 = append(T2, NewECP8(mem)) T2[0].Copy(Q[4]) // Q[0] - T2 = append(T2, NewECP8()) + T2 = append(T2, NewECP8(mem)) T2[1].Copy(T2[0]) - T2[1].Add(Q[5]) // Q[0]+Q[1] - T2 = append(T2, NewECP8()) + T2[1].Add(Q[5], mem) // Q[0]+Q[1] + T2 = append(T2, NewECP8(mem)) T2[2].Copy(T2[0]) - T2[2].Add(Q[6]) // Q[0]+Q[2] - T2 = append(T2, NewECP8()) + T2[2].Add(Q[6], mem) // Q[0]+Q[2] + T2 = append(T2, NewECP8(mem)) T2[3].Copy(T2[1]) - T2[3].Add(Q[6]) // Q[0]+Q[1]+Q[2] - T2 = append(T2, NewECP8()) + T2[3].Add(Q[6], mem) // Q[0]+Q[1]+Q[2] + T2 = append(T2, NewECP8(mem)) T2[4].Copy(T2[0]) - T2[4].Add(Q[7]) // Q[0]+Q[3] - T2 = append(T2, NewECP8()) + T2[4].Add(Q[7], mem) // Q[0]+Q[3] + T2 = append(T2, NewECP8(mem)) T2[5].Copy(T2[1]) - T2[5].Add(Q[7]) // Q[0]+Q[1]+Q[3] - T2 = append(T2, NewECP8()) + T2[5].Add(Q[7], mem) // Q[0]+Q[1]+Q[3] + T2 = append(T2, NewECP8(mem)) T2[6].Copy(T2[2]) - T2[6].Add(Q[7]) // Q[0]+Q[2]+Q[3] - T2 = append(T2, NewECP8()) + T2[6].Add(Q[7], mem) // Q[0]+Q[2]+Q[3] + T2 = append(T2, NewECP8(mem)) T2[7].Copy(T2[3]) - T2[7].Add(Q[7]) // Q[0]+Q[1]+Q[2]+Q[3] + T2[7].Add(Q[7], mem) // Q[0]+Q[1]+Q[2]+Q[3] - T3 = append(T3, NewECP8()) + T3 = append(T3, NewECP8(mem)) T3[0].Copy(Q[8]) // Q[0] - T3 = append(T3, NewECP8()) + T3 = append(T3, NewECP8(mem)) T3[1].Copy(T3[0]) - T3[1].Add(Q[9]) // Q[0]+Q[1] - T3 = append(T3, NewECP8()) + T3[1].Add(Q[9], mem) // Q[0]+Q[1] + T3 = append(T3, NewECP8(mem)) T3[2].Copy(T3[0]) - T3[2].Add(Q[10]) // Q[0]+Q[2] - T3 = append(T3, NewECP8()) + T3[2].Add(Q[10], mem) // Q[0]+Q[2] + T3 = append(T3, NewECP8(mem)) T3[3].Copy(T3[1]) - T3[3].Add(Q[10]) // Q[0]+Q[1]+Q[2] - T3 = append(T3, NewECP8()) + T3[3].Add(Q[10], mem) // Q[0]+Q[1]+Q[2] + T3 = append(T3, NewECP8(mem)) T3[4].Copy(T3[0]) - T3[4].Add(Q[11]) // Q[0]+Q[3] - T3 = append(T3, NewECP8()) + T3[4].Add(Q[11], mem) // Q[0]+Q[3] + T3 = append(T3, NewECP8(mem)) T3[5].Copy(T3[1]) - T3[5].Add(Q[11]) // Q[0]+Q[1]+Q[3] - T3 = append(T3, NewECP8()) + T3[5].Add(Q[11], mem) // Q[0]+Q[1]+Q[3] + T3 = append(T3, NewECP8(mem)) T3[6].Copy(T3[2]) - T3[6].Add(Q[11]) // Q[0]+Q[2]+Q[3] - T3 = append(T3, NewECP8()) + T3[6].Add(Q[11], mem) // Q[0]+Q[2]+Q[3] + T3 = append(T3, NewECP8(mem)) T3[7].Copy(T3[3]) - T3[7].Add(Q[11]) // Q[0]+Q[1]+Q[2]+Q[3] + T3[7].Add(Q[11], mem) // Q[0]+Q[1]+Q[2]+Q[3] - T4 = append(T4, NewECP8()) + T4 = append(T4, NewECP8(mem)) T4[0].Copy(Q[12]) // Q[0] - T4 = append(T4, NewECP8()) + T4 = append(T4, NewECP8(mem)) T4[1].Copy(T4[0]) - T4[1].Add(Q[13]) // Q[0]+Q[1] - T4 = append(T4, NewECP8()) + T4[1].Add(Q[13], mem) // Q[0]+Q[1] + T4 = append(T4, NewECP8(mem)) T4[2].Copy(T4[0]) - T4[2].Add(Q[14]) // Q[0]+Q[2] - T4 = append(T4, NewECP8()) + T4[2].Add(Q[14], mem) // Q[0]+Q[2] + T4 = append(T4, NewECP8(mem)) T4[3].Copy(T4[1]) - T4[3].Add(Q[14]) // Q[0]+Q[1]+Q[2] - T4 = append(T4, NewECP8()) + T4[3].Add(Q[14], mem) // Q[0]+Q[1]+Q[2] + T4 = append(T4, NewECP8(mem)) T4[4].Copy(T4[0]) - T4[4].Add(Q[15]) // Q[0]+Q[3] - T4 = append(T4, NewECP8()) + T4[4].Add(Q[15], mem) // Q[0]+Q[3] + T4 = append(T4, NewECP8(mem)) T4[5].Copy(T4[1]) - T4[5].Add(Q[15]) // Q[0]+Q[1]+Q[3] - T4 = append(T4, NewECP8()) + T4[5].Add(Q[15], mem) // Q[0]+Q[1]+Q[3] + T4 = append(T4, NewECP8(mem)) T4[6].Copy(T4[2]) - T4[6].Add(Q[15]) // Q[0]+Q[2]+Q[3] - T4 = append(T4, NewECP8()) + T4[6].Add(Q[15], mem) // Q[0]+Q[2]+Q[3] + T4 = append(T4, NewECP8(mem)) T4[7].Copy(T4[3]) - T4[7].Add(Q[15]) // Q[0]+Q[1]+Q[2]+Q[3] + T4[7].Add(Q[15], mem) // Q[0]+Q[1]+Q[2]+Q[3] // Make them odd pb1 := 1 - t[0].parity() @@ -1037,38 +960,38 @@ func Mul16(Q []*ECP8, u []*BIG) *ECP8 { // Main loop P.selector(T1, int32(2*w1[nb-1]+1)) W.selector(T2, int32(2*w2[nb-1]+1)) - P.Add(W) + P.Add(W, mem) W.selector(T3, int32(2*w3[nb-1]+1)) - P.Add(W) + P.Add(W, mem) W.selector(T4, int32(2*w4[nb-1]+1)) - P.Add(W) + P.Add(W, mem) for i := nb - 2; i >= 0; i-- { - P.Dbl() + P.Dbl(mem) W.selector(T1, int32(2*w1[i]+s1[i])) - P.Add(W) + P.Add(W, mem) W.selector(T2, int32(2*w2[i]+s2[i])) - P.Add(W) + P.Add(W, mem) W.selector(T3, int32(2*w3[i]+s3[i])) - P.Add(W) + P.Add(W, mem) W.selector(T4, int32(2*w4[i]+s4[i])) - P.Add(W) + P.Add(W, mem) } // apply correction W.Copy(P) - W.Sub(Q[0]) + W.Sub(Q[0], mem) P.cmove(W, pb1) W.Copy(P) - W.Sub(Q[4]) + W.Sub(Q[4], mem) P.cmove(W, pb2) W.Copy(P) - W.Sub(Q[8]) + W.Sub(Q[8], mem) P.cmove(W, pb3) W.Copy(P) - W.Sub(Q[12]) + W.Sub(Q[12], mem) P.cmove(W, pb4) - P.Affine() + P.Affine(mem) return P } diff --git a/nekryptology/pkg/core/curves/native/bls48581/hpke.go b/nekryptology/pkg/core/curves/native/bls48581/hpke.go deleted file mode 100644 index eb80eb7..0000000 --- a/nekryptology/pkg/core/curves/native/bls48581/hpke.go +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Copyright (c) 2012-2020 MIRACL UK Ltd. - * - * This file is part of MIRACL Core - * (see https://github.com/miracl/ext.. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* Hybrid Public Key Encryption */ - -/* Following https://datatracker.ietf.org/doc/draft-irtf-cfrg-hpke/?include_text=1 */ - -package bls48581 - -import "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/curves/native/bls48581/ext" - -//import "fmt" - -func reverse(X []byte) { - lx := len(X) - for i := 0; i < lx/2; i++ { - ch := X[i] - X[i] = X[lx-i-1] - X[lx-i-1] = ch - } -} - -func labeledExtract(SALT []byte, SUITE_ID []byte, label string, IKM []byte) []byte { - rfc := "HPKE-v1" - RFC := []byte(rfc) - LABEL := []byte(label) - var LIKM []byte - for i := 0; i < len(RFC); i++ { - LIKM = append(LIKM, RFC[i]) - } - for i := 0; i < len(SUITE_ID); i++ { - LIKM = append(LIKM, SUITE_ID[i]) - } - for i := 0; i < len(LABEL); i++ { - LIKM = append(LIKM, LABEL[i]) - } - if IKM != nil { - for i := 0; i < len(IKM); i++ { - LIKM = append(LIKM, IKM[i]) - } - } - return ext.HKDF_Extract(ext.MC_SHA2, HASH_TYPE, SALT, LIKM) -} - -func labeledExpand(PRK []byte, SUITE_ID []byte, label string, INFO []byte, L int) []byte { - rfc := "HPKE-v1" - RFC := []byte(rfc) - LABEL := []byte(label) - AR := ext.InttoBytes(L, 2) - var LINFO []byte - for i := 0; i < len(AR); i++ { - LINFO = append(LINFO, AR[i]) - } - for i := 0; i < len(RFC); i++ { - LINFO = append(LINFO, RFC[i]) - } - for i := 0; i < len(SUITE_ID); i++ { - LINFO = append(LINFO, SUITE_ID[i]) - } - for i := 0; i < len(LABEL); i++ { - LINFO = append(LINFO, LABEL[i]) - } - if INFO != nil { - for i := 0; i < len(INFO); i++ { - LINFO = append(LINFO, INFO[i]) - } - } - - return ext.HKDF_Expand(ext.MC_SHA2, HASH_TYPE, L, PRK, LINFO) -} - -func extractAndExpand(config_id int, DH []byte, context []byte) []byte { - kem := config_id & 255 - txt := "KEM" - KEM_ID := ext.InttoBytes(kem, 2) - KEM := []byte(txt) - var SUITE_ID []byte - for i := 0; i < len(KEM); i++ { - SUITE_ID = append(SUITE_ID, KEM[i]) - } - SUITE_ID = append(SUITE_ID, KEM_ID[0]) - SUITE_ID = append(SUITE_ID, KEM_ID[1]) - - PRK := labeledExtract(nil, SUITE_ID, "eae_prk", DH) - return labeledExpand(PRK, SUITE_ID, "shared_secret", context, HASH_TYPE) -} - -func DeriveKeyPair(config_id int, SK []byte, PK []byte, SEED []byte) bool { - counter := 0 - kem := config_id & 255 - - txt := "KEM" - KEM_ID := ext.InttoBytes(kem, 2) - KEM := []byte(txt) - var SUITE_ID []byte - for i := 0; i < len(KEM); i++ { - SUITE_ID = append(SUITE_ID, KEM[i]) - } - SUITE_ID = append(SUITE_ID, KEM_ID[0]) - SUITE_ID = append(SUITE_ID, KEM_ID[1]) - - PRK := labeledExtract(nil, SUITE_ID, "dkp_prk", SEED) - var S []byte - if kem == 32 || kem == 33 { // RFC7748 - S = labeledExpand(PRK, SUITE_ID, "sk", nil, EGS) - reverse(S) - if kem == 32 { - S[EGS-1] &= 248 - S[0] &= 127 - S[0] |= 64 - } else { - S[EGS-1] &= 252 - S[0] |= 128 - } - } else { - bit_mask := 0xff - if kem == 18 { - bit_mask = 1 - } - for i := 0; i < EGS; i++ { - S = append(S, 0) - } - for !ECDH_IN_RANGE(S) && counter < 256 { - var INFO [1]byte - INFO[0] = byte(counter) - S = labeledExpand(PRK, SUITE_ID, "candidate", INFO[:], EGS) - S[0] &= byte(bit_mask) - counter++ - } - } - for i := 0; i < EGS; i++ { - SK[i] = S[i] - } - ECDH_KEY_PAIR_GENERATE(nil, SK, PK) - if kem == 32 || kem == 33 { - reverse(PK) - } - if counter < 256 { - return true - } - return false -} - -func Encap(config_id int, skE []byte, pkE []byte, pkR []byte) []byte { - DH := make([]byte, EFS) - var kemcontext []byte - kem := config_id & 255 - - if kem == 32 || kem == 33 { - reverse(pkR) - ECDH_ECPSVDP_DH(skE, pkR, DH[:], 0) - reverse(pkR) - reverse(DH[:]) - } else { - ECDH_ECPSVDP_DH(skE, pkR, DH[:], 0) - } - for i := 0; i < len(pkE); i++ { - kemcontext = append(kemcontext, pkE[i]) - } - for i := 0; i < len(pkR); i++ { - kemcontext = append(kemcontext, pkR[i]) - } - return extractAndExpand(config_id, DH[:], kemcontext) -} - -func Decap(config_id int, skR []byte, pkE []byte, pkR []byte) []byte { - DH := make([]byte, EFS) - var kemcontext []byte - kem := config_id & 255 - - if kem == 32 || kem == 33 { - reverse(pkE) - ECDH_ECPSVDP_DH(skR, pkE, DH[:], 0) - reverse(pkE) - reverse(DH[:]) - } else { - ECDH_ECPSVDP_DH(skR, pkE, DH[:], 0) - } - - for i := 0; i < len(pkE); i++ { - kemcontext = append(kemcontext, pkE[i]) - } - for i := 0; i < len(pkR); i++ { - kemcontext = append(kemcontext, pkR[i]) - } - return extractAndExpand(config_id, DH[:], kemcontext) -} - -func AuthEncap(config_id int, skE []byte, skS []byte, pkE []byte, pkR []byte, pkS []byte) []byte { - pklen := len(pkE) - DH := make([]byte, EFS) - DH1 := make([]byte, EFS) - - kemcontext := make([]byte, 3*pklen) - kem := config_id & 255 - - if kem == 32 || kem == 33 { - reverse(pkR) - ECDH_ECPSVDP_DH(skE, pkR, DH[:], 0) - ECDH_ECPSVDP_DH(skS, pkR, DH1[:], 0) - reverse(pkR) - reverse(DH[:]) - reverse(DH1[:]) - } else { - ECDH_ECPSVDP_DH(skE, pkR, DH[:], 0) - ECDH_ECPSVDP_DH(skS, pkR, DH1[:], 0) - } - ZZ := make([]byte, 2*EFS) - for i := 0; i < EFS; i++ { - ZZ[i] = DH[i] - ZZ[EFS+i] = DH1[i] - } - - for i := 0; i < pklen; i++ { - kemcontext[i] = pkE[i] - kemcontext[pklen+i] = pkR[i] - kemcontext[2*pklen+i] = pkS[i] - } - return extractAndExpand(config_id, ZZ[:], kemcontext) -} - -func AuthDecap(config_id int, skR []byte, pkE []byte, pkR []byte, pkS []byte) []byte { - pklen := len(pkE) - DH := make([]byte, EFS) - DH1 := make([]byte, EFS) - kemcontext := make([]byte, 3*pklen) - - kem := config_id & 255 - - if kem == 32 || kem == 33 { - reverse(pkE) - reverse(pkS) - ECDH_ECPSVDP_DH(skR[:], pkE, DH[:], 0) - ECDH_ECPSVDP_DH(skR[:], pkS, DH1[:], 0) - reverse(pkE) - reverse(pkS) - reverse(DH[:]) - reverse(DH1[:]) - } else { - ECDH_ECPSVDP_DH(skR[:], pkE, DH[:], 0) - ECDH_ECPSVDP_DH(skR[:], pkS, DH1[:], 0) - } - ZZ := make([]byte, 2*EFS) - for i := 0; i < EFS; i++ { - ZZ[i] = DH[i] - ZZ[EFS+i] = DH1[i] - } - - for i := 0; i < pklen; i++ { - kemcontext[i] = pkE[i] - kemcontext[pklen+i] = pkR[i] - kemcontext[2*pklen+i] = pkS[i] - } - return extractAndExpand(config_id, ZZ[:], kemcontext) -} - -/* -func printBinary(array []byte) { - for i := 0; i < len(array); i++ { - fmt.Printf("%02x", array[i]) - } - fmt.Printf("\n") -} -*/ - -func KeySchedule(config_id int, mode int, Z []byte, info []byte, psk []byte, pskID []byte) ([]byte, []byte, []byte) { - var context []byte - - kem := config_id & 255 - kdf := (config_id >> 8) & 3 - aead := (config_id >> 10) & 3 - - txt := "HPKE" - KEM := []byte(txt) - var SUITE_ID []byte - for i := 0; i < len(KEM); i++ { - SUITE_ID = append(SUITE_ID, KEM[i]) - } - num := ext.InttoBytes(kem, 2) - SUITE_ID = append(SUITE_ID, num[0]) - SUITE_ID = append(SUITE_ID, num[1]) - num = ext.InttoBytes(kdf, 2) - SUITE_ID = append(SUITE_ID, num[0]) - SUITE_ID = append(SUITE_ID, num[1]) - num = ext.InttoBytes(aead, 2) - SUITE_ID = append(SUITE_ID, num[0]) - SUITE_ID = append(SUITE_ID, num[1]) - - ar := ext.InttoBytes(mode, 1) - for i := 0; i < len(ar); i++ { - context = append(context, ar[i]) - } - - H := labeledExtract(nil, SUITE_ID, "psk_id_hash", pskID) - for i := 0; i < HASH_TYPE; i++ { - context = append(context, H[i]) - } - H = labeledExtract(nil, SUITE_ID, "info_hash", info) - for i := 0; i < HASH_TYPE; i++ { - context = append(context, H[i]) - } - //H=labeledExtract(nil,SUITE_ID,"psk_hash",psk) - //secret:=labeledExtract(H,SUITE_ID,"secret",Z) - - secret := labeledExtract(Z, SUITE_ID, "secret", psk) - - key := labeledExpand(secret, SUITE_ID, "key", context, AESKEY) - nonce := labeledExpand(secret, SUITE_ID, "base_nonce", context, 12) - exp_secret := labeledExpand(secret, SUITE_ID, "exp", context, HASH_TYPE) - - return key, nonce, exp_secret -} diff --git a/nekryptology/pkg/core/curves/native/bls48581/mpin256.go b/nekryptology/pkg/core/curves/native/bls48581/mpin256.go deleted file mode 100644 index dd1d970..0000000 --- a/nekryptology/pkg/core/curves/native/bls48581/mpin256.go +++ /dev/null @@ -1,202 +0,0 @@ -/* - * Copyright (c) 2012-2020 MIRACL UK Ltd. - * - * This file is part of MIRACL Core - * (see https://github.com/miracl/ext.. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* MPIN 256-bit API Functions */ - -package bls48581 - -import "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/curves/native/bls48581/ext" - -//import "fmt" - -const MFS int = int(MODBYTES) -const MGS int = int(MODBYTES) -const BAD_PARAMS int = -11 -const INVALID_POINT int = -14 -const WRONG_ORDER int = -18 -const BAD_PIN int = -19 - -/* Configure your PIN here */ - -const MAXPIN int32 = 10000 /* PIN less than this */ -const PBLEN int32 = 14 /* Number of bits in PIN */ - -func MPIN_HASH_ID(sha int, ID []byte) []byte { - return ext.GPhashit(ext.MC_SHA2, sha, int(MODBYTES), 0, nil, -1, ID) - //return mhashit(sha, 0, ID) -} - -func roundup(a int, b int) int { - return (((a)-1)/(b) + 1) -} - -func MPIN_ENCODE_TO_CURVE(DST []byte, ID []byte, HCID []byte) { - q := NewBIGints(Modulus) - k := q.Nbits() - r := NewBIGints(CURVE_Order) - m := r.Nbits() - L := roundup(k+roundup(m, 2), 8) - var fd = make([]byte, L) - OKM := ext.XMD_Expand(ext.MC_SHA2, HASH_TYPE, L, DST, ID) - - for j := 0; j < L; j++ { - fd[j] = OKM[j] - } - dx := DBIG_fromBytes(fd) - u := NewFPbig(dx.Mod(q)) - P := ECP_map2point(u) - - P.Cfp() - P.Affine() - P.ToBytes(HCID, false) -} - -/* create random secret S */ -func MPIN_RANDOM_GENERATE(rng *ext.RAND, S []byte) int { - r := NewBIGints(CURVE_Order) - s := Randtrunc(r, 16*AESKEY, rng) - s.ToBytes(S) - return 0 -} - -func MPIN_EXTRACT_PIN(CID []byte, pin int, TOKEN []byte) int { - P := ECP_fromBytes(TOKEN) - if P.Is_infinity() { - return INVALID_POINT - } - R := ECP_fromBytes(CID) - if R.Is_infinity() { - return INVALID_POINT - } - R = R.pinmul(int32(pin)%MAXPIN, PBLEN) - P.Sub(R) - P.ToBytes(TOKEN, false) - return 0 -} - -/* Implement step 2 on client side of MPin protocol */ -func MPIN_CLIENT_2(X []byte, Y []byte, SEC []byte) int { - r := NewBIGints(CURVE_Order) - P := ECP_fromBytes(SEC) - if P.Is_infinity() { - return INVALID_POINT - } - - px := FromBytes(X) - py := FromBytes(Y) - px.Add(py) - px.Mod(r) - - P = G1mul(P, px) - P.Neg() - P.ToBytes(SEC, false) - return 0 -} - -func MPIN_GET_CLIENT_SECRET(S []byte, IDHTC []byte, CST []byte) int { - s := FromBytes(S) - P := ECP_fromBytes(IDHTC) - if P.Is_infinity() { - return INVALID_POINT - } - G1mul(P, s).ToBytes(CST, false) - return 0 -} - -/* Implement step 1 on client side of MPin protocol */ -func MPIN_CLIENT_1(CID []byte, rng *ext.RAND, X []byte, pin int, TOKEN []byte, SEC []byte, xID []byte) int { - r := NewBIGints(CURVE_Order) - var x *BIG - if rng != nil { - x = Randtrunc(r, 16*AESKEY, rng) - x.ToBytes(X) - } else { - x = FromBytes(X) - } - - P := ECP_fromBytes(CID) - if P.Is_infinity() { - return INVALID_POINT - } - - T := ECP_fromBytes(TOKEN) - if T.Is_infinity() { - return INVALID_POINT - } - - W := P.pinmul(int32(pin)%MAXPIN, PBLEN) - T.Add(W) - - P = G1mul(P, x) - P.ToBytes(xID, false) - - T.ToBytes(SEC, false) - return 0 -} - -/* Extract Server Secret SST=S*Q where Q is fixed generator in G2 and S is master secret */ -func MPIN_GET_SERVER_SECRET(S []byte, SST []byte) int { - Q := ECP8_generator() - s := FromBytes(S) - Q = G2mul(Q, s) - Q.ToBytes(SST, false) - return 0 -} - -/* Implement step 2 of MPin protocol on server side */ -func MPIN_SERVER(HID []byte, Y []byte, SST []byte, xID []byte, mSEC []byte) int { - Q := ECP8_generator() - - sQ := ECP8_fromBytes(SST) - if sQ.Is_infinity() { - return INVALID_POINT - } - - if xID == nil { - return BAD_PARAMS - } - R := ECP_fromBytes(xID) - if R.Is_infinity() { - return INVALID_POINT - } - y := FromBytes(Y) - if HID == nil { - return BAD_PARAMS - } - P := ECP_fromBytes(HID) - if P.Is_infinity() { - return INVALID_POINT - } - - P = G1mul(P, y) - P.Add(R) - R = ECP_fromBytes(mSEC) - if R.Is_infinity() { - return INVALID_POINT - } - - var g *FP48 - g = Ate2(Q, R, sQ, P) - g = Fexp(g) - - if !g.Isunity() { - return BAD_PIN - } - return 0 -} diff --git a/nekryptology/pkg/core/curves/native/bls48581/pair8.go b/nekryptology/pkg/core/curves/native/bls48581/pair8.go index 123f253..39f988f 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/pair8.go +++ b/nekryptology/pkg/core/curves/native/bls48581/pair8.go @@ -21,111 +21,98 @@ package bls48581 +import ( + "arena" +) + //import "fmt" // Point doubling for pairings -func dbl(A *ECP8, AA *FP8, BB *FP8, CC *FP8) { - CC.copy(A.getx()) //X - YY := NewFP8copy(A.gety()) //Y - BB.copy(A.getz()) //Z - AA.copy(YY) //Y - AA.Mul(BB) //YZ - CC.Sqr() //X^2 - YY.Sqr() //Y^2 - BB.Sqr() //Z^2 +func dbl(A *ECP8, AA *FP8, BB *FP8, CC *FP8, mem *arena.Arena) { + CC.copy(A.getx()) //X + YY := NewFP8copy(A.gety(), mem) //Y + BB.copy(A.getz()) //Z + AA.copy(YY) //Y + AA.Mul(BB, mem) //YZ + CC.Sqr(mem) //X^2 + YY.Sqr(mem) //Y^2 + BB.Sqr(mem) //Z^2 - AA.Add(AA) - AA.Neg() + AA.Add(AA, mem) + AA.Neg(mem) AA.norm() //-2AA - AA.times_i() + AA.times_i(mem) sb := 3 * CURVE_B_I - BB.imul(sb) - CC.imul(3) - if SEXTIC_TWIST == D_TYPE { - YY.times_i() - CC.times_i() - } - if SEXTIC_TWIST == M_TYPE { - BB.times_i() - } - BB.Sub(YY) + BB.imul(sb, mem) + CC.imul(3, mem) + YY.times_i(mem) + CC.times_i(mem) + BB.Sub(YY, mem) BB.norm() - A.Dbl() + A.Dbl(mem) } // Point addition for pairings -func add(A *ECP8, B *ECP8, AA *FP8, BB *FP8, CC *FP8) { - AA.copy(A.getx()) // X1 - CC.copy(A.gety()) // Y1 - T1 := NewFP8copy(A.getz()) // Z1 - BB.copy(A.getz()) // Z1 +func add(A *ECP8, B *ECP8, AA *FP8, BB *FP8, CC *FP8, mem *arena.Arena) { + AA.copy(A.getx()) // X1 + CC.copy(A.gety()) // Y1 + T1 := NewFP8copy(A.getz(), mem) // Z1 + BB.copy(A.getz()) // Z1 - T1.Mul(B.gety()) // T1=Z1.Y2 - BB.Mul(B.getx()) // T2=Z1.X2 + T1.Mul(B.gety(), mem) // T1=Z1.Y2 + BB.Mul(B.getx(), mem) // T2=Z1.X2 - AA.Sub(BB) + AA.Sub(BB, mem) AA.norm() // X1=X1-Z1.X2 - CC.Sub(T1) + CC.Sub(T1, mem) CC.norm() // Y1=Y1-Z1.Y2 T1.copy(AA) // T1=X1-Z1.X2 - if SEXTIC_TWIST == M_TYPE { - AA.times_i() - AA.norm() - } + T1.Mul(B.gety(), mem) // T1=(X1-Z1.X2).Y2 - T1.Mul(B.gety()) // T1=(X1-Z1.X2).Y2 - - BB.copy(CC) // T2=Y1-Z1.Y2 - BB.Mul(B.getx()) // T2=(Y1-Z1.Y2).X2 - BB.Sub(T1) + BB.copy(CC) // T2=Y1-Z1.Y2 + BB.Mul(B.getx(), mem) // T2=(Y1-Z1.Y2).X2 + BB.Sub(T1, mem) BB.norm() // T2=(Y1-Z1.Y2).X2 - (X1-Z1.X2).Y2 - CC.Neg() + CC.Neg(mem) CC.norm() // Y1=-(Y1-Z1.Y2).Xs - A.Add(B) + A.Add(B, mem) } -func line(A *ECP8, B *ECP8, Qx *FP, Qy *FP) *FP48 { - AA := NewFP8() - BB := NewFP8() - CC := NewFP8() +func line(A *ECP8, B *ECP8, Qx *FP, Qy *FP, mem *arena.Arena) *FP48 { + AA := NewFP8(mem) + BB := NewFP8(mem) + CC := NewFP8(mem) var a *FP16 var b *FP16 var c *FP16 if A == B { - dbl(A, AA, BB, CC) + dbl(A, AA, BB, CC, mem) } else { - add(A, B, AA, BB, CC) + add(A, B, AA, BB, CC, mem) } - CC.tmul(Qx) - AA.tmul(Qy) + CC.tmul(Qx, mem) + AA.tmul(Qy, mem) - a = NewFP16fp8s(AA, BB) + a = NewFP16fp8s(AA, BB, mem) - if SEXTIC_TWIST == D_TYPE { - b = NewFP16fp8(CC) // L(0,1) | L(0,0) | L(1,0) - c = NewFP16() - } - if SEXTIC_TWIST == M_TYPE { - b = NewFP16() - c = NewFP16fp8(CC) - c.times_i() - } + b = NewFP16fp8(CC, mem) // L(0,1) | L(0,0) | L(1,0) + c = NewFP16(mem) - r := NewFP48fp16s(a, b, c) + r := NewFP48fp16s(a, b, c, mem) r.stype = FP_SPARSER return r } /* prepare ate parameter, n=6u+2 (BN) or n=u (BLS), n3=3*n */ -func lbits(n3 *BIG, n *BIG) int { - n.copy(NewBIGints(CURVE_Bnx)) +func lbits(n3 *BIG, n *BIG, mem *arena.Arena) int { + n.copy(NewBIGints(CURVE_Bnx, mem)) n3.copy(n) n3.pmul(3) n3.norm() @@ -133,40 +120,38 @@ func lbits(n3 *BIG, n *BIG) int { } /* prepare for multi-pairing */ -func Initmp() []*FP48 { +func Initmp(mem *arena.Arena) []*FP48 { var r []*FP48 for i := ATE_BITS - 1; i >= 0; i-- { - r = append(r, NewFP48int(1)) + r = append(r, NewFP48int(1, mem)) } return r } /* basic Miller loop */ -func Miller(r []*FP48) *FP48 { - res := NewFP48int(1) +func Miller(r []*FP48, mem *arena.Arena) *FP48 { + res := NewFP48int(1, mem) for i := ATE_BITS - 1; i >= 1; i-- { - res.Sqr() - res.ssmul(r[i]) + res.Sqr(mem) + res.ssmul(r[i], mem) r[i].zero() } - if SIGN_OF_X == NEGATIVEX { - res.conj() - } - res.ssmul(r[0]) + res.conj(mem) + res.ssmul(r[0], mem) r[0].zero() return res } // Store precomputed line details in an FP8 func pack(AA *FP8, BB *FP8, CC *FP8) *FP16 { - i := NewFP8copy(CC) - i.Invert(nil) - a := NewFP8copy(AA) - a.Mul(i) - b := NewFP8copy(BB) - b.Mul(i) - return NewFP16fp8s(a, b) + i := NewFP8copy(CC, nil) + i.Invert(nil, nil) + a := NewFP8copy(AA, nil) + a.Mul(i, nil) + b := NewFP8copy(BB, nil) + b.Mul(i, nil) + return NewFP16fp8s(a, b, nil) } // Unpack G2 line function details and include G1 @@ -175,52 +160,45 @@ func unpack(T *FP16, Qx *FP, Qy *FP) *FP48 { var b *FP16 var c *FP16 - a = NewFP16copy(T) - a.geta().tmul(Qy) - t := NewFP8fp(Qx) - if SEXTIC_TWIST == D_TYPE { - b = NewFP16fp8(t) - c = NewFP16() - } - if SEXTIC_TWIST == M_TYPE { - b = NewFP16() - c = NewFP16fp8(t) - c.times_i() - } - v := NewFP48fp16s(a, b, c) + a = NewFP16copy(T, nil) + a.geta().tmul(Qy, nil) + t := NewFP8fp(Qx, nil) + b = NewFP16fp8(t, nil) + c = NewFP16(nil) + v := NewFP48fp16s(a, b, c, nil) v.stype = FP_SPARSEST return v } func precomp(GV *ECP8) []*FP16 { - n := NewBIG() - n3 := NewBIG() - AA := NewFP8() - BB := NewFP8() - CC := NewFP8() + n := NewBIG(nil) + n3 := NewBIG(nil) + AA := NewFP8(nil) + BB := NewFP8(nil) + CC := NewFP8(nil) var bt int - P := NewECP8() + P := NewECP8(nil) P.Copy(GV) - A := NewECP8() + A := NewECP8(nil) A.Copy(P) - MP := NewECP8() + MP := NewECP8(nil) MP.Copy(P) - MP.Neg() + MP.Neg(nil) - nb := lbits(n3, n) + nb := lbits(n3, n, nil) var T []*FP16 for i := nb - 2; i >= 1; i-- { - dbl(A, AA, BB, CC) + dbl(A, AA, BB, CC, nil) T = append(T, pack(AA, BB, CC)) bt = n3.bit(i) - n.bit(i) if bt == 1 { - add(A, P, AA, BB, CC) + add(A, P, AA, BB, CC, nil) T = append(T, pack(AA, BB, CC)) } if bt == -1 { - add(A, MP, AA, BB, CC) + add(A, MP, AA, BB, CC, nil) T = append(T, pack(AA, BB, CC)) } } @@ -228,22 +206,22 @@ func precomp(GV *ECP8) []*FP16 { } func Another_pc(r []*FP48, T []*FP16, QV *ECP) { - n := NewBIG() - n3 := NewBIG() + n := NewBIG(nil) + n3 := NewBIG(nil) var lv, lv2 *FP48 var bt, j int - if QV.Is_infinity() { + if QV.Is_infinity(nil) { return } - Q := NewECP() + Q := NewECP(nil) Q.Copy(QV) - Q.Affine() - Qx := NewFPcopy(Q.getx()) - Qy := NewFPcopy(Q.gety()) + Q.Affine(nil) + Qx := NewFPcopy(Q.getx(), nil) + Qy := NewFPcopy(Q.gety(), nil) - nb := lbits(n3, n) + nb := lbits(n3, n, nil) j = 0 for i := nb - 2; i >= 1; i-- { lv = unpack(T[j], Qx, Qy) @@ -252,625 +230,452 @@ func Another_pc(r []*FP48, T []*FP16, QV *ECP) { if bt == 1 { lv2 = unpack(T[j], Qx, Qy) j += 1 - lv.smul(lv2) + lv.smul(lv2, nil) } if bt == -1 { lv2 = unpack(T[j], Qx, Qy) j += 1 - lv.smul(lv2) + lv.smul(lv2, nil) } - r[i].ssmul(lv) + r[i].ssmul(lv, nil) } } /* Accumulate another set of line functions for n-pairing */ -func Another(r []*FP48, P1 *ECP8, Q1 *ECP) { - n := NewBIG() - n3 := NewBIG() +func Another(r []*FP48, P1 *ECP8, Q1 *ECP, mem *arena.Arena) { + n := NewBIG(mem) + n3 := NewBIG(mem) var lv, lv2 *FP48 - if Q1.Is_infinity() { + if Q1.Is_infinity(mem) { return } // P is needed in affine form for line function, Q for (Qx,Qy) extraction - P := NewECP8() + P := NewECP8(mem) P.Copy(P1) - Q := NewECP() + Q := NewECP(mem) Q.Copy(Q1) - P.Affine() - Q.Affine() + P.Affine(mem) + Q.Affine(mem) - Qx := NewFPcopy(Q.getx()) - Qy := NewFPcopy(Q.gety()) + Qx := NewFPcopy(Q.getx(), mem) + Qy := NewFPcopy(Q.gety(), mem) - A := NewECP8() + A := NewECP8(mem) A.Copy(P) - MP := NewECP8() + MP := NewECP8(mem) MP.Copy(P) - MP.Neg() + MP.Neg(mem) - nb := lbits(n3, n) + nb := lbits(n3, n, mem) for i := nb - 2; i >= 1; i-- { - lv = line(A, A, Qx, Qy) + lv = line(A, A, Qx, Qy, mem) bt := n3.bit(i) - n.bit(i) if bt == 1 { - lv2 = line(A, P, Qx, Qy) - lv.smul(lv2) + lv2 = line(A, P, Qx, Qy, mem) + lv.smul(lv2, mem) } if bt == -1 { - lv2 = line(A, MP, Qx, Qy) - lv.smul(lv2) + lv2 = line(A, MP, Qx, Qy, mem) + lv.smul(lv2, mem) } - r[i].ssmul(lv) + r[i].ssmul(lv, mem) } } /* Optimal R-ate pairing */ func Ate(P1 *ECP8, Q1 *ECP) *FP48 { - n := NewBIG() - n3 := NewBIG() + n := NewBIG(nil) + n3 := NewBIG(nil) var lv, lv2 *FP48 - if Q1.Is_infinity() { - return NewFP48int(1) + if Q1.Is_infinity(nil) { + return NewFP48int(1, nil) } - P := NewECP8() + P := NewECP8(nil) P.Copy(P1) - P.Affine() - Q := NewECP() + P.Affine(nil) + Q := NewECP(nil) Q.Copy(Q1) - Q.Affine() + Q.Affine(nil) - Qx := NewFPcopy(Q.getx()) - Qy := NewFPcopy(Q.gety()) + Qx := NewFPcopy(Q.getx(), nil) + Qy := NewFPcopy(Q.gety(), nil) - A := NewECP8() - r := NewFP48int(1) + A := NewECP8(nil) + r := NewFP48int(1, nil) A.Copy(P) - NP := NewECP8() + NP := NewECP8(nil) NP.Copy(P) - NP.Neg() + NP.Neg(nil) - nb := lbits(n3, n) + nb := lbits(n3, n, nil) for i := nb - 2; i >= 1; i-- { - r.Sqr() - lv = line(A, A, Qx, Qy) + r.Sqr(nil) + lv = line(A, A, Qx, Qy, nil) bt := n3.bit(i) - n.bit(i) if bt == 1 { - lv2 = line(A, P, Qx, Qy) - lv.smul(lv2) + lv2 = line(A, P, Qx, Qy, nil) + lv.smul(lv2, nil) } if bt == -1 { - lv2 = line(A, NP, Qx, Qy) - lv.smul(lv2) + lv2 = line(A, NP, Qx, Qy, nil) + lv.smul(lv2, nil) } - r.ssmul(lv) + r.ssmul(lv, nil) } - if SIGN_OF_X == NEGATIVEX { - r.conj() - } + r.conj(nil) return r } /* Optimal R-ate double pairing e(P,Q).e(R,S) */ func Ate2(P1 *ECP8, Q1 *ECP, R1 *ECP8, S1 *ECP) *FP48 { - n := NewBIG() - n3 := NewBIG() + n := NewBIG(nil) + n3 := NewBIG(nil) var lv, lv2 *FP48 - if Q1.Is_infinity() { + if Q1.Is_infinity(nil) { return Ate(R1, S1) } - if S1.Is_infinity() { + if S1.Is_infinity(nil) { return Ate(P1, Q1) } - P := NewECP8() + P := NewECP8(nil) P.Copy(P1) - P.Affine() - Q := NewECP() + P.Affine(nil) + Q := NewECP(nil) Q.Copy(Q1) - Q.Affine() - R := NewECP8() + Q.Affine(nil) + R := NewECP8(nil) R.Copy(R1) - R.Affine() - S := NewECP() + R.Affine(nil) + S := NewECP(nil) S.Copy(S1) - S.Affine() + S.Affine(nil) - Qx := NewFPcopy(Q.getx()) - Qy := NewFPcopy(Q.gety()) - Sx := NewFPcopy(S.getx()) - Sy := NewFPcopy(S.gety()) + Qx := NewFPcopy(Q.getx(), nil) + Qy := NewFPcopy(Q.gety(), nil) + Sx := NewFPcopy(S.getx(), nil) + Sy := NewFPcopy(S.gety(), nil) - A := NewECP8() - B := NewECP8() - r := NewFP48int(1) + A := NewECP8(nil) + B := NewECP8(nil) + r := NewFP48int(1, nil) A.Copy(P) B.Copy(R) - NP := NewECP8() + NP := NewECP8(nil) NP.Copy(P) - NP.Neg() - NR := NewECP8() + NP.Neg(nil) + NR := NewECP8(nil) NR.Copy(R) - NR.Neg() + NR.Neg(nil) - nb := lbits(n3, n) + nb := lbits(n3, n, nil) for i := nb - 2; i >= 1; i-- { - r.Sqr() - lv = line(A, A, Qx, Qy) - lv2 = line(B, B, Sx, Sy) - lv.smul(lv2) - r.ssmul(lv) + r.Sqr(nil) + lv = line(A, A, Qx, Qy, nil) + lv2 = line(B, B, Sx, Sy, nil) + lv.smul(lv2, nil) + r.ssmul(lv, nil) bt := n3.bit(i) - n.bit(i) if bt == 1 { - lv = line(A, P, Qx, Qy) - lv2 = line(B, R, Sx, Sy) - lv.smul(lv2) - r.ssmul(lv) + lv = line(A, P, Qx, Qy, nil) + lv2 = line(B, R, Sx, Sy, nil) + lv.smul(lv2, nil) + r.ssmul(lv, nil) } if bt == -1 { - lv = line(A, NP, Qx, Qy) - lv2 = line(B, NR, Sx, Sy) - lv.smul(lv2) - r.ssmul(lv) + lv = line(A, NP, Qx, Qy, nil) + lv2 = line(B, NR, Sx, Sy, nil) + lv.smul(lv2, nil) + r.ssmul(lv, nil) } } - if SIGN_OF_X == NEGATIVEX { - r.conj() - } + r.conj(nil) return r } /* final exponentiation - keep separate for multi-pairings and to avoid thrashing stack */ func Fexp(m *FP48) *FP48 { - f := NewFP2bigs(NewBIGints(Fra), NewBIGints(Frb)) - x := NewBIGints(CURVE_Bnx) - r := NewFP48copy(m) + mem := arena.NewArena() + f := NewFP2bigs(NewBIGints(Fra, mem), NewBIGints(Frb, mem), mem) + x := NewBIGints(CURVE_Bnx, mem) + r := NewFP48copy(m, nil) // var t1, t2 *FP48 /* Easy part of final exp */ - lv := NewFP48copy(r) + lv := NewFP48copy(r, mem) - lv.Invert() - r.conj() + lv.Invert(mem) + r.conj(mem) - r.Mul(lv) + r.Mul(lv, mem) lv.Copy(r) - r.frob(f, 8) - r.Mul(lv) + r.frob(f, 8, mem) + r.Mul(lv, mem) /* Hard part of final exp */ // See https://eprint.iacr.org/2020/875.pdf - y1 := NewFP48copy(r) - y1.uSqr() - y1.Mul(r) // y1=r^3 + y1 := NewFP48copy(r, mem) + y1.uSqr(mem) + y1.Mul(r, mem) // y1=r^3 - y0 := NewFP48copy(r.Pow(x)) - if SIGN_OF_X == NEGATIVEX { - y0.conj() - } - t0 := NewFP48copy(r) - t0.conj() + y0 := NewFP48copy(r.Pow(x, mem), mem) + y0.conj(mem) + t0 := NewFP48copy(r, mem) + t0.conj(mem) r.Copy(y0) - r.Mul(t0) + r.Mul(t0, mem) - y0.Copy(r.Pow(x)) - if SIGN_OF_X == NEGATIVEX { - y0.conj() - } + y0.Copy(r.Pow(x, mem)) + y0.conj(mem) t0.Copy(r) - t0.conj() + t0.conj(mem) r.Copy(y0) - r.Mul(t0) + r.Mul(t0, mem) // ^(x+p) - y0.Copy(r.Pow(x)) - if SIGN_OF_X == NEGATIVEX { - y0.conj() - } + y0.Copy(r.Pow(x, mem)) + y0.conj(mem) t0.Copy(r) - t0.frob(f, 1) + t0.frob(f, 1, mem) r.Copy(y0) - r.Mul(t0) + r.Mul(t0, mem) // ^(x^2+p^2) - y0.Copy(r.Pow(x)) - y0.Copy(y0.Pow(x)) + y0.Copy(r.Pow(x, mem)) + y0.Copy(y0.Pow(x, mem)) t0.Copy(r) - t0.frob(f, 2) + t0.frob(f, 2, mem) r.Copy(y0) - r.Mul(t0) + r.Mul(t0, mem) // ^(x^4+p^4) - y0.Copy(r.Pow(x)) - y0.Copy(y0.Pow(x)) - y0.Copy(y0.Pow(x)) - y0.Copy(y0.Pow(x)) + y0.Copy(r.Pow(x, mem)) + y0.Copy(y0.Pow(x, mem)) + y0.Copy(y0.Pow(x, mem)) + y0.Copy(y0.Pow(x, mem)) t0.Copy(r) - t0.frob(f, 4) + t0.frob(f, 4, mem) r.Copy(y0) - r.Mul(t0) + r.Mul(t0, mem) // ^(x^8+p^8-1) - y0.Copy(r.Pow(x)) - y0.Copy(y0.Pow(x)) - y0.Copy(y0.Pow(x)) - y0.Copy(y0.Pow(x)) - y0.Copy(y0.Pow(x)) - y0.Copy(y0.Pow(x)) - y0.Copy(y0.Pow(x)) - y0.Copy(y0.Pow(x)) + y0.Copy(r.Pow(x, mem)) + y0.Copy(y0.Pow(x, mem)) + y0.Copy(y0.Pow(x, mem)) + y0.Copy(y0.Pow(x, mem)) + y0.Copy(y0.Pow(x, mem)) + y0.Copy(y0.Pow(x, mem)) + y0.Copy(y0.Pow(x, mem)) + y0.Copy(y0.Pow(x, mem)) t0.Copy(r) - t0.frob(f, 8) - y0.Mul(t0) + t0.frob(f, 8, mem) + y0.Mul(t0, mem) t0.Copy(r) - t0.conj() + t0.conj(mem) r.Copy(y0) - r.Mul(t0) + r.Mul(t0, mem) - r.Mul(y1) - r.reduce() + r.Mul(y1, mem) + r.reduce(mem) + mem.Free() - /* - // Ghamman & Fouotsa Method - - t7 := NewFP48copy(r) - t7.usqr() - - if x.parity() == 1 { - t2 = r.Pow(x) - t1 = NewFP48copy(t2) - t1.usqr() - t2 = t2.Pow(x) - } else { - t1 = t7.Pow(x) - x.fshr(1) - t2 = t1.Pow(x) - x.fshl(1) - } - - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3 := NewFP48copy(t1) - t3.conj() - t2.Mul(t3) - t2.Mul(r) - - r.Mul(t7) - - t1 = t2.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - t3.Copy(t1) - t3.frob(f, 14) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3.Copy(t1) - t3.frob(f, 13) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3.Copy(t1) - t3.frob(f, 12) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3.Copy(t1) - t3.frob(f, 11) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3.Copy(t1) - t3.frob(f, 10) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3.Copy(t1) - t3.frob(f, 9) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3.Copy(t1) - t3.frob(f, 8) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3.Copy(t2) - t3.conj() - t1.Mul(t3) - t3.Copy(t1) - t3.frob(f, 7) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3.Copy(t1) - t3.frob(f, 6) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3.Copy(t1) - t3.frob(f, 5) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3.Copy(t1) - t3.frob(f, 4) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3.Copy(t1) - t3.frob(f, 3) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3.Copy(t1) - t3.frob(f, 2) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - t3.Copy(t1) - t3.frob(f, 1) - r.Mul(t3) - t1 = t1.Pow(x) - if SIGN_OF_X == NEGATIVEX { - t1.conj() - } - - r.Mul(t1) - t2.frob(f, 15) - r.Mul(t2) - - r.reduce() - */ return r } /* GLV method */ -func glv(ee *BIG) []*BIG { +func glv(ee *BIG, mem *arena.Arena) []*BIG { var u []*BIG - q := NewBIGints(CURVE_Order) - x := NewBIGints(CURVE_Bnx) + q := NewBIGints(CURVE_Order, mem) + x := NewBIGints(CURVE_Bnx, mem) x2 := smul(x, x) x = smul(x2, x2) x2 = smul(x, x) bd := uint(q.nbits() - x2.nbits()) - u = append(u, NewBIGcopy(ee)) - u[0].ctmod(x2, bd) - u = append(u, NewBIGcopy(ee)) - u[1].ctdiv(x2, bd) + u = append(u, NewBIGcopy(ee, mem)) + u[0].ctmod(x2, bd, mem) + u = append(u, NewBIGcopy(ee, mem)) + u[1].ctdiv(x2, bd, mem) u[1].rsub(q) return u } /* Galbraith & Scott Method */ -func gs(ee *BIG) []*BIG { +func gs(ee *BIG, mem *arena.Arena) []*BIG { var u []*BIG - q := NewBIGints(CURVE_Order) - x := NewBIGints(CURVE_Bnx) + q := NewBIGints(CURVE_Order, mem) + x := NewBIGints(CURVE_Bnx, mem) bd := uint(q.nbits() - x.nbits()) - w := NewBIGcopy(ee) + w := NewBIGcopy(ee, mem) for i := 0; i < 15; i++ { - u = append(u, NewBIGcopy(w)) - u[i].ctmod(x, bd) - w.ctdiv(x, bd) - } - u = append(u, NewBIGcopy(w)) - if SIGN_OF_X == NEGATIVEX { - u[1].copy(Modneg(u[1], q)) - u[3].copy(Modneg(u[3], q)) - u[5].copy(Modneg(u[5], q)) - u[7].copy(Modneg(u[7], q)) - u[9].copy(Modneg(u[9], q)) - u[11].copy(Modneg(u[11], q)) - u[13].copy(Modneg(u[13], q)) - u[15].copy(Modneg(u[15], q)) + u = append(u, NewBIGcopy(w, mem)) + u[i].ctmod(x, bd, mem) + w.ctdiv(x, bd, mem) } + u = append(u, NewBIGcopy(w, mem)) + u[1].copy(Modneg(u[1], q, mem)) + u[3].copy(Modneg(u[3], q, mem)) + u[5].copy(Modneg(u[5], q, mem)) + u[7].copy(Modneg(u[7], q, mem)) + u[9].copy(Modneg(u[9], q, mem)) + u[11].copy(Modneg(u[11], q, mem)) + u[13].copy(Modneg(u[13], q, mem)) + u[15].copy(Modneg(u[15], q, mem)) return u } /* Multiply P by e in group G1 */ -func G1mul(P *ECP, e *BIG) *ECP { +func G1mul(P *ECP, e *BIG, mem *arena.Arena) *ECP { var R *ECP - q := NewBIGints(CURVE_Order) - ee := NewBIGcopy(e) - ee.Mod(q) - if USE_GLV { - R = NewECP() - R.Copy(P) - Q := NewECP() - Q.Copy(P) - Q.Affine() + q := NewBIGints(CURVE_Order, mem) + ee := NewBIGcopy(e, mem) + ee.Mod(q, mem) + R = NewECP(mem) + R.Copy(P) + Q := NewECP(mem) + Q.Copy(P) + Q.Affine(mem) - cru := NewFPbig(NewBIGints(CRu)) - t := NewBIGint(0) - u := glv(ee) - Q.getx().Mul(cru) + cru := NewFPbig(NewBIGints(CRu, mem), mem) + t := NewBIGint(0, mem) + u := glv(ee, mem) + Q.getx().Mul(cru, mem) - np := u[0].nbits() - t.copy(Modneg(u[0], q)) - nn := t.nbits() - if nn < np { - u[0].copy(t) - R.Neg() - } - - np = u[1].nbits() - t.copy(Modneg(u[1], q)) - nn = t.nbits() - if nn < np { - u[1].copy(t) - Q.Neg() - } - u[0].norm() - u[1].norm() - R = R.Mul2(u[0], Q, u[1]) - - } else { - R = P.clmul(e, q) + np := u[0].nbits() + t.copy(Modneg(u[0], q, mem)) + nn := t.nbits() + if nn < np { + u[0].copy(t) + R.Neg(mem) } + + np = u[1].nbits() + t.copy(Modneg(u[1], q, mem)) + nn = t.nbits() + if nn < np { + u[1].copy(t) + Q.Neg(mem) + } + u[0].norm() + u[1].norm() + R = R.Mul2(u[0], Q, u[1], mem) + return R } /* Multiply P by e in group G2 */ -func G2mul(P *ECP8, e *BIG) *ECP8 { +func G2mul(P *ECP8, e *BIG, mem *arena.Arena) *ECP8 { var R *ECP8 - q := NewBIGints(CURVE_Order) - ee := NewBIGcopy(e) - ee.Mod(q) - if USE_GS_G2 { - var Q []*ECP8 + q := NewBIGints(CURVE_Order, mem) + ee := NewBIGcopy(e, mem) + ee.Mod(q, mem) + var Q []*ECP8 - F := ECP8_frob_constants() - u := gs(ee) + F := ECP8_frob_constants() + u := gs(ee, mem) - t := NewBIGint(0) + t := NewBIGint(0, mem) - Q = append(Q, NewECP8()) - Q[0].Copy(P) - for i := 1; i < 16; i++ { - Q = append(Q, NewECP8()) - Q[i].Copy(Q[i-1]) - Q[i].frob(F, 1) - } - for i := 0; i < 16; i++ { - np := u[i].nbits() - t.copy(Modneg(u[i], q)) - nn := t.nbits() - if nn < np { - u[i].copy(t) - Q[i].Neg() - } - u[i].norm() - } - - R = Mul16(Q, u) - - } else { - R = P.Mul(e) + Q = append(Q, NewECP8(mem)) + Q[0].Copy(P) + for i := 1; i < 16; i++ { + Q = append(Q, NewECP8(mem)) + Q[i].Copy(Q[i-1]) + Q[i].frob(F, 1) } + for i := 0; i < 16; i++ { + np := u[i].nbits() + t.copy(Modneg(u[i], q, mem)) + nn := t.nbits() + if nn < np { + u[i].copy(t) + Q[i].Neg(mem) + } + u[i].norm() + } + + R = Mul16(Q, u, mem) return R } /* f=f^e */ /* Note that this method requires a lot of RAM! */ -func GTpow(d *FP48, e *BIG) *FP48 { - var r *FP48 - q := NewBIGints(CURVE_Order) - ee := NewBIGcopy(e) - ee.Mod(q) - if USE_GS_GT { - var g []*FP48 - f := NewFP2bigs(NewBIGints(Fra), NewBIGints(Frb)) - t := NewBIGint(0) +// func GTpow(d *FP48, e *BIG) *FP48 { +// var r *FP48 +// q := NewBIGints(CURVE_Order) +// ee := NewBIGcopy(e) +// ee.Mod(q) +// if USE_GS_GT { +// var g []*FP48 +// f := NewFP2bigs(NewBIGints(Fra), NewBIGints(Frb)) +// t := NewBIGint(0) - u := gs(ee) +// u := gs(ee) - g = append(g, NewFP48copy(d)) - for i := 1; i < 16; i++ { - g = append(g, NewFP48()) - g[i].Copy(g[i-1]) - g[i].frob(f, 1) - } - for i := 0; i < 16; i++ { - np := u[i].nbits() - t.copy(Modneg(u[i], q)) - nn := t.nbits() - if nn < np { - u[i].copy(t) - g[i].conj() - } - u[i].norm() - } - r = pow16(g, u) - } else { - r = d.Pow(ee) - } - return r -} +// g = append(g, NewFP48copy(d)) +// for i := 1; i < 16; i++ { +// g = append(g, NewFP48()) +// g[i].Copy(g[i-1]) +// g[i].frob(f, 1) +// } +// for i := 0; i < 16; i++ { +// np := u[i].nbits() +// t.copy(Modneg(u[i], q)) +// nn := t.nbits() +// if nn < np { +// u[i].copy(t) +// g[i].conj() +// } +// u[i].norm() +// } +// r = pow16(g, u) +// } else { +// r = d.Pow(ee) +// } +// return r +// } /* test G1 group membership */ -func G1member(P *ECP) bool { - if P.Is_infinity() { +func G1member(P *ECP, mem *arena.Arena) bool { + if P.Is_infinity(mem) { return false } - x := NewBIGints(CURVE_Bnx) - cru := NewFPbig(NewBIGints(CRu)) - W := NewECP() + x := NewBIGints(CURVE_Bnx, mem) + cru := NewFPbig(NewBIGints(CRu, mem), mem) + W := NewECP(mem) W.Copy(P) - W.getx().Mul(cru) - T := P.lmul(x) + W.getx().Mul(cru, mem) + T := P.lmul(x, mem, mem) if P.Equals(T) { return false } // P is of low order - T = T.Mul(x) - T = T.Mul(x) - T = T.Mul(x) - T = T.Mul(x) - T = T.Mul(x) - T = T.Mul(x) - T = T.Mul(x) - T.Neg() + T = T.Mul(x, mem, mem) + T = T.Mul(x, mem, mem) + T = T.Mul(x, mem, mem) + T = T.Mul(x, mem, mem) + T = T.Mul(x, mem, mem) + T = T.Mul(x, mem, mem) + T = T.Mul(x, mem, mem) + T.Neg(mem) if !W.Equals(T) { return false } @@ -889,19 +694,17 @@ func G1member(P *ECP) bool { } /* test G2 group membership */ -func G2member(P *ECP8) bool { - if P.Is_infinity() { +func G2member(P *ECP8, mem *arena.Arena) bool { + if P.Is_infinity(mem) { return false } F := ECP8_frob_constants() - x := NewBIGints(CURVE_Bnx) - W := NewECP8() + x := NewBIGints(CURVE_Bnx, mem) + W := NewECP8(mem) W.Copy(P) W.frob(F, 1) - T := P.Mul(x) - if SIGN_OF_X == NEGATIVEX { - T.Neg() - } + T := P.Mul(x, mem) + T.Neg(mem) /* R:=NewECP8(); R.Copy(W) R.frob(F,1) @@ -928,20 +731,20 @@ func GTcyclotomic(m *FP48) bool { if m.Isunity() { return false } - r := NewFP48copy(m) - r.conj() - r.Mul(m) + r := NewFP48copy(m, nil) + r.conj(nil) + r.Mul(m, nil) if !r.Isunity() { return false } - f := NewFP2bigs(NewBIGints(Fra), NewBIGints(Frb)) + f := NewFP2bigs(NewBIGints(Fra, nil), NewBIGints(Frb, nil), nil) r.Copy(m) - r.frob(f, 8) - w := NewFP48copy(r) - w.frob(f, 8) - w.Mul(m) + r.frob(f, 8, nil) + w := NewFP48copy(r, nil) + w.frob(f, 8, nil) + w.Mul(m, nil) if !w.Equals(r) { return false } @@ -953,16 +756,14 @@ func GTmember(m *FP48) bool { if !GTcyclotomic(m) { return false } - f := NewFP2bigs(NewBIGints(Fra), NewBIGints(Frb)) - x := NewBIGints(CURVE_Bnx) + f := NewFP2bigs(NewBIGints(Fra, nil), NewBIGints(Frb, nil), nil) + x := NewBIGints(CURVE_Bnx, nil) - r := NewFP48copy(m) - r.frob(f, 1) - t := m.Pow(x) + r := NewFP48copy(m, nil) + r.frob(f, 1, nil) + t := m.Pow(x, nil) - if SIGN_OF_X == NEGATIVEX { - t.conj() - } + t.conj(nil) if !r.Equals(t) { return false } diff --git a/nekryptology/pkg/core/curves/native/bls48581/rom.go b/nekryptology/pkg/core/curves/native/bls48581/rom_32.go similarity index 99% rename from nekryptology/pkg/core/curves/native/bls48581/rom.go rename to nekryptology/pkg/core/curves/native/bls48581/rom_32.go index 7e0400a..c6f7069 100644 --- a/nekryptology/pkg/core/curves/native/bls48581/rom.go +++ b/nekryptology/pkg/core/curves/native/bls48581/rom_32.go @@ -1,3 +1,5 @@ +//go:build js && wasm + /* * Copyright (c) 2012-2020 MIRACL UK Ltd. * diff --git a/nekryptology/pkg/core/curves/native/bls48581/rom_64.go b/nekryptology/pkg/core/curves/native/bls48581/rom_64.go new file mode 100644 index 0000000..e2adcbe --- /dev/null +++ b/nekryptology/pkg/core/curves/native/bls48581/rom_64.go @@ -0,0 +1,77 @@ +//go:build !js && !wasm + +/* + * Copyright (c) 2012-2020 MIRACL UK Ltd. + * + * This file is part of MIRACL Core + * (see https://github.com/miracl/core). + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Fixed Data in ROM - Field and Curve parameters */ + +package bls48581 + +// Base Bits= 60 +var Modulus = [...]Chunk{0xEDC154E6565912B, 0x8FDF721A4A48AC3, 0x7A5513170EE0A57, 0x394F4736DAF6836, 0xAF6E082ACD9CD30, 0xF3975444A48AE43, 0x22131BB3BE6C0F1, 0x12A0056E84F8D1, 0x76F313824E31D47, 0x1280F73FF34} +var ROI = [...]Chunk{0xEDC154E6565912A, 0x8FDF721A4A48AC3, 0x7A5513170EE0A57, 0x394F4736DAF6836, 0xAF6E082ACD9CD30, 0xF3975444A48AE43, 0x22131BB3BE6C0F1, 0x12A0056E84F8D1, 0x76F313824E31D47, 0x1280F73FF34} +var R2modp = [...]Chunk{0x79868479F1B5833, 0xFB6EBA8FCB82D07, 0x9CC8A7F1FD84C7F, 0x402C51CF5CC3CBB, 0x3F3114F078502C, 0xFC90829BDC8336E, 0xC7BE91DE9CA8EED, 0xD4D273BB17BFADB, 0x6EC7C9A81E792CA, 0x1DC317A6E4} +var SQRTm3 = [...]Chunk{0x51EDFC2A1D65A0A, 0xD62DAA292D8CDBF, 0x24112478269D616, 0x6C25D3CABF8AD71, 0xC8E9B16B5D3E4CD, 0xF50A03B738960EE, 0x1A664376FED4343, 0xBFFD8FB8925AE06, 0x600908C6A28DEAA, 0x1280F73F9A7} + +const MConst Chunk = 0x148B81FC39D5A7D + +var Fra = [...]Chunk{0x62EB6CFE42AEB25, 0xDB41942760AD3F9, 0xA7DF2570715ECE4, 0x90377B51208AC0F, 0x6848493E1C8C418, 0xF496307E298187E, 0x58740E3CAFD6B62, 0xF6067D047983E78, 0x49FA75CD7E73E55, 0xFD30DB501} +var Frb = [...]Chunk{0x62EB6CFE42AEB25, 0xDB41942760AD3F9, 0xA7DF2570715ECE4, 0x90377B51208AC0F, 0x6848493E1C8C418, 0xF496307E298187E, 0x58740E3CAFD6B62, 0xF6067D047983E78, 0x49FA75CD7E73E55, 0xFD30DB501} +var TWK = [...]Chunk{0x7B433D25F426953, 0xACE45923B9863D, 0xC28BBDFA2D37E16, 0x62FFCC8AFB4BC18, 0x661B4392F002C4F, 0x2ED27E951A14781, 0x670A6683B853246, 0xAEB8C9BA138A075, 0xC10075769CDDD9E, 0x3A65A537B} + +//*** rom curve parameters ***** +// Base Bits= 60 +// Ate Bits= 33 +// G2 Table size= 36 + +const CURVE_Cof_I int = 0 + +var CURVE_Cof = [...]Chunk{0x140000382, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} + +const CURVE_B_I int = 1 + +var CURVE_B = [...]Chunk{0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +var CURVE_Order = [...]Chunk{0x8A5FE6FCD671C01, 0xBE599467C24DA11, 0xC7CD0562303C4CC, 0x9D34C4C92016A85, 0xBC972C2E6E74196, 0x3F0B3CBE003FAD6, 0x615C0D6C635387A, 0xE2885E233A9CCC1, 0x2386F8A925, 0x0} +var CURVE_Gx = [...]Chunk{0xBCE8732315AF640, 0x74DA5D3A1E6D8C3, 0x57DB368B11786CB, 0x665D859236EBDBC, 0x46A9DF6F9645847, 0xEDFFB9F75445505, 0xE86868CF61ABDBA, 0x93F860DE3F257E0, 0x40F2BAF2B73DF1E, 0x2AF59B7AC3} +var CURVE_Gy = [...]Chunk{0xDBB5DE3E2587A70, 0xF37AEF7B926B576, 0xF77C2876D1B2E35, 0x78584C3EF22F487, 0xFFB98AEE53E80F6, 0xD41B720EF7BB7BE, 0xFEB8A52E991279D, 0xB398A488A553C9E, 0x31F91F86B3A2D1F, 0xCEFDA44F65} +var CURVE_HTPC = [...]Chunk{0x393F0BE031193EC, 0xC28896440758243, 0xDBE4AA8E70D4620, 0x6B27BD55EFD560E, 0x24A9624BEECD070, 0xE2626AD7C53B361, 0xDD845A98030C755, 0x29389B4E6A62C2D, 0x5AF94F05D8A9FD4, 0x92348CD5DC} + +var CURVE_Bnx = [...]Chunk{0x140000381, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0} +var CRu = [...]Chunk{0x4DE9AC5E1C79B90, 0x5CD8E3F88E5DE82, 0xAB21F74F7421A20, 0x6694B9B60DB5D62, 0x73422B5FB82F431, 0xFF46A846B5FA6AA, 0x83D66C1E5FCBED6, 0x2096384F2AFA565, 0x8B75055DD5D1F4E, 0x2C6} +var CURVE_Pxaaa = [...]Chunk{0x34FD0B4ACE8BFAB, 0xB79766322154DEC, 0x4D80491F510317, 0x3CA0612F4005030, 0xBAAD1A8C42281A6, 0x3A2EF156C46FF79, 0x344DBCCB7DE64DB, 0x2775DEBABBEFC70, 0x71E4A38237FA45A, 0x5D615D9A78} +var CURVE_Pxaab = [...]Chunk{0x669B36676B47C57, 0x5556A01AFA143F1, 0x7630D979630FFD7, 0x6AFFA62504F0C3C, 0xABFEDF16214A7, 0x12307F4E1C3943A, 0xE1623E9526F6DA, 0xBC07E8B22BB6D98, 0x258512069B0E86A, 0x7C4973ECE2} +var CURVE_Pxaba = [...]Chunk{0x488156CA55A3E6A, 0xEF4CDED6B3F0B46, 0xCBDFBB879D5FEA8, 0x66F0D2A6D55F028, 0xC1DBD19242FFAE7, 0xCCBAB5AB6860161, 0xAE237CA7A6D6957, 0xAD83BC73A8A6CA9, 0xF1334E1B2EA1853, 0x1FCCC70198} +var CURVE_Pxabb = [...]Chunk{0x9A7033CBB7FEAFE, 0x10B8CB4E80BC3F0, 0x1C5257C200CA523, 0x43B1B279B9468C3, 0x5F63E1C776E6EC1, 0x393F8BE0CC218A9, 0x62F3E5821B7B92A, 0x54D4BFE8F5985AC, 0xEB6185C78D80129, 0xBE2218C25C} +var CURVE_Pxbaa = [...]Chunk{0x39C3A1C53F8CCE5, 0x5B5F746C9D4CBB7, 0xD55FC1889AA80C6, 0xEF492AE589274FA, 0x9E48199D5AC10B2, 0xC5805386699981F, 0xB1642B5675FF0E7, 0xA9DD63007C675D0, 0x35913A3C598E4CA, 0x38B91C600B} +var CURVE_Pxbab = [...]Chunk{0x2004D914A3C093A, 0x7960910FCE3370F, 0xA9F177612F097FC, 0x40B9C0B15DD7595, 0x3835D28997EB57B, 0x7BB037418181DF6, 0xEF0977A3D1A5867, 0xCDA088F7B8F35DC, 0x738603F1311E4E, 0xC96C7797EB} +var CURVE_Pxbba = [...]Chunk{0x41607E60750E057, 0x4B5B0E205C3354E, 0xCBE4324C22D6333, 0xAA5EFCF3432AAD1, 0xF293B13CED0FD0C, 0xA2C0B7A449CEF11, 0x9D13852B6DB908B, 0x8AEE660DEA41B3, 0x61EE3F0197A4989, 0xB9B7951C60} +var CURVE_Pxbbb = [...]Chunk{0xE19DA00FBC6AE34, 0x6AF2FC9E97C3F84, 0x9BD6AEBF9FC44E5, 0x90B7E2B0D458547, 0xA93F29CFF364A71, 0x719728A7F9F8CFC, 0xFAF47B5211CF741, 0x4AAA2B1E5D7A9DE, 0x2BDEC5282624C4F, 0x827D5C22FB} +var CURVE_Pyaaa = [...]Chunk{0x3EDD3FE4D2D7971, 0x45012AB12C0FF32, 0x9ABF77EEA6D6590, 0x336D8AE5163C159, 0x35AFA27748D90F7, 0xBFC435FAAB09062, 0x59A577E6F3B39E, 0x2F3024B918B4238, 0x75B5DFA49721645, 0xEB53356C3} +var CURVE_Pyaab = [...]Chunk{0x1471DB936CD5665, 0x8B423525FFC7B11, 0x2FA097D760E2E58, 0xD1892AB24E1DD21, 0x6B243B1F192C5C3, 0x64732FCBF3AFB09, 0xA325E6FBA01D729, 0x5FCADC2B75A422B, 0xE0FF144DA653181, 0x284DC75979} +var CURVE_Pyaba = [...]Chunk{0x8332A526A2A8474, 0xBC7C46FC3B8FDE6, 0x1D35D51A652269C, 0x36CA3295E5E2F0C, 0xC99D0E904115155, 0xD370514475F7D5, 0x216D5B119D3A48, 0x67669EF2C2FC503, 0x8523E421EFB703, 0xB36A201DD0} +var CURVE_Pyabb = [...]Chunk{0x6213DA92841589D, 0xB3D8B8A1E533731, 0x7BDA503EE5E578F, 0x817742770BA10D6, 0x224333FA40DCED2, 0x10E122D2742C89B, 0x60DCEE23DD8B0E7, 0x78762B1C2CDED33, 0xEDC0688223FBBD4, 0xAEC25A4621} +var CURVE_Pybaa = [...]Chunk{0x47831F982E50137, 0x857FDDDFCF7A43F, 0x30135945D137B08, 0xCA4E512B64F59F4, 0x7FA238CDCE8A1E2, 0x5F1129857ED85C7, 0xB43DD93B5A95980, 0x88325A2554DC541, 0xA9C46916503FA5A, 0xD209D5A223} +var CURVE_Pybab = [...]Chunk{0x4EEDC58CF90BEE4, 0xA59ED8226CF3A59, 0xFC198CAA72B679D, 0xF47C180D139E3AA, 0xE8C270841F6824, 0x55AB7504FA8342, 0xB16722B589D82E2, 0xD537B90421AD66E, 0x36B7A513D339D5A, 0x7D0D037457} +var CURVE_Pybba = [...]Chunk{0xD41FAEAFEB23986, 0xE884017D9AA62B3, 0x40FA639F53DCCC9, 0xAB8C74B2618B5BB, 0x5AE3A2864F22C1F, 0xE4C819A6DF98F42, 0xC0841B064155F14, 0xD17AF8A006F364F, 0xE65EA25C2D05DFD, 0x896767811B} +var CURVE_Pybbb = [...]Chunk{0x667FFCB732718B6, 0x5AC66E84069C55D, 0xD8C4AB33F748E, 0x333EC7192054173, 0x8E69C31E97E1AD0, 0xEF8ECA9A9533A3F, 0x6BE8E50C87549B6, 0x4F981B5E068F140, 0x9029D393A5C07E8, 0x35E2524FF8} + +//var CURVE_W=[2][10]Chunk {{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0}} +//var CURVE_SB=[2][2][10]Chunk {{{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0}},{{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0}}} +//var CURVE_WB=[4][10]Chunk {{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0}} +//var CURVE_BB=[4][4][10]Chunk {{{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0}},{{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0}},{{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0}},{{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0},{0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0}}} diff --git a/nekryptology/pkg/vdf/vdf.go b/nekryptology/pkg/vdf/vdf.go index 286d563..a5536b4 100644 --- a/nekryptology/pkg/vdf/vdf.go +++ b/nekryptology/pkg/vdf/vdf.go @@ -18,7 +18,7 @@ type VDF struct { finished bool } -//size of long integers in quadratic function group +// size of long integers in quadratic function group const sizeInBits = 2048 // New create a new instance of VDF. @@ -53,12 +53,31 @@ func (vdf *VDF) Execute() { vdf.finished = true } +func (vdf *VDF) ExecuteIteration(x_blob []byte) { + vdf.finished = false + + yBuf, proofBuf := GenerateVDFIteration(vdf.input[:], x_blob, vdf.difficulty, sizeInBits) + + copy(vdf.output[:], yBuf) + copy(vdf.output[258:], proofBuf) + + go func() { + vdf.outputChan <- vdf.output + }() + + vdf.finished = true +} + // Verify runs the verification of generated proof // currently on i7-6700K, verification takes about 350 ms func (vdf *VDF) Verify(proof [516]byte) bool { return VerifyVDF(vdf.input[:], proof[:], vdf.difficulty, sizeInBits) } +func (vdf *VDF) VerifyIteration(x_blob [258]byte, proof [516]byte, iterations uint32) bool { + return VerifyVDFIteration(vdf.input[:], x_blob[:], proof[:], vdf.difficulty, sizeInBits) +} + // IsFinished returns whether the vdf execution is finished or not. func (vdf *VDF) IsFinished() bool { return vdf.finished diff --git a/nekryptology/pkg/vdf/wesolowski.go b/nekryptology/pkg/vdf/wesolowski.go index 139b233..2051cc3 100644 --- a/nekryptology/pkg/vdf/wesolowski.go +++ b/nekryptology/pkg/vdf/wesolowski.go @@ -16,8 +16,8 @@ import ( "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/iqc" ) -//Creates L and k parameters from papers, based on how many iterations need to be -//performed, and how much memory should be used. +// Creates L and k parameters from papers, based on how many iterations need to be +// performed, and how much memory should be used. func approximateParameters(T uint32) (int, int, int) { //log_memory = math.log(10000000, 2) log_memory := math.Log(10000000) / math.Log(2) @@ -86,6 +86,20 @@ func GenerateVDFWithStopChan(seed []byte, iterations, int_size_bits uint32, stop } } +func GenerateVDFIteration(seed, x_blob []byte, iterations, int_size_bits uint32) ([]byte, []byte) { + int_size := (int_size_bits + 16) >> 4 + D := iqc.CreateDiscriminant(seed, int_size_bits) + x, _ := iqc.NewClassGroupFromBytesDiscriminant(x_blob[:(2*int_size)], D) + + y, proof := calculateVDF(D, x, iterations, int_size_bits, nil) + + if (y == nil) || (proof == nil) { + return nil, nil + } else { + return y.Serialize(), proof.Serialize() + } +} + func VerifyVDF(seed, proof_blob []byte, iterations, int_size_bits uint32) bool { int_size := (int_size_bits + 16) >> 4 @@ -97,6 +111,16 @@ func VerifyVDF(seed, proof_blob []byte, iterations, int_size_bits uint32) bool { return verifyProof(x, y, proof, iterations) } +func VerifyVDFIteration(seed, x_blob, proof_blob []byte, iterations, int_size_bits uint32) bool { + int_size := (int_size_bits + 16) >> 4 + D := iqc.CreateDiscriminant(seed, int_size_bits) + x, _ := iqc.NewClassGroupFromBytesDiscriminant(x_blob[:(2*int_size)], D) + y, _ := iqc.NewClassGroupFromBytesDiscriminant(proof_blob[:(2*int_size)], D) + proof, _ := iqc.NewClassGroupFromBytesDiscriminant(proof_blob[2*int_size:], D) + + return verifyProof(x, y, proof, iterations) +} + // Creates a random prime based on input x, y, T // Note – this differs from harmony-one's implementation, as the Fiat-Shamir // transform requires _all_ public parameters be input, or else there is the @@ -133,7 +157,7 @@ func getBlock(i, k, T int, B *big.Int) *big.Int { return iqc.FloorDivision(new(big.Int).Mul(p1, p2), B) } -//Optimized evalutation of h ^ (2^T // B) +// Optimized evalutation of h ^ (2^T // B) func evalOptimized(identity, h *iqc.ClassGroup, B *big.Int, T uint32, k, l int, C map[int]*iqc.ClassGroup) *iqc.ClassGroup { //k1 = k//2 var k1 int = k / 2 @@ -219,7 +243,7 @@ func evalOptimized(identity, h *iqc.ClassGroup, B *big.Int, T uint32, k, l int, return x } -//generate y = x ^ (2 ^T) and pi +// generate y = x ^ (2 ^T) and pi func generateProof(identity, x, y *iqc.ClassGroup, T uint32, k, l int, powers map[int]*iqc.ClassGroup) *iqc.ClassGroup { //x_s = x.serialize() x_s := x.Serialize() @@ -236,10 +260,12 @@ func generateProof(identity, x, y *iqc.ClassGroup, T uint32, k, l int, powers ma func calculateVDF(discriminant *big.Int, x *iqc.ClassGroup, iterations, int_size_bits uint32, stop <-chan struct{}) (y, proof *iqc.ClassGroup) { L, k, _ := approximateParameters(iterations) - loopCount := int(math.Ceil(float64(iterations) / float64(k*L))) + // NB: Dusk needs to do the disjoint set arithmetic, marking this spot down + // as the insertion point powers_to_calculate := make([]int, loopCount+2) + // link into next for i := 0; i < loopCount+1; i++ { powers_to_calculate[i] = i * k * L } diff --git a/nekryptology/pkg/zkp/schnorr/schnorr_test.go b/nekryptology/pkg/zkp/schnorr/schnorr_test.go index 47de66c..d897126 100644 --- a/nekryptology/pkg/zkp/schnorr/schnorr_test.go +++ b/nekryptology/pkg/zkp/schnorr/schnorr_test.go @@ -25,13 +25,13 @@ func TestZKPOverMultipleCurves(t *testing.T) { } for i, curve := range curveInstances { uniqueSessionId := sha3.New256().Sum([]byte("random seed")) - prover := NewProver(curve, nil, uniqueSessionId) + prover := NewProver(curve, nil, sha3.New256(), uniqueSessionId) secret := curve.Scalar.Random(rand.Reader) proof, err := prover.Prove(secret) require.NoError(t, err, fmt.Sprintf("failed in curve %d", i)) - err = Verify(proof, curve, nil, uniqueSessionId) + err = Verify(proof, curve, nil, sha3.New256(), uniqueSessionId) require.NoError(t, err, fmt.Sprintf("failed in curve %d", i)) } } diff --git a/node/.vscode/settings.json b/node/.vscode/settings.json new file mode 100644 index 0000000..aee3509 --- /dev/null +++ b/node/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "go.testEnvVars": { + "GOEXPERIMENT": "arenas" + } +} \ No newline at end of file diff --git a/node/app/db_console.go b/node/app/db_console.go index ed93749..bc8b354 100644 --- a/node/app/db_console.go +++ b/node/app/db_console.go @@ -23,6 +23,7 @@ import ( "google.golang.org/grpc/credentials/insecure" "source.quilibrium.com/quilibrium/monorepo/node/config" "source.quilibrium.com/quilibrium/monorepo/node/execution/ceremony/application" + "source.quilibrium.com/quilibrium/monorepo/node/p2p" "source.quilibrium.com/quilibrium/monorepo/node/protobufs" "source.quilibrium.com/quilibrium/monorepo/node/tries" ) @@ -431,7 +432,7 @@ func (m model) View() string { list := []string{} for i, item := range m.filters { - str := item[0:12] + ".." + item[52:] + str := item[0:12] + ".." + item[len(item)-12:] if m.selectedFilter == item { list = append(list, selectedListStyle.Render(str)) } else if i == m.cursor { @@ -584,7 +585,7 @@ func (m model) View() string { for _, active := range app.ActiveParticipants { explorerContent += "\t" + base64.StdEncoding.EncodeToString( - active.KeyValue, + active.PublicKeySignatureEd448.PublicKey.KeyValue, ) + "\n" } @@ -624,7 +625,7 @@ func (m model) View() string { for _, active := range app.ActiveParticipants { explorerContent += "\t" + base64.StdEncoding.EncodeToString( - active.KeyValue, + active.PublicKeySignatureEd448.PublicKey.KeyValue, ) + "\n" } @@ -656,8 +657,10 @@ func (m model) View() string { ) + "\n" } case application.CEREMONY_APPLICATION_STATE_VALIDATING: + explorerContent += fmt.Sprintf( + "G1 Powers: %d\n", len(app.UpdatedTranscript.G1Powers), + ) explorerContent += "Preferred Next Round Participants: \n" - for _, next := range app.NextRoundPreferredParticipants { explorerContent += "\t" + base64.StdEncoding.EncodeToString( next.KeyValue, @@ -727,7 +730,10 @@ func consoleModel( 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }), - hex.EncodeToString(application.CEREMONY_ADDRESS), + hex.EncodeToString(append( + p2p.GetBloomFilter(application.CEREMONY_ADDRESS, 256, 3), + p2p.GetBloomFilterIndices(application.CEREMONY_ADDRESS, 65536, 24)..., + )), }, cursor: 0, conn: conn, diff --git a/node/app/wire.go b/node/app/wire.go index a3e09bb..588381f 100644 --- a/node/app/wire.go +++ b/node/app/wire.go @@ -8,7 +8,6 @@ import ( "go.uber.org/zap" "source.quilibrium.com/quilibrium/monorepo/node/config" "source.quilibrium.com/quilibrium/monorepo/node/consensus" - ceremonyConsensus "source.quilibrium.com/quilibrium/monorepo/node/consensus/ceremony" "source.quilibrium.com/quilibrium/monorepo/node/consensus/master" "source.quilibrium.com/quilibrium/monorepo/node/execution/ceremony" "source.quilibrium.com/quilibrium/monorepo/node/keys" @@ -38,6 +37,7 @@ var keyManagerSet = wire.NewSet( var storeSet = wire.NewSet( wire.FieldsOf(new(*config.Config), "DB"), store.NewPebbleDB, + wire.Bind(new(store.KVDB), new(*store.PebbleDB)), store.NewPebbleClockStore, store.NewPebbleKeyStore, store.NewPebbleDataProofStore, @@ -52,16 +52,8 @@ var pubSubSet = wire.NewSet( wire.Bind(new(p2p.PubSub), new(*p2p.BlossomSub)), ) -var dataConsensusSet = wire.NewSet( - wire.FieldsOf(new(*config.Config), "Engine"), - ceremonyConsensus.NewCeremonyDataClockConsensusEngine, - wire.Bind( - new(consensus.DataConsensusEngine), - new(*ceremonyConsensus.CeremonyDataClockConsensusEngine), - ), -) - var engineSet = wire.NewSet( + wire.FieldsOf(new(*config.Config), "Engine"), ceremony.NewCeremonyExecutionEngine, ) @@ -80,7 +72,6 @@ func NewNode(*config.Config) (*Node, error) { storeSet, pubSubSet, engineSet, - dataConsensusSet, consensusSet, newNode, )) diff --git a/node/app/wire_gen.go b/node/app/wire_gen.go index d391163..40fb5dc 100644 --- a/node/app/wire_gen.go +++ b/node/app/wire_gen.go @@ -11,9 +11,8 @@ import ( "go.uber.org/zap" "source.quilibrium.com/quilibrium/monorepo/node/config" "source.quilibrium.com/quilibrium/monorepo/node/consensus" - "source.quilibrium.com/quilibrium/monorepo/node/consensus/ceremony" "source.quilibrium.com/quilibrium/monorepo/node/consensus/master" - ceremony2 "source.quilibrium.com/quilibrium/monorepo/node/execution/ceremony" + "source.quilibrium.com/quilibrium/monorepo/node/execution/ceremony" "source.quilibrium.com/quilibrium/monorepo/node/keys" "source.quilibrium.com/quilibrium/monorepo/node/p2p" "source.quilibrium.com/quilibrium/monorepo/node/store" @@ -24,16 +23,15 @@ import ( func NewNode(configConfig *config.Config) (*Node, error) { zapLogger := logger() dbConfig := configConfig.DB - db := store.NewPebbleDB(dbConfig) - pebbleClockStore := store.NewPebbleClockStore(db, zapLogger) + pebbleDB := store.NewPebbleDB(dbConfig) + pebbleClockStore := store.NewPebbleClockStore(pebbleDB, zapLogger) keyConfig := configConfig.Key fileKeyManager := keys.NewFileKeyManager(keyConfig, zapLogger) p2PConfig := configConfig.P2P blossomSub := p2p.NewBlossomSub(p2PConfig, zapLogger) engineConfig := configConfig.Engine - pebbleKeyStore := store.NewPebbleKeyStore(db, zapLogger) - ceremonyDataClockConsensusEngine := ceremony.NewCeremonyDataClockConsensusEngine(engineConfig, zapLogger, fileKeyManager, pebbleClockStore, pebbleKeyStore, blossomSub) - ceremonyExecutionEngine := ceremony2.NewCeremonyExecutionEngine(zapLogger, ceremonyDataClockConsensusEngine, engineConfig, fileKeyManager, blossomSub, pebbleClockStore, pebbleKeyStore) + pebbleKeyStore := store.NewPebbleKeyStore(pebbleDB, zapLogger) + ceremonyExecutionEngine := ceremony.NewCeremonyExecutionEngine(zapLogger, engineConfig, fileKeyManager, blossomSub, pebbleClockStore, pebbleKeyStore) masterClockConsensusEngine := master.NewMasterClockConsensusEngine(engineConfig, zapLogger, pebbleClockStore, fileKeyManager, blossomSub) node, err := newNode(zapLogger, pebbleClockStore, fileKeyManager, blossomSub, ceremonyExecutionEngine, masterClockConsensusEngine) if err != nil { @@ -52,9 +50,9 @@ func NewDBConsole(configConfig *config.Config) (*DBConsole, error) { func NewClockStore(configConfig *config.Config) (store.ClockStore, error) { dbConfig := configConfig.DB - db := store.NewPebbleDB(dbConfig) + pebbleDB := store.NewPebbleDB(dbConfig) zapLogger := logger() - pebbleClockStore := store.NewPebbleClockStore(db, zapLogger) + pebbleClockStore := store.NewPebbleClockStore(pebbleDB, zapLogger) return pebbleClockStore, nil } @@ -75,17 +73,11 @@ var loggerSet = wire.NewSet( var keyManagerSet = wire.NewSet(wire.FieldsOf(new(*config.Config), "Key"), keys.NewFileKeyManager, wire.Bind(new(keys.KeyManager), new(*keys.FileKeyManager))) -var storeSet = wire.NewSet(wire.FieldsOf(new(*config.Config), "DB"), store.NewPebbleDB, store.NewPebbleClockStore, store.NewPebbleKeyStore, store.NewPebbleDataProofStore, wire.Bind(new(store.ClockStore), new(*store.PebbleClockStore)), wire.Bind(new(store.KeyStore), new(*store.PebbleKeyStore)), wire.Bind(new(store.DataProofStore), new(*store.PebbleDataProofStore))) +var storeSet = wire.NewSet(wire.FieldsOf(new(*config.Config), "DB"), store.NewPebbleDB, wire.Bind(new(store.KVDB), new(*store.PebbleDB)), store.NewPebbleClockStore, store.NewPebbleKeyStore, store.NewPebbleDataProofStore, wire.Bind(new(store.ClockStore), new(*store.PebbleClockStore)), wire.Bind(new(store.KeyStore), new(*store.PebbleKeyStore)), wire.Bind(new(store.DataProofStore), new(*store.PebbleDataProofStore))) var pubSubSet = wire.NewSet(wire.FieldsOf(new(*config.Config), "P2P"), p2p.NewBlossomSub, wire.Bind(new(p2p.PubSub), new(*p2p.BlossomSub))) -var dataConsensusSet = wire.NewSet(wire.FieldsOf(new(*config.Config), "Engine"), ceremony.NewCeremonyDataClockConsensusEngine, wire.Bind( - new(consensus.DataConsensusEngine), - new(*ceremony.CeremonyDataClockConsensusEngine), -), -) - -var engineSet = wire.NewSet(ceremony2.NewCeremonyExecutionEngine) +var engineSet = wire.NewSet(wire.FieldsOf(new(*config.Config), "Engine"), ceremony.NewCeremonyExecutionEngine) var consensusSet = wire.NewSet(master.NewMasterClockConsensusEngine, wire.Bind( new(consensus.ConsensusEngine), diff --git a/node/config/engine.go b/node/config/engine.go index 7fce2ac..b2f5f22 100644 --- a/node/config/engine.go +++ b/node/config/engine.go @@ -7,4 +7,8 @@ type EngineConfig struct { MaxFrames int64 `yaml:"maxFrames"` PendingCommitWorkers int64 `yaml:"pendingCommitWorkers"` MinimumPeersRequired int `yaml:"minimumPeersRequired"` + + // Values used only for testing – do not override these in production, your + // node will get kicked out + Difficulty uint32 `yaml:"difficulty"` } diff --git a/node/consensus/ceremony/broadcast_messaging.go b/node/consensus/ceremony/broadcast_messaging.go index b79c4a0..b9014db 100644 --- a/node/consensus/ceremony/broadcast_messaging.go +++ b/node/consensus/ceremony/broadcast_messaging.go @@ -2,8 +2,6 @@ package ceremony import ( "bytes" - "crypto" - "crypto/rand" "encoding/binary" "strings" "time" @@ -19,7 +17,6 @@ import ( "google.golang.org/protobuf/types/known/anypb" "source.quilibrium.com/quilibrium/monorepo/go-libp2p-blossomsub/pb" "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/curves" - "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/zkp/schnorr" "source.quilibrium.com/quilibrium/monorepo/node/consensus" qcrypto "source.quilibrium.com/quilibrium/monorepo/node/crypto" "source.quilibrium.com/quilibrium/monorepo/node/keys" @@ -111,22 +108,6 @@ func (e *CeremonyDataClockConsensusEngine) handleMessage( ); err != nil { return errors.Wrap(err, "handle message") } - case protobufs.ProvingKeyRequestType: - if err := e.handleProvingKeyRequest( - message.From, - msg.Address, - any, - ); err != nil { - return errors.Wrap(err, "handle message") - } - case protobufs.ProvingKeyAnnouncementType: - if err := e.handleProvingKey(message.From, msg.Address, any); err != nil { - return errors.Wrap(err, "handle message") - } - case protobufs.KeyBundleAnnouncementType: - if err := e.handleKeyBundle(message.From, msg.Address, any); err != nil { - return errors.Wrap(err, "handle message") - } case protobufs.CeremonyPeerListAnnounceType: if err := e.handleCeremonyPeerListAnnounce( message.From, @@ -304,177 +285,6 @@ func (e *CeremonyDataClockConsensusEngine) handleCeremonyLobbyStateTransition( return nil } -func (e *CeremonyDataClockConsensusEngine) handleKeyBundle( - peerID []byte, - address []byte, - any *anypb.Any, -) error { - e.logger.Debug("received key bundle") - keyBundleAnnouncement := &protobufs.KeyBundleAnnouncement{} - if err := any.UnmarshalTo(keyBundleAnnouncement); err != nil { - return errors.Wrap(err, "handle key bundle") - } - - if len(keyBundleAnnouncement.ProvingKeyBytes) == 0 { - return errors.Wrap(errors.New("proving key is nil"), "handle key bundle") - } - - k, err := e.keyStore.GetLatestKeyBundle(keyBundleAnnouncement.ProvingKeyBytes) - if err != nil && !errors.Is(err, store.ErrNotFound) { - return errors.Wrap(err, "handle key bundle") - } - - if k != nil { - latestAnnouncement := &protobufs.KeyBundleAnnouncement{} - err := proto.Unmarshal(k.Data, latestAnnouncement) - if err != nil { - return errors.Wrap(err, "handle key bundle") - } - - if bytes.Equal( - latestAnnouncement.IdentityKey.Challenge, - keyBundleAnnouncement.IdentityKey.Challenge, - ) && bytes.Equal( - latestAnnouncement.IdentityKey.Response, - keyBundleAnnouncement.IdentityKey.Response, - ) && bytes.Equal( - latestAnnouncement.IdentityKey.Statement, - keyBundleAnnouncement.IdentityKey.Statement, - ) && bytes.Equal( - latestAnnouncement.SignedPreKey.Challenge, - keyBundleAnnouncement.SignedPreKey.Challenge, - ) && bytes.Equal( - latestAnnouncement.SignedPreKey.Response, - keyBundleAnnouncement.SignedPreKey.Response, - ) && bytes.Equal( - latestAnnouncement.SignedPreKey.Statement, - keyBundleAnnouncement.SignedPreKey.Statement, - ) { - // This has already been proven, ignore - return nil - } - } - - var provingKey *protobufs.ProvingKeyAnnouncement - inclusion, err := e.keyStore.GetProvingKey( - keyBundleAnnouncement.ProvingKeyBytes, - ) - if err != nil { - if !errors.Is(err, store.ErrNotFound) { - return errors.Wrap(err, "handle key bundle") - } - - provingKey, err = e.keyStore.GetStagedProvingKey( - keyBundleAnnouncement.ProvingKeyBytes, - ) - if err != nil && !errors.Is(err, store.ErrNotFound) { - return errors.Wrap(err, "handle key bundle") - } - } else { - err := proto.Unmarshal(inclusion.Data, provingKey) - if err != nil { - return errors.Wrap(err, "handle key bundle") - } - } - - // We have a matching proving key, we can set this up to be committed. - if provingKey != nil { - e.logger.Debug("verifying key bundle announcement") - if err := keyBundleAnnouncement.Verify(provingKey); err != nil { - e.logger.Debug( - "could not verify key bundle announcement", - zap.Error(err), - ) - return nil - } - - go func() { - e.logger.Debug("adding key bundle announcement to pending commits") - - e.pendingCommits <- any - }() - - return nil - } else { - e.logger.Debug("proving key not found, requesting from peers") - - if err = e.publishMessage(e.filter, &protobufs.ProvingKeyRequest{ - ProvingKeyBytes: keyBundleAnnouncement.ProvingKeyBytes, - }); err != nil { - return errors.Wrap(err, "handle key bundle") - } - - e.dependencyMapMx.Lock() - e.dependencyMap[string(keyBundleAnnouncement.ProvingKeyBytes)] = any - e.dependencyMapMx.Unlock() - } - - return nil -} - -func (e *CeremonyDataClockConsensusEngine) handleProvingKey( - peerID []byte, - address []byte, - any *anypb.Any, -) error { - e.logger.Debug("received proving key") - - provingKeyAnnouncement := &protobufs.ProvingKeyAnnouncement{} - if err := any.UnmarshalTo(provingKeyAnnouncement); err != nil { - return errors.Wrap(err, "handle proving key") - } - - if err := provingKeyAnnouncement.Verify(); err != nil { - return errors.Wrap(err, "handle proving key") - } - - if err := e.keyStore.StageProvingKey(provingKeyAnnouncement); err != nil { - return errors.Wrap(err, "handle proving key") - } - - provingKey := provingKeyAnnouncement.PublicKey() - - e.logger.Debug( - "proving key staged", - zap.Binary("proving_key", provingKey), - ) - - go func() { - e.dependencyMapMx.Lock() - if e.dependencyMap[string(provingKey)] != nil { - keyBundleAnnouncement := &protobufs.KeyBundleAnnouncement{} - if err := proto.Unmarshal( - e.dependencyMap[string(provingKey)].Value, - keyBundleAnnouncement, - ); err != nil { - e.logger.Error( - "could not unmarshal key bundle announcement", - zap.Error(err), - ) - e.dependencyMapMx.Unlock() - return - } - if err := keyBundleAnnouncement.Verify( - provingKeyAnnouncement, - ); err != nil { - e.logger.Error( - "could not verify key bundle announcement", - zap.Error(err), - ) - e.dependencyMapMx.Unlock() - return - } - - e.pendingCommits <- e.dependencyMap[string(provingKey)] - - delete(e.dependencyMap, string(provingKey)) - } - e.dependencyMapMx.Unlock() - }() - - return nil -} - func (e *CeremonyDataClockConsensusEngine) handleClockFrameData( peerID []byte, address []byte, @@ -694,16 +504,30 @@ func (e *CeremonyDataClockConsensusEngine) handleClockFrameData( zap.Binary("filter", frame.Filter), zap.Uint64("frame_number", frame.FrameNumber), ) + masterFrame, err := e.clockStore.GetMasterClockFrame( + []byte{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }, + frame.FrameNumber-1, + ) + if err != nil { + e.logger.Info("received frame with no known master, needs sync") + return nil + } - parentSelector, selector, distance, err := - frame.GetParentSelectorAndDistance() + discriminator, err := masterFrame.GetSelector() + if err != nil { + return errors.Wrap(err, "handle clock frame data") + } + + parentSelector, distance, selector, err := + frame.GetParentSelectorAndDistance(discriminator) if err != nil { return errors.Wrap(err, "handle clock frame data") } - e.logger.Debug( - "difference between selector/discriminator", - zap.Binary("difference", distance.Bytes()), - ) if _, err := e.clockStore.GetParentDataClockFrame( frame.Filter, @@ -713,7 +537,7 @@ func (e *CeremonyDataClockConsensusEngine) handleClockFrameData( // If this is a frame number higher than what we're already caught up to, // push a request to fill the gap, unless we're syncing or it's in step, // then just lazily seek. - from := e.frame + from := e.frame.FrameNumber if from >= frame.FrameNumber-1 { from = frame.FrameNumber - 1 } @@ -737,9 +561,9 @@ func (e *CeremonyDataClockConsensusEngine) handleClockFrameData( } if err := e.clockStore.PutCandidateDataClockFrame( - parentSelector.Bytes(), - distance.Bytes(), - selector.Bytes(), + parentSelector.FillBytes(make([]byte, 32)), + distance.FillBytes(make([]byte, 32)), + selector.FillBytes(make([]byte, 32)), frame, txn, ); err != nil { @@ -752,7 +576,7 @@ func (e *CeremonyDataClockConsensusEngine) handleClockFrameData( return errors.Wrap(err, "handle clock frame data") } - if e.frame < frame.FrameNumber { + if e.frame.FrameNumber < frame.FrameNumber { e.latestFrameReceived = frame.FrameNumber e.lastFrameReceivedAt = time.Now().UTC() } @@ -819,12 +643,11 @@ func (e *CeremonyDataClockConsensusEngine) publishMessage( return e.pubSub.PublishToBitmask(filter, data) } -func (e *CeremonyDataClockConsensusEngine) announceKeyBundle() error { - e.logger.Debug("announcing key bundle") - idk, err := e.keyManager.GetAgreementKey("q-ratchet-idk") +func (e *CeremonyDataClockConsensusEngine) createCommunicationKeys() error { + _, err := e.keyManager.GetAgreementKey("q-ratchet-idk") if err != nil { if errors.Is(err, keys.KeyNotFoundErr) { - idk, err = e.keyManager.CreateAgreementKey( + _, err = e.keyManager.CreateAgreementKey( "q-ratchet-idk", keys.KeyTypeX448, ) @@ -836,10 +659,10 @@ func (e *CeremonyDataClockConsensusEngine) announceKeyBundle() error { } } - spk, err := e.keyManager.GetAgreementKey("q-ratchet-spk") + _, err = e.keyManager.GetAgreementKey("q-ratchet-spk") if err != nil { if errors.Is(err, keys.KeyNotFoundErr) { - spk, err = e.keyManager.CreateAgreementKey( + _, err = e.keyManager.CreateAgreementKey( "q-ratchet-spk", keys.KeyTypeX448, ) @@ -851,110 +674,5 @@ func (e *CeremonyDataClockConsensusEngine) announceKeyBundle() error { } } - idkPoint := curves.ED448().NewGeneratorPoint().Mul(idk) - idkProver := schnorr.NewProver( - curves.ED448(), - curves.ED448().NewGeneratorPoint(), - sha3.New256(), - []byte{}, - ) - - spkPoint := curves.ED448().NewGeneratorPoint().Mul(spk) - spkProver := schnorr.NewProver( - curves.ED448(), - curves.ED448().NewGeneratorPoint(), - sha3.New256(), - []byte{}, - ) - - idkProof, idkCommitment, err := idkProver.ProveCommit(idk) - if err != nil { - return errors.Wrap(err, "announce key bundle") - } - - spkProof, spkCommitment, err := spkProver.ProveCommit(spk) - if err != nil { - return errors.Wrap(err, "announce key bundle") - } - - msg := append( - append([]byte{}, idkCommitment...), - spkCommitment..., - ) - - signature, err := e.provingKey.Sign(rand.Reader, msg, crypto.Hash(0)) - if err != nil { - return errors.Wrap(err, "announce key bundle") - } - - signatureProto := &protobufs.ProvingKeyAnnouncement_ProvingKeySignatureEd448{ - ProvingKeySignatureEd448: &protobufs.Ed448Signature{ - PublicKey: &protobufs.Ed448PublicKey{ - KeyValue: e.provingKeyBytes, - }, - Signature: signature, - }, - } - provingKeyAnnouncement := &protobufs.ProvingKeyAnnouncement{ - IdentityCommitment: idkCommitment, - PrekeyCommitment: spkCommitment, - ProvingKeySignature: signatureProto, - } - - if err := e.publishMessage(e.filter, provingKeyAnnouncement); err != nil { - return errors.Wrap(err, "announce key bundle") - } - - idkSignature, err := e.provingKey.Sign( - rand.Reader, - idkPoint.ToAffineCompressed(), - crypto.Hash(0), - ) - if err != nil { - return errors.Wrap(err, "announce key bundle") - } - - spkSignature, err := e.provingKey.Sign( - rand.Reader, - spkPoint.ToAffineCompressed(), - crypto.Hash(0), - ) - if err != nil { - return errors.Wrap(err, "announce key bundle") - } - - keyBundleAnnouncement := &protobufs.KeyBundleAnnouncement{ - ProvingKeyBytes: e.provingKeyBytes, - IdentityKey: &protobufs.IdentityKey{ - Challenge: idkProof.C.Bytes(), - Response: idkProof.S.Bytes(), - Statement: idkProof.Statement.ToAffineCompressed(), - IdentityKeySignature: &protobufs.IdentityKey_PublicKeySignatureEd448{ - PublicKeySignatureEd448: &protobufs.Ed448Signature{ - PublicKey: &protobufs.Ed448PublicKey{ - KeyValue: idkPoint.ToAffineCompressed(), - }, - Signature: idkSignature, - }, - }, - }, - SignedPreKey: &protobufs.SignedPreKey{ - Challenge: spkProof.C.Bytes(), - Response: spkProof.S.Bytes(), - Statement: spkProof.Statement.ToAffineCompressed(), - SignedPreKeySignature: &protobufs.SignedPreKey_PublicKeySignatureEd448{ - PublicKeySignatureEd448: &protobufs.Ed448Signature{ - PublicKey: &protobufs.Ed448PublicKey{ - KeyValue: spkPoint.ToAffineCompressed(), - }, - Signature: spkSignature, - }, - }, - }, - } - - return errors.Wrap( - e.publishMessage(e.filter, keyBundleAnnouncement), - "announce key bundle", - ) + return nil } diff --git a/node/consensus/ceremony/ceremony_data_clock_consensus_engine.go b/node/consensus/ceremony/ceremony_data_clock_consensus_engine.go index 5ce1f30..fa40990 100644 --- a/node/consensus/ceremony/ceremony_data_clock_consensus_engine.go +++ b/node/consensus/ceremony/ceremony_data_clock_consensus_engine.go @@ -53,8 +53,7 @@ type ChannelServer = protobufs.CeremonyService_GetPublicChannelServer type CeremonyDataClockConsensusEngine struct { protobufs.UnimplementedCeremonyServiceServer - frame uint64 - activeFrame *protobufs.ClockFrame + frame *protobufs.ClockFrame difficulty uint32 logger *zap.Logger state consensus.EngineState @@ -113,6 +112,8 @@ func NewCeremonyDataClockConsensusEngine( clockStore store.ClockStore, keyStore store.KeyStore, pubSub p2p.PubSub, + filter []byte, + seed []byte, ) *CeremonyDataClockConsensusEngine { if logger == nil { panic(errors.New("logger is nil")) @@ -143,9 +144,14 @@ func NewCeremonyDataClockConsensusEngine( minimumPeersRequired = 3 } + difficulty := engineConfig.Difficulty + if difficulty == 0 { + difficulty = 10000 + } + e := &CeremonyDataClockConsensusEngine{ - frame: 0, - difficulty: 10000, + frame: nil, + difficulty: difficulty, logger: logger, state: consensus.EngineStateStopped, clockStore: clockStore, @@ -182,6 +188,8 @@ func NewCeremonyDataClockConsensusEngine( engineConfig, ) + e.filter = filter + e.input = seed e.provingKey = signer e.provingKeyType = keyType e.provingKeyBytes = bytes @@ -190,16 +198,10 @@ func NewCeremonyDataClockConsensusEngine( return e } -func (e *CeremonyDataClockConsensusEngine) Start( - filter []byte, - seed []byte, -) <-chan error { +func (e *CeremonyDataClockConsensusEngine) Start() <-chan error { e.logger.Info("starting ceremony consensus engine") e.state = consensus.EngineStateStarting errChan := make(chan error) - - e.filter = filter - e.input = seed e.state = consensus.EngineStateLoading e.logger.Info("loading last seen state") @@ -214,16 +216,16 @@ func (e *CeremonyDataClockConsensusEngine) Start( if latestFrame != nil { e.setFrame(latestFrame) } else { - latestFrame = e.createGenesisFrame() + latestFrame = e.CreateGenesisFrame(nil) + } + + err = e.createCommunicationKeys() + if err != nil { + panic(err) } e.logger.Info("subscribing to pubsub messages") e.pubSub.Subscribe(e.filter, e.handleMessage, true) - e.pubSub.Subscribe( - append(append([]byte{}, e.filter...), e.pubSub.GetPeerID()...), - e.handleSync, - true, - ) go func() { server := grpc.NewServer( @@ -240,8 +242,6 @@ func (e *CeremonyDataClockConsensusEngine) Start( } }() - latestFrame = e.performSanityCheck(latestFrame) - e.state = consensus.EngineStateCollecting for i := int64(0); i < e.pendingCommitWorkers; i++ { @@ -257,7 +257,7 @@ func (e *CeremonyDataClockConsensusEngine) Start( } timestamp := time.Now().UnixMilli() - msg := binary.BigEndian.AppendUint64([]byte{}, e.frame) + msg := binary.BigEndian.AppendUint64([]byte{}, e.frame.FrameNumber) msg = append(msg, consensus.GetVersion()...) msg = binary.BigEndian.AppendUint64(msg, uint64(timestamp)) sig, err := e.pubSub.SignMessage(msg) @@ -269,7 +269,7 @@ func (e *CeremonyDataClockConsensusEngine) Start( e.peerMap[string(e.pubSub.GetPeerID())] = &peerInfo{ peerId: e.pubSub.GetPeerID(), multiaddr: "", - maxFrame: e.frame, + maxFrame: e.frame.FrameNumber, version: consensus.GetVersion(), signature: sig, publicKey: e.pubSub.GetPublicKey(), @@ -307,38 +307,8 @@ func (e *CeremonyDataClockConsensusEngine) Start( }() go func() { - latest := latestFrame - for { - time.Sleep(30 * time.Second) - peerCount := e.pubSub.GetNetworkPeersCount() - if peerCount >= e.minimumPeersRequired { - e.logger.Info("selecting leader") - if e.frame > latest.FrameNumber && e.frame-latest.FrameNumber > 16 && - e.syncingTarget == nil { - e.logger.Info("rewinding sync head due to large delta") - latest, _, err = e.clockStore.GetDataClockFrame( - e.filter, - 0, - ) - if err != nil { - panic(err) - } - } - latest, err = e.commitLongestPath(latest) - if err != nil { - e.logger.Error("could not collect longest path", zap.Error(err)) - latest, _, err = e.clockStore.GetDataClockFrame(e.filter, 0) - if err != nil { - panic(err) - } - } - - latest = e.performSanityCheck(latest) - } - } - }() - - go func() { + e.logger.Info("waiting for peer list mappings") + time.Sleep(30 * time.Second) for e.state < consensus.EngineStateStopping { peerCount := e.pubSub.GetNetworkPeersCount() if peerCount < e.minimumPeersRequired { @@ -350,22 +320,23 @@ func (e *CeremonyDataClockConsensusEngine) Start( } else { switch e.state { case consensus.EngineStateCollecting: + currentFrame := latestFrame if latestFrame, err = e.collect(latestFrame); err != nil { e.logger.Error("could not collect", zap.Error(err)) e.state = consensus.EngineStateCollecting - errChan <- err + latestFrame = currentFrame } case consensus.EngineStateProving: + currentFrame := latestFrame if latestFrame, err = e.prove(latestFrame); err != nil { e.logger.Error("could not prove", zap.Error(err)) e.state = consensus.EngineStateCollecting - errChan <- err + latestFrame = currentFrame } case consensus.EngineStatePublishing: if err = e.publishProof(latestFrame); err != nil { e.logger.Error("could not publish", zap.Error(err)) e.state = consensus.EngineStateCollecting - errChan <- err } } } @@ -389,7 +360,7 @@ func (e *CeremonyDataClockConsensusEngine) Stop(force bool) <-chan error { for name := range e.executionEngines { name := name go func(name string) { - err := <-e.UnregisterExecutor(name, e.frame, force) + err := <-e.UnregisterExecutor(name, e.frame.FrameNumber, force) if err != nil { errChan <- err } @@ -463,7 +434,7 @@ func (e *CeremonyDataClockConsensusEngine) performSanityCheck( panic(err) } - parentSelector, _, _, err := disc.GetParentSelectorAndDistance() + parentSelector, _, _, err := disc.GetParentSelectorAndDistance(nil) if err != nil { panic(err) } @@ -536,7 +507,7 @@ func (e *CeremonyDataClockConsensusEngine) GetDifficulty() uint32 { return e.difficulty } -func (e *CeremonyDataClockConsensusEngine) GetFrame() uint64 { +func (e *CeremonyDataClockConsensusEngine) GetFrame() *protobufs.ClockFrame { return e.frame } @@ -550,12 +521,6 @@ func ( return e.frameChan } -func ( - e *CeremonyDataClockConsensusEngine, -) GetActiveFrame() *protobufs.ClockFrame { - return e.activeFrame -} - func ( e *CeremonyDataClockConsensusEngine, ) GetPeerInfo() *protobufs.PeerInfoResponse { diff --git a/node/consensus/ceremony/consensus_frames.go b/node/consensus/ceremony/consensus_frames.go index db8aabc..a1f29b3 100644 --- a/node/consensus/ceremony/consensus_frames.go +++ b/node/consensus/ceremony/consensus_frames.go @@ -3,16 +3,18 @@ package ceremony import ( "bytes" "context" + "encoding/base64" "encoding/binary" "encoding/hex" + "encoding/json" "fmt" "io" "math/big" + "os" "strings" "github.com/iden3/go-iden3-crypto/ff" "github.com/iden3/go-iden3-crypto/poseidon" - "github.com/libp2p/go-libp2p/core/peer" "github.com/pkg/errors" "go.uber.org/zap" "golang.org/x/crypto/sha3" @@ -25,7 +27,6 @@ import ( "source.quilibrium.com/quilibrium/monorepo/node/execution/ceremony/application" "source.quilibrium.com/quilibrium/monorepo/node/p2p" "source.quilibrium.com/quilibrium/monorepo/node/protobufs" - "source.quilibrium.com/quilibrium/monorepo/node/store" "source.quilibrium.com/quilibrium/monorepo/node/tries" ) @@ -322,9 +323,8 @@ func (e *CeremonyDataClockConsensusEngine) setFrame( } e.logger.Debug("set frame", zap.Uint64("frame_number", frame.FrameNumber)) e.currentDistance = distance - e.frame = frame.FrameNumber + e.frame = frame e.parentSelector = parent.Bytes() - e.activeFrame = frame go func() { e.frameChan <- frame }() @@ -332,7 +332,7 @@ func (e *CeremonyDataClockConsensusEngine) setFrame( func ( e *CeremonyDataClockConsensusEngine, -) createGenesisFrame() *protobufs.ClockFrame { +) CreateGenesisFrame(testProverKeys [][]byte) *protobufs.ClockFrame { e.logger.Info("creating genesis frame") for _, l := range strings.Split(string(e.input), "\n") { e.logger.Info(l) @@ -376,7 +376,7 @@ func ( transcript.RunningG2_256Powers = append( transcript.RunningG2_256Powers, &protobufs.BLS48581G2PublicKey{ - KeyValue: qcrypto.CeremonyPotPubKeys[len(qcrypto.CeremonyPotPubKeys)-1]. + KeyValue: qcrypto.CeremonyBLS48581G2[len(qcrypto.CeremonyBLS48581G2)-1]. ToAffineCompressed(), }, ) @@ -408,6 +408,44 @@ func ( rewardTrie.Add(addrBytes, 0, 50) } + // 2024-01-03: 1.2.0 + d, err := os.ReadFile("./retroactive_peers.json") + if err != nil { + panic(err) + } + + type peerData struct { + PeerId string `json:"peer_id"` + TokenBalance uint64 `json:"token_balance"` + } + type rewards struct { + Rewards []peerData `json:"rewards"` + } + + retroEntries := &rewards{} + err = json.Unmarshal(d, retroEntries) + if err != nil { + panic(err) + } + + e.logger.Info("adding retroactive peer reward info") + for _, s := range retroEntries.Rewards { + peerId := s.PeerId + peerBytes, err := base64.StdEncoding.DecodeString(peerId) + if err != nil { + panic(err) + } + + addr, err := poseidon.HashBytes(peerBytes) + if err != nil { + panic(err) + } + + addrBytes := addr.Bytes() + addrBytes = append(make([]byte, 32-len(addrBytes)), addrBytes...) + rewardTrie.Add(addrBytes, 0, s.TokenBalance) + } + trieBytes, err := rewardTrie.Serialize() if err != nil { panic(err) @@ -521,25 +559,42 @@ func ( // first phase: e.logger.Info("encoding signatories to prover trie") - for _, s := range qcrypto.CeremonySignatories { - pubkey := s.ToAffineCompressed() - e.logger.Info("0x" + hex.EncodeToString(pubkey)) + if len(testProverKeys) != 0 { + e.logger.Warn( + "TEST PROVER ENTRIES BEING ADDED, YOUR NODE WILL BE KICKED IF IN" + + " PRODUCTION", + ) + for _, s := range testProverKeys { + addr, err := poseidon.HashBytes(s) + if err != nil { + panic(err) + } - addr, err := poseidon.HashBytes(pubkey) - if err != nil { - panic(err) + addrBytes := addr.Bytes() + addrBytes = append(make([]byte, 32-len(addrBytes)), addrBytes...) + e.frameProverTrie.Add(addrBytes, 0) } + } else { + for _, s := range qcrypto.CeremonySignatories { + pubkey := s.ToAffineCompressed() + e.logger.Info("0x" + hex.EncodeToString(pubkey)) - addrBytes := addr.Bytes() - addrBytes = append(make([]byte, 32-len(addrBytes)), addrBytes...) - e.frameProverTrie.Add(addrBytes, 0) + addr, err := poseidon.HashBytes(pubkey) + if err != nil { + panic(err) + } + + addrBytes := addr.Bytes() + addrBytes = append(make([]byte, 32-len(addrBytes)), addrBytes...) + e.frameProverTrie.Add(addrBytes, 0) + } } e.logger.Info("proving genesis frame") input := []byte{} input = append(input, e.filter...) - input = binary.BigEndian.AppendUint64(input, e.frame) - input = binary.BigEndian.AppendUint64(input, uint64(0)) + input = binary.BigEndian.AppendUint64(input, 0) + input = binary.BigEndian.AppendUint64(input, 0) input = binary.BigEndian.AppendUint32(input, e.difficulty) input = append(input, e.input...) @@ -551,7 +606,7 @@ func ( frame := &protobufs.ClockFrame{ Filter: e.filter, - FrameNumber: e.frame, + FrameNumber: 0, Timestamp: 0, Difficulty: e.difficulty, Input: inputMessage, @@ -563,7 +618,7 @@ func ( PublicKeySignature: nil, } - parent, distance, selector, err := frame.GetParentSelectorAndDistance() + parent, _, selector, err := frame.GetParentSelectorAndDistance(nil) if err != nil { panic(err) } @@ -574,9 +629,9 @@ func ( } if err := e.clockStore.PutCandidateDataClockFrame( - parent.Bytes(), - distance.Bytes(), - selector.Bytes(), + parent.FillBytes(make([]byte, 32)), + big.NewInt(0).FillBytes(make([]byte, 32)), + selector.FillBytes(make([]byte, 32)), frame, txn, ); err != nil { @@ -643,13 +698,23 @@ func (e *CeremonyDataClockConsensusEngine) commitLongestPath( return nil, errors.Wrap(err, "commit longest path") } - selectorBytes := selector.Bytes() - selectorBytes = append( - make([]byte, 32-len(selectorBytes)), - selectorBytes..., - ) + masterFrame, err := e.clockStore.GetMasterClockFrame([]byte{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }, s[currentDepth].GetFrameNumber()) + if err != nil { + return nil, errors.Wrap(err, "commit longest path") + } + + proverSelector, err := masterFrame.GetSelector() + if err != nil { + return nil, errors.Wrap(err, "commit longest path") + } + nearest := e.frameProverTrie.FindNearest( - selectorBytes, + proverSelector.FillBytes(make([]byte, 32)), ) addr, err := value.GetAddress() @@ -786,37 +851,6 @@ func (e *CeremonyDataClockConsensusEngine) commitLongestPath( ) return nil, errors.Wrap(err, "commit longest path") } - case protobufs.KeyBundleAnnouncementType: - bundle := &protobufs.KeyBundleAnnouncement{} - if err := proto.Unmarshal(c.Data, bundle); err != nil { - e.logger.Error( - "could not commit candidate", - zap.Error(err), - zap.Uint64("frame_number", s.FrameNumber), - zap.Binary("commitment", c.Commitment), - ) - return nil, errors.Wrap(err, "commit longest path") - } - - e.logger.Debug( - "committing key bundle", - zap.Uint64("frame_number", s.FrameNumber), - zap.Binary("commitment", c.Commitment), - ) - - if err := e.keyStore.PutKeyBundle( - bundle.ProvingKeyBytes, - c, - txn, - ); err != nil { - e.logger.Error( - "could not commit candidate", - zap.Error(err), - zap.Uint64("frame_number", s.FrameNumber), - zap.Binary("output", s.Output), - ) - return nil, errors.Wrap(err, "commit longest path") - } } } } @@ -851,6 +885,22 @@ func (e *CeremonyDataClockConsensusEngine) commitLongestPath( } } + if current.FrameNumber != latest.FrameNumber { + to := current.FrameNumber + if to-16 > to { // underflow + to = 1 + } else { + to = to - 16 + } + + if 1 < to { + err := e.clockStore.DeleteCandidateDataClockFrameRange(e.filter, 1, to) + if err != nil { + e.logger.Error("error while purging candidate frames", zap.Error(err)) + } + } + } + return current, nil } @@ -860,7 +910,7 @@ func (e *CeremonyDataClockConsensusEngine) GetMostAheadPeer() ( error, ) { e.peerMapMx.Lock() - max := e.frame + max := e.frame.FrameNumber var peer []byte = nil for _, v := range e.peerMap { if v.maxFrame > max { @@ -882,190 +932,6 @@ func (e *CeremonyDataClockConsensusEngine) GetMostAheadPeer() ( return peer, max, nil } -func (e *CeremonyDataClockConsensusEngine) reverseOptimisticSync( - currentLatest *protobufs.ClockFrame, - maxFrame uint64, - peerId []byte, -) (*protobufs.ClockFrame, error) { - latest := currentLatest - cc, err := e.pubSub.GetDirectChannel(peerId) - if err != nil { - e.logger.Error( - "could not establish direct channel", - zap.Error(err), - ) - e.peerMapMx.Lock() - if _, ok := e.peerMap[string(peerId)]; ok { - e.uncooperativePeersMap[string(peerId)] = e.peerMap[string(peerId)] - delete(e.peerMap, string(peerId)) - } - e.peerMapMx.Unlock() - e.syncingTarget = nil - return latest, errors.Wrap(err, "reverse optimistic sync") - } - - client := protobufs.NewCeremonyServiceClient(cc) - - from := latest.FrameNumber - if from <= 1 { - from = 2 - } - - if maxFrame-from > 32 { - // divergence is high, ask them for the latest frame and if they - // respond with a valid answer, optimistically continue from this - // frame, if we hit a fault we'll mark them as uncooperative and move - // on - from = 2 - s, err := client.GetCompressedSyncFrames( - context.Background(), - &protobufs.ClockFramesRequest{ - Filter: e.filter, - FromFrameNumber: maxFrame - 32, - }, - grpc.MaxCallRecvMsgSize(600*1024*1024), - ) - if err != nil { - e.logger.Error( - "received error from peer", - zap.Error(err), - ) - e.peerMapMx.Lock() - if _, ok := e.peerMap[string(peerId)]; ok { - e.uncooperativePeersMap[string(peerId)] = e.peerMap[string(peerId)] - delete(e.peerMap, string(peerId)) - } - e.peerMapMx.Unlock() - e.syncingTarget = nil - return latest, errors.Wrap(err, "reverse optimistic sync") - } - var syncMsg *protobufs.CeremonyCompressedSync - for syncMsg, err = s.Recv(); err == nil; syncMsg, err = s.Recv() { - e.logger.Info( - "received compressed sync frame", - zap.Uint64("from", syncMsg.FromFrameNumber), - zap.Uint64("to", syncMsg.ToFrameNumber), - zap.Int("frames", len(syncMsg.TruncatedClockFrames)), - zap.Int("proofs", len(syncMsg.Proofs)), - ) - var next *protobufs.ClockFrame - if next, err = e.decompressAndStoreCandidates( - peerId, - syncMsg, - e.logger.Info, - ); err != nil && !errors.Is(err, ErrNoNewFrames) { - e.logger.Error( - "could not decompress and store candidate", - zap.Error(err), - ) - e.peerMapMx.Lock() - if _, ok := e.peerMap[string(peerId)]; ok { - e.uncooperativePeersMap[string(peerId)] = e.peerMap[string(peerId)] - delete(e.peerMap, string(peerId)) - } - e.peerMapMx.Unlock() - - if err := cc.Close(); err != nil { - e.logger.Error("error while closing connection", zap.Error(err)) - } - - e.syncingTarget = nil - e.syncingStatus = SyncStatusFailed - return currentLatest, errors.Wrap(err, "reverse optimistic sync") - } - if next != nil { - latest = next - } - } - if err != nil && err != io.EOF && !errors.Is(err, ErrNoNewFrames) { - if err := cc.Close(); err != nil { - e.logger.Error("error while closing connection", zap.Error(err)) - } - e.logger.Error("error while receiving sync", zap.Error(err)) - e.syncingTarget = nil - e.syncingStatus = SyncStatusFailed - return latest, errors.Wrap(err, "reverse optimistic sync") - } - } - - go func() { - defer func() { e.syncingTarget = nil }() - e.logger.Info("continuing sync in background") - s, err := client.GetCompressedSyncFrames( - context.Background(), - &protobufs.ClockFramesRequest{ - Filter: e.filter, - FromFrameNumber: from - 1, - ToFrameNumber: maxFrame, - }, - grpc.MaxCallRecvMsgSize(600*1024*1024), - ) - if err != nil { - e.logger.Error( - "error while retrieving sync", - zap.Error(err), - ) - e.peerMapMx.Lock() - if _, ok := e.peerMap[string(peerId)]; ok { - e.uncooperativePeersMap[string(peerId)] = e.peerMap[string(peerId)] - delete(e.peerMap, string(peerId)) - } - e.peerMapMx.Unlock() - e.syncingStatus = SyncStatusFailed - - if err := cc.Close(); err != nil { - e.logger.Error("error while closing connection", zap.Error(err)) - } - return - } else { - var syncMsg *protobufs.CeremonyCompressedSync - for syncMsg, err = s.Recv(); err == nil; syncMsg, err = s.Recv() { - e.logger.Debug( - "received compressed sync frame", - zap.Uint64("from", syncMsg.FromFrameNumber), - zap.Uint64("to", syncMsg.ToFrameNumber), - zap.Int("frames", len(syncMsg.TruncatedClockFrames)), - zap.Int("proofs", len(syncMsg.Proofs)), - ) - if _, err = e.decompressAndStoreCandidates( - peerId, - syncMsg, - e.logger.Debug, - ); err != nil && !errors.Is(err, ErrNoNewFrames) { - e.logger.Error( - "could not decompress and store candidate", - zap.Error(err), - ) - e.syncingTarget = nil - e.syncingStatus = SyncStatusFailed - if err := cc.Close(); err != nil { - e.logger.Error("error while closing connection", zap.Error(err)) - } - return - } - } - if err != nil && err != io.EOF && !errors.Is(err, ErrNoNewFrames) { - e.syncingTarget = nil - e.syncingStatus = SyncStatusFailed - e.logger.Error("error while receiving sync", zap.Error(err)) - if err := cc.Close(); err != nil { - e.logger.Error("error while closing connection", zap.Error(err)) - } - return - } - } - - if err := cc.Close(); err != nil { - e.logger.Error("error while closing connection", zap.Error(err)) - } - - e.syncingTarget = nil - e.syncingStatus = SyncStatusNotSyncing - }() - - return latest, nil -} - func (e *CeremonyDataClockConsensusEngine) sync( currentLatest *protobufs.ClockFrame, maxFrame uint64, @@ -1095,18 +961,48 @@ func (e *CeremonyDataClockConsensusEngine) sync( from = 1 } - if maxFrame > from { - s, err := client.GetCompressedSyncFrames( - context.Background(), - &protobufs.ClockFramesRequest{ - Filter: e.filter, - FromFrameNumber: maxFrame - 16, - }, - grpc.MaxCallRecvMsgSize(600*1024*1024), + if maxFrame > from && maxFrame > 3 { + from = maxFrame - 2 + } + + s, err := client.GetCompressedSyncFrames( + context.Background(), + &protobufs.ClockFramesRequest{ + Filter: e.filter, + FromFrameNumber: from, + }, + grpc.MaxCallRecvMsgSize(600*1024*1024), + ) + if err != nil { + e.logger.Error( + "received error from peer", + zap.Error(err), ) - if err != nil { + e.peerMapMx.Lock() + if _, ok := e.peerMap[string(peerId)]; ok { + e.uncooperativePeersMap[string(peerId)] = e.peerMap[string(peerId)] + delete(e.peerMap, string(peerId)) + } + e.peerMapMx.Unlock() + return latest, errors.Wrap(err, "reverse optimistic sync") + } + var syncMsg *protobufs.CeremonyCompressedSync + for syncMsg, err = s.Recv(); err == nil; syncMsg, err = s.Recv() { + e.logger.Info( + "received compressed sync frame", + zap.Uint64("from", syncMsg.FromFrameNumber), + zap.Uint64("to", syncMsg.ToFrameNumber), + zap.Int("frames", len(syncMsg.TruncatedClockFrames)), + zap.Int("proofs", len(syncMsg.Proofs)), + ) + var next *protobufs.ClockFrame + if next, err = e.decompressAndStoreCandidates( + peerId, + syncMsg, + e.logger.Info, + ); err != nil && !errors.Is(err, ErrNoNewFrames) { e.logger.Error( - "received error from peer", + "could not decompress and store candidate", zap.Error(err), ) e.peerMapMx.Lock() @@ -1115,56 +1011,31 @@ func (e *CeremonyDataClockConsensusEngine) sync( delete(e.peerMap, string(peerId)) } e.peerMapMx.Unlock() - return latest, errors.Wrap(err, "reverse optimistic sync") - } - var syncMsg *protobufs.CeremonyCompressedSync - for syncMsg, err = s.Recv(); err == nil; syncMsg, err = s.Recv() { - e.logger.Info( - "received compressed sync frame", - zap.Uint64("from", syncMsg.FromFrameNumber), - zap.Uint64("to", syncMsg.ToFrameNumber), - zap.Int("frames", len(syncMsg.TruncatedClockFrames)), - zap.Int("proofs", len(syncMsg.Proofs)), - ) - var next *protobufs.ClockFrame - if next, err = e.decompressAndStoreCandidates( - peerId, - syncMsg, - e.logger.Info, - ); err != nil && !errors.Is(err, ErrNoNewFrames) { - e.logger.Error( - "could not decompress and store candidate", - zap.Error(err), - ) - e.peerMapMx.Lock() - if _, ok := e.peerMap[string(peerId)]; ok { - e.uncooperativePeersMap[string(peerId)] = e.peerMap[string(peerId)] - delete(e.peerMap, string(peerId)) - } - e.peerMapMx.Unlock() - if err := cc.Close(); err != nil { - e.logger.Error("error while closing connection", zap.Error(err)) - } - - return currentLatest, errors.Wrap(err, "reverse optimistic sync") - } - if next != nil { - latest = next - } - } - if err != nil && err != io.EOF && !errors.Is(err, ErrNoNewFrames) { if err := cc.Close(); err != nil { e.logger.Error("error while closing connection", zap.Error(err)) } - e.logger.Error("error while receiving sync", zap.Error(err)) - return latest, errors.Wrap(err, "reverse optimistic sync") - } - e.logger.Info("received new leading frame", zap.Uint64("frame_number", latest.FrameNumber)) + return currentLatest, errors.Wrap(err, "reverse optimistic sync") + } + if next != nil { + latest = next + } + } + if err != nil && err != io.EOF && !errors.Is(err, ErrNoNewFrames) { if err := cc.Close(); err != nil { e.logger.Error("error while closing connection", zap.Error(err)) } + e.logger.Error("error while receiving sync", zap.Error(err)) + return latest, errors.Wrap(err, "reverse optimistic sync") + } + + e.logger.Info( + "received new leading frame", + zap.Uint64("frame_number", latest.FrameNumber), + ) + if err := cc.Close(); err != nil { + e.logger.Error("error while closing connection", zap.Error(err)) } return latest, nil @@ -1181,43 +1052,31 @@ func (e *CeremonyDataClockConsensusEngine) collect( latest = e.previousHead e.syncingStatus = SyncStatusNotSyncing } - maxFrame := uint64(0) - var peerId []byte peerId, maxFrame, err := e.GetMostAheadPeer() if err != nil { e.logger.Warn("no peers available, skipping sync") } else if peerId == nil { e.logger.Info("currently up to date, skipping sync") - } else if e.syncingTarget == nil { - e.syncingStatus = SyncStatusAwaitingResponse - e.logger.Info( - "setting syncing target", - zap.String("peer_id", peer.ID(peerId).String()), - ) - - e.syncingTarget = peerId - e.previousHead = latest - latest, err = e.reverseOptimisticSync(latest, maxFrame, peerId) - } else if maxFrame > latest.FrameNumber { + } else if maxFrame-2 > latest.FrameNumber { latest, err = e.sync(latest, maxFrame, peerId) } - go func() { - _, err = e.keyStore.GetProvingKey(e.provingKeyBytes) - if errors.Is(err, store.ErrNotFound) && - latest.FrameNumber-e.lastKeyBundleAnnouncementFrame > 6 { - if err = e.announceKeyBundle(); err != nil { - panic(err) - } - e.lastKeyBundleAnnouncementFrame = latest.FrameNumber - } - }() - e.logger.Info( "returning leader frame", zap.Uint64("frame_number", latest.FrameNumber), ) + e.logger.Info("selecting leader") + + latest, err = e.commitLongestPath(latest) + if err != nil { + e.logger.Error("could not collect longest path", zap.Error(err)) + latest, _, err = e.clockStore.GetDataClockFrame(e.filter, 0) + if err != nil { + panic(err) + } + } + e.setFrame(latest) e.state = consensus.EngineStateProving return latest, nil diff --git a/node/consensus/ceremony/execution_registration.go b/node/consensus/ceremony/execution_registration.go index 7e89a08..b781d1a 100644 --- a/node/consensus/ceremony/execution_registration.go +++ b/node/consensus/ceremony/execution_registration.go @@ -17,11 +17,11 @@ func (e *CeremonyDataClockConsensusEngine) RegisterExecutor( for { logger.Info( "awaiting frame", - zap.Uint64("current_frame", e.frame), + zap.Uint64("current_frame", e.frame.FrameNumber), zap.Uint64("target_frame", frame), ) - newFrame := e.frame + newFrame := e.frame.FrameNumber if newFrame >= frame { logger.Info( "injecting execution engine at frame", @@ -54,11 +54,11 @@ func (e *CeremonyDataClockConsensusEngine) UnregisterExecutor( for { logger.Info( "awaiting frame", - zap.Uint64("current_frame", e.frame), + zap.Uint64("current_frame", e.frame.FrameNumber), zap.Uint64("target_frame", frame), ) - newFrame := e.frame + newFrame := e.frame.FrameNumber if newFrame >= frame { logger.Info( "removing execution engine at frame", diff --git a/node/consensus/ceremony/peer_messaging.go b/node/consensus/ceremony/peer_messaging.go index 8f4c41b..07f1a5a 100644 --- a/node/consensus/ceremony/peer_messaging.go +++ b/node/consensus/ceremony/peer_messaging.go @@ -11,7 +11,6 @@ import ( "google.golang.org/grpc" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" - "source.quilibrium.com/quilibrium/monorepo/go-libp2p-blossomsub/pb" "source.quilibrium.com/quilibrium/monorepo/node/execution/ceremony/application" "source.quilibrium.com/quilibrium/monorepo/node/p2p" "source.quilibrium.com/quilibrium/monorepo/node/protobufs" @@ -20,52 +19,6 @@ import ( var ErrNoNewFrames = errors.New("peer reported no frames") -func (e *CeremonyDataClockConsensusEngine) handleSync( - message *pb.Message, -) error { - e.logger.Debug( - "received message", - zap.Binary("data", message.Data), - zap.Binary("from", message.From), - zap.Binary("signature", message.Signature), - ) - if bytes.Equal(message.From, e.pubSub.GetPeerID()) { - return nil - } - - msg := &protobufs.Message{} - - if err := proto.Unmarshal(message.Data, msg); err != nil { - return errors.Wrap(err, "handle sync") - } - - any := &anypb.Any{} - if err := proto.Unmarshal(msg.Payload, any); err != nil { - return errors.Wrap(err, "handle sync") - } - - switch any.TypeUrl { - case protobufs.ProvingKeyAnnouncementType: - if err := e.handleProvingKey( - message.From, - msg.Address, - any, - ); err != nil { - return errors.Wrap(err, "handle sync") - } - case protobufs.KeyBundleAnnouncementType: - if err := e.handleKeyBundle( - message.From, - msg.Address, - any, - ); err != nil { - return errors.Wrap(err, "handle sync") - } - } - - return nil -} - // GetCompressedSyncFrames implements protobufs.CeremonyServiceServer. func (e *CeremonyDataClockConsensusEngine) GetCompressedSyncFrames( request *protobufs.ClockFramesRequest, @@ -153,7 +106,7 @@ func (e *CeremonyDataClockConsensusEngine) GetCompressedSyncFrames( } } - max := e.frame + max := e.frame.FrameNumber to := request.ToFrameNumber // We need to slightly rewind, to compensate for unconfirmed frame heads on a @@ -469,93 +422,3 @@ func (e *CeremonyDataClockConsensusEngine) GetPublicChannel( ) error { return errors.New("not supported") } - -func (e *CeremonyDataClockConsensusEngine) handleProvingKeyRequest( - peerID []byte, - address []byte, - any *anypb.Any, -) error { - if bytes.Equal(peerID, e.pubSub.GetPeerID()) { - return nil - } - - request := &protobufs.ProvingKeyRequest{} - if err := any.UnmarshalTo(request); err != nil { - return nil - } - - if len(request.ProvingKeyBytes) == 0 { - e.logger.Debug( - "received proving key request for empty key", - zap.Binary("peer_id", peerID), - zap.Binary("address", address), - ) - return nil - } - - e.pubSub.Subscribe( - append(append([]byte{}, e.filter...), peerID...), - e.handleSync, - true, - ) - - e.logger.Debug( - "received proving key request", - zap.Binary("peer_id", peerID), - zap.Binary("address", address), - zap.Binary("proving_key", request.ProvingKeyBytes), - ) - - var provingKey *protobufs.ProvingKeyAnnouncement - inclusion, err := e.keyStore.GetProvingKey(request.ProvingKeyBytes) - if err != nil { - if !errors.Is(err, store.ErrNotFound) { - e.logger.Debug( - "peer asked for proving key that returned error", - zap.Binary("peer_id", peerID), - zap.Binary("address", address), - zap.Binary("proving_key", request.ProvingKeyBytes), - ) - return nil - } - - provingKey, err = e.keyStore.GetStagedProvingKey(request.ProvingKeyBytes) - if !errors.Is(err, store.ErrNotFound) { - e.logger.Debug( - "peer asked for proving key that returned error", - zap.Binary("peer_id", peerID), - zap.Binary("address", address), - zap.Binary("proving_key", request.ProvingKeyBytes), - ) - return nil - } else if err != nil { - e.logger.Debug( - "peer asked for unknown proving key", - zap.Binary("peer_id", peerID), - zap.Binary("address", address), - zap.Binary("proving_key", request.ProvingKeyBytes), - ) - return nil - } - } else { - err := proto.Unmarshal(inclusion.Data, provingKey) - if err != nil { - e.logger.Debug( - "inclusion commitment could not be deserialized", - zap.Binary("peer_id", peerID), - zap.Binary("address", address), - zap.Binary("proving_key", request.ProvingKeyBytes), - ) - return nil - } - } - - if err := e.publishMessage( - append(append([]byte{}, e.filter...), peerID...), - provingKey, - ); err != nil { - return nil - } - - return nil -} diff --git a/node/consensus/consensus_engine.go b/node/consensus/consensus_engine.go index d9f0fb4..d3b8eca 100644 --- a/node/consensus/consensus_engine.go +++ b/node/consensus/consensus_engine.go @@ -28,22 +28,21 @@ type ConsensusEngine interface { Stop(force bool) <-chan error RegisterExecutor(exec execution.ExecutionEngine, frame uint64) <-chan error UnregisterExecutor(name string, frame uint64, force bool) <-chan error - GetFrame() uint64 - GetDifficulty() uint32 - GetState() EngineState - GetFrameChannel() <-chan uint64 -} - -type DataConsensusEngine interface { - Start(filter []byte, seed []byte) <-chan error - Stop(force bool) <-chan error - RegisterExecutor(exec execution.ExecutionEngine, frame uint64) <-chan error - UnregisterExecutor(name string, frame uint64, force bool) <-chan error - GetFrame() uint64 + GetFrame() *protobufs.ClockFrame + GetDifficulty() uint32 + GetState() EngineState + GetFrameChannel() <-chan *protobufs.ClockFrame +} + +type DataConsensusEngine interface { + Start() <-chan error + Stop(force bool) <-chan error + RegisterExecutor(exec execution.ExecutionEngine, frame uint64) <-chan error + UnregisterExecutor(name string, frame uint64, force bool) <-chan error + GetFrame() *protobufs.ClockFrame GetDifficulty() uint32 GetState() EngineState GetFrameChannel() <-chan *protobufs.ClockFrame - GetActiveFrame() *protobufs.ClockFrame GetProvingKey( engineConfig *config.EngineConfig, ) (crypto.Signer, keys.KeyType, []byte, []byte) @@ -52,13 +51,13 @@ type DataConsensusEngine interface { } func GetMinimumVersionCutoff() time.Time { - return time.Date(2023, time.December, 2, 7, 0, 0, 0, time.UTC) + return time.Date(2024, time.January, 3, 7, 0, 0, 0, time.UTC) } func GetMinimumVersion() []byte { - return []byte{0x01, 0x01, 0x08} + return []byte{0x01, 0x02, 0x00} } func GetVersion() []byte { - return []byte{0x01, 0x01, 0x08} + return []byte{0x01, 0x02, 0x00} } diff --git a/node/consensus/master/broadcast_messaging.go b/node/consensus/master/broadcast_messaging.go index ba59f07..a956a3e 100644 --- a/node/consensus/master/broadcast_messaging.go +++ b/node/consensus/master/broadcast_messaging.go @@ -36,7 +36,6 @@ func (e *MasterClockConsensusEngine) handleMessage(message *pb.Message) error { eg := errgroup.Group{} eg.SetLimit(len(e.executionEngines)) - for name := range e.executionEngines { name := name eg.Go(func() error { @@ -52,7 +51,6 @@ func (e *MasterClockConsensusEngine) handleMessage(message *pb.Message) error { ) return errors.Wrap(err, "handle message") } - for _, m := range messages { m := m if err := e.publishMessage(m.Address, m); err != nil { @@ -64,11 +62,9 @@ func (e *MasterClockConsensusEngine) handleMessage(message *pb.Message) error { return errors.Wrap(err, "handle message") } } - return nil }) } - if err := eg.Wait(); err != nil { e.logger.Error("rejecting invalid message", zap.Error(err)) return errors.Wrap(err, "execution failed") @@ -96,7 +92,7 @@ func (e *MasterClockConsensusEngine) handleClockFrameData( return errors.Wrap(err, "handle clock frame data") } - if e.frame > frame.FrameNumber { + if e.frame.FrameNumber > frame.FrameNumber { e.logger.Debug( "received anachronistic frame", zap.Binary("sender", peerID), @@ -131,7 +127,7 @@ func (e *MasterClockConsensusEngine) handleClockFrameData( return errors.Wrap(err, "handle clock frame data") } - if e.frame < frame.FrameNumber { + if e.frame.FrameNumber < frame.FrameNumber { if err := e.enqueueSeenFrame(frame); err != nil { e.logger.Error("could not enqueue seen clock frame", zap.Error(err)) return errors.Wrap(err, "handle clock frame data") diff --git a/node/consensus/master/consensus_frames.go b/node/consensus/master/consensus_frames.go index 53693a2..04bd699 100644 --- a/node/consensus/master/consensus_frames.go +++ b/node/consensus/master/consensus_frames.go @@ -43,8 +43,7 @@ func (e *MasterClockConsensusEngine) setFrame(frame *protobufs.ClockFrame) { copy(previousSelectorBytes[:], frame.Output[:516]) e.logger.Debug("set frame", zap.Uint64("frame_number", frame.FrameNumber)) - e.frame = frame.FrameNumber - e.latestFrame = frame + e.frame = frame go func() { e.frameChan <- e.frame @@ -53,7 +52,7 @@ func (e *MasterClockConsensusEngine) setFrame(frame *protobufs.ClockFrame) { func ( e *MasterClockConsensusEngine, -) createGenesisFrame() *protobufs.ClockFrame { +) CreateGenesisFrame() *protobufs.ClockFrame { e.logger.Debug("creating genesis frame") b := sha3.Sum256(e.input) v := vdf.New(e.difficulty, b) @@ -65,7 +64,7 @@ func ( e.logger.Debug("proving genesis frame") input := []byte{} input = append(input, e.filter...) - input = binary.BigEndian.AppendUint64(input, e.frame) + input = binary.BigEndian.AppendUint64(input, 0) input = binary.BigEndian.AppendUint32(input, e.difficulty) if bytes.Equal(e.input, []byte{0x00}) { value := [516]byte{} @@ -82,7 +81,7 @@ func ( frame := &protobufs.ClockFrame{ Filter: e.filter, - FrameNumber: e.frame, + FrameNumber: 0, Timestamp: 0, Difficulty: e.difficulty, Input: inputMessage, @@ -107,13 +106,13 @@ func (e *MasterClockConsensusEngine) collect( if e.state == consensus.EngineStateCollecting { e.logger.Debug("collecting vdf proofs") - latest := e.latestFrame + latest := e.frame if e.syncingStatus == SyncStatusNotSyncing { peer, err := e.pubSub.GetRandomPeer(e.filter) if err != nil { if errors.Is(err, p2p.ErrNoPeersAvailable) { - e.logger.Warn("no peers available, skipping sync") + e.logger.Debug("no peers available, skipping sync") } else { e.logger.Error("error while fetching random peer", zap.Error(err)) } @@ -200,10 +199,10 @@ func ( }) if len(e.seenFrames) == 0 { - return e.latestFrame, nil + return e.frame, nil } - prev := e.latestFrame + prev := e.frame committedSet := []*protobufs.ClockFrame{} for len(e.seenFrames) > 0 { diff --git a/node/consensus/master/execution_registration.go b/node/consensus/master/execution_registration.go index 69657cb..e5dde0c 100644 --- a/node/consensus/master/execution_registration.go +++ b/node/consensus/master/execution_registration.go @@ -17,7 +17,7 @@ func (e *MasterClockConsensusEngine) RegisterExecutor( go func() { logger.Info( "starting execution engine at frame", - zap.Uint64("current_frame", e.frame), + zap.Uint64("current_frame", e.frame.FrameNumber), ) err := <-exec.Start() if err != nil { @@ -29,11 +29,11 @@ func (e *MasterClockConsensusEngine) RegisterExecutor( for { logger.Info( "awaiting frame", - zap.Uint64("current_frame", e.frame), + zap.Uint64("current_frame", e.frame.FrameNumber), zap.Uint64("target_frame", frame), ) - newFrame := e.frame + newFrame := e.frame.FrameNumber if newFrame >= frame { logger.Info( "injecting execution engine at frame", @@ -76,11 +76,11 @@ func (e *MasterClockConsensusEngine) UnregisterExecutor( for { logger.Info( "awaiting frame", - zap.Uint64("current_frame", e.frame), + zap.Uint64("current_frame", e.frame.FrameNumber), zap.Uint64("target_frame", frame), ) - newFrame := e.frame + newFrame := e.frame.FrameNumber if newFrame >= frame { logger.Info( "removing execution engine at frame", diff --git a/node/consensus/master/master_clock_consensus_engine.go b/node/consensus/master/master_clock_consensus_engine.go index 130149a..e8f6c22 100644 --- a/node/consensus/master/master_clock_consensus_engine.go +++ b/node/consensus/master/master_clock_consensus_engine.go @@ -25,16 +25,15 @@ const ( ) type MasterClockConsensusEngine struct { - frame uint64 + frame *protobufs.ClockFrame difficulty uint32 logger *zap.Logger state consensus.EngineState pubSub p2p.PubSub keyManager keys.KeyManager lastFrameReceivedAt time.Time - latestFrame *protobufs.ClockFrame - frameChan chan uint64 + frameChan chan *protobufs.ClockFrame executionEngines map[string]execution.ExecutionEngine filter []byte input []byte @@ -79,20 +78,29 @@ func NewMasterClockConsensusEngine( } e := &MasterClockConsensusEngine{ - frame: 0, + frame: nil, difficulty: 10000, logger: logger, state: consensus.EngineStateStopped, keyManager: keyManager, pubSub: pubSub, - frameChan: make(chan uint64), executionEngines: map[string]execution.ExecutionEngine{}, + frameChan: make(chan *protobufs.ClockFrame), input: seed, lastFrameReceivedAt: time.Time{}, syncingStatus: SyncStatusNotSyncing, clockStore: clockStore, } + latestFrame, err := e.clockStore.GetLatestMasterClockFrame(e.filter) + if err != nil && !errors.Is(err, store.ErrNotFound) { + panic(err) + } + + if latestFrame != nil { + e.frame = latestFrame + } + if e.filter, err = hex.DecodeString(engineConfig.Filter); err != nil { panic(errors.Wrap(err, "could not parse filter value")) } @@ -103,7 +111,7 @@ func NewMasterClockConsensusEngine( } func (e *MasterClockConsensusEngine) Start() <-chan error { - e.logger.Info("starting consensus engine") + e.logger.Info("starting master consensus engine") e.state = consensus.EngineStateStarting errChan := make(chan error) @@ -112,7 +120,7 @@ func (e *MasterClockConsensusEngine) Start() <-chan error { latestFrame, err := e.clockStore.GetLatestMasterClockFrame(e.filter) if err != nil && errors.Is(err, store.ErrNotFound) { - latestFrame = e.createGenesisFrame() + latestFrame = e.CreateGenesisFrame() txn, err := e.clockStore.NewTransaction() if err != nil { panic(err) @@ -131,11 +139,111 @@ func (e *MasterClockConsensusEngine) Start() <-chan error { e.setFrame(latestFrame) } + e.buildHistoricFrameCache(latestFrame) + + e.logger.Info("subscribing to pubsub messages") + e.pubSub.Subscribe(e.filter, e.handleMessage, true) + e.pubSub.Subscribe(e.pubSub.GetPeerID(), e.handleSync, true) + + e.state = consensus.EngineStateCollecting + + go func() { + for { + e.logger.Info( + "peers in store", + zap.Int("peer_store_count", e.pubSub.GetPeerstoreCount()), + zap.Int("network_peer_count", e.pubSub.GetNetworkPeersCount()), + ) + time.Sleep(10 * time.Second) + } + }() + + go func() { + for e.state < consensus.EngineStateStopping { + var err error + switch e.state { + case consensus.EngineStateCollecting: + currentFrame := latestFrame + if latestFrame, err = e.collect(latestFrame); err != nil { + e.logger.Error("could not collect", zap.Error(err)) + latestFrame = currentFrame + } + case consensus.EngineStateProving: + currentFrame := latestFrame + if latestFrame, err = e.prove(latestFrame); err != nil { + e.logger.Error("could not prove", zap.Error(err)) + latestFrame = currentFrame + } + case consensus.EngineStatePublishing: + if err = e.publishProof(latestFrame); err != nil { + e.logger.Error("could not publish", zap.Error(err)) + } + } + } + }() + + go func() { + errChan <- nil + }() + + return errChan +} + +func (e *MasterClockConsensusEngine) Stop(force bool) <-chan error { + e.logger.Info("stopping consensus engine") + e.state = consensus.EngineStateStopping + errChan := make(chan error) + + wg := sync.WaitGroup{} + wg.Add(len(e.executionEngines)) + for name := range e.executionEngines { + name := name + go func(name string) { + err := <-e.UnregisterExecutor(name, e.frame.FrameNumber, force) + if err != nil { + errChan <- err + } + wg.Done() + }(name) + } + + e.logger.Info("waiting for execution engines to stop") + wg.Wait() + e.logger.Info("execution engines stopped") + + e.state = consensus.EngineStateStopped + go func() { + errChan <- nil + }() + return errChan +} + +func (e *MasterClockConsensusEngine) GetDifficulty() uint32 { + return e.difficulty +} + +func (e *MasterClockConsensusEngine) GetFrame() *protobufs.ClockFrame { + return e.frame +} + +func (e *MasterClockConsensusEngine) GetState() consensus.EngineState { + return e.state +} + +func ( + e *MasterClockConsensusEngine, +) GetFrameChannel() <-chan *protobufs.ClockFrame { + return e.frameChan +} + +func (e *MasterClockConsensusEngine) buildHistoricFrameCache( + latestFrame *protobufs.ClockFrame, +) { e.historicFrames = []*protobufs.ClockFrame{} if latestFrame.FrameNumber != 0 { min := uint64(0) - if latestFrame.FrameNumber-255 > min { + if latestFrame.FrameNumber-255 > min && latestFrame.FrameNumber > 255 { min = latestFrame.FrameNumber - 255 } @@ -163,98 +271,4 @@ func (e *MasterClockConsensusEngine) Start() <-chan error { } e.historicFrames = append(e.historicFrames, latestFrame) - - e.logger.Info("subscribing to pubsub messages") - e.pubSub.Subscribe(e.filter, e.handleMessage, true) - e.pubSub.Subscribe(e.pubSub.GetPeerID(), e.handleSync, true) - - e.state = consensus.EngineStateCollecting - - go func() { - for { - e.logger.Info( - "peers in store", - zap.Int("peer_store_count", e.pubSub.GetPeerstoreCount()), - zap.Int("network_peer_count", e.pubSub.GetNetworkPeersCount()), - ) - time.Sleep(10 * time.Second) - } - }() - - go func() { - for e.state < consensus.EngineStateStopping { - var err error - switch e.state { - case consensus.EngineStateCollecting: - if latestFrame, err = e.collect(latestFrame); err != nil { - e.logger.Error("could not collect", zap.Error(err)) - errChan <- err - } - case consensus.EngineStateProving: - if latestFrame, err = e.prove(latestFrame); err != nil { - e.logger.Error("could not prove", zap.Error(err)) - errChan <- err - } - case consensus.EngineStatePublishing: - if err = e.publishProof(latestFrame); err != nil { - e.logger.Error("could not publish", zap.Error(err)) - errChan <- err - } - } - } - }() - - go func() { - errChan <- nil - }() - - return errChan -} - -func (e *MasterClockConsensusEngine) Stop(force bool) <-chan error { - e.logger.Info("stopping consensus engine") - e.state = consensus.EngineStateStopping - errChan := make(chan error) - - wg := sync.WaitGroup{} - wg.Add(len(e.executionEngines)) - for name := range e.executionEngines { - name := name - go func(name string) { - err := <-e.UnregisterExecutor(name, e.frame, force) - if err != nil { - errChan <- err - } - wg.Done() - }(name) - } - - e.logger.Info("waiting for execution engines to stop") - wg.Wait() - e.logger.Info("execution engines stopped") - - e.state = consensus.EngineStateStopped - - e.engineMx.Lock() - defer e.engineMx.Unlock() - go func() { - errChan <- nil - }() - return errChan -} - -func (e *MasterClockConsensusEngine) GetDifficulty() uint32 { - return e.difficulty -} - -func (e *MasterClockConsensusEngine) GetFrame() uint64 { - return e.frame -} - -func (e *MasterClockConsensusEngine) GetState() consensus.EngineState { - return e.state -} - -func (e *MasterClockConsensusEngine) GetFrameChannel() <-chan uint64 { - return e.frameChan } diff --git a/node/consensus/master/peer_messaging.go b/node/consensus/master/peer_messaging.go index 5afedbb..9d64c6b 100644 --- a/node/consensus/master/peer_messaging.go +++ b/node/consensus/master/peer_messaging.go @@ -5,7 +5,6 @@ import ( "github.com/pkg/errors" "go.uber.org/zap" - "golang.org/x/sync/errgroup" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" "source.quilibrium.com/quilibrium/monorepo/go-libp2p-blossomsub/pb" @@ -30,46 +29,6 @@ func (e *MasterClockConsensusEngine) handleSync(message *pb.Message) error { return errors.Wrap(err, "handle sync") } - eg := errgroup.Group{} - eg.SetLimit(len(e.executionEngines)) - - for name := range e.executionEngines { - name := name - eg.Go(func() error { - messages, err := e.executionEngines[name].ProcessMessage( - msg.Address, - msg, - ) - if err != nil { - e.logger.Error( - "could not process message for engine", - zap.Error(err), - zap.String("engine_name", name), - ) - return errors.Wrap(err, "handle message") - } - - for _, m := range messages { - m := m - if err := e.publishMessage(e.filter, m); err != nil { - e.logger.Error( - "could not publish message for engine", - zap.Error(err), - zap.String("engine_name", name), - ) - return errors.Wrap(err, "handle message") - } - } - - return nil - }) - } - - if err := eg.Wait(); err != nil { - e.logger.Error("rejecting invalid message", zap.Error(err)) - return errors.Wrap(err, "handle sync") - } - switch any.TypeUrl { case protobufs.ClockFramesResponseType: if err := e.handleClockFramesResponse( @@ -149,7 +108,7 @@ func (e *MasterClockConsensusEngine) handleClockFramesResponse( zap.Uint64("frame_number", frame.FrameNumber), ) - if e.frame < frame.FrameNumber { + if e.frame.FrameNumber < frame.FrameNumber { if err := e.enqueueSeenFrame(frame); err != nil { e.logger.Error("could not enqueue seen clock frame", zap.Error(err)) return errors.Wrap(err, "handle clock frame response") @@ -186,7 +145,7 @@ func (e *MasterClockConsensusEngine) handleClockFramesRequest( from := request.FromFrameNumber - if e.frame < from || len(e.historicFrames) == 0 { + if e.frame.FrameNumber < from || len(e.historicFrames) == 0 { e.logger.Debug( "peer asked for undiscovered frame", zap.Binary("peer_id", peerID), @@ -210,8 +169,8 @@ func (e *MasterClockConsensusEngine) handleClockFramesRequest( to = request.FromFrameNumber + 127 } - if int(to) > int(e.latestFrame.FrameNumber) { - to = e.latestFrame.FrameNumber + if int(to) > int(e.frame.FrameNumber) { + to = e.frame.FrameNumber } e.logger.Debug( diff --git a/node/crypto/kzg.go b/node/crypto/kzg.go index ffbf9a6..1c0488a 100644 --- a/node/crypto/kzg.go +++ b/node/crypto/kzg.go @@ -75,6 +75,221 @@ var CeremonyPotPubKeys []curves.PairingPoint var CeremonySignatories []curves.Point var FFTBLS48581 map[uint64][]curves.PairingPoint = make(map[uint64][]curves.PairingPoint) +func TestInit(file string) { + // start with phase 1 ceremony: + csBytes, err := os.ReadFile(file) + if err != nil { + panic(err) + } + + bls48581.Init() + + cs := &CeremonyState{} + if err := json.Unmarshal(csBytes, cs); err != nil { + panic(err) + } + + g1s := make([]curves.PairingPoint, 1024) + g2s := make([]curves.PairingPoint, 257) + g1ffts := make([]curves.PairingPoint, 1024) + wg := sync.WaitGroup{} + wg.Add(1024) + + for i := 0; i < 1024; i++ { + i := i + go func() { + b, err := hex.DecodeString(cs.PowersOfTau.G1Affines[i][2:]) + if err != nil { + panic(err) + } + g1, err := curves.BLS48581G1().NewGeneratorPoint().FromAffineCompressed(b) + if err != nil { + panic(err) + } + g1s[i] = g1.(curves.PairingPoint) + + f, err := hex.DecodeString(cs.PowersOfTau.G1FFT[i][2:]) + if err != nil { + panic(err) + } + g1fft, err := curves.BLS48581G1().NewGeneratorPoint().FromAffineCompressed(f) + if err != nil { + panic(err) + } + g1ffts[i] = g1fft.(curves.PairingPoint) + + if i < 257 { + b, err := hex.DecodeString(cs.PowersOfTau.G2Affines[i][2:]) + if err != nil { + panic(err) + } + g2, err := curves.BLS48581G2().NewGeneratorPoint().FromAffineCompressed( + b, + ) + if err != nil { + panic(err) + } + g2s[i] = g2.(curves.PairingPoint) + } + wg.Done() + }() + } + + wg.Wait() + + wg.Add(len(cs.Witness.RunningProducts)) + CeremonyRunningProducts = make([]curves.PairingPoint, len(cs.Witness.RunningProducts)) + for i, s := range cs.Witness.RunningProducts { + i, s := i, s + go func() { + b, err := hex.DecodeString(s[2:]) + if err != nil { + panic(err) + } + + g1, err := curves.BLS48581G1().NewGeneratorPoint().FromAffineCompressed(b) + if err != nil { + panic(err) + } + CeremonyRunningProducts[i] = g1.(curves.PairingPoint) + wg.Done() + }() + } + wg.Wait() + + wg.Add(len(cs.Witness.PotPubKeys)) + CeremonyPotPubKeys = make([]curves.PairingPoint, len(cs.Witness.PotPubKeys)) + for i, s := range cs.Witness.PotPubKeys { + i, s := i, s + go func() { + b, err := hex.DecodeString(s[2:]) + if err != nil { + panic(err) + } + + g2, err := curves.BLS48581G2().NewGeneratorPoint().FromAffineCompressed(b) + if err != nil { + panic(err) + } + CeremonyPotPubKeys[i] = g2.(curves.PairingPoint) + wg.Done() + }() + } + wg.Wait() + + wg.Add(len(cs.VoucherPubKeys)) + CeremonySignatories = make([]curves.Point, len(cs.VoucherPubKeys)) + for i, s := range cs.VoucherPubKeys { + i, s := i, s + go func() { + b, err := hex.DecodeString(s[2:]) + if err != nil { + panic(err) + } + + CeremonySignatories[i], err = curves.ED448().Point.FromAffineCompressed(b) + if err != nil { + panic(err) + } + wg.Done() + }() + } + wg.Wait() + + CeremonyBLS48581G1 = g1s + CeremonyBLS48581G2 = g2s + + // Post-ceremony, precompute everything and put it in the finalized ceremony + // state + modulus := make([]byte, 73) + bls48581.NewBIGints(bls48581.CURVE_Order, nil).ToBytes(modulus) + q := new(big.Int).SetBytes(modulus) + sizes := []int64{16, 128, 1024} + + wg.Add(len(sizes)) + root := make([]curves.PairingScalar, 3) + roots := make([][]curves.PairingScalar, 3) + reverseRoots := make([][]curves.PairingScalar, 3) + ffts := make([][]curves.PairingPoint, 3) + + for idx, i := range sizes { + i := i + idx := idx + go func() { + exp := new(big.Int).Quo( + new(big.Int).Sub(q, big.NewInt(1)), + big.NewInt(i), + ) + rootOfUnity := new(big.Int).Exp(big.NewInt(int64(37)), exp, q) + roots[idx] = make([]curves.PairingScalar, i+1) + reverseRoots[idx] = make([]curves.PairingScalar, i+1) + wg2 := sync.WaitGroup{} + wg2.Add(int(i)) + for j := int64(0); j < i; j++ { + j := j + go func() { + rev := big.NewInt(int64(j)) + r := new(big.Int).Exp( + rootOfUnity, + rev, + q, + ) + scalar, _ := (&curves.ScalarBls48581{}).SetBigInt(r) + + if rev.Cmp(big.NewInt(1)) == 0 { + root[idx] = scalar.(curves.PairingScalar) + } + + roots[idx][j] = scalar.(curves.PairingScalar) + reverseRoots[idx][i-j] = roots[idx][j] + wg2.Done() + }() + } + wg2.Wait() + roots[idx][i] = roots[idx][0] + reverseRoots[idx][0] = reverseRoots[idx][i] + wg.Done() + }() + } + wg.Wait() + + wg.Add(len(sizes)) + for i := range root { + i := i + RootOfUnityBLS48581[uint64(sizes[i])] = root[i] + RootsOfUnityBLS48581[uint64(sizes[i])] = roots[i] + ReverseRootsOfUnityBLS48581[uint64(sizes[i])] = reverseRoots[i] + + go func() { + // We precomputed 65536, others are cheap and will be fully precomputed + // post-ceremony + if sizes[i] < 65536 { + fftG1, err := FFTG1( + CeremonyBLS48581G1[:sizes[i]], + *curves.BLS48581( + curves.BLS48581G1().NewGeneratorPoint(), + ), + uint64(sizes[i]), + true, + ) + if err != nil { + panic(err) + } + + ffts[i] = fftG1 + } else { + ffts[i] = g1ffts + } + wg.Done() + }() + } + wg.Wait() + + for i := range root { + FFTBLS48581[uint64(sizes[i])] = ffts[i] + } +} + func Init() { // start with phase 1 ceremony: csBytes, err := os.ReadFile("./ceremony.json") @@ -202,7 +417,7 @@ func Init() { // Post-ceremony, precompute everything and put it in the finalized ceremony // state modulus := make([]byte, 73) - bls48581.NewBIGints(bls48581.CURVE_Order).ToBytes(modulus) + bls48581.NewBIGints(bls48581.CURVE_Order, nil).ToBytes(modulus) q := new(big.Int).SetBytes(modulus) sizes := []int64{16, 128, 1024, 65536} @@ -310,7 +525,7 @@ func NewKZGProver( func DefaultKZGProver() *KZGProver { modulus := make([]byte, 73) - bls48581.NewBIGints(bls48581.CURVE_Order).ToBytes(modulus) + bls48581.NewBIGints(bls48581.CURVE_Order, nil).ToBytes(modulus) q := new(big.Int).SetBytes(modulus) return NewKZGProver( curves.BLS48581(curves.BLS48581G1().Point), @@ -426,7 +641,7 @@ func (p *KZGProver) EvaluateLagrangeForm( xBI := x.BigInt() modulus := make([]byte, 73) - bls48581.NewBIGints(bls48581.CURVE_Order).ToBytes(modulus) + bls48581.NewBIGints(bls48581.CURVE_Order, nil).ToBytes(modulus) q := new(big.Int).SetBytes(modulus) xBI.Exp(xBI, width.BigInt(), q) xBI.Sub(xBI, big.NewInt(1)) diff --git a/node/crypto/kzg_test.go b/node/crypto/kzg_test.go index 87c4668..7f684af 100644 --- a/node/crypto/kzg_test.go +++ b/node/crypto/kzg_test.go @@ -81,7 +81,7 @@ func TestMain(m *testing.M) { // Post-ceremony, precompute everything and put it in the finalized ceremony // state modulus := make([]byte, 73) - bls48581.NewBIGints(bls48581.CURVE_Order).ToBytes(modulus) + bls48581.NewBIGints(bls48581.CURVE_Order, nil).ToBytes(modulus) q := new(big.Int).SetBytes(modulus) sizes := []int64{16} @@ -173,7 +173,7 @@ func TestMain(m *testing.M) { func TestKzgBytesToPoly(t *testing.T) { modulus := make([]byte, 73) - bls48581.NewBIGints(bls48581.CURVE_Order).ToBytes(modulus) + bls48581.NewBIGints(bls48581.CURVE_Order, nil).ToBytes(modulus) q := new(big.Int).SetBytes(modulus) p := crypto.NewKZGProver(curves.BLS48581(curves.BLS48581G1().Point), sha3.New256, q) @@ -215,7 +215,7 @@ func TestKzgBytesToPoly(t *testing.T) { func TestPolynomialCommitment(t *testing.T) { modulus := make([]byte, 73) - bls48581.NewBIGints(bls48581.CURVE_Order).ToBytes(modulus) + bls48581.NewBIGints(bls48581.CURVE_Order, nil).ToBytes(modulus) q := new(big.Int).SetBytes(modulus) p := crypto.NewKZGProver(curves.BLS48581(curves.BLS48581G1().Point), sha3.New256, q) @@ -263,7 +263,7 @@ func TestPolynomialCommitment(t *testing.T) { func TestKZGProof(t *testing.T) { modulus := make([]byte, 73) - bls48581.NewBIGints(bls48581.CURVE_Order).ToBytes(modulus) + bls48581.NewBIGints(bls48581.CURVE_Order, nil).ToBytes(modulus) q := new(big.Int).SetBytes(modulus) p := crypto.NewKZGProver(curves.BLS48581(curves.BLS48581G1().Point), sha3.New256, q) @@ -290,27 +290,51 @@ func TestKZGProof(t *testing.T) { curves.BLS48581G1().NewGeneratorPoint(), ), 16, - false, + true, ) require.NoError(t, err) - commit, err := p.Commit(evalPoly) + commit, err := p.Commit(poly) require.NoError(t, err) - z, err := (&curves.ScalarBls48581{}).SetBigInt(big.NewInt(2)) + z := crypto.RootsOfUnityBLS48581[16][2] require.NoError(t, err) - checky := poly[len(poly)-1] - for i := len(poly) - 2; i >= 0; i-- { - checky = checky.Mul(z).Add(poly[i]).(curves.PairingScalar) + checky := evalPoly[len(poly)-1] + for i := len(evalPoly) - 2; i >= 0; i-- { + checky = checky.Mul(z).Add(evalPoly[i]).(curves.PairingScalar) } - y, err := p.EvaluateLagrangeForm(evalPoly, z.(curves.PairingScalar), 16, 0) - require.NoError(t, err) - require.Equal(t, y.Cmp(checky), 0) + fmt.Printf("%+x\n", checky.Bytes()) - proof, err := p.Prove(evalPoly, commit, z.(curves.PairingScalar)) + divisors := make([]curves.PairingScalar, 2) + divisors[0] = (&curves.ScalarBls48581{}).Zero().Sub(z).(*curves.ScalarBls48581) + divisors[1] = (&curves.ScalarBls48581{}).One().(*curves.ScalarBls48581) + + a := make([]curves.PairingScalar, len(evalPoly)) + for i := 0; i < len(a); i++ { + a[i] = evalPoly[i].Clone().(*curves.ScalarBls48581) + } + + // Adapted from Feist's amortized proofs: + aPos := len(a) - 1 + bPos := len(divisors) - 1 + diff := aPos - bPos + out := make([]curves.PairingScalar, diff+1, diff+1) + for diff >= 0 { + out[diff] = a[aPos].Div(divisors[bPos]).(*curves.ScalarBls48581) + for i := bPos; i >= 0; i-- { + a[diff+i] = a[diff+i].Sub( + out[diff].Mul(divisors[i]), + ).(*curves.ScalarBls48581) + } + aPos -= 1 + diff -= 1 + } + + proof, err := p.PointLinearCombination(crypto.CeremonyBLS48581G1[:15], out) + // proof, err := p.Prove(evalPoly, commit, z.(curves.PairingScalar)) require.NoError(t, err) - require.True(t, p.Verify(commit, z.(curves.PairingScalar), y, proof)) + require.True(t, p.Verify(commit, z, checky, proof)) commitments, err := p.CommitAggregate( [][]curves.PairingScalar{evalPoly}, diff --git a/node/execution/ceremony/application/ceremony_application.go b/node/execution/ceremony/application/ceremony_application.go index 0819a39..85a09ea 100644 --- a/node/execution/ceremony/application/ceremony_application.go +++ b/node/execution/ceremony/application/ceremony_application.go @@ -14,8 +14,6 @@ var ErrInvalidStateTransition = errors.New("invalid state transition") type CeremonyApplicationState int -const V118_CUTOFF = uint64(45000) - var CEREMONY_ADDRESS = []byte{ // SHA3-256("q_kzg_ceremony") 0x34, 0x00, 0x1b, 0xe7, 0x43, 0x2c, 0x2e, 0x66, @@ -50,7 +48,7 @@ type CeremonyApplication struct { StateCount uint64 RoundCount uint64 LobbyState CeremonyApplicationState - ActiveParticipants []*protobufs.Ed448PublicKey + ActiveParticipants []*protobufs.CeremonyLobbyJoin NextRoundPreferredParticipants []*protobufs.Ed448PublicKey LatestSeenProverAttestations []*protobufs.CeremonySeenProverAttestation DroppedParticipantAttestations []*protobufs.CeremonyDroppedProverAttestation @@ -82,8 +80,22 @@ func (a *CeremonyApplication) Equals(b *CeremonyApplication) bool { for i := range a.ActiveParticipants { if !bytes.Equal( - a.ActiveParticipants[i].KeyValue, - b.ActiveParticipants[i].KeyValue, + a.ActiveParticipants[i].PublicKeySignatureEd448.PublicKey.KeyValue, + b.ActiveParticipants[i].PublicKeySignatureEd448.PublicKey.KeyValue, + ) { + return false + } + + if !bytes.Equal( + a.ActiveParticipants[i].IdentityKey.KeyValue, + b.ActiveParticipants[i].IdentityKey.KeyValue, + ) { + return false + } + + if !bytes.Equal( + a.ActiveParticipants[i].SignedPreKey.KeyValue, + b.ActiveParticipants[i].SignedPreKey.KeyValue, ) { return false } @@ -856,7 +868,7 @@ func (a *CeremonyApplication) ApplyTransition( } } - if currentFrameNumber > V118_CUTOFF && a.StateCount > 100 { + if a.StateCount > 10 { shouldReset = true } @@ -866,17 +878,19 @@ func (a *CeremonyApplication) ApplyTransition( a.RoundCount = 0 for _, p := range a.ActiveParticipants { p := p - if _, ok := droppedProversMap[string(p.KeyValue)]; !ok { + if _, ok := droppedProversMap[string( + p.PublicKeySignatureEd448.PublicKey.KeyValue, + )]; !ok { a.NextRoundPreferredParticipants = append( append( []*protobufs.Ed448PublicKey{}, - p, + p.PublicKeySignatureEd448.PublicKey, ), a.NextRoundPreferredParticipants..., ) } } - a.ActiveParticipants = []*protobufs.Ed448PublicKey{} + a.ActiveParticipants = []*protobufs.CeremonyLobbyJoin{} a.DroppedParticipantAttestations = []*protobufs.CeremonyDroppedProverAttestation{} a.LatestSeenProverAttestations = @@ -958,7 +972,7 @@ func (a *CeremonyApplication) ApplyTransition( } a.LobbyState = CEREMONY_APPLICATION_STATE_VALIDATING - a.ActiveParticipants = []*protobufs.Ed448PublicKey{} + a.ActiveParticipants = []*protobufs.CeremonyLobbyJoin{} a.DroppedParticipantAttestations = []*protobufs.CeremonyDroppedProverAttestation{} a.LatestSeenProverAttestations = @@ -984,7 +998,7 @@ func (a *CeremonyApplication) ApplyTransition( } } - if currentFrameNumber > V118_CUTOFF && a.StateCount > 100 { + if a.StateCount > 10 { shouldReset = true } @@ -994,17 +1008,19 @@ func (a *CeremonyApplication) ApplyTransition( a.RoundCount = 0 for _, p := range a.ActiveParticipants { p := p - if _, ok := droppedProversMap[string(p.KeyValue)]; !ok { + if _, ok := droppedProversMap[string( + p.PublicKeySignatureEd448.PublicKey.KeyValue, + )]; !ok { a.NextRoundPreferredParticipants = append( append( []*protobufs.Ed448PublicKey{}, - p, + p.PublicKeySignatureEd448.PublicKey, ), a.NextRoundPreferredParticipants..., ) } } - a.ActiveParticipants = []*protobufs.Ed448PublicKey{} + a.ActiveParticipants = []*protobufs.CeremonyLobbyJoin{} a.DroppedParticipantAttestations = []*protobufs.CeremonyDroppedProverAttestation{} a.LatestSeenProverAttestations = @@ -1036,7 +1052,25 @@ func (a *CeremonyApplication) ApplyTransition( } } - if a.UpdatedTranscript == nil { + shouldReset := false + if a.StateCount > 100 { + shouldReset = true + } + + if shouldReset { + a.LobbyState = CEREMONY_APPLICATION_STATE_OPEN + a.StateCount = 0 + a.RoundCount = 0 + a.ActiveParticipants = []*protobufs.CeremonyLobbyJoin{} + a.DroppedParticipantAttestations = + []*protobufs.CeremonyDroppedProverAttestation{} + a.LatestSeenProverAttestations = + []*protobufs.CeremonySeenProverAttestation{} + a.TranscriptRoundAdvanceCommits = + []*protobufs.CeremonyAdvanceRound{} + a.TranscriptShares = + []*protobufs.CeremonyTranscriptShare{} + } else if a.UpdatedTranscript == nil { rewardMultiplier := uint64(1) for i := 0; i < len(a.FinalCommits)-1; i++ { rewardMultiplier = rewardMultiplier << 1 @@ -1064,7 +1098,7 @@ func (a *CeremonyApplication) ApplyTransition( a.LobbyState = CEREMONY_APPLICATION_STATE_OPEN a.StateCount = 0 a.RoundCount = 0 - a.ActiveParticipants = []*protobufs.Ed448PublicKey{} + a.ActiveParticipants = []*protobufs.CeremonyLobbyJoin{} a.DroppedParticipantAttestations = []*protobufs.CeremonyDroppedProverAttestation{} a.LatestSeenProverAttestations = diff --git a/node/execution/ceremony/application/ceremony_application_in_progress.go b/node/execution/ceremony/application/ceremony_application_in_progress.go index abcc6d7..b61e5f3 100644 --- a/node/execution/ceremony/application/ceremony_application_in_progress.go +++ b/node/execution/ceremony/application/ceremony_application_in_progress.go @@ -22,7 +22,10 @@ func (a *CeremonyApplication) applySeenProverAttestation( inParticipantList := false for _, p := range a.ActiveParticipants { - if bytes.Equal(p.KeyValue, seenProverAttestation.SeenProverKey.KeyValue) { + if bytes.Equal( + p.PublicKeySignatureEd448.PublicKey.KeyValue, + seenProverAttestation.SeenProverKey.KeyValue, + ) { inParticipantList = true break } @@ -93,7 +96,7 @@ func (a *CeremonyApplication) applyDroppedProverAttestation( inParticipantList := false for _, p := range a.ActiveParticipants { if bytes.Equal( - p.KeyValue, + p.PublicKeySignatureEd448.PublicKey.KeyValue, droppedProverAttestation.DroppedProverKey.KeyValue, ) { inParticipantList = true @@ -189,7 +192,7 @@ func (a *CeremonyApplication) applyTranscriptCommit( inParticipantList := false for _, p := range a.ActiveParticipants { if bytes.Equal( - p.KeyValue, + p.PublicKeySignatureEd448.PublicKey.KeyValue, transcriptCommit.ProverSignature.PublicKey.KeyValue, ) { inParticipantList = true diff --git a/node/execution/ceremony/application/ceremony_application_open.go b/node/execution/ceremony/application/ceremony_application_open.go index ab13d50..6f5b02d 100644 --- a/node/execution/ceremony/application/ceremony_application_open.go +++ b/node/execution/ceremony/application/ceremony_application_open.go @@ -89,11 +89,11 @@ func (a *CeremonyApplication) finalizeParticipantSet() error { power = power >> 1 } - a.ActiveParticipants = []*protobufs.Ed448PublicKey{} + a.ActiveParticipants = []*protobufs.CeremonyLobbyJoin{} for i := 0; i < int(power); i++ { a.ActiveParticipants = append( a.ActiveParticipants, - a.LobbyJoins[i].PublicKeySignatureEd448.PublicKey, + a.LobbyJoins[i], ) } diff --git a/node/execution/ceremony/application/ceremony_application_test.go b/node/execution/ceremony/application/ceremony_application_test.go index be3f033..cfcec43 100644 --- a/node/execution/ceremony/application/ceremony_application_test.go +++ b/node/execution/ceremony/application/ceremony_application_test.go @@ -122,7 +122,10 @@ func TestCeremonyTransitions(t *testing.T) { }) require.NoError(t, err) require.Equal(t, a.LobbyState, CEREMONY_APPLICATION_STATE_IN_PROGRESS) - require.True(t, bytes.Equal(a.ActiveParticipants[0].KeyValue, proverPubKey)) + require.True(t, bytes.Equal( + a.ActiveParticipants[0].PublicKeySignatureEd448.PublicKey.KeyValue, + proverPubKey, + )) tau := curves.BLS48581G1().Scalar.Random(rand.Reader) tau2 := tau.Mul(tau) diff --git a/node/execution/ceremony/application/ceremony_application_validating.go b/node/execution/ceremony/application/ceremony_application_validating.go index 4b50fac..d1d36e1 100644 --- a/node/execution/ceremony/application/ceremony_application_validating.go +++ b/node/execution/ceremony/application/ceremony_application_validating.go @@ -2,9 +2,9 @@ package application import ( "bytes" + "crypto/rand" "github.com/pkg/errors" - "golang.org/x/sync/errgroup" "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/curves" "source.quilibrium.com/quilibrium/monorepo/node/protobufs" ) @@ -37,59 +37,47 @@ func (a *CeremonyApplication) applyTranscript( ) } - g1s := make([]*curves.PointBls48581G1, len(a.UpdatedTranscript.G1Powers)) - eg := errgroup.Group{} - eg.SetLimit(100) + g1s := make([]curves.Point, len(a.UpdatedTranscript.G1Powers)) for i := range a.UpdatedTranscript.G1Powers { i := i - eg.Go(func() error { - if !bytes.Equal( - a.UpdatedTranscript.G1Powers[i].KeyValue, - transcript.G1Powers[i].KeyValue, - ) { - return errors.Wrap(errors.New("invalid g1s"), "apply transcript") - } + if !bytes.Equal( + a.UpdatedTranscript.G1Powers[i].KeyValue, + transcript.G1Powers[i].KeyValue, + ) { + return errors.Wrap(errors.New("invalid g1s"), "apply transcript") + } - g1 := &curves.PointBls48581G1{} - x, err := g1.FromAffineCompressed(a.UpdatedTranscript.G1Powers[i].KeyValue) - if err != nil { - return errors.Wrap(err, "apply transcript") - } - g1, _ = x.(*curves.PointBls48581G1) + g1 := &curves.PointBls48581G1{} + x, err := g1.FromAffineCompressed( + a.UpdatedTranscript.G1Powers[i].KeyValue, + ) + if err != nil { + return errors.Wrap(err, "apply transcript") + } - g1s[i] = g1 - - return nil - }) + g1s[i] = x } - g2s := make([]*curves.PointBls48581G2, len(a.UpdatedTranscript.G2Powers)) + g2s := make([]curves.Point, len(a.UpdatedTranscript.G2Powers)) for i := range a.UpdatedTranscript.G2Powers { i := i - eg.Go(func() error { - if !bytes.Equal( - a.UpdatedTranscript.G2Powers[i].KeyValue, - transcript.G2Powers[i].KeyValue, - ) { - return errors.Wrap(errors.New("invalid g2s"), "apply transcript") - } + if !bytes.Equal( + a.UpdatedTranscript.G2Powers[i].KeyValue, + transcript.G2Powers[i].KeyValue, + ) { + return errors.Wrap(errors.New("invalid g2s"), "apply transcript") + } - g2 := &curves.PointBls48581G2{} - x, err := g2.FromAffineCompressed(a.UpdatedTranscript.G2Powers[i].KeyValue) - if err != nil { - return errors.Wrap(err, "apply transcript") - } - g2, _ = x.(*curves.PointBls48581G2) + g2 := &curves.PointBls48581G2{} + x, err := g2.FromAffineCompressed( + a.UpdatedTranscript.G2Powers[i].KeyValue, + ) + if err != nil { + return errors.Wrap(err, "apply transcript") + } - g2s[i] = g2 - - return nil - }) - } - - if err := eg.Wait(); err != nil { - return err + g2s[i] = x } g1Witnesses := []*curves.PointBls48581G1{} @@ -168,52 +156,70 @@ func (a *CeremonyApplication) applyTranscript( } } - mp := []curves.PairingPoint{} mpg2 := curves.BLS48581G2().Point.Generator().(curves.PairingPoint) mpg2n := g2s[1].Neg().(curves.PairingPoint) - for i := 0; i < len(g1s)-1; i++ { - mp = append(mp, g1s[i]) - mp = append(mp, mpg2n) - mp = append(mp, g1s[i+1]) - mp = append(mp, mpg2) - } - - mp2 := []curves.PairingPoint{} mpg1 := curves.BLS48581G1().Point.Generator().(curves.PairingPoint) mpg1n := g1s[1].Neg().(curves.PairingPoint) - for i := 0; i < len(g2s)-1; i++ { - mp2 = append(mp2, mpg1n) - mp2 = append(mp2, g2s[i]) - mp2 = append(mp2, mpg1) - mp2 = append(mp2, g2s[i+1]) + + randoms := []curves.Scalar{} + sum := curves.BLS48581G1().Scalar.Zero() + + for i := 0; i < len(g1s)-1; i++ { + randoms = append(randoms, curves.BLS48581G1().Scalar.Random(rand.Reader)) + sum = sum.Add(randoms[i]) } - l := g1s[0].MultiPairing(mp...) - if !l.IsOne() { + g1CheckR := g1s[0].SumOfProducts(g1s[1:], randoms) + g1CheckL := g1s[0].SumOfProducts(g1s[:len(g1s)-1], randoms) + + if !mpg2.MultiPairing( + g1CheckL.(curves.PairingPoint), + mpg2n.Mul(sum).(curves.PairingPoint), + g1CheckR.(curves.PairingPoint), + mpg2.Mul(sum).(curves.PairingPoint), + ).IsOne() { return errors.Wrap( errors.New("pairing check failed for g1s"), "apply transcript", ) } - l = g1s[0].MultiPairing(mp2...) - if !l.IsOne() { + var g2CheckL, g2CheckR curves.Point + g2Sum := curves.BLS48581G1().Scalar.Zero() + for i := 0; i < len(g2s)-1; i++ { + g2Sum = g2Sum.Add(randoms[i]) + if g2CheckL == nil { + g2CheckL = g2s[0].Mul(randoms[0]) + g2CheckR = g2s[1].Mul(randoms[0]) + } else { + g2CheckL = g2CheckL.Add(g2s[i].Mul(randoms[i])) + g2CheckR = g2CheckR.Add(g2s[i+1].Mul(randoms[i])) + } + } + + if !mpg2.MultiPairing( + mpg1n.Mul(g2Sum).(curves.PairingPoint), + g2CheckL.(curves.PairingPoint), + mpg1.Mul(g2Sum).(curves.PairingPoint), + g2CheckR.(curves.PairingPoint), + ).IsOne() { return errors.Wrap( errors.New("pairing check failed for g2s"), "apply transcript", ) } - mp3 := []curves.PairingPoint{} + mp3 := make([]curves.PairingPoint, (len(g2Powers)-1)*4) for i := 0; i < len(g2Powers)-1; i++ { - mp3 = append(mp3, g1Witnesses[i+1].Neg().(curves.PairingPoint)) - mp3 = append(mp3, g2Powers[i]) - mp3 = append(mp3, mpg1) - mp3 = append(mp3, g2Powers[i+1]) + i := i + mp3[i*4+0] = g1Witnesses[i+1].Neg().(curves.PairingPoint) + mp3[i*4+1] = g2Powers[i] + mp3[i*4+2] = mpg1 + mp3[i*4+3] = g2Powers[i+1] } - l = g1s[0].MultiPairing(mp3...) + l := mp3[0].MultiPairing(mp3...) if !l.IsOne() { return errors.Wrap( errors.New("pairing check failed for witnesses"), diff --git a/node/execution/ceremony/application/ceremony_application_validating_test.go b/node/execution/ceremony/application/ceremony_application_validating_test.go index 7f6c196..3840e32 100644 --- a/node/execution/ceremony/application/ceremony_application_validating_test.go +++ b/node/execution/ceremony/application/ceremony_application_validating_test.go @@ -3,7 +3,9 @@ package application import ( "crypto" "crypto/rand" + "fmt" "testing" + "time" "github.com/cloudflare/circl/sign/ed448" "github.com/stretchr/testify/require" @@ -12,6 +14,166 @@ import ( "source.quilibrium.com/quilibrium/monorepo/node/protobufs" ) +// This does a full test of the 65536 powers, run this if you want to wait a +// long time +func TestApplyTranscript_Slow(t *testing.T) { + old := curves.BLS48581G1().Scalar.Random(rand.Reader) + olds := []*curves.ScalarBls48581{ + curves.BLS48581G1().Scalar.One().(*curves.ScalarBls48581), + } + tau := curves.BLS48581G1().Scalar.Random(rand.Reader) + taus := []*curves.ScalarBls48581{ + curves.BLS48581G1().Scalar.One().(*curves.ScalarBls48581), + } + fmt.Println(time.Now().Unix()) + fmt.Println("generate taus") + for i := 0; i < 65536; i++ { + olds = append(olds, olds[i].Mul(old).(*curves.ScalarBls48581)) + taus = append(taus, taus[i].Mul(tau).(*curves.ScalarBls48581)) + } + tauPubG2 := curves.BLS48581G2().Point.Generator().Mul(tau) + + fmt.Println(time.Now().Unix()) + fmt.Println("taus generated") + proverPubKey, proverKey, err := ed448.GenerateKey(rand.Reader) + require.NoError(t, err) + proverSig, err := proverKey.Sign( + rand.Reader, + tauPubG2.ToAffineCompressed(), + crypto.Hash(0), + ) + require.NoError(t, err) + + fmt.Println(time.Now().Unix()) + fmt.Println("prover signature generated") + blsSignature := make([]byte, int(bls48581.MODBYTES)+1) + key := tau.Bytes() + + for i, j := 0, len(key)-1; i < j; i, j = i+1, j-1 { + key[i], key[j] = key[j], key[i] + } + + if bls48581.Core_Sign(blsSignature, proverKey, key) != bls48581.BLS_OK { + require.Fail(t, "could not sign") + } + + fmt.Println(time.Now().Unix()) + fmt.Println("bls signature generated") + + blsSig := blsSignature[:] + oldTranscript := &protobufs.CeremonyTranscript{ + G1Powers: []*protobufs.BLS48581G1PublicKey{}, + G2Powers: []*protobufs.BLS48581G2PublicKey{}, + RunningG1_256Witnesses: []*protobufs.BLS48581G1PublicKey{ + { + KeyValue: curves.BLS48581G1().Point.Generator().ToAffineCompressed(), + }, + }, + RunningG2_256Powers: []*protobufs.BLS48581G2PublicKey{ + { + KeyValue: curves.BLS48581G2().Point.Generator().Mul( + olds[256], + ).ToAffineCompressed(), + }, + }, + } + updatedTranscript := &protobufs.CeremonyTranscript{ + G1Powers: []*protobufs.BLS48581G1PublicKey{}, + G2Powers: []*protobufs.BLS48581G2PublicKey{}, + RunningG1_256Witnesses: []*protobufs.BLS48581G1PublicKey{ + { + KeyValue: curves.BLS48581G1().Point.Generator().ToAffineCompressed(), + }, + { + KeyValue: curves.BLS48581G1().Point.Generator().Mul( + taus[256], + ).ToAffineCompressed(), + }, + }, + RunningG2_256Powers: []*protobufs.BLS48581G2PublicKey{ + { + KeyValue: curves.BLS48581G2().Point.Generator().Mul( + olds[256], + ).ToAffineCompressed(), + }, + { + KeyValue: curves.BLS48581G2().Point.Generator().Mul( + olds[256], + ).Mul(taus[256]).ToAffineCompressed(), + }, + }, + } + + for i, o := range olds { + oldTranscript.G1Powers = append( + oldTranscript.G1Powers, + &protobufs.BLS48581G1PublicKey{ + KeyValue: curves.BLS48581G1().Point.Generator().Mul( + o, + ).ToAffineCompressed(), + }, + ) + + updatedTranscript.G1Powers = append( + updatedTranscript.G1Powers, + &protobufs.BLS48581G1PublicKey{ + KeyValue: curves.BLS48581G1().Point.Generator().Mul( + o, + ).Mul(taus[i]).ToAffineCompressed(), + }, + ) + + if i < 257 { + oldTranscript.G2Powers = append( + oldTranscript.G2Powers, + &protobufs.BLS48581G2PublicKey{ + KeyValue: curves.BLS48581G2().Point.Generator().Mul( + o, + ).ToAffineCompressed(), + }, + ) + + updatedTranscript.G2Powers = append( + updatedTranscript.G2Powers, + &protobufs.BLS48581G2PublicKey{ + KeyValue: curves.BLS48581G2().Point.Generator().Mul( + o, + ).Mul(taus[i]).ToAffineCompressed(), + }, + ) + } + } + + fmt.Println(time.Now().Unix()) + fmt.Println("transcripts generated") + a := &CeremonyApplication{ + StateCount: 0, + RoundCount: 0, + LobbyState: CEREMONY_APPLICATION_STATE_VALIDATING, + FinalCommits: []*protobufs.CeremonyTranscriptCommit{ + { + ProverSignature: &protobufs.Ed448Signature{ + Signature: proverSig, + PublicKey: &protobufs.Ed448PublicKey{ + KeyValue: proverPubKey, + }, + }, + ContributionSignature: &protobufs.BLS48581Signature{ + Signature: blsSig, + PublicKey: &protobufs.BLS48581G2PublicKey{ + KeyValue: tauPubG2.ToAffineCompressed(), + }, + }, + }, + }, + LatestTranscript: oldTranscript, + UpdatedTranscript: updatedTranscript, + } + + err = a.applyTranscript(updatedTranscript) + require.NoError(t, err) +} + func TestApplyTranscript(t *testing.T) { old := curves.BLS48581G1().Scalar.Random(rand.Reader) old2 := old.Mul(old) @@ -322,5 +484,5 @@ func TestApplyRewritingTranscriptFails(t *testing.T) { } err = a.applyTranscript(updatedTranscript) - require.NoError(t, err) + require.Error(t, err) } diff --git a/node/execution/ceremony/ceremony_execution_engine.go b/node/execution/ceremony/ceremony_execution_engine.go index d122a09..8855ac8 100644 --- a/node/execution/ceremony/ceremony_execution_engine.go +++ b/node/execution/ceremony/ceremony_execution_engine.go @@ -37,6 +37,7 @@ type CeremonyExecutionEngine struct { keyManager keys.KeyManager engineConfig *config.EngineConfig pubSub p2p.PubSub + peerIdHash []byte provingKey crypto.Signer proverPublicKey []byte provingKeyAddress []byte @@ -48,11 +49,11 @@ type CeremonyExecutionEngine struct { alreadyPublishedTranscript bool seenMessageMap map[string]bool seenMessageMx sync.Mutex + intrinsicFilter []byte } func NewCeremonyExecutionEngine( logger *zap.Logger, - clock *ceremony.CeremonyDataClockConsensusEngine, engineConfig *config.EngineConfig, keyManager keys.KeyManager, pubSub p2p.PubSub, @@ -63,6 +64,27 @@ func NewCeremonyExecutionEngine( panic(errors.New("logger is nil")) } + seed, err := hex.DecodeString(engineConfig.GenesisSeed) + if err != nil { + panic(err) + } + + intrinsicFilter := append( + p2p.GetBloomFilter(application.CEREMONY_ADDRESS, 256, 3), + p2p.GetBloomFilterIndices(application.CEREMONY_ADDRESS, 65536, 24)..., + ) + + clock := ceremony.NewCeremonyDataClockConsensusEngine( + engineConfig, + logger, + keyManager, + clockStore, + keyStore, + pubSub, + intrinsicFilter, + seed, + ) + e := &CeremonyExecutionEngine{ logger: logger, clock: clock, @@ -76,8 +98,18 @@ func NewCeremonyExecutionEngine( alreadyPublishedShare: false, seenMessageMx: sync.Mutex{}, seenMessageMap: map[string]bool{}, + intrinsicFilter: intrinsicFilter, } + peerId := e.pubSub.GetPeerID() + addr, err := poseidon.HashBytes(peerId) + if err != nil { + panic(err) + } + + addrBytes := addr.Bytes() + addrBytes = append(make([]byte, 32-len(addrBytes)), addrBytes...) + e.peerIdHash = addrBytes provingKey, _, publicKeyBytes, provingKeyAddress := e.clock.GetProvingKey( engineConfig, ) @@ -117,15 +149,7 @@ func (e *CeremonyExecutionEngine) Start() <-chan error { )) go func() { - seed, err := hex.DecodeString(e.engineConfig.GenesisSeed) - if err != nil { - panic(err) - } - - err = <-e.clock.Start( - application.CEREMONY_ADDRESS, - seed, - ) + err := <-e.clock.Start() if err != nil { panic(err) } @@ -175,7 +199,7 @@ func (e *CeremonyExecutionEngine) ProcessMessage( return nil, errors.Wrap(err, "process message") } - if frame.FrameNumber < e.clock.GetFrame() { + if frame.FrameNumber < e.clock.GetFrame().FrameNumber { return nil, nil } @@ -270,7 +294,7 @@ func (e *CeremonyExecutionEngine) RunWorker() { frameChan := e.clock.GetFrameChannel() for { frameFromBuffer := <-frameChan - frame := e.clock.GetActiveFrame() + frame := e.clock.GetFrame() e.activeClockFrame = frame e.logger.Info( "evaluating next frame", @@ -289,9 +313,10 @@ func (e *CeremonyExecutionEngine) RunWorker() { } _, _, reward := app.RewardTrie.Get(e.provingKeyAddress) + _, _, retro := app.RewardTrie.Get(e.peerIdHash) e.logger.Info( "current application state", - zap.Uint64("my_balance", reward), + zap.Uint64("my_balance", reward+retro), zap.String("lobby_state", app.LobbyState.String()), ) @@ -313,7 +338,10 @@ func (e *CeremonyExecutionEngine) RunWorker() { e.logger.Info( "lobby open for joins", zap.Int("joined_participants", len(app.LobbyJoins)), - zap.Int("preferred_participants", len(app.NextRoundPreferredParticipants)), + zap.Int( + "preferred_participants", + len(app.NextRoundPreferredParticipants), + ), zap.Bool("in_lobby", alreadyJoined), zap.Uint64("state_count", app.StateCount), ) @@ -337,7 +365,10 @@ func (e *CeremonyExecutionEngine) RunWorker() { case application.CEREMONY_APPLICATION_STATE_IN_PROGRESS: inRound := false for _, p := range app.ActiveParticipants { - if bytes.Equal(p.KeyValue, e.proverPublicKey) { + if bytes.Equal( + p.PublicKeySignatureEd448.PublicKey.KeyValue, + e.proverPublicKey, + ) { inRound = true break } @@ -353,7 +384,10 @@ func (e *CeremonyExecutionEngine) RunWorker() { e.logger.Info( "round in progress", zap.Any("participants", app.ActiveParticipants), - zap.Any("current_seen_attestations", len(app.LatestSeenProverAttestations)), + zap.Any( + "current_seen_attestations", + len(app.LatestSeenProverAttestations), + ), zap.Any( "current_dropped_attestations", len(app.DroppedParticipantAttestations), @@ -371,7 +405,10 @@ func (e *CeremonyExecutionEngine) RunWorker() { if len(e.peerChannels) == 0 && app.RoundCount == 1 && len(app.ActiveParticipants) > 1 { for i, p := range app.ActiveParticipants { - if bytes.Equal(p.KeyValue, e.proverPublicKey) { + if bytes.Equal( + p.PublicKeySignatureEd448.PublicKey.KeyValue, + e.proverPublicKey, + ) { shouldConnect = true position = i break @@ -418,7 +455,10 @@ func (e *CeremonyExecutionEngine) RunWorker() { } } } else if len(app.ActiveParticipants) == 1 && - bytes.Equal(app.ActiveParticipants[0].KeyValue, e.proverPublicKey) { + bytes.Equal( + app.ActiveParticipants[0].PublicKeySignatureEd448.PublicKey.KeyValue, + e.proverPublicKey, + ) { if err = e.commitRound(e.activeSecrets); err != nil { e.logger.Error("error while participating in round", zap.Error(err)) } @@ -427,7 +467,10 @@ func (e *CeremonyExecutionEngine) RunWorker() { e.logger.Info( "round contribution finalizing", zap.Any("participants", len(app.ActiveParticipants)), - zap.Any("current_seen_attestations", len(app.LatestSeenProverAttestations)), + zap.Any( + "current_seen_attestations", + len(app.LatestSeenProverAttestations), + ), zap.Any( "current_dropped_attestations", len(app.DroppedParticipantAttestations), @@ -450,7 +493,10 @@ func (e *CeremonyExecutionEngine) RunWorker() { shouldPublish := false for _, p := range app.ActiveParticipants { - if bytes.Equal(p.KeyValue, e.proverPublicKey) { + if bytes.Equal( + p.PublicKeySignatureEd448.PublicKey.KeyValue, + e.proverPublicKey, + ) { shouldPublish = true break } @@ -587,7 +633,7 @@ func (e *CeremonyExecutionEngine) announceJoin( return errors.Wrap( e.publishMessage( - application.CEREMONY_ADDRESS, + e.intrinsicFilter, join, ), "announce join", @@ -607,34 +653,20 @@ func (e *CeremonyExecutionEngine) connectToActivePeers( return errors.Wrap(err, "connect to active peers") } - for i, p := range app.ActiveParticipants { - if !bytes.Equal(p.KeyValue, e.proverPublicKey) { - ic, err := e.keyStore.GetLatestKeyBundle(p.KeyValue) - if err != nil { - return errors.Wrap(err, "connect to active peers") - } - - var kba *protobufs.KeyBundleAnnouncement - switch ic.TypeUrl { - case protobufs.KeyBundleAnnouncementType: - kba = &protobufs.KeyBundleAnnouncement{} - if err := proto.Unmarshal( - ic.Data, - kba, - ); err != nil { - return errors.Wrap(err, "connect to active peers") - } - } - + for i, p := range app.LobbyJoins { + if !bytes.Equal( + p.PublicKeySignatureEd448.PublicKey.KeyValue, + e.proverPublicKey, + ) { receiverIdk, err := curves.ED448().Point.FromAffineCompressed( - kba.IdentityKey.GetPublicKeySignatureEd448().PublicKey.KeyValue, + p.IdentityKey.KeyValue, ) if err != nil { return errors.Wrap(err, "connect to active peers") } receiverSpk, err := curves.ED448().Point.FromAffineCompressed( - kba.SignedPreKey.GetPublicKeySignatureEd448().PublicKey.KeyValue, + p.SignedPreKey.KeyValue, ) if err != nil { return errors.Wrap(err, "connect to active peers") @@ -642,19 +674,24 @@ func (e *CeremonyExecutionEngine) connectToActivePeers( client, err := e.clock.GetPublicChannelForProvingKey( i > position, - p.KeyValue, + p.PublicKeySignatureEd448.PublicKey.KeyValue, ) if err != nil { e.logger.Error( "peer does not support direct public channels", - zap.Binary("proving_key", p.KeyValue), + zap.Binary( + "proving_key", + p.PublicKeySignatureEd448.PublicKey.KeyValue, + ), zap.Error(err), ) } - e.peerChannels[string(p.KeyValue)], err = p2p.NewPublicP2PChannel( + e.peerChannels[string( + p.PublicKeySignatureEd448.PublicKey.KeyValue, + )], err = p2p.NewPublicP2PChannel( client, e.proverPublicKey, - p.KeyValue, + p.PublicKeySignatureEd448.PublicKey.KeyValue, i > position, idk, spk, @@ -690,8 +727,13 @@ func (e *CeremonyExecutionEngine) participateRound( idks := []curves.Point{} initiator := false for _, p := range app.ActiveParticipants { - if !bytes.Equal(p.KeyValue, e.proverPublicKey) { - ic, err := e.keyStore.GetLatestKeyBundle(p.KeyValue) + if !bytes.Equal( + p.PublicKeySignatureEd448.PublicKey.KeyValue, + e.proverPublicKey, + ) { + ic, err := e.keyStore.GetLatestKeyBundle( + p.PublicKeySignatureEd448.PublicKey.KeyValue, + ) if err != nil { return errors.Wrap(err, "participate round") } @@ -722,22 +764,29 @@ func (e *CeremonyExecutionEngine) participateRound( return errors.Wrap(err, "participate round") } - if _, ok := e.peerChannels[string(p.KeyValue)]; !ok { + if _, ok := e.peerChannels[string( + p.PublicKeySignatureEd448.PublicKey.KeyValue, + )]; !ok { client, err := e.clock.GetPublicChannelForProvingKey( initiator, - p.KeyValue, + p.PublicKeySignatureEd448.PublicKey.KeyValue, ) if err != nil { e.logger.Error( "peer does not support direct public channels", - zap.Binary("proving_key", p.KeyValue), + zap.Binary( + "proving_key", + p.PublicKeySignatureEd448.PublicKey.KeyValue, + ), zap.Error(err), ) } - e.peerChannels[string(p.KeyValue)], err = p2p.NewPublicP2PChannel( + e.peerChannels[string( + p.PublicKeySignatureEd448.PublicKey.KeyValue, + )], err = p2p.NewPublicP2PChannel( client, e.proverPublicKey, - p.KeyValue, + p.PublicKeySignatureEd448.PublicKey.KeyValue, initiator, idk, spk, @@ -761,7 +810,10 @@ func (e *CeremonyExecutionEngine) participateRound( pubKeys := [][]byte{} for _, p := range app.ActiveParticipants { - pubKeys = append(pubKeys, p.KeyValue) + pubKeys = append( + pubKeys, + p.PublicKeySignatureEd448.PublicKey.KeyValue, + ) } newSecrets, err := application.ProcessRound( @@ -834,7 +886,7 @@ func (e *CeremonyExecutionEngine) commitRound(secrets []curves.Scalar) error { } if err := e.publishMessage( - application.CEREMONY_ADDRESS, + e.intrinsicFilter, advance, ); err != nil { return errors.Wrap(err, "commit round") @@ -849,7 +901,7 @@ func (e *CeremonyExecutionEngine) commitRound(secrets []curves.Scalar) error { func (e *CeremonyExecutionEngine) publishDroppedParticipant( participant []byte, ) { - frameNumber := e.clock.GetFrame() + frameNumber := e.clock.GetFrame().FrameNumber b := binary.BigEndian.AppendUint64([]byte("dropped"), frameNumber) b = append(b, participant...) @@ -876,7 +928,7 @@ func (e *CeremonyExecutionEngine) publishDroppedParticipant( } err = e.publishMessage( - application.CEREMONY_ADDRESS, + e.intrinsicFilter, dropped, ) if err != nil { @@ -893,7 +945,7 @@ func (e *CeremonyExecutionEngine) publishDroppedParticipant( func (e *CeremonyExecutionEngine) publishLastSeenParticipant( participant []byte, ) { - frameNumber := e.clock.GetFrame() + frameNumber := e.clock.GetFrame().FrameNumber b := binary.BigEndian.AppendUint64([]byte("lastseen"), frameNumber) b = append(b, participant...) @@ -919,7 +971,7 @@ func (e *CeremonyExecutionEngine) publishLastSeenParticipant( }, } err = e.publishMessage( - application.CEREMONY_ADDRESS, + e.intrinsicFilter, seen, ) if err != nil { @@ -1019,7 +1071,7 @@ func (e *CeremonyExecutionEngine) publishTranscriptShare( err = errors.Wrap( e.publishMessage( - application.CEREMONY_ADDRESS, + e.intrinsicFilter, transcriptShare, ), "publish transcript share", @@ -1035,7 +1087,7 @@ func (e *CeremonyExecutionEngine) publishTranscriptShare( func (e *CeremonyExecutionEngine) VerifyExecution( frame *protobufs.ClockFrame, ) error { - if e.clock.GetFrame() != frame.FrameNumber-1 { + if e.clock.GetFrame().FrameNumber != frame.FrameNumber-1 { return nil } @@ -1102,7 +1154,7 @@ func (e *CeremonyExecutionEngine) publishTranscript( e.alreadyPublishedTranscript = true err := errors.Wrap( e.publishMessage( - application.CEREMONY_ADDRESS, + e.intrinsicFilter, app.UpdatedTranscript, ), "publish transcript share", diff --git a/node/go.mod b/node/go.mod index ffaeba4..3cd7cfc 100644 --- a/node/go.mod +++ b/node/go.mod @@ -11,9 +11,11 @@ replace github.com/libp2p/go-libp2p-gostream => ../go-libp2p-gostream replace source.quilibrium.com/quilibrium/monorepo/go-libp2p-blossomsub => ../go-libp2p-blossomsub +replace github.com/cockroachdb/pebble => ../pebble + require ( filippo.io/edwards25519 v1.0.0-rc.1 - github.com/cockroachdb/pebble v0.0.0-20231025190044-422dce910055 + github.com/cockroachdb/pebble v0.0.0-20231210175920-b4d301aeb46a github.com/libp2p/go-libp2p v0.31.0 github.com/libp2p/go-libp2p-gostream v0.6.0 github.com/libp2p/go-libp2p-kad-dht v0.23.0 @@ -57,11 +59,9 @@ require ( github.com/quic-go/qtls-go1-19 v0.3.3 // indirect github.com/quic-go/qtls-go1-20 v0.2.3 // indirect github.com/rivo/uniseg v0.2.0 // indirect - golang.org/x/term v0.14.0 // indirect - google.golang.org/genproto v0.0.0-20230803162519-f966b187b2e5 // indirect + golang.org/x/term v0.14.0 google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d // indirect - google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) @@ -126,7 +126,7 @@ require ( github.com/mikioh/tcpinfo v0.0.0-20190314235526-30a79bb1804b // indirect github.com/mikioh/tcpopt v0.0.0-20190314235656-172688c1accc // indirect github.com/minio/sha256-simd v1.0.1 // indirect - github.com/mr-tron/base58 v1.2.0 // indirect + github.com/mr-tron/base58 v1.2.0 github.com/multiformats/go-base32 v0.1.0 // indirect github.com/multiformats/go-base36 v0.2.0 // indirect github.com/multiformats/go-multiaddr-dns v0.3.1 // indirect diff --git a/node/go.sum b/node/go.sum index f1e8d1a..72df8bd 100644 --- a/node/go.sum +++ b/node/go.sum @@ -9,22 +9,13 @@ dmitri.shuralyov.com/state v0.0.0-20180228185332-28bcc343414c/go.mod h1:0PRwlb0D filippo.io/edwards25519 v1.0.0-rc.1 h1:m0VOOB23frXZvAOK44usCgLWvtsxIoMCTBGJZlpmGfU= filippo.io/edwards25519 v1.0.0-rc.1/go.mod h1:N1IkdkCkiLB6tki+MYJoSx2JTY9NUlxZE7eHn5EwJns= git.apache.org/thrift.git v0.0.0-20180902110319-2566ecd5d999/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqblrnkyeyg= -github.com/AndreasBriese/bbloom v0.0.0-20190306092124-e2d15f34fcf9/go.mod h1:bOvUY6CB00SOBii9/FifXqc0awNKxLFCL/+pkDPuyl8= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/CloudyKit/fastprinter v0.0.0-20170127035650-74b38d55f37a/go.mod h1:EFZQ978U7x8IRnstaskI3IysnWY5Ao3QgZUKOXlsAdw= -github.com/CloudyKit/jet v2.1.3-0.20180809161101-62edd43e4f88+incompatible/go.mod h1:HPYO+50pSWkPoj9Q/eq0aRGByCL6ScRlUmiEX5Zgm+w= github.com/DataDog/zstd v1.4.5 h1:EndNeuB0l9syBZhut0wns3gV1hL8zX8LIu6ZiVHWLIQ= github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= -github.com/Joker/hpp v1.0.0/go.mod h1:8x5n+M1Hp5hC0g8okX3sR3vFQwynaX/UgSOM9MeBKzY= -github.com/Joker/jade v1.0.1-0.20190614124447-d475f43051e7/go.mod h1:6E6s8o2AE4KhCrqr6GRJjdC/gNfTdxkIXvuGZZda2VM= -github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0= github.com/aead/siphash v1.0.1/go.mod h1:Nywa3cDsYNNK3gaciGTWPwHt0wlpNV15vwmswBAUSII= -github.com/ajg/form v1.5.1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY= github.com/anmitsu/go-shlex v0.0.0-20161002113705-648efa622239/go.mod h1:2FmKhYUyUczH0OGQWaF5ceTx0UBShxjsH6f8oGKYe2c= -github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= -github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/benbjohnson/clock v1.3.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/benbjohnson/clock v1.3.5 h1:VvXlSJBzZpA/zum6Sj74hxwYI2DIxRWuNIoXAzHZz5o= @@ -61,30 +52,16 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/cloudflare/circl v1.3.3 h1:fE/Qz0QdIGqeWfnwq0RE0R7MI51s0M2E4Ga9kq5AEMs= github.com/cloudflare/circl v1.3.3/go.mod h1:5XYMA4rFBvNIrhs50XuiBJ15vF2pZn4nnUKZrLbUZFA= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cockroachdb/datadriven v1.0.0/go.mod h1:5Ib8Meh+jk1RlHIXej6Pzevx/NLlNvQB9pmSBZErGA4= github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f h1:otljaYPt5hWxV3MUfO5dFPFiOXg9CyG5/kCfayTqsJ4= -github.com/cockroachdb/errors v1.6.1/go.mod h1:tm6FTP5G81vwJ5lC0SizQo374JNCOPrHyXGitRJoDqM= -github.com/cockroachdb/errors v1.8.1 h1:A5+txlVZfOqFBDa4mGz2bUWSp0aHElvHX2bKkdbQu+Y= -github.com/cockroachdb/errors v1.8.1/go.mod h1:qGwQn6JmZ+oMjuLwjWzUNqblqk0xl4CVV3SQbGwK7Ac= github.com/cockroachdb/errors v1.11.1 h1:xSEW75zKaKCWzR3OfxXUxgrk/NtT4G1MiOv5lWZazG8= github.com/cockroachdb/errors v1.11.1/go.mod h1:8MUxA3Gi6b25tYlFEBGLf+D8aISL+M4MIpiWMSNRfxw= -github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f h1:o/kfcElHqOiXqcou5a3rIlMc7oJbMQkeLk0VQJ7zgqY= -github.com/cockroachdb/logtags v0.0.0-20190617123548-eb05cc24525f/go.mod h1:i/u985jwjWRlyHXQbwatDASoW0RMlZ/3i9yJHE2xLkI= github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZeQy818SGhaone5OnYfxFR/+AzdY3sf5aE= github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= -github.com/cockroachdb/pebble v0.0.0-20230527012508-ac69476c46ff h1:/F1VgP7wxZCRj8PzresPo2NbAdgPwmU7pi+CgZ8sBZw= -github.com/cockroachdb/pebble v0.0.0-20230527012508-ac69476c46ff/go.mod h1:TkdVsGYRqtULUppt2RbC+YaKtTHnHoWa2apfFrSKABw= -github.com/cockroachdb/pebble v0.0.0-20231025190044-422dce910055 h1:EigfnVX/iY/WTi3F+f4ezhAxJO+BePglQkEAKycNhqo= -github.com/cockroachdb/pebble v0.0.0-20231025190044-422dce910055/go.mod h1:sEHm5NOXxyiAoKWhoFxT8xMgd/f3RA6qUqQ1BXKrh2E= -github.com/cockroachdb/redact v1.0.8 h1:8QG/764wK+vmEYoOlfobpe12EQcS81ukx/a4hdVMxNw= -github.com/cockroachdb/redact v1.0.8/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= +github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895 h1:XANOgPYtvELQ/h4IrmPAohXqe2pWA8Bwhejr3VQoZsA= github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30= github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= -github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2 h1:IKgmqgMQlVJIZj19CdocBeSfSaiCbEBZGKODaixqtHM= -github.com/cockroachdb/sentry-go v0.6.1-cockroachdb.2/go.mod h1:8BT+cPK6xvFOcRlk0R8eg+OTkcqI6baNH4xAkpiYVvQ= github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 h1:zuQyyAKVxetITBuuhv3BI9cMrmStnpT18zmgmTxunpo= github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06/go.mod h1:7nc4anLGjupUW/PeY5qiNYsdNXj7zopG+eqsS7To5IQ= -github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0/go.mod h1:4Zcjuz89kmFXt9morQgcfYZAYZ5n8WHjt81YYWIwtTM= github.com/consensys/bavard v0.1.8-0.20210915155054-088da2f7f54a/go.mod h1:9ItSMtA/dXMAiL7BG6bqW2m3NdSEObYWoH223nGHukI= github.com/consensys/gnark-crypto v0.5.3 h1:4xLFGZR3NWEH2zy+YzvzHicpToQR8FXFbfLNvpGB+rE= github.com/consensys/gnark-crypto v0.5.3/go.mod h1:hOdPlWQV1gDLp7faZVeg8Y0iEPFaOUnCc4XeCCk96p0= @@ -93,14 +70,10 @@ github.com/containerd/cgroups v1.1.0 h1:v8rEWFl6EoqHB+swVNjVoCJE8o3jX7e8nqBGPLaD github.com/containerd/cgroups v1.1.0/go.mod h1:6ppBcbh/NOOUU+dMKrykgaBnK9lCIBxHqJDGwsa1mIw= github.com/containerd/console v1.0.4-0.20230313162750-1ae8d489ac81 h1:q2hJAaP1k2wIvVRd/hEHD7lacgqrCPS+k8g1MndzfWY= github.com/containerd/console v1.0.4-0.20230313162750-1ae8d489ac81/go.mod h1:YynlIjWYF8myEu6sdkwKIvGQq+cOckRm6So2avqoYAk= -github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= -github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= -github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= github.com/coreos/go-systemd v0.0.0-20181012123002-c6f51f82210d/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/go-systemd/v22 v22.1.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= @@ -114,14 +87,10 @@ github.com/decred/dcrd/crypto/blake256 v1.0.1 h1:7PltbUIQB7u/FfZ39+DGa/ShuMyJ5il github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0 h1:8UrgZ3GkP4i/CLijOJx79Yu+etlyjdBU4sfcs2WYQMs= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.2.0/go.mod h1:v57UDF4pDQJcEfFUCRop3lJL149eHGSe9Jvczhzjo/0= github.com/decred/dcrd/lru v1.0.0/go.mod h1:mxKOwFd7lFjN2GZYsiz/ecgqR6kkYAl+0pz0tEMk218= -github.com/dgraph-io/badger v1.6.0/go.mod h1:zwt7syl517jmP8s94KqSxTlM6IMsdhYy6psNgSztDR4= -github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= -github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= github.com/elastic/gosigar v0.14.2 h1:Dg80n8cr90OZ7x+bAax/QjoW/XqTI11RmA79ZwIm9/4= github.com/elastic/gosigar v0.14.2/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= @@ -129,10 +98,6 @@ github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymF github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= -github.com/etcd-io/bbolt v1.3.3/go.mod h1:ZF2nL25h33cCyBtcyWeZ2/I3HQOfTP+0PIEvHjkjCrw= -github.com/fasthttp-contrib/websocket v0.0.0-20160511215533-1f3b11f56072/go.mod h1:duJ4Jxv5lDcvg4QuQr0oowTf7dz4/CR8NtyCooz9HL8= -github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= -github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4/go.mod h1:T9YF2M40nIgbVgp3rreNmTged+9HrbNTIQf1PsaIiTA= github.com/flynn/go-shlex v0.0.0-20150515145356-3f9db97f8568/go.mod h1:xEzjJPgXI435gkrCt3MPfRiAkVrwSbHsst4LCFVfpJc= github.com/flynn/noise v1.0.0 h1:DlTHqmzmvcEiKj+4RYo/imoswx/4r6iBlCMfVtrMXpQ= github.com/flynn/noise v1.0.0/go.mod h1:xbMo+0i6+IGbYdJhF31t2eR1BIU0CYc12+BNAKwUTag= @@ -140,43 +105,30 @@ github.com/francoispqt/gojay v1.2.13 h1:d2m3sFjloqoIUQU3TsHBgj6qg/BVGlTBeHDUmyJn github.com/francoispqt/gojay v1.2.13/go.mod h1:ehT5mTG4ua4581f1++1WLG0vPdaA9HaiDsoyrBGkyDY= github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= -github.com/gavv/httpexpect v2.0.0+incompatible/go.mod h1:x+9tiU1YnrOvnB725RkpoLv1M62hOWzwo5OXotisrKc= github.com/getsentry/sentry-go v0.18.0 h1:MtBW5H9QgdcJabtZcuJG80BMOwaBpkRDZkxRkNC1sN0= github.com/getsentry/sentry-go v0.18.0/go.mod h1:Kgon4Mby+FJ7ZWHFUAZgVaIa8sxHtnRJRLTXZr51aKQ= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= -github.com/gin-contrib/sse v0.0.0-20190301062529-5545eab6dad3/go.mod h1:VJ0WA2NBN22VlZ2dKZQPAPnyWw5XTlK1KymzLKsr59s= -github.com/gin-gonic/gin v1.4.0/go.mod h1:OW2EZn3DO8Ln9oIKOvM++LBO+5UPHJJDH72/q/3rZdM= github.com/gliderlabs/ssh v0.1.1/go.mod h1:U7qILu1NlMHj9FlMhZLlkCdDnU1DBEAqr0aevW3Awn0= -github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= -github.com/go-errors/errors v1.0.1 h1:LUHzmkK3GUKUrL/1gfBUxAHzcev3apQlezX/+O7ma6w= github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q= +github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-martini/martini v0.0.0-20170121215854-22fa46961aab/go.mod h1:/P9AEU963A2AYjv4d1V5eVL1CQbEJq6aCNHDDjibzu8= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 h1:tfuBGBXKqDEevZMzYi5KSi8KkcZtzBcTgAUUtapy0OI= github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572/go.mod h1:9Pwr4B2jHnOSGXyyzV8ROjYa2ojvAY6HCGYYfMoC3Ls= github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0= -github.com/gobwas/httphead v0.0.0-20180130184737-2c6c146eadee/go.mod h1:L0fX3K22YWvt/FAX9NnzrNzcI4wNYi9Yku4O0LKYflo= -github.com/gobwas/pool v0.2.0/go.mod h1:q8bcK0KcYlCgd9e7WYLm9LpyS+YeLd8JVDW6WezmKEw= -github.com/gobwas/ws v1.0.2/go.mod h1:szmBTxLgaFppYjEmNtny/v3w89xOydFnnZMcgRRu/EM= github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/gogo/googleapis v0.0.0-20180223154316-0cd9801be74a/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= -github.com/gogo/protobuf v1.2.0/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= -github.com/gogo/status v1.1.0/go.mod h1:BFv9nrluPLmrS0EmGVvLaPNmRosr9KapBYd5/hpY1WM= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/glog v1.1.0 h1:/d3pCKDPWNnvIWe0vVUpNP32qc8U3PDVxySP/y360qE= -github.com/golang/glog v1.1.0/go.mod h1:pfYeQZ3JWZoXTV5sFc986z3HTpwQs9At6P4ImfuP3NQ= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= github.com/golang/lint v0.0.0-20180702182130-06c8688daad7/go.mod h1:tluoj9z5200jBnyusfRPU2LqT6J+DAorxEvtC7LHB+E= @@ -187,21 +139,18 @@ github.com/golang/mock v1.6.0/go.mod h1:p6yTPP+5HYm5mzsMV8JkE6ZKdX+/wYM6Hr+Licev github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/gomodule/redigo v1.7.1-0.20190724094224-574c33c3df38/go.mod h1:B4C85qUVwatsJoIUNIfCRsp7qO0iAmpGFZ4EELWSbC4= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -231,7 +180,6 @@ github.com/googleapis/gax-go v2.0.0+incompatible/go.mod h1:SFVmujtThgffbyetf+mdk github.com/googleapis/gax-go/v2 v2.0.3/go.mod h1:LLvjysVCY1JZeum8Z6l8qUty8fiNwE08qbEPm1M08qg= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gopherjs/gopherjs v0.0.0-20190430165422-3e4dfb77656c h1:7lF+Vz0LqiRidnzC1Oq86fpX1q/iEv2KJdrCtttYjT4= -github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc= github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= @@ -245,20 +193,15 @@ github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= -github.com/hashicorp/go-version v1.2.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc= github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= github.com/hashicorp/golang-lru/v2 v2.0.2 h1:Dwmkdr5Nc/oBiXgJS3CDHNhJtIHkuZ3DZF5twqnfBdU= github.com/hashicorp/golang-lru/v2 v2.0.2/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= -github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/huin/goupnp v1.2.0 h1:uOKW26NG1hsSSbXIZ1IR7XP9Gjd1U8pnLaCMgntmkmY= github.com/huin/goupnp v1.2.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFckNX8= -github.com/hydrogen18/memlistener v0.0.0-20141126152155-54553eb933fb/go.mod h1:qEIFzExnS6016fRpRfxrExeVn2gbClQA99gQhnIcdhE= github.com/iden3/go-iden3-crypto v0.0.15 h1:4MJYlrot1l31Fzlo2sF56u7EVFeHHJkxGXXZCtESgK4= github.com/iden3/go-iden3-crypto v0.0.15/go.mod h1:dLpM4vEPJ3nDHzhWFXDjzkn1qHoBeOT/3UEhXsEsP3E= -github.com/imkira/go-interpol v1.1.0/go.mod h1:z0h2/2T3XF8kyEPpRgJ3kmNv+C43p+I/CoI+jC3w2iA= -github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= github.com/ipfs/boxo v0.8.0 h1:UdjAJmHzQHo/j3g3b1bAcAXCj/GM6iTwvSlBDvPBNBs= github.com/ipfs/boxo v0.8.0/go.mod h1:RIsi4CnTyQ7AUsNn5gXljJYZlQrHBMnJp94p73liFiA= github.com/ipfs/go-cid v0.4.1 h1:A/T3qGvxi4kpKWWcPC/PgbvDA2bjVLO7n4UeVwnbs/s= @@ -276,10 +219,6 @@ github.com/ipfs/go-log/v2 v2.5.1 h1:1XdUzF7048prq4aBjDQQ4SL5RxftpRGdXhNRwKSAlcY= github.com/ipfs/go-log/v2 v2.5.1/go.mod h1:prSpmC1Gpllc9UYWxDiZDreBYw7zp4Iqp1kOLU9U5UI= github.com/ipld/go-ipld-prime v0.20.0 h1:Ud3VwE9ClxpO2LkCYP7vWPc0Fo+dYdYzgxUJZ3uRG4g= github.com/ipld/go-ipld-prime v0.20.0/go.mod h1:PzqZ/ZR981eKbgdr3y2DJYeD/8bgMawdGVlJDE8kK+M= -github.com/iris-contrib/blackfriday v2.0.0+incompatible/go.mod h1:UzZ2bDEoaSGPbkg6SAB4att1aAwTmVIx/5gCVqeyUdI= -github.com/iris-contrib/go.uuid v2.0.0+incompatible/go.mod h1:iz2lgM/1UnEf1kP0L/+fafWORmlnuysV2EMP8MW+qe0= -github.com/iris-contrib/i18n v0.0.0-20171121225848-987a633949d0/go.mod h1:pMCz62A0xJL6I+umB2YTlFRwWXaDFA0jy+5HzGiJjqI= -github.com/iris-contrib/schema v0.0.1/go.mod h1:urYA3uvUNG1TIIjOSCzHr9/LmbQo8LrOcOqfqxa4hXw= github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7BdWus= github.com/jackpal/go-nat-pmp v1.0.2/go.mod h1:QPH045xvCAeXUZOxsnwmrtiCoxIr9eob+4orBN1SBKc= github.com/jbenet/go-cienv v0.1.0/go.mod h1:TqNnHUmJgXau0nCzC7kXWeotg3J9W34CUv5Djy1+FlA= @@ -295,23 +234,12 @@ github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCV github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= -github.com/juju/errors v0.0.0-20181118221551-089d3ea4e4d5/go.mod h1:W54LbzXuIE0boCoNJfwqpmkKJ1O4TCTZMetAt6jGk7Q= -github.com/juju/loggo v0.0.0-20180524022052-584905176618/go.mod h1:vgyd7OREkbtVEN/8IXZe5Ooef3LQePvuBm9UWj6ZL8U= -github.com/juju/testing v0.0.0-20180920084828-472a3e8b2073/go.mod h1:63prj8cnj0tU0S9OHjGJn+b1h0ZghCndfnbQolrYTwA= -github.com/k0kubun/colorstring v0.0.0-20150214042306-9440f1994b88/go.mod h1:3w7q1U84EfirKl04SVQ/s7nPm1ZPhiXd34z40TNz36k= -github.com/kataras/golog v0.0.9/go.mod h1:12HJgwBIZFNGL0EJnMRhmvGA0PQGx8VFwrZtM4CqbAk= -github.com/kataras/iris/v12 v12.0.1/go.mod h1:udK4vLQKkdDqMGJJVd/msuMtN6hpYJhg/lSzuxjhO+U= -github.com/kataras/neffos v0.0.10/go.mod h1:ZYmJC07hQPW67eKuzlfY7SO3bC0mw83A3j6im82hfqw= -github.com/kataras/pio v0.0.0-20190103105442-ea782b38602d/go.mod h1:NV88laa9UiiDuX9AhMbDPkGYSPugBOV6yTZB1l2K9Z0= github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/kkdai/bstream v0.0.0-20161212061736-f391b8402d23/go.mod h1:J+Gs4SYgM6CZQHDETBtE9HaSEkGmuNXF86RwHhHUvq4= -github.com/klauspost/compress v1.8.2/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= -github.com/klauspost/compress v1.9.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= -github.com/klauspost/cpuid v1.2.1/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= github.com/koron/go-ssdp v0.0.4 h1:1IDwrghSKYM7yLf7XCzbByg2sJ/JcNOZRXS2jczTwz0= @@ -325,8 +253,6 @@ github.com/kr/pty v1.1.3/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/labstack/echo/v4 v4.1.11/go.mod h1:i541M3Fj6f76NZtHSj7TXnyM8n2gaodfvfxNnFqi74g= -github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c= github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8= github.com/libp2p/go-buffer-pool v0.1.0 h1:oK4mSFcQz7cTQIfqbe4MIj9gLW+mnanjyFtc6cdF0Y8= @@ -357,14 +283,9 @@ github.com/libp2p/go-yamux/v4 v4.0.1/go.mod h1:NWjl8ZTLOGlozrXSOZ/HlfG++39iKNnM5 github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY= github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/lunixbochs/vtclean v1.0.0/go.mod h1:pHhQNgMf3btfWnGBVipUOjRYhoOsdGqdm/+2c2E2WMI= -github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= github.com/mailru/easyjson v0.0.0-20190312143242-1de009706dbe/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd h1:br0buuQ854V8u83wA0rVZ8ttrq5CpaPZdvrK0LP2lOk= github.com/marten-seemann/tcp v0.0.0-20210406111302-dfbc87cc63fd/go.mod h1:QuCEs1Nt24+FYQEqAAncTDPJIuGs+LxK1MCiFL25pMU= -github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= -github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= -github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= -github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= @@ -373,14 +294,10 @@ github.com/mattn/go-localereader v0.0.1/go.mod h1:8fBrzywKY7BI3czFoHkuzRoWE9C+Ei github.com/mattn/go-runewidth v0.0.12/go.mod h1:RAqKPSqVFrSLVXbA8x7dzmKdmGzieGRCM46jaSJTDAk= github.com/mattn/go-runewidth v0.0.15 h1:UNAjwbU9l54TA3KzvqLGxwWjHmMgBUVhBiTjelZgg3U= github.com/mattn/go-runewidth v0.0.15/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= -github.com/mattn/goveralls v0.0.2/go.mod h1:8d1ZMHsd7fW6IRPKQh46F2WRpyib5/X4FOpevwGNQEw= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= github.com/matttproud/golang_protobuf_extensions v1.0.4/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= -github.com/mediocregopher/mediocre-go-lib v0.0.0-20181029021733-cb65787f37ed/go.mod h1:dSsfyI2zABAdhcbvkXqgxOxrCsbYeHCPgrZkku60dSg= -github.com/mediocregopher/radix/v3 v3.3.0/go.mod h1:EmfVyvspXz1uZEyPBMyGK+kjWiKQGvsUt6O3Pj+LDCQ= github.com/microcosm-cc/bluemonday v1.0.1/go.mod h1:hsXNsILzKxV+sX77C5b8FSuKF00vh2OMYv+xgHpAMF4= -github.com/microcosm-cc/bluemonday v1.0.2/go.mod h1:iVP4YcDBq+n/5fb23BhYFvIMq/leAFZyRl6bYmGDlGc= github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI= github.com/miekg/dns v1.1.55 h1:GoQ4hpsj0nFLYe+bWiCToyrBEJXkQfOOIvFGFy0lEgo= github.com/miekg/dns v1.1.55/go.mod h1:uInx36IzPl7FYnDcMeVWxj9byh7DutNykX4G9Sj60FY= @@ -396,11 +313,8 @@ github.com/minio/blake2b-simd v0.0.0-20160723061019-3f5f724cb5b1/go.mod h1:pD8Rv github.com/minio/sha256-simd v0.1.1-0.20190913151208-6de447530771/go.mod h1:B5e1o+1/KgNmWrSQK08Y6Z1Vb5pwIktudl0J58iy0KM= github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM= github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8= -github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= -github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ= github.com/mr-tron/base58 v1.1.2/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= github.com/mr-tron/base58 v1.1.3/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= @@ -439,22 +353,14 @@ github.com/multiformats/go-varint v0.0.1/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXS github.com/multiformats/go-varint v0.0.5/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= github.com/multiformats/go-varint v0.0.7 h1:sWSGR+f/eu5ABZA2ZpYKBILXTTs9JWpdEM/nEGOHFS8= github.com/multiformats/go-varint v0.0.7/go.mod h1:r8PUYw/fD/SjBCiKOoDlGF6QawOELpZAu9eioSos/OU= -github.com/nats-io/nats.go v1.8.1/go.mod h1:BrFz9vVn0fU3AcH9Vn4Kd7W0NpJ651tD5omQ3M8LwxM= -github.com/nats-io/nkeys v0.0.2/go.mod h1:dab7URMsZm6Z/jp9Z5UGa87Uutgc2mVpXLC4B7TDb/4= -github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/neelance/astrewrite v0.0.0-20160511093645-99348263ae86/go.mod h1:kHJEU3ofeGjhHklVoIGuVj85JJwZ6kWPaJwCIxgnFmo= github.com/neelance/sourcemap v0.0.0-20151028013722-8c68805598ab/go.mod h1:Qr6/a/Q4r9LP1IltGz7tA7iOK1WonHEYhu1HRBA7ZiM= -github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/ginkgo v1.7.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= -github.com/onsi/ginkgo v1.13.0/go.mod h1:+REjRxOmWfHCjfv9TTWB1jD1Frx4XydAD3zm1lskyM0= github.com/onsi/ginkgo/v2 v2.11.0 h1:WgqUCUt/lT6yXoQ8Wef0fsNn5cAuMK7+KT9UFRz2tcU= github.com/onsi/ginkgo/v2 v2.11.0/go.mod h1:ZhrRA5XmEE3x3rhlzamx/JJvujdZoJ2uvgI7kR0iZvM= github.com/onsi/gomega v1.4.1/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA= github.com/onsi/gomega v1.4.3/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= -github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= -github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= github.com/onsi/gomega v1.27.8 h1:gegWiwZjBsf2DgiSbf5hpokZ98JVDMcWkUiigk6/KXc= github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.1.0 h1:HHUyrt9mwHUjtasSbXSMvs4cyFxh+Bll4AjJ9odEGpg= @@ -464,9 +370,7 @@ github.com/opentracing/opentracing-go v1.2.0/go.mod h1:GxEUsuufX4nBwe+T+Wl9TAgYr github.com/openzipkin/zipkin-go v0.1.1/go.mod h1:NtoC/o8u3JlF1lSlyPNswIbeQH9bJTmOf0Erfk+hxe8= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= -github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4= -github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= @@ -494,8 +398,6 @@ github.com/quic-go/qtls-go1-19 v0.3.3 h1:wznEHvJwd+2X3PqftRha0SUKmGsnb6dfArMhy9P github.com/quic-go/qtls-go1-19 v0.3.3/go.mod h1:ySOI96ew8lnoKPtSqx2BlI5wCpUVPT05RMAlajtnyOI= github.com/quic-go/qtls-go1-20 v0.2.3 h1:m575dovXn1y2ATOb1XrRFcrv0F+EQmlowTkoraNkDPI= github.com/quic-go/qtls-go1-20 v0.2.3/go.mod h1:JKtK6mjbAVcUTN/9jZpvLbGxvdWIKS8uT7EiStoU1SM= -github.com/quic-go/qtls-go1-20 v0.3.2 h1:rRgN3WfnKbyik4dBV8A6girlJVxGand/d+jVKbQq5GI= -github.com/quic-go/qtls-go1-20 v0.3.2/go.mod h1:X9Nh97ZL80Z+bX/gUXMbipO6OxdiDi58b/fMC9mAL+k= github.com/quic-go/quic-go v0.36.3 h1:f+yOqeGhMoRX7/M3wmEw/djhzKWr15FtQysox85/834= github.com/quic-go/quic-go v0.36.3/go.mod h1:qxQumdeKw5GmWs1OsTZZnOxzSI+RJWuhf1O8FN35L2o= github.com/quic-go/webtransport-go v0.5.3 h1:5XMlzemqB4qmOlgIus5zB45AcZ2kCgCy2EptUrfOPWU= @@ -511,10 +413,7 @@ github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjR github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/russross/blackfriday v1.5.2/go.mod h1:JO/DiYxRf+HjHt06OyowR9PTA263kcR/rfWxYHBV53g= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/ryanuber/columnize v2.1.0+incompatible/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= -github.com/sclevine/agouti v3.0.0+incompatible/go.mod h1:b4WX9W9L1sfQKXeJf1mUTLZKJ48R1S7H23Ji7oFO5Bw= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= -github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM= github.com/shurcooL/component v0.0.0-20170202220835-f88ec8f54cc4/go.mod h1:XhFIlyj5a1fBNx5aJTbKoIq0mNaPvOagO+HjB3EtxrY= github.com/shurcooL/events v0.0.0-20181021180414-410e4ca65f48/go.mod h1:5u70Mqkb5O5cxEA8nxTsgrgLehJeAw6Oc4Ab1c/P1HM= github.com/shurcooL/github_flavored_markdown v0.0.0-20181002035957-2122de532470/go.mod h1:2dOwnU2uBioM+SGy2aZoq1f/Sd1l9OkAeAUvjSyvgU0= @@ -539,22 +438,14 @@ github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeV github.com/shurcooL/users v0.0.0-20180125191416-49c67e49c537/go.mod h1:QJTqeLYEDaXHZDBsXlPCDqdhQuJkuw4NOtaxYe3xii4= github.com/shurcooL/webdavfs v0.0.0-20170829043945-18c3829fa133/go.mod h1:hKmq5kWdCj2z2KEozexVbfEZIWiTjhE0+UjmZgPqehw= github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= -github.com/smartystreets/assertions v0.0.0-20180927180507-b2de0cb4f26d/go.mod h1:OnSkiWE9lh6wB0YB77sQom3nweQdgAjqCqsofrRNTgc= github.com/smartystreets/assertions v1.2.0 h1:42S6lae5dvLc7BrLu/0ugRtcFVjoJNMC/N3yZFZkDFs= github.com/smartystreets/assertions v1.2.0/go.mod h1:tcbTF8ujkAEcZ8TElKY+i30BzYlVhC/LOxJk7iOWnoo= -github.com/smartystreets/goconvey v1.6.4/go.mod h1:syvi0/a8iFYH4r/RixwvyeAJjdLS9QV7WQ/tjFTllLA= github.com/smartystreets/goconvey v1.7.2 h1:9RBaZCeXEQ3UselpuwUQHltGVXvdwm6cv1hgR6gDIPg= github.com/smartystreets/goconvey v1.7.2/go.mod h1:Vw0tHAZW6lzCRk3xgdin6fKYcG+G3Pg9vgXWeJpQFMM= github.com/sourcegraph/annotate v0.0.0-20160123013949-f4cad6c6324d/go.mod h1:UdhH50NIW0fCiwBSr0co2m7BnFLdv4fQTgdqdJTHFeE= github.com/sourcegraph/syntaxhighlight v0.0.0-20170531221838-bd320f5d308e/go.mod h1:HuIsMU8RRBOtsCgI77wP899iHVBQpCmg4ErYMZB+2IA= github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= -github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= -github.com/spf13/cobra v0.0.5/go.mod h1:3K3wKZymM7VvHMDS9+Akkh4K60UwM26emMESw8tLCHU= -github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= -github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= -github.com/spf13/viper v1.3.2/go.mod h1:ZiWeW+zYFKm7srdB9IoDzzZXaJaI5eL9QjNiN/DMA2s= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= @@ -569,29 +460,14 @@ github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/tarm/serial v0.0.0-20180830185346-98f6abe2eb07/go.mod h1:kDXzergiv9cbyO7IOYJZWg1U88JhDg3PB6klq9Hg2pA= -github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= -github.com/ugorji/go/codec v0.0.0-20181204163529-d75b2dcb6bc8/go.mod h1:VFNgLljTbGfSG7qAOspJ7OScBnGdDN/yBr0sguwnwf0= github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= github.com/urfave/cli v1.22.10/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= -github.com/urfave/negroni v1.0.0/go.mod h1:Meg73S6kFm/4PpbYdq35yYWoCZ9mS/YSx+lKnmiohz4= -github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= -github.com/valyala/fasthttp v1.6.0/go.mod h1:FstJa9V+Pj9vQ7OJie2qMHdwemEDaDiSdBnvPM1Su9w= -github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= -github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= github.com/viant/assertly v0.4.8/go.mod h1:aGifi++jvCrUaklKEKT0BU95igDNaqkvz+49uaYMPRU= github.com/viant/toolbox v0.24.0/go.mod h1:OxMCG57V0PXuIP2HNQrtJf2CjqdmbrOx5EkMILuUhzM= github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0 h1:GDDkbFiaK8jsSDJfjId/PEGEShv6ugrt4kYsC5UIDaQ= github.com/warpfork/go-wish v0.0.0-20220906213052-39a1cc7a02d0/go.mod h1:x6AKhvSSexNrVSrViXSHUEbICjmGXhtgABaHIySUSGw= github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1 h1:EKhdznlJHPMoKr0XTrX+IlJs1LH3lyx2nfr1dOlZ79k= github.com/whyrusleeping/go-keyspace v0.0.0-20160322163242-5b898ac5add1/go.mod h1:8UvriyWtv5Q5EOgjHaSseUEdkQfvwFv1I/In/O2M9gc= -github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= -github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415/go.mod h1:GwrjFmJcFw6At/Gs6z4yjiIwzuJ1/+UwLxMQDVQXShQ= -github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= -github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= -github.com/yalp/jsonpath v0.0.0-20180802001716-5cc68e5049a0/go.mod h1:/LWChgwKmvncFJFHJ7Gvn9wZArjbV5/FppcK2fKk/tI= -github.com/yudai/gojsondiff v1.0.0/go.mod h1:AY32+k2cwILAkW1fbgxQ5mUmMiZFgLIV+FBNExI05xg= -github.com/yudai/golcs v0.0.0-20170316035057-ecda9a501e82/go.mod h1:lgjkn3NuSvDfVJdfcVVdX+jpBxNmX4rDAzaS45IcYoM= -github.com/yudai/pp v2.0.1+incompatible/go.mod h1:PuxR/8QJ7cyCkFp/aUDS+JY727OFEZkTdatxwunjIkc= github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= @@ -624,20 +500,16 @@ go4.org v0.0.0-20180809161055-417644f6feb5/go.mod h1:MkTOUMDaeVYJUOUsaDXIhWPZYa1 golang.org/x/build v0.0.0-20190111050920-041ab4dc3f9d/go.mod h1:OWs+y06UdEOHN4y+MfF/py+xQ/tYqIWW03b70/CG9Rw= golang.org/x/crypto v0.0.0-20170930174604-9419663f5a44/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20181030102418-4d3f4d9ffa16/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= -golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190313024323-a1f597ede03a/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200115085410-6d4e4cb37c7d/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200510223506-06a226fb4e37/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200602180216-279210d13fed/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210322153248-0c34fe9e7dc2/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= -golang.org/x/crypto v0.12.0 h1:tFM/ta59kqch6LlvYnPa0yx5a83cL2nHflFhYKvv9Yk= -golang.org/x/crypto v0.12.0/go.mod h1:NF0Gs7EO5K4qLn+Ylc+fih8BSTeIjAP05siRnAh98yw= golang.org/x/crypto v0.13.0 h1:mvySKfSWJ+UKUii46M40LOvyWfN0s2U+46/jDd0e6Ck= golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -662,25 +534,18 @@ golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181029044818-c44066c5c816/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181106065722-10aee1819953/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190313220215-9f648a60d977/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190327091125-710a502c58a2/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= -golang.org/x/net v0.14.0 h1:BONx9s002vGdD9umnlX1Po8vOZmrgH34qlHcD1MfK14= -golang.org/x/net v0.14.0/go.mod h1:PpSgVXXLK0OxS0F31C1/tv6XNguvCrnXIDrFMspZIUI= golang.org/x/net v0.15.0 h1:ugBLEUaxABaB5AJqW9enI0ACdci2RUd4eP51NTBvuJ8= golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -702,20 +567,11 @@ golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181029174526-d69651ed3497/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20181205085412-a5c9d58dba9a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190316082340-a2f829d7f35f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200602225109-6fdc65e7d980/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -727,23 +583,14 @@ golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= -golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= -golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.14.0 h1:Vz7Qs629MkJkGyHxUlRHizWJRG2j8fbQKjELVSNhy7Q= golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.12.0 h1:/ZfYdc3zq+q02Rv9vGqTeSItdzZTSNDmfTi0mBAuidU= -golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= golang.org/x/term v0.14.0 h1:LGK9IlZ8T9jvdy6cTdfKUCltatMFOehAQo9SRC46UQ8= golang.org/x/term v0.14.0/go.mod h1:TySc+nGkYR6qt8km8wUhuFRTVSMIX3XPR58y2lC8vww= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.12.0 h1:k+n5B8goJNdU7hSvEtMUz3d1Q6D/XW4COJSJR6fN0mc= -golang.org/x/text v0.12.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/text v0.13.0 h1:ablQoSUd0tRdKxZewP80B+BaqeKJuVhuRxj/dkrun3k= golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -752,11 +599,9 @@ golang.org/x/tools v0.0.0-20180828015842-6cd1fcedba52/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181030000716-a0a13e073c7b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20181221001348-537d06c36207/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= -golang.org/x/tools v0.0.0-20190327201419-c70d86f8b7cf/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190328211700-ab21143f2384/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190422233926-fe54fb35175b/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= @@ -784,7 +629,6 @@ google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9Ywl google.golang.org/appengine v1.2.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.3.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= -google.golang.org/genproto v0.0.0-20180518175338-11a468237815/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20180831171423-11092d34479b/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20181029155118-b69ba1387ce2/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= @@ -793,14 +637,10 @@ google.golang.org/genproto v0.0.0-20190306203927-b5d61aea6440/go.mod h1:VzzqZJRn google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= google.golang.org/genproto v0.0.0-20230803162519-f966b187b2e5 h1:L6iMMGrtzgHsWofoFcihmDEMYeDR9KN/ThbPWGrh++g= -google.golang.org/genproto v0.0.0-20230803162519-f966b187b2e5/go.mod h1:oH/ZOT02u4kWEp7oYBGYFFkCdKS/uYR9Z7+0/xuuFp8= google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d h1:DoPTO70H+bcDXcd39vOqb2viZxgqeBeSGtZ55yZU4/Q= google.golang.org/genproto/googleapis/api v0.0.0-20230822172742-b8732ec3820d/go.mod h1:KjSP20unUpOx5kyQUFa7k4OJg0qeJ7DEZflGDu2p6Bk= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 h1:bVf09lpb+OJbByTj913DRJioFFAjf/ZGxEz7MajTp2U= -google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98/go.mod h1:TUfxEVdsvPg18p6AslUXFoLdpED4oBnGwyqk3dV1XzM= google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d h1:uvYuEyMHKNt+lT4K3bN6fGswmK8qSvcreM3BwjDh+y4= google.golang.org/genproto/googleapis/rpc v0.0.0-20230822172742-b8732ec3820d/go.mod h1:+Bk1OCOj40wS2hwAMA+aCW9ypzm63QTBBHp6lQ3p+9M= -google.golang.org/grpc v1.12.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.14.0/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.16.0/go.mod h1:0JHn/cJsOMiMfNA9+DeHDlAU7KAAB5GDlYFpa9MZMio= google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs= @@ -808,12 +648,9 @@ google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZi google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= -google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= google.golang.org/grpc v1.58.2 h1:SXUpjxeVF3FKrTYQI4f4KvbGD5u2xccdYdurwowix5I= google.golang.org/grpc v1.58.2/go.mod h1:tgX3ZQDlNJGU96V6yHh1T/JeoBQ2TXdr43YbYSsCJk0= -google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0 h1:rNBFJjBCOgVr9pWD7rs/knKL4FRTKgpZmsRfV214zcA= -google.golang.org/grpc/cmd/protoc-gen-go-grpc v1.3.0/go.mod h1:Dk1tviKTvMCz5tvh7t+fh94dhmQVHuCt2OzJB3CTW9Y= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -829,21 +666,15 @@ google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE= -gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/RRjR0eouCJSH80/M2Y= gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= -gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/node/keys/inmem.go b/node/keys/inmem.go new file mode 100644 index 0000000..a59a201 --- /dev/null +++ b/node/keys/inmem.go @@ -0,0 +1,197 @@ +package keys + +import ( + "crypto" + "crypto/rand" + + "github.com/cloudflare/circl/sign/ed448" + "github.com/pkg/errors" + "source.quilibrium.com/quilibrium/monorepo/nekryptology/pkg/core/curves" +) + +type InMemoryKeyManager struct { + key ByteString + store map[string]Key +} + +func NewInMemoryKeyManager() *InMemoryKeyManager { + store := make(map[string]Key) + + return &InMemoryKeyManager{ + store: store, + } +} + +// CreateSigningKey implements KeyManager +func (f *InMemoryKeyManager) CreateSigningKey( + id string, + keyType KeyType, +) (crypto.Signer, error) { + switch keyType { + case KeyTypeEd448: + pubkey, privkey, err := ed448.GenerateKey(rand.Reader) + if err != nil { + return nil, errors.Wrap(err, "could not generate key") + } + + if err = f.save( + id, + Key{ + Id: id, + Type: keyType, + PublicKey: ByteString(pubkey), + PrivateKey: ByteString(privkey), + }, + ); err != nil { + return nil, errors.Wrap(err, "could not save") + } + + return privkey, nil + // case KeyTypePCAS: + // _, privkey, err := addressing.GenerateKey(rand.Reader) + // if err != nil { + // return nil, errors.Wrap(err, "could not generate key") + // } + + // if err = f.save(id, privkey); err != nil { + // return nil, errors.Wrap(err, "could not save") + // } + + // return privkey, nil + } + + return nil, UnsupportedKeyTypeErr +} + +// CreateAgreementKey implements KeyManager +func (f *InMemoryKeyManager) CreateAgreementKey( + id string, + keyType KeyType, +) (curves.Scalar, error) { + switch keyType { + case KeyTypeX448: + privkey := curves.ED448().Scalar.Random(rand.Reader) + pubkey := curves.ED448().NewGeneratorPoint().Mul(privkey) + + if err := f.save( + id, + Key{ + Id: id, + Type: KeyTypeX448, + PublicKey: pubkey.ToAffineCompressed(), + PrivateKey: privkey.Bytes(), + }, + ); err != nil { + return nil, errors.Wrap(err, "could not save") + } + + return privkey, nil + } + + return nil, UnsupportedKeyTypeErr +} + +// GetAgreementKey implements KeyManager +func (f *InMemoryKeyManager) GetAgreementKey(id string) (curves.Scalar, error) { + key, err := f.read(id) + if err != nil { + return nil, err + } + + switch key.Type { + case KeyTypeX448: + privkey, err := curves.ED448().NewScalar().SetBytes(key.PrivateKey) + return privkey, err + } + + return nil, UnsupportedKeyTypeErr +} + +// GetRawKey implements KeyManager +func (f *InMemoryKeyManager) GetRawKey(id string) (*Key, error) { + key, err := f.read(id) + return &key, err +} + +// GetSigningKey implements KeyManager +func (f *InMemoryKeyManager) GetSigningKey(id string) (crypto.Signer, error) { + key, err := f.read(id) + if err != nil { + return nil, err + } + + switch key.Type { + case KeyTypeEd448: + privkey := (ed448.PrivateKey)(key.PrivateKey) + return privkey, err + // case KeyTypePCAS: + // privkey := (addressing.PCAS)(key.PrivateKey) + // return privkey, err + } + + return nil, UnsupportedKeyTypeErr +} + +// PutRawKey implements KeyManager +func (f *InMemoryKeyManager) PutRawKey(key *Key) error { + return f.save(key.Id, *key) +} + +// DeleteKey implements KeyManager +func (f *InMemoryKeyManager) DeleteKey(id string) error { + delete(f.store, id) + + return nil +} + +// GetKey implements KeyManager +func (f *InMemoryKeyManager) GetKey(id string) (key *Key, err error) { + storeKey, err := f.read(id) + if err != nil { + return nil, err + } + + return &storeKey, nil +} + +// ListKeys implements KeyManager +func (f *InMemoryKeyManager) ListKeys() ([]*Key, error) { + keys := []*Key{} + + for k := range f.store { + storeKey, err := f.read(k) + if err != nil { + return nil, err + } + keys = append(keys, &storeKey) + } + + return keys, nil +} + +var _ KeyManager = (*InMemoryKeyManager)(nil) + +func (f *InMemoryKeyManager) save(id string, key Key) error { + f.store[id] = Key{ + Id: key.Id, + Type: key.Type, + PublicKey: key.PublicKey, + PrivateKey: key.PrivateKey, + } + + return nil +} + +func (f *InMemoryKeyManager) read(id string) (Key, error) { + k, ok := f.store[id] + if !ok { + return Key{}, KeyNotFoundErr + } + + return Key{ + Id: k.Id, + Type: k.Type, + PublicKey: k.PublicKey, + PrivateKey: k.PrivateKey, + }, nil +} diff --git a/node/main.go b/node/main.go index 7e7769e..81a3101 100644 --- a/node/main.go +++ b/node/main.go @@ -25,7 +25,7 @@ import ( var ( configDirectory = flag.String( "config", - "./.config/", + filepath.Join(".", ".config"), "the configuration directory", ) importPrivKey = flag.String( @@ -233,5 +233,5 @@ func printLogo() { func printVersion() { fmt.Println(" ") - fmt.Println(" Quilibrium Node - v1.1.8 – Dawn") + fmt.Println(" Quilibrium Node - v1.2.0 – Dawn") } diff --git a/node/p2p/bloom_utils.go b/node/p2p/bloom_utils.go index aa2963a..101e9c9 100644 --- a/node/p2p/bloom_utils.go +++ b/node/p2p/bloom_utils.go @@ -3,6 +3,7 @@ package p2p import ( "fmt" "math/big" + "sort" "golang.org/x/crypto/sha3" ) @@ -64,10 +65,10 @@ func generateBitSlices( return nil } -// getBloomFilterIndices returns a bloom filter index based on the data, however +// GetBloomFilter returns a bloom filter based on the data, however // it assumes bitLength is a multiple of 32. If the filter size is not // conformant, this will generate biased indices. -func getBloomFilterIndices(data []byte, bitLength int, k int) []byte { +func GetBloomFilter(data []byte, bitLength int, k int) []byte { size := big.NewInt(int64(bitLength)).BitLen() - 1 digest := sha3.Sum256(data) output := make([]byte, bitLength/8) @@ -75,7 +76,7 @@ func getBloomFilterIndices(data []byte, bitLength int, k int) []byte { digestBI := new(big.Int).SetBytes(digest[:]) for i := 0; i < k; i++ { position := uint(0) - for j := size*(i+1) - 1; j >= size*i; j-- { + for j := size * i; j < size*(i+1); j++ { position = position<<1 | (digestBI.Bit(j)) } if outputBI.Bit(int(position)) != 1 { @@ -96,3 +97,51 @@ func getBloomFilterIndices(data []byte, bitLength int, k int) []byte { outputBI.FillBytes(output) return output } + +// GetBloomFilterIndices returns the indices of a bloom filter, in increasing +// order, assuming bitLength is a multiple of 32 as in GetBloomFilter. +func GetBloomFilterIndices(data []byte, bitLength int, k int) []byte { + size := big.NewInt(int64(bitLength)).BitLen() - 1 + h := sha3.NewShake256() + _, err := h.Write(data) + if err != nil { + panic(err) + } + + digest := make([]byte, size*k/8) + _, err = h.Read(digest) + if err != nil { + panic(err) + } + + indices := []string{} + for i := 0; i < k; i++ { + position := make([]byte, size/8) + for j := (size / 8) * i; j < (size/8)*(i+1); j++ { + position[j%(size/8)] = digest[j] + } + found := false + for _, ext := range indices { + if ext == string(position) { + k++ + found = true + break + } + } + if !found { + p := sort.SearchStrings(indices, string(position)) + if len(indices) > p { + indices = append(indices[:p+1], indices[p:]...) + indices[p] = string(position) + } else { + indices = append(indices, string(position)) + } + } + } + + output := "" + for _, idx := range indices { + output += idx + } + return []byte(output) +} diff --git a/node/p2p/bloom_utils_test.go b/node/p2p/bloom_utils_test.go new file mode 100644 index 0000000..2fa7698 --- /dev/null +++ b/node/p2p/bloom_utils_test.go @@ -0,0 +1,91 @@ +package p2p_test + +import ( + "bytes" + "testing" + + "github.com/stretchr/testify/assert" + "source.quilibrium.com/quilibrium/monorepo/node/p2p" +) + +func TestGetBloomFilter(t *testing.T) { + fourByteThreeKTest := p2p.GetBloomFilter( + []byte{0x00, 0x00, 0x00, 0x00}, + 256, + 3, + ) + assert.ElementsMatch(t, fourByteThreeKTest, []byte{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x20, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }) + + sixtyByteThreeKTest := p2p.GetBloomFilter( + bytes.Repeat([]byte{0x00}, 60), + 256, + 3, + ) + assert.ElementsMatch(t, sixtyByteThreeKTest, []byte{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x10, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }) + + fourByteSixteenKTest := p2p.GetBloomFilter( + []byte{0x00, 0x00, 0x00, 0x00}, + 65536, + 16, + ) + assert.ElementsMatch(t, fourByteSixteenKTest, []byte{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }) + + sixtyByteSixteenKTest := p2p.GetBloomFilter( + bytes.Repeat([]byte{0x00}, 60), + 65536, + 16, + ) + assert.ElementsMatch(t, sixtyByteSixteenKTest, []byte{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }) +} + +func TestGetBloomFilterIndices(t *testing.T) { + fourByteThreeKTest := p2p.GetBloomFilterIndices( + []byte{0x00, 0x00, 0x00, 0x00}, + 256, + 3, + ) + assert.ElementsMatch(t, fourByteThreeKTest, []byte{0x1e, 0xa2, 0xb4}) + + sixtyByteThreeKTest := p2p.GetBloomFilterIndices( + bytes.Repeat([]byte{0x00}, 60), + 256, + 3, + ) + assert.ElementsMatch(t, sixtyByteThreeKTest, []byte{0x0a, 0x72, 0x80}) + + fourByteSixteenKTest := p2p.GetBloomFilterIndices( + []byte{0x00, 0x00, 0x00, 0x00}, + 65536, + 16, + ) + assert.ElementsMatch(t, fourByteSixteenKTest, []byte{ + 0x10, 0x23, 0x1e, 0x79, 0x39, 0xbe, 0x50, 0xe9, 0x64, 0x68, 0x73, 0x4f, + 0x7e, 0xd5, 0x8b, 0x4d, 0x8d, 0x15, 0x95, 0xd6, 0xb1, 0x25, 0xb3, 0x1a, + 0xb4, 0xa2, 0xbd, 0x3c, 0xea, 0x31, 0xee, 0x7e, + }) + + sixtyByteSixteenKTest := p2p.GetBloomFilterIndices( + bytes.Repeat([]byte{0x00}, 60), + 65536, + 16, + ) + assert.ElementsMatch(t, sixtyByteSixteenKTest, []byte{ + 0x10, 0x34, 0x16, 0x18, 0x27, 0xe7, 0x4b, 0xfc, 0x72, 0x0a, 0x80, 0x38, + 0x81, 0x12, 0x93, 0xec, 0xa1, 0xf8, 0xa2, 0x37, 0xa9, 0x1a, 0xc1, 0x55, + 0xc4, 0x16, 0xd1, 0x7e, 0xd5, 0xcd, 0xf0, 0x6c, + }) +} diff --git a/node/p2p/blossomsub.go b/node/p2p/blossomsub.go index f5dc4c4..58e8526 100644 --- a/node/p2p/blossomsub.go +++ b/node/p2p/blossomsub.go @@ -17,6 +17,7 @@ import ( libp2pconfig "github.com/libp2p/go-libp2p/config" "github.com/libp2p/go-libp2p/core/crypto" "github.com/libp2p/go-libp2p/core/host" + "github.com/libp2p/go-libp2p/core/network" "github.com/libp2p/go-libp2p/core/peer" "github.com/libp2p/go-libp2p/core/protocol" "github.com/libp2p/go-libp2p/p2p/discovery/routing" @@ -47,8 +48,6 @@ type BlossomSub struct { var _ PubSub = (*BlossomSub)(nil) var ErrNoPeersAvailable = errors.New("no peers available") -// Crucial note, bitmask lengths should always be a power of two so as to reduce -// index bias with hash functions var BITMASK_ALL = []byte{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, @@ -180,7 +179,8 @@ func (b *BlossomSub) PublishToBitmask(bitmask []byte, data []byte) error { } func (b *BlossomSub) Publish(data []byte) error { - bitmask := getBloomFilterIndices(data, 256, 3) + bitmask := GetBloomFilter(data, 256, 3) + bitmask = append(bitmask, GetBloomFilterIndices(data, 65536, 24)...) return b.PublishToBitmask(bitmask, data) } @@ -509,7 +509,8 @@ func discoverPeers( for peer := range peerChan { peer := peer - if peer.ID == h.ID() { + if peer.ID == h.ID() || + h.Network().Connectedness(peer.ID) == network.Connected { continue } @@ -535,10 +536,7 @@ func discoverPeers( go func() { for { time.Sleep(30 * time.Second) - if len(h.Network().Peers()) == 0 { - logger.Info("reinitiating discovery") - discover() - } + discover() } }() diff --git a/node/poor_mans_cd.sh b/node/poor_mans_cd.sh new file mode 100755 index 0000000..386cbc4 --- /dev/null +++ b/node/poor_mans_cd.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +start_process() { + go run ./... & + process_pid=$! + child_process_pid=$(pgrep -P $process_pid) +} + +is_process_running() { + ps -p $process_pid > /dev/null 2>&1 + return $? +} + +kill_process() { + kill $process_pid + kill $child_process_pid +} + +start_process + +while true; do + if ! is_process_running; then + echo "Process crashed or stopped. Restarting..." + start_process + fi + + git fetch + + local_head=$(git rev-parse HEAD) + remote_head=$(git rev-parse @{u}) + + if [ "$local_head" != "$remote_head" ]; then + kill_process + + git pull + + start_process + fi + + sleep 60 +done diff --git a/node/protobufs/ceremony.pb.go b/node/protobufs/ceremony.pb.go index 4e8398b..0c2af75 100644 --- a/node/protobufs/ceremony.pb.go +++ b/node/protobufs/ceremony.pb.go @@ -748,7 +748,7 @@ type CeremonyInProgressState struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - ActiveParticipants []*Ed448PublicKey `protobuf:"bytes,1,rep,name=active_participants,json=activeParticipants,proto3" json:"active_participants,omitempty"` + ActiveParticipants []*CeremonyLobbyJoin `protobuf:"bytes,1,rep,name=active_participants,json=activeParticipants,proto3" json:"active_participants,omitempty"` LatestSeenProverAttestations []*CeremonySeenProverAttestation `protobuf:"bytes,2,rep,name=latest_seen_prover_attestations,json=latestSeenProverAttestations,proto3" json:"latest_seen_prover_attestations,omitempty"` DroppedParticipantAttestations []*CeremonyDroppedProverAttestation `protobuf:"bytes,3,rep,name=dropped_participant_attestations,json=droppedParticipantAttestations,proto3" json:"dropped_participant_attestations,omitempty"` TranscriptRoundAdvanceCommits []*CeremonyAdvanceRound `protobuf:"bytes,4,rep,name=transcript_round_advance_commits,json=transcriptRoundAdvanceCommits,proto3" json:"transcript_round_advance_commits,omitempty"` @@ -787,7 +787,7 @@ func (*CeremonyInProgressState) Descriptor() ([]byte, []int) { return file_ceremony_proto_rawDescGZIP(), []int{10} } -func (x *CeremonyInProgressState) GetActiveParticipants() []*Ed448PublicKey { +func (x *CeremonyInProgressState) GetActiveParticipants() []*CeremonyLobbyJoin { if x != nil { return x.ActiveParticipants } @@ -827,7 +827,7 @@ type CeremonyFinalizingState struct { sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - ActiveParticipants []*Ed448PublicKey `protobuf:"bytes,1,rep,name=active_participants,json=activeParticipants,proto3" json:"active_participants,omitempty"` + ActiveParticipants []*CeremonyLobbyJoin `protobuf:"bytes,1,rep,name=active_participants,json=activeParticipants,proto3" json:"active_participants,omitempty"` LatestSeenProverAttestations []*CeremonySeenProverAttestation `protobuf:"bytes,2,rep,name=latest_seen_prover_attestations,json=latestSeenProverAttestations,proto3" json:"latest_seen_prover_attestations,omitempty"` DroppedParticipantAttestations []*CeremonyDroppedProverAttestation `protobuf:"bytes,3,rep,name=dropped_participant_attestations,json=droppedParticipantAttestations,proto3" json:"dropped_participant_attestations,omitempty"` Commits []*CeremonyTranscriptCommit `protobuf:"bytes,4,rep,name=commits,proto3" json:"commits,omitempty"` @@ -867,7 +867,7 @@ func (*CeremonyFinalizingState) Descriptor() ([]byte, []int) { return file_ceremony_proto_rawDescGZIP(), []int{11} } -func (x *CeremonyFinalizingState) GetActiveParticipants() []*Ed448PublicKey { +func (x *CeremonyFinalizingState) GetActiveParticipants() []*CeremonyLobbyJoin { if x != nil { return x.ActiveParticipants } @@ -1567,189 +1567,190 @@ var file_ceremony_proto_rawDesc = []byte{ 0x6f, 0x64, 0x65, 0x2e, 0x6b, 0x65, 0x79, 0x73, 0x2e, 0x70, 0x62, 0x2e, 0x45, 0x64, 0x34, 0x34, 0x38, 0x50, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x4b, 0x65, 0x79, 0x52, 0x15, 0x70, 0x72, 0x65, 0x66, 0x65, 0x72, 0x72, 0x65, 0x64, 0x50, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, - 0x73, 0x22, 0xde, 0x04, 0x0a, 0x17, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x49, 0x6e, - 0x50, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x58, 0x0a, + 0x73, 0x22, 0xe5, 0x04, 0x0a, 0x17, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x49, 0x6e, + 0x50, 0x72, 0x6f, 0x67, 0x72, 0x65, 0x73, 0x73, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x5f, 0x0a, 0x13, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, - 0x61, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x71, 0x75, 0x69, - 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x6b, 0x65, 0x79, - 0x73, 0x2e, 0x70, 0x62, 0x2e, 0x45, 0x64, 0x34, 0x34, 0x38, 0x50, 0x75, 0x62, 0x6c, 0x69, 0x63, - 0x4b, 0x65, 0x79, 0x52, 0x12, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x50, 0x61, 0x72, 0x74, 0x69, - 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x73, 0x12, 0x81, 0x01, 0x0a, 0x1f, 0x6c, 0x61, 0x74, 0x65, - 0x73, 0x74, 0x5f, 0x73, 0x65, 0x65, 0x6e, 0x5f, 0x70, 0x72, 0x6f, 0x76, 0x65, 0x72, 0x5f, 0x61, - 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x3a, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, - 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, - 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x53, 0x65, 0x65, 0x6e, 0x50, 0x72, 0x6f, 0x76, - 0x65, 0x72, 0x41, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x1c, 0x6c, - 0x61, 0x74, 0x65, 0x73, 0x74, 0x53, 0x65, 0x65, 0x6e, 0x50, 0x72, 0x6f, 0x76, 0x65, 0x72, 0x41, - 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x87, 0x01, 0x0a, 0x20, - 0x64, 0x72, 0x6f, 0x70, 0x70, 0x65, 0x64, 0x5f, 0x70, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, - 0x61, 0x6e, 0x74, 0x5f, 0x61, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, - 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x3d, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, + 0x61, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x71, 0x75, 0x69, + 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, + 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, + 0x79, 0x4c, 0x6f, 0x62, 0x62, 0x79, 0x4a, 0x6f, 0x69, 0x6e, 0x52, 0x12, 0x61, 0x63, 0x74, 0x69, + 0x76, 0x65, 0x50, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x73, 0x12, 0x81, + 0x01, 0x0a, 0x1f, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x65, 0x6e, 0x5f, 0x70, + 0x72, 0x6f, 0x76, 0x65, 0x72, 0x5f, 0x61, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x3a, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, + 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, + 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x53, + 0x65, 0x65, 0x6e, 0x50, 0x72, 0x6f, 0x76, 0x65, 0x72, 0x41, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x1c, 0x6c, 0x61, 0x74, 0x65, 0x73, 0x74, 0x53, 0x65, 0x65, 0x6e, + 0x50, 0x72, 0x6f, 0x76, 0x65, 0x72, 0x41, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x73, 0x12, 0x87, 0x01, 0x0a, 0x20, 0x64, 0x72, 0x6f, 0x70, 0x70, 0x65, 0x64, 0x5f, 0x70, + 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x5f, 0x61, 0x74, 0x74, 0x65, 0x73, + 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x3d, 0x2e, + 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, + 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, + 0x6d, 0x6f, 0x6e, 0x79, 0x44, 0x72, 0x6f, 0x70, 0x70, 0x65, 0x64, 0x50, 0x72, 0x6f, 0x76, 0x65, + 0x72, 0x41, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x1e, 0x64, 0x72, + 0x6f, 0x70, 0x70, 0x65, 0x64, 0x50, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, + 0x41, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x7a, 0x0a, 0x20, + 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x5f, 0x72, 0x6f, 0x75, 0x6e, 0x64, + 0x5f, 0x61, 0x64, 0x76, 0x61, 0x6e, 0x63, 0x65, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x73, + 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x31, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, - 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x44, 0x72, 0x6f, - 0x70, 0x70, 0x65, 0x64, 0x50, 0x72, 0x6f, 0x76, 0x65, 0x72, 0x41, 0x74, 0x74, 0x65, 0x73, 0x74, - 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x1e, 0x64, 0x72, 0x6f, 0x70, 0x70, 0x65, 0x64, 0x50, 0x61, - 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x41, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, - 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x7a, 0x0a, 0x20, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, - 0x69, 0x70, 0x74, 0x5f, 0x72, 0x6f, 0x75, 0x6e, 0x64, 0x5f, 0x61, 0x64, 0x76, 0x61, 0x6e, 0x63, - 0x65, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, - 0x31, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, - 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, - 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x41, 0x64, 0x76, 0x61, 0x6e, 0x63, 0x65, 0x52, 0x6f, 0x75, - 0x6e, 0x64, 0x52, 0x1d, 0x74, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x6f, - 0x75, 0x6e, 0x64, 0x41, 0x64, 0x76, 0x61, 0x6e, 0x63, 0x65, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, - 0x73, 0x12, 0x5f, 0x0a, 0x17, 0x6e, 0x65, 0x78, 0x74, 0x5f, 0x72, 0x6f, 0x75, 0x6e, 0x64, 0x5f, - 0x70, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x73, 0x18, 0x05, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, - 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x6b, 0x65, 0x79, 0x73, 0x2e, 0x70, 0x62, 0x2e, 0x45, 0x64, 0x34, - 0x34, 0x38, 0x50, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x4b, 0x65, 0x79, 0x52, 0x15, 0x6e, 0x65, 0x78, - 0x74, 0x52, 0x6f, 0x75, 0x6e, 0x64, 0x50, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, - 0x74, 0x73, 0x22, 0x81, 0x05, 0x0a, 0x17, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x46, - 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x69, 0x6e, 0x67, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x58, - 0x0a, 0x13, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x5f, 0x70, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, - 0x70, 0x61, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x71, 0x75, - 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x6b, 0x65, - 0x79, 0x73, 0x2e, 0x70, 0x62, 0x2e, 0x45, 0x64, 0x34, 0x34, 0x38, 0x50, 0x75, 0x62, 0x6c, 0x69, - 0x63, 0x4b, 0x65, 0x79, 0x52, 0x12, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x50, 0x61, 0x72, 0x74, - 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x73, 0x12, 0x81, 0x01, 0x0a, 0x1f, 0x6c, 0x61, 0x74, - 0x65, 0x73, 0x74, 0x5f, 0x73, 0x65, 0x65, 0x6e, 0x5f, 0x70, 0x72, 0x6f, 0x76, 0x65, 0x72, 0x5f, - 0x61, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, - 0x28, 0x0b, 0x32, 0x3a, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, + 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x41, 0x64, 0x76, + 0x61, 0x6e, 0x63, 0x65, 0x52, 0x6f, 0x75, 0x6e, 0x64, 0x52, 0x1d, 0x74, 0x72, 0x61, 0x6e, 0x73, + 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x6f, 0x75, 0x6e, 0x64, 0x41, 0x64, 0x76, 0x61, 0x6e, 0x63, + 0x65, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x5f, 0x0a, 0x17, 0x6e, 0x65, 0x78, 0x74, + 0x5f, 0x72, 0x6f, 0x75, 0x6e, 0x64, 0x5f, 0x70, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, + 0x6e, 0x74, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x71, 0x75, 0x69, 0x6c, + 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x6b, 0x65, 0x79, 0x73, + 0x2e, 0x70, 0x62, 0x2e, 0x45, 0x64, 0x34, 0x34, 0x38, 0x50, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x4b, + 0x65, 0x79, 0x52, 0x15, 0x6e, 0x65, 0x78, 0x74, 0x52, 0x6f, 0x75, 0x6e, 0x64, 0x50, 0x61, 0x72, + 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x73, 0x22, 0x88, 0x05, 0x0a, 0x17, 0x43, 0x65, + 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x46, 0x69, 0x6e, 0x61, 0x6c, 0x69, 0x7a, 0x69, 0x6e, 0x67, + 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x5f, 0x0a, 0x13, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x5f, + 0x70, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, - 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x53, 0x65, 0x65, 0x6e, 0x50, 0x72, 0x6f, - 0x76, 0x65, 0x72, 0x41, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x1c, - 0x6c, 0x61, 0x74, 0x65, 0x73, 0x74, 0x53, 0x65, 0x65, 0x6e, 0x50, 0x72, 0x6f, 0x76, 0x65, 0x72, - 0x41, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x87, 0x01, 0x0a, - 0x20, 0x64, 0x72, 0x6f, 0x70, 0x70, 0x65, 0x64, 0x5f, 0x70, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, - 0x70, 0x61, 0x6e, 0x74, 0x5f, 0x61, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, - 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x3d, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, - 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, - 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x44, 0x72, - 0x6f, 0x70, 0x70, 0x65, 0x64, 0x50, 0x72, 0x6f, 0x76, 0x65, 0x72, 0x41, 0x74, 0x74, 0x65, 0x73, - 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x1e, 0x64, 0x72, 0x6f, 0x70, 0x70, 0x65, 0x64, 0x50, - 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x41, 0x74, 0x74, 0x65, 0x73, 0x74, - 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x4f, 0x0a, 0x07, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, - 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x35, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, - 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, - 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x54, 0x72, - 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x52, 0x07, - 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x4c, 0x0a, 0x06, 0x73, 0x68, 0x61, 0x72, 0x65, - 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x34, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, - 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, - 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x54, 0x72, - 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x68, 0x61, 0x72, 0x65, 0x52, 0x06, 0x73, - 0x68, 0x61, 0x72, 0x65, 0x73, 0x12, 0x5f, 0x0a, 0x17, 0x6e, 0x65, 0x78, 0x74, 0x5f, 0x72, 0x6f, - 0x75, 0x6e, 0x64, 0x5f, 0x70, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x73, - 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, - 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x6b, 0x65, 0x79, 0x73, 0x2e, 0x70, 0x62, - 0x2e, 0x45, 0x64, 0x34, 0x34, 0x38, 0x50, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x4b, 0x65, 0x79, 0x52, - 0x15, 0x6e, 0x65, 0x78, 0x74, 0x52, 0x6f, 0x75, 0x6e, 0x64, 0x50, 0x61, 0x72, 0x74, 0x69, 0x63, - 0x69, 0x70, 0x61, 0x6e, 0x74, 0x73, 0x22, 0xab, 0x02, 0x0a, 0x17, 0x43, 0x65, 0x72, 0x65, 0x6d, - 0x6f, 0x6e, 0x79, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x53, 0x74, 0x61, - 0x74, 0x65, 0x12, 0x4f, 0x0a, 0x07, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x73, 0x18, 0x01, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x35, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, - 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, - 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, - 0x72, 0x69, 0x70, 0x74, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x52, 0x07, 0x63, 0x6f, 0x6d, 0x6d, - 0x69, 0x74, 0x73, 0x12, 0x5e, 0x0a, 0x12, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x74, - 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, - 0x2f, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, - 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, - 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, - 0x52, 0x11, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, - 0x69, 0x70, 0x74, 0x12, 0x5f, 0x0a, 0x17, 0x6e, 0x65, 0x78, 0x74, 0x5f, 0x72, 0x6f, 0x75, 0x6e, - 0x64, 0x5f, 0x70, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x73, 0x18, 0x03, + 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x4c, 0x6f, 0x62, 0x62, 0x79, 0x4a, 0x6f, + 0x69, 0x6e, 0x52, 0x12, 0x61, 0x63, 0x74, 0x69, 0x76, 0x65, 0x50, 0x61, 0x72, 0x74, 0x69, 0x63, + 0x69, 0x70, 0x61, 0x6e, 0x74, 0x73, 0x12, 0x81, 0x01, 0x0a, 0x1f, 0x6c, 0x61, 0x74, 0x65, 0x73, + 0x74, 0x5f, 0x73, 0x65, 0x65, 0x6e, 0x5f, 0x70, 0x72, 0x6f, 0x76, 0x65, 0x72, 0x5f, 0x61, 0x74, + 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x3a, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, + 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x43, + 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x53, 0x65, 0x65, 0x6e, 0x50, 0x72, 0x6f, 0x76, 0x65, + 0x72, 0x41, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x1c, 0x6c, 0x61, + 0x74, 0x65, 0x73, 0x74, 0x53, 0x65, 0x65, 0x6e, 0x50, 0x72, 0x6f, 0x76, 0x65, 0x72, 0x41, 0x74, + 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x87, 0x01, 0x0a, 0x20, 0x64, + 0x72, 0x6f, 0x70, 0x70, 0x65, 0x64, 0x5f, 0x70, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, + 0x6e, 0x74, 0x5f, 0x61, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, + 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x3d, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, + 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, + 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x44, 0x72, 0x6f, 0x70, + 0x70, 0x65, 0x64, 0x50, 0x72, 0x6f, 0x76, 0x65, 0x72, 0x41, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, + 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x1e, 0x64, 0x72, 0x6f, 0x70, 0x70, 0x65, 0x64, 0x50, 0x61, 0x72, + 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x41, 0x74, 0x74, 0x65, 0x73, 0x74, 0x61, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x4f, 0x0a, 0x07, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x73, 0x18, + 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x35, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, + 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, + 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x54, 0x72, 0x61, 0x6e, + 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x52, 0x07, 0x63, 0x6f, + 0x6d, 0x6d, 0x69, 0x74, 0x73, 0x12, 0x4c, 0x0a, 0x06, 0x73, 0x68, 0x61, 0x72, 0x65, 0x73, 0x18, + 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x34, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, + 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, + 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x54, 0x72, 0x61, 0x6e, + 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x53, 0x68, 0x61, 0x72, 0x65, 0x52, 0x06, 0x73, 0x68, 0x61, + 0x72, 0x65, 0x73, 0x12, 0x5f, 0x0a, 0x17, 0x6e, 0x65, 0x78, 0x74, 0x5f, 0x72, 0x6f, 0x75, 0x6e, + 0x64, 0x5f, 0x70, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x6b, 0x65, 0x79, 0x73, 0x2e, 0x70, 0x62, 0x2e, 0x45, 0x64, 0x34, 0x34, 0x38, 0x50, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x4b, 0x65, 0x79, 0x52, 0x15, 0x6e, 0x65, 0x78, 0x74, 0x52, 0x6f, 0x75, 0x6e, 0x64, 0x50, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, - 0x61, 0x6e, 0x74, 0x73, 0x22, 0x62, 0x0a, 0x18, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, - 0x50, 0x65, 0x65, 0x72, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6e, 0x6e, 0x6f, 0x75, 0x6e, 0x63, 0x65, - 0x12, 0x46, 0x0a, 0x09, 0x70, 0x65, 0x65, 0x72, 0x5f, 0x6c, 0x69, 0x73, 0x74, 0x18, 0x01, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x29, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, - 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, - 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x50, 0x65, 0x65, 0x72, 0x52, 0x08, - 0x70, 0x65, 0x65, 0x72, 0x4c, 0x69, 0x73, 0x74, 0x22, 0xd7, 0x01, 0x0a, 0x0c, 0x43, 0x65, 0x72, - 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x50, 0x65, 0x65, 0x72, 0x12, 0x17, 0x0a, 0x07, 0x70, 0x65, 0x65, - 0x72, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x70, 0x65, 0x65, 0x72, - 0x49, 0x64, 0x12, 0x1c, 0x0a, 0x09, 0x6d, 0x75, 0x6c, 0x74, 0x69, 0x61, 0x64, 0x64, 0x72, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x6d, 0x75, 0x6c, 0x74, 0x69, 0x61, 0x64, 0x64, 0x72, - 0x12, 0x1b, 0x0a, 0x09, 0x6d, 0x61, 0x78, 0x5f, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x04, 0x52, 0x08, 0x6d, 0x61, 0x78, 0x46, 0x72, 0x61, 0x6d, 0x65, 0x12, 0x1c, 0x0a, - 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, - 0x52, 0x09, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x12, 0x18, 0x0a, 0x07, 0x76, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x76, 0x65, - 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, - 0x72, 0x65, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x09, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, - 0x75, 0x72, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x5f, 0x6b, 0x65, - 0x79, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x09, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x4b, - 0x65, 0x79, 0x22, 0xe0, 0x02, 0x0a, 0x16, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x43, - 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x53, 0x79, 0x6e, 0x63, 0x12, 0x2a, 0x0a, - 0x11, 0x66, 0x72, 0x6f, 0x6d, 0x5f, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x5f, 0x6e, 0x75, 0x6d, 0x62, - 0x65, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0f, 0x66, 0x72, 0x6f, 0x6d, 0x46, 0x72, - 0x61, 0x6d, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x26, 0x0a, 0x0f, 0x74, 0x6f, 0x5f, - 0x66, 0x72, 0x61, 0x6d, 0x65, 0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x04, 0x52, 0x0d, 0x74, 0x6f, 0x46, 0x72, 0x61, 0x6d, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, - 0x72, 0x12, 0x5a, 0x0a, 0x16, 0x74, 0x72, 0x75, 0x6e, 0x63, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x63, - 0x6c, 0x6f, 0x63, 0x6b, 0x5f, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x24, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, - 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x6c, 0x6f, 0x63, 0x6b, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x6c, 0x6f, - 0x63, 0x6b, 0x46, 0x72, 0x61, 0x6d, 0x65, 0x52, 0x14, 0x74, 0x72, 0x75, 0x6e, 0x63, 0x61, 0x74, - 0x65, 0x64, 0x43, 0x6c, 0x6f, 0x63, 0x6b, 0x46, 0x72, 0x61, 0x6d, 0x65, 0x73, 0x12, 0x47, 0x0a, - 0x06, 0x70, 0x72, 0x6f, 0x6f, 0x66, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2f, 0x2e, + 0x61, 0x6e, 0x74, 0x73, 0x22, 0xab, 0x02, 0x0a, 0x17, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, + 0x79, 0x56, 0x61, 0x6c, 0x69, 0x64, 0x61, 0x74, 0x69, 0x6e, 0x67, 0x53, 0x74, 0x61, 0x74, 0x65, + 0x12, 0x4f, 0x0a, 0x07, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x35, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, + 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, + 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, + 0x70, 0x74, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x52, 0x07, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, + 0x73, 0x12, 0x5e, 0x0a, 0x12, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x74, 0x72, 0x61, + 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2f, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, - 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x49, 0x6e, 0x63, 0x6c, - 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x50, 0x72, 0x6f, 0x6f, 0x66, 0x73, 0x4d, 0x61, 0x70, 0x52, 0x06, - 0x70, 0x72, 0x6f, 0x6f, 0x66, 0x73, 0x12, 0x4d, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, - 0x74, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x31, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, + 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, + 0x6d, 0x6f, 0x6e, 0x79, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x52, 0x11, + 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x64, 0x54, 0x72, 0x61, 0x6e, 0x73, 0x63, 0x72, 0x69, 0x70, + 0x74, 0x12, 0x5f, 0x0a, 0x17, 0x6e, 0x65, 0x78, 0x74, 0x5f, 0x72, 0x6f, 0x75, 0x6e, 0x64, 0x5f, + 0x70, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, 0x74, 0x73, 0x18, 0x03, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x27, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, + 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x6b, 0x65, 0x79, 0x73, 0x2e, 0x70, 0x62, 0x2e, 0x45, 0x64, 0x34, + 0x34, 0x38, 0x50, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x4b, 0x65, 0x79, 0x52, 0x15, 0x6e, 0x65, 0x78, + 0x74, 0x52, 0x6f, 0x75, 0x6e, 0x64, 0x50, 0x61, 0x72, 0x74, 0x69, 0x63, 0x69, 0x70, 0x61, 0x6e, + 0x74, 0x73, 0x22, 0x62, 0x0a, 0x18, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x50, 0x65, + 0x65, 0x72, 0x4c, 0x69, 0x73, 0x74, 0x41, 0x6e, 0x6e, 0x6f, 0x75, 0x6e, 0x63, 0x65, 0x12, 0x46, + 0x0a, 0x09, 0x70, 0x65, 0x65, 0x72, 0x5f, 0x6c, 0x69, 0x73, 0x74, 0x18, 0x01, 0x20, 0x03, 0x28, + 0x0b, 0x32, 0x29, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, + 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, + 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x50, 0x65, 0x65, 0x72, 0x52, 0x08, 0x70, 0x65, + 0x65, 0x72, 0x4c, 0x69, 0x73, 0x74, 0x22, 0xd7, 0x01, 0x0a, 0x0c, 0x43, 0x65, 0x72, 0x65, 0x6d, + 0x6f, 0x6e, 0x79, 0x50, 0x65, 0x65, 0x72, 0x12, 0x17, 0x0a, 0x07, 0x70, 0x65, 0x65, 0x72, 0x5f, + 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x06, 0x70, 0x65, 0x65, 0x72, 0x49, 0x64, + 0x12, 0x1c, 0x0a, 0x09, 0x6d, 0x75, 0x6c, 0x74, 0x69, 0x61, 0x64, 0x64, 0x72, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x09, 0x6d, 0x75, 0x6c, 0x74, 0x69, 0x61, 0x64, 0x64, 0x72, 0x12, 0x1b, + 0x0a, 0x09, 0x6d, 0x61, 0x78, 0x5f, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x04, 0x52, 0x08, 0x6d, 0x61, 0x78, 0x46, 0x72, 0x61, 0x6d, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x74, + 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, + 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, + 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, + 0x69, 0x6f, 0x6e, 0x12, 0x1c, 0x0a, 0x09, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x18, 0x06, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x09, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, + 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x5f, 0x6b, 0x65, 0x79, 0x18, + 0x07, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x09, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x4b, 0x65, 0x79, + 0x22, 0xe0, 0x02, 0x0a, 0x16, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x43, 0x6f, 0x6d, + 0x70, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x53, 0x79, 0x6e, 0x63, 0x12, 0x2a, 0x0a, 0x11, 0x66, + 0x72, 0x6f, 0x6d, 0x5f, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x04, 0x52, 0x0f, 0x66, 0x72, 0x6f, 0x6d, 0x46, 0x72, 0x61, 0x6d, + 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x26, 0x0a, 0x0f, 0x74, 0x6f, 0x5f, 0x66, 0x72, + 0x61, 0x6d, 0x65, 0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x04, + 0x52, 0x0d, 0x74, 0x6f, 0x46, 0x72, 0x61, 0x6d, 0x65, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, + 0x5a, 0x0a, 0x16, 0x74, 0x72, 0x75, 0x6e, 0x63, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x63, 0x6c, 0x6f, + 0x63, 0x6b, 0x5f, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x24, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, + 0x65, 0x2e, 0x63, 0x6c, 0x6f, 0x63, 0x6b, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x6c, 0x6f, 0x63, 0x6b, + 0x46, 0x72, 0x61, 0x6d, 0x65, 0x52, 0x14, 0x74, 0x72, 0x75, 0x6e, 0x63, 0x61, 0x74, 0x65, 0x64, + 0x43, 0x6c, 0x6f, 0x63, 0x6b, 0x46, 0x72, 0x61, 0x6d, 0x65, 0x73, 0x12, 0x47, 0x0a, 0x06, 0x70, + 0x72, 0x6f, 0x6f, 0x66, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2f, 0x2e, 0x71, 0x75, + 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, + 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x49, 0x6e, 0x63, 0x6c, 0x75, 0x73, + 0x69, 0x6f, 0x6e, 0x50, 0x72, 0x6f, 0x6f, 0x66, 0x73, 0x4d, 0x61, 0x70, 0x52, 0x06, 0x70, 0x72, + 0x6f, 0x6f, 0x66, 0x73, 0x12, 0x4d, 0x0a, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, + 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x31, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, + 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, + 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x49, 0x6e, 0x63, 0x6c, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x53, 0x65, + 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x4d, 0x61, 0x70, 0x52, 0x08, 0x73, 0x65, 0x67, 0x6d, 0x65, + 0x6e, 0x74, 0x73, 0x22, 0xa5, 0x01, 0x0a, 0x12, 0x49, 0x6e, 0x63, 0x6c, 0x75, 0x73, 0x69, 0x6f, + 0x6e, 0x50, 0x72, 0x6f, 0x6f, 0x66, 0x73, 0x4d, 0x61, 0x70, 0x12, 0x21, 0x0a, 0x0c, 0x66, 0x72, + 0x61, 0x6d, 0x65, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, + 0x52, 0x0b, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x12, 0x14, 0x0a, + 0x05, 0x70, 0x72, 0x6f, 0x6f, 0x66, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x70, 0x72, + 0x6f, 0x6f, 0x66, 0x12, 0x56, 0x0a, 0x0b, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x6d, 0x65, 0x6e, + 0x74, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x34, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x49, 0x6e, 0x63, 0x6c, 0x75, 0x73, 0x69, 0x6f, 0x6e, - 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x4d, 0x61, 0x70, 0x52, 0x08, 0x73, 0x65, 0x67, - 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x22, 0xa5, 0x01, 0x0a, 0x12, 0x49, 0x6e, 0x63, 0x6c, 0x75, 0x73, - 0x69, 0x6f, 0x6e, 0x50, 0x72, 0x6f, 0x6f, 0x66, 0x73, 0x4d, 0x61, 0x70, 0x12, 0x21, 0x0a, 0x0c, - 0x66, 0x72, 0x61, 0x6d, 0x65, 0x5f, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x0c, 0x52, 0x0b, 0x66, 0x72, 0x61, 0x6d, 0x65, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x12, - 0x14, 0x0a, 0x05, 0x70, 0x72, 0x6f, 0x6f, 0x66, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, - 0x70, 0x72, 0x6f, 0x6f, 0x66, 0x12, 0x56, 0x0a, 0x0b, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x6d, - 0x65, 0x6e, 0x74, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x34, 0x2e, 0x71, 0x75, 0x69, - 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, - 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x49, 0x6e, 0x63, 0x6c, 0x75, 0x73, 0x69, - 0x6f, 0x6e, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x4d, 0x61, 0x70, - 0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x22, 0x3e, 0x0a, - 0x14, 0x49, 0x6e, 0x63, 0x6c, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, - 0x74, 0x73, 0x4d, 0x61, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x68, 0x61, 0x73, 0x68, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x0c, 0x52, 0x04, 0x68, 0x61, 0x73, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x64, 0x61, 0x74, - 0x61, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x7b, 0x0a, - 0x17, 0x49, 0x6e, 0x63, 0x6c, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, - 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x4d, 0x61, 0x70, 0x12, 0x1e, 0x0a, 0x0a, 0x63, 0x6f, 0x6d, 0x6d, - 0x69, 0x74, 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x63, 0x6f, - 0x6d, 0x6d, 0x69, 0x74, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x79, 0x70, 0x65, - 0x5f, 0x75, 0x72, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x74, 0x79, 0x70, 0x65, - 0x55, 0x72, 0x6c, 0x12, 0x25, 0x0a, 0x0e, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x68, - 0x61, 0x73, 0x68, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0c, 0x52, 0x0d, 0x73, 0x65, 0x67, - 0x6d, 0x65, 0x6e, 0x74, 0x48, 0x61, 0x73, 0x68, 0x65, 0x73, 0x32, 0x89, 0x02, 0x0a, 0x0f, 0x43, - 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x7e, - 0x0a, 0x17, 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x53, - 0x79, 0x6e, 0x63, 0x46, 0x72, 0x61, 0x6d, 0x65, 0x73, 0x12, 0x2c, 0x2e, 0x71, 0x75, 0x69, 0x6c, - 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x6c, 0x6f, 0x63, - 0x6b, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x6c, 0x6f, 0x63, 0x6b, 0x46, 0x72, 0x61, 0x6d, 0x65, 0x73, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x33, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, - 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, - 0x6e, 0x79, 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x43, 0x6f, - 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x53, 0x79, 0x6e, 0x63, 0x30, 0x01, 0x12, 0x76, - 0x0a, 0x10, 0x47, 0x65, 0x74, 0x50, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x43, 0x68, 0x61, 0x6e, 0x6e, - 0x65, 0x6c, 0x12, 0x2e, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, - 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x68, 0x61, 0x6e, 0x6e, 0x65, 0x6c, 0x2e, 0x70, 0x62, 0x2e, - 0x50, 0x32, 0x50, 0x43, 0x68, 0x61, 0x6e, 0x6e, 0x65, 0x6c, 0x45, 0x6e, 0x76, 0x65, 0x6c, 0x6f, - 0x70, 0x65, 0x1a, 0x2e, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, - 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x68, 0x61, 0x6e, 0x6e, 0x65, 0x6c, 0x2e, 0x70, 0x62, 0x2e, - 0x50, 0x32, 0x50, 0x43, 0x68, 0x61, 0x6e, 0x6e, 0x65, 0x6c, 0x45, 0x6e, 0x76, 0x65, 0x6c, 0x6f, - 0x70, 0x65, 0x28, 0x01, 0x30, 0x01, 0x42, 0x3a, 0x5a, 0x38, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, - 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, - 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2f, 0x6d, 0x6f, 0x6e, 0x6f, 0x72, - 0x65, 0x70, 0x6f, 0x2f, 0x6e, 0x6f, 0x64, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, - 0x66, 0x73, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x4d, 0x61, 0x70, 0x52, 0x0b, + 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x22, 0x3e, 0x0a, 0x14, 0x49, + 0x6e, 0x63, 0x6c, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x53, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, + 0x4d, 0x61, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x68, 0x61, 0x73, 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x0c, 0x52, 0x04, 0x68, 0x61, 0x73, 0x68, 0x12, 0x12, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, + 0x02, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61, 0x22, 0x7b, 0x0a, 0x17, 0x49, + 0x6e, 0x63, 0x6c, 0x75, 0x73, 0x69, 0x6f, 0x6e, 0x43, 0x6f, 0x6d, 0x6d, 0x69, 0x74, 0x6d, 0x65, + 0x6e, 0x74, 0x73, 0x4d, 0x61, 0x70, 0x12, 0x1e, 0x0a, 0x0a, 0x63, 0x6f, 0x6d, 0x6d, 0x69, 0x74, + 0x6d, 0x65, 0x6e, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0a, 0x63, 0x6f, 0x6d, 0x6d, + 0x69, 0x74, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x79, 0x70, 0x65, 0x5f, 0x75, + 0x72, 0x6c, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x74, 0x79, 0x70, 0x65, 0x55, 0x72, + 0x6c, 0x12, 0x25, 0x0a, 0x0e, 0x73, 0x65, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x5f, 0x68, 0x61, 0x73, + 0x68, 0x65, 0x73, 0x18, 0x03, 0x20, 0x03, 0x28, 0x0c, 0x52, 0x0d, 0x73, 0x65, 0x67, 0x6d, 0x65, + 0x6e, 0x74, 0x48, 0x61, 0x73, 0x68, 0x65, 0x73, 0x32, 0x89, 0x02, 0x0a, 0x0f, 0x43, 0x65, 0x72, + 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x7e, 0x0a, 0x17, + 0x47, 0x65, 0x74, 0x43, 0x6f, 0x6d, 0x70, 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x53, 0x79, 0x6e, + 0x63, 0x46, 0x72, 0x61, 0x6d, 0x65, 0x73, 0x12, 0x2c, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, + 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x6c, 0x6f, 0x63, 0x6b, 0x2e, + 0x70, 0x62, 0x2e, 0x43, 0x6c, 0x6f, 0x63, 0x6b, 0x46, 0x72, 0x61, 0x6d, 0x65, 0x73, 0x52, 0x65, + 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x33, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, + 0x75, 0x6d, 0x2e, 0x6e, 0x6f, 0x64, 0x65, 0x2e, 0x63, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, + 0x2e, 0x70, 0x62, 0x2e, 0x43, 0x65, 0x72, 0x65, 0x6d, 0x6f, 0x6e, 0x79, 0x43, 0x6f, 0x6d, 0x70, + 0x72, 0x65, 0x73, 0x73, 0x65, 0x64, 0x53, 0x79, 0x6e, 0x63, 0x30, 0x01, 0x12, 0x76, 0x0a, 0x10, + 0x47, 0x65, 0x74, 0x50, 0x75, 0x62, 0x6c, 0x69, 0x63, 0x43, 0x68, 0x61, 0x6e, 0x6e, 0x65, 0x6c, + 0x12, 0x2e, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, + 0x64, 0x65, 0x2e, 0x63, 0x68, 0x61, 0x6e, 0x6e, 0x65, 0x6c, 0x2e, 0x70, 0x62, 0x2e, 0x50, 0x32, + 0x50, 0x43, 0x68, 0x61, 0x6e, 0x6e, 0x65, 0x6c, 0x45, 0x6e, 0x76, 0x65, 0x6c, 0x6f, 0x70, 0x65, + 0x1a, 0x2e, 0x2e, 0x71, 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x6e, 0x6f, + 0x64, 0x65, 0x2e, 0x63, 0x68, 0x61, 0x6e, 0x6e, 0x65, 0x6c, 0x2e, 0x70, 0x62, 0x2e, 0x50, 0x32, + 0x50, 0x43, 0x68, 0x61, 0x6e, 0x6e, 0x65, 0x6c, 0x45, 0x6e, 0x76, 0x65, 0x6c, 0x6f, 0x70, 0x65, + 0x28, 0x01, 0x30, 0x01, 0x42, 0x3a, 0x5a, 0x38, 0x73, 0x6f, 0x75, 0x72, 0x63, 0x65, 0x2e, 0x71, + 0x75, 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x71, 0x75, + 0x69, 0x6c, 0x69, 0x62, 0x72, 0x69, 0x75, 0x6d, 0x2f, 0x6d, 0x6f, 0x6e, 0x6f, 0x72, 0x65, 0x70, + 0x6f, 0x2f, 0x6e, 0x6f, 0x64, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x73, + 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -1822,12 +1823,12 @@ var file_ceremony_proto_depIdxs = []int32{ 22, // 23: quilibrium.node.ceremony.pb.CeremonyLobbyJoin.public_key_signature_ed448:type_name -> quilibrium.node.keys.pb.Ed448Signature 7, // 24: quilibrium.node.ceremony.pb.CeremonyOpenState.joined_participants:type_name -> quilibrium.node.ceremony.pb.CeremonyLobbyJoin 21, // 25: quilibrium.node.ceremony.pb.CeremonyOpenState.preferred_participants:type_name -> quilibrium.node.keys.pb.Ed448PublicKey - 21, // 26: quilibrium.node.ceremony.pb.CeremonyInProgressState.active_participants:type_name -> quilibrium.node.keys.pb.Ed448PublicKey + 7, // 26: quilibrium.node.ceremony.pb.CeremonyInProgressState.active_participants:type_name -> quilibrium.node.ceremony.pb.CeremonyLobbyJoin 2, // 27: quilibrium.node.ceremony.pb.CeremonyInProgressState.latest_seen_prover_attestations:type_name -> quilibrium.node.ceremony.pb.CeremonySeenProverAttestation 3, // 28: quilibrium.node.ceremony.pb.CeremonyInProgressState.dropped_participant_attestations:type_name -> quilibrium.node.ceremony.pb.CeremonyDroppedProverAttestation 6, // 29: quilibrium.node.ceremony.pb.CeremonyInProgressState.transcript_round_advance_commits:type_name -> quilibrium.node.ceremony.pb.CeremonyAdvanceRound 21, // 30: quilibrium.node.ceremony.pb.CeremonyInProgressState.next_round_participants:type_name -> quilibrium.node.keys.pb.Ed448PublicKey - 21, // 31: quilibrium.node.ceremony.pb.CeremonyFinalizingState.active_participants:type_name -> quilibrium.node.keys.pb.Ed448PublicKey + 7, // 31: quilibrium.node.ceremony.pb.CeremonyFinalizingState.active_participants:type_name -> quilibrium.node.ceremony.pb.CeremonyLobbyJoin 2, // 32: quilibrium.node.ceremony.pb.CeremonyFinalizingState.latest_seen_prover_attestations:type_name -> quilibrium.node.ceremony.pb.CeremonySeenProverAttestation 3, // 33: quilibrium.node.ceremony.pb.CeremonyFinalizingState.dropped_participant_attestations:type_name -> quilibrium.node.ceremony.pb.CeremonyDroppedProverAttestation 5, // 34: quilibrium.node.ceremony.pb.CeremonyFinalizingState.commits:type_name -> quilibrium.node.ceremony.pb.CeremonyTranscriptCommit diff --git a/node/protobufs/ceremony.proto b/node/protobufs/ceremony.proto index b4ce8dd..3014ce9 100644 --- a/node/protobufs/ceremony.proto +++ b/node/protobufs/ceremony.proto @@ -105,7 +105,7 @@ message CeremonyOpenState { } message CeremonyInProgressState { - repeated quilibrium.node.keys.pb.Ed448PublicKey active_participants = 1; + repeated CeremonyLobbyJoin active_participants = 1; repeated CeremonySeenProverAttestation latest_seen_prover_attestations = 2; repeated CeremonyDroppedProverAttestation dropped_participant_attestations = 3; repeated CeremonyAdvanceRound transcript_round_advance_commits = 4; @@ -113,7 +113,7 @@ message CeremonyInProgressState { } message CeremonyFinalizingState { - repeated quilibrium.node.keys.pb.Ed448PublicKey active_participants = 1; + repeated CeremonyLobbyJoin active_participants = 1; repeated CeremonySeenProverAttestation latest_seen_prover_attestations = 2; repeated CeremonyDroppedProverAttestation dropped_participant_attestations = 3; repeated CeremonyTranscriptCommit commits = 4; diff --git a/node/protobufs/clock.go b/node/protobufs/clock.go index 1bf62d9..e1c5077 100644 --- a/node/protobufs/clock.go +++ b/node/protobufs/clock.go @@ -121,7 +121,9 @@ func (frame *ClockFrame) VerifyMasterClockFrame() error { return nil } -func (frame *ClockFrame) GetParentSelectorAndDistance() ( +func (frame *ClockFrame) GetParentSelectorAndDistance( + discriminator *big.Int, +) ( *big.Int, *big.Int, *big.Int, @@ -141,27 +143,20 @@ func (frame *ClockFrame) GetParentSelectorAndDistance() ( parentSelector := new(big.Int).SetBytes(frame.ParentSelector) - var pubkey []byte - ed448PublicKey := frame.GetPublicKeySignatureEd448() - if ed448PublicKey != nil { - pubkey = ed448PublicKey.PublicKey.KeyValue - } else { - return nil, nil, nil, errors.Wrap( - errors.New("no valid signature provided"), - "get parent selector and distance", + var distance *big.Int + if discriminator != nil { + l := new(big.Int).Mod( + new(big.Int).Sub(selector, discriminator), + ff.Modulus(), ) - } - - discriminator, err := poseidon.HashBytes(pubkey) - if err != nil { - return nil, nil, nil, errors.Wrap(err, "get parent selector and distance") - } - - l := new(big.Int).Mod(new(big.Int).Sub(selector, discriminator), ff.Modulus()) - r := new(big.Int).Mod(new(big.Int).Sub(discriminator, selector), ff.Modulus()) - distance := r - if l.Cmp(r) == -1 { - distance = l + r := new(big.Int).Mod( + new(big.Int).Sub(discriminator, selector), + ff.Modulus(), + ) + distance = r + if l.Cmp(r) == 1 { + distance = l + } } return parentSelector, distance, selector, nil diff --git a/node/retroactive_peers.json b/node/retroactive_peers.json new file mode 100644 index 0000000..2a8d5fe --- /dev/null +++ b/node/retroactive_peers.json @@ -0,0 +1,1052 @@ +{ + "rewards": [ + { "peer_id": "EiDt/I7irgZJvxHTKVYBWFC84aZt6t+jH44pTtBwDps2Mw==", "token_balance": 137558 }, + { "peer_id": "EiCFw9CwNODrkiOIVFcyLpwLDVbzw+gJEk9Up36FOcAkYA==", "token_balance": 137558 }, + { "peer_id": "EiA/zYamLhLM7WvEJw76qCsu5BECV7HuHnyXqijSDCj/LA==", "token_balance": 137558 }, + { "peer_id": "EiDN04yQGVtsc8h1kiuZ9/hsp0N8YnWmuD/H4sLqBo4cMQ==", "token_balance": 137558 }, + { "peer_id": "EiBEDZeMlawFcSNZbhMdpp81oeABrkoys9FW5Gpuo+Vx1w==", "token_balance": 137558 }, + { "peer_id": "EiCpNbD4duH1dBHPSKerruhytoJXS4yBzmxtll4/+uTdbw==", "token_balance": 157208 }, + { "peer_id": "EiDW4pG0FjdnCCn4Fl3WaFtsuOjewUPyYy1hgxYGcHJU4A==", "token_balance": 137558 }, + { "peer_id": "EiDjVqM33jPfGp3G6hmXGnY/xT9+jJQaNqzaEc5YPh7nrA==", "token_balance": 137558 }, + { "peer_id": "EiDi4THsckwtdtcsVftxOXE5ECGQVt/PlUR5z3CHfdaOvg==", "token_balance": 137558 }, + { "peer_id": "EiA3/AHA4LVWEJBJC7Vj3DwN96vPIFrq7sUMIriesskU+A==", "token_balance": 157208 }, + { "peer_id": "EiB40xAnxyscpEqR+HI4sqtEHX2L5TpOglTr8wgR1Rf8XQ==", "token_balance": 137558 }, + { "peer_id": "EiBDjyTCf4m7wrvkAq795q3/9GTROLo3dQRpJDbpkA7TlA==", "token_balance": 137558 }, + { "peer_id": "EiDnRZq4L5VNWDvT6lj3Lx8hV6uLISnIw5/WbWxPlHJ/YQ==", "token_balance": 137558 }, + { "peer_id": "EiDcwwi2Y/d29+Za4AKneG8WVGzn0B8IGasVAluMJX8kNQ==", "token_balance": 137558 }, + { "peer_id": "EiC/2ozdDeYRHkBa7eHblE0B7iHotFyuutG1OoxiCF/Iuw==", "token_balance": 137558 }, + { "peer_id": "EiCsl0NhFsQfmWeqmGFnAHJO3EvcInss6hwpzs4yCGbYcA==", "token_balance": 137558 }, + { "peer_id": "EiBacHPWZ9GDbROC3ir0iBFWLSRlYiJ+31IrNoJd8IVcew==", "token_balance": 137558 }, + { "peer_id": "EiBQ82JQDutS5d301d7ZwTebXyQaw8boOLxi3eKMxDjMiA==", "token_balance": 137558 }, + { "peer_id": "EiC79invPH4VCatNnehIsRFpI32LHVISg8T6gmBI03L+iw==", "token_balance": 157208 }, + { "peer_id": "EiBqHhBPaZ40hnQkisOf5i72uii3Ft99g/WHSZeABmN+gQ==", "token_balance": 137558 }, + { "peer_id": "EiDLg4I2+SAV7f0dUfT/qwDJWAstv1CAYmbhvJG3LxrgZw==", "token_balance": 157208 }, + { "peer_id": "EiCPSIISA21MOeMW7/jaoEm1X8S0JZtUy49uqxPi/FGbTg==", "token_balance": 137558 }, + { "peer_id": "EiC+fzSzlytoB6Ip0cRcDGmEk12Ogw58Sc3hNdsg/SHkOw==", "token_balance": 137558 }, + { "peer_id": "EiBziJ/ddhCWFhS0qAIyvBqdgFzVxeww8UfIHyR1LLFJJg==", "token_balance": 137558 }, + { "peer_id": "EiDSp9LSre4xNdlcUQUeWZ27Le3z2ux+yXivh+CQ9i+f4A==", "token_balance": 137558 }, + { "peer_id": "EiB7PV8KSHxzLWMdhtKyJetVAwG1MmFAw+Aq2scndWw5ag==", "token_balance": 137558 }, + { "peer_id": "EiCOMYZsW3yEELpS24n3pBVjAiztvHT5PgCPtX2Tlw1OFQ==", "token_balance": 137558 }, + { "peer_id": "EiB7RtBwX+XKHvZ60TAmYFwUc8wf/gjr/oxM85oRovHeYg==", "token_balance": 137558 }, + { "peer_id": "EiCGalaov/tWEJ9kefQb3tT2t8Iqhw8I77Hy9LTiqkTFCA==", "token_balance": 137558 }, + { "peer_id": "EiDVxRV2koJRehT22MRKhCUAwVjWoOs7jG3YHkH+H4n/Kg==", "token_balance": 137558 }, + { "peer_id": "EiDw3Ywh65HO9XDhjHrCh9O58F4by8QJYG/BIAj/0k/MCg==", "token_balance": 137558 }, + { "peer_id": "EiDegv3AFdSkzSBIMEa66/4zF4dAy9VWX+PfsgyWbrjMMw==", "token_balance": 137558 }, + { "peer_id": "EiBYvhRhcArCadflEwdD5dCjxnpBR+GTqbzTOeB6mftNBQ==", "token_balance": 137558 }, + { "peer_id": "EiAMbrjjLm8tnSBsGYC5aAB8W+bTgLLfZRZ/vNevcfe1UQ==", "token_balance": 137558 }, + { "peer_id": "EiBK+wXpiV7GnfCYqkUBCQm8+G0YxGKlKwAYIEYdNSPKqg==", "token_balance": 137558 }, + { "peer_id": "EiBh7HnX3m2LJoXl0cFrHYC0wRkeMIu86iXuD3ITfOcTQA==", "token_balance": 137558 }, + { "peer_id": "EiDyQ/H+unN8ABzy3jMO8AXqqRWMkdpp9S5Qo5KxsRk0yA==", "token_balance": 157208 }, + { "peer_id": "EiCtbAIknJCzX3zMa7XKdL2C/Vk5SOAuQvN1QQ7g/NINuA==", "token_balance": 137558 }, + { "peer_id": "EiBPYBYzDA1opc87Pxn1+0W3T1m7r5LLDGKXORuyqb/NrA==", "token_balance": 137558 }, + { "peer_id": "EiCFRWMLrI4Ep1tM/Ypek+bBdRjFNs8VSTYUOmvAmbY++Q==", "token_balance": 137558 }, + { "peer_id": "EiDq5lIy++a/uU3dGPbUo727N4pUfjY4l5aESU0ri91igg==", "token_balance": 137558 }, + { "peer_id": "EiCir42Ak2MRRlHduW0N6EYW/TvZ4iiaCjbrvip2k0YZeg==", "token_balance": 157208 }, + { "peer_id": "EiAMqyEN+tXoJQXA3/JSsaUSEDmAh9BL9ZOd1i9+r8uIzw==", "token_balance": 137558 }, + { "peer_id": "EiBIcjkr470meG42q/bwhhTsKvsWL/SjRjhRwYlWCzmgnQ==", "token_balance": 137558 }, + { "peer_id": "EiDACZWOuYJyI5Zp/z+P4rRshPO81fv9v7w/D0Vb9YOjkw==", "token_balance": 137558 }, + { "peer_id": "EiCqZLOl92jTRBmkq4aVHo08RWPBQuGuieg53X/gfbJX5Q==", "token_balance": 157208 }, + { "peer_id": "EiBb6Z0DMUQ7je1as86/l+cqGYx0lbI/j2ZOMf7Twr3qiQ==", "token_balance": 137558 }, + { "peer_id": "EiDiXM2XUbY95oH/DxoP7zIzTkiBTqIq2Q6oG4/Lzf3KnA==", "token_balance": 137558 }, + { "peer_id": "EiAhgQeJqx2PJwubnlXxw/VNRyoUQdSrFjTuGr3ZoNesPQ==", "token_balance": 137558 }, + { "peer_id": "EiAXJMczXLr3cPMAhaEcbgskqDNJR0AYc4h925HYASTp2g==", "token_balance": 137558 }, + { "peer_id": "EiDSsJ9Cz15k1adu13K8cnWpaBfYZQia5D4HNf19HpkMbg==", "token_balance": 137558 }, + { "peer_id": "EiBhpttrR0jqq1LcunsuvGf/UvmpkLE9K3XsmbsKpMmKaw==", "token_balance": 137558 }, + { "peer_id": "EiCIOXvLFLqVeaZ9cbNh+l0su0ZrLcQ+8fYSp8EDtzzpaw==", "token_balance": 137558 }, + { "peer_id": "EiB/w8e9nsOFtyA2SYPeeGqiMgwwPFivXPEscYNWzR+MPA==", "token_balance": 137558 }, + { "peer_id": "EiDiCoY7zYQxXDN0WuNX+gT0bTvpriicypSUX2NULZb1Yw==", "token_balance": 137558 }, + { "peer_id": "EiBri4toaKjBVT+c2ttG082uNPjN2YntOGUz74YUnA5S8g==", "token_balance": 137558 }, + { "peer_id": "EiCKW+WVd/yseDDV2jCI+y7yekbimA4EOjB4NpaQDUzAXw==", "token_balance": 137558 }, + { "peer_id": "EiD90pwUVRhsyBqleRB6+fCFlmyzPEuSShGUBa5TxIIfEw==", "token_balance": 137558 }, + { "peer_id": "EiCx5rf/JCPlaaSu2vfAdf+YpYNp6vAr+CoiV9J/dtfayQ==", "token_balance": 137558 }, + { "peer_id": "EiDOu8gLzNR5ZWmg8a6JjNaFQS6LUp+0JMjJDrrGlwfvlw==", "token_balance": 137558 }, + { "peer_id": "EiA6mZroS+TJ0PvEI3sszwFCfWOMfE8rxfTebdSCjpXB7w==", "token_balance": 137558 }, + { "peer_id": "EiCvx9ZgMW2yiOdLGLrFSm0O8M99wnuVAAwFJotqjpQTNA==", "token_balance": 137558 }, + { "peer_id": "EiBBNOiQksmnZ68ePdBabTngUR97UXxkJYXRPEl2agvdGw==", "token_balance": 137558 }, + { "peer_id": "EiAo3nz97JlE158qfzDRyzBAjpWBOEx/faUrVQePy+vxSg==", "token_balance": 137558 }, + { "peer_id": "EiBkXrxRgcgoZdI1KbsVhQAHDrmOWc55aspWx0MImrRcvA==", "token_balance": 157208 }, + { "peer_id": "EiCbTU+5tg7y6uBqHEISeCGu1R9v0CiIjXxcr4yHngKClw==", "token_balance": 137558 }, + { "peer_id": "EiBJXIAwAW2SRFAQrdhnglMRBhOp5m4SxYF2yJTTW9lF4g==", "token_balance": 137558 }, + { "peer_id": "EiDU0CsEo5ClYdBuO3JPoxMBRx8tEAam5cdYHaBAcPH8kg==", "token_balance": 137558 }, + { "peer_id": "EiBtaK2pOIVjGqj9/f7lTSljT39JWd41YbjLuNX0NWo3eQ==", "token_balance": 157208 }, + { "peer_id": "EiBQnAqhRGVsBfSz4+YdbzV05rHDKh++b1vSEcAXvKauoQ==", "token_balance": 137558 }, + { "peer_id": "EiATlGphiTRVCj4/CD1sRTqolV53dk4NA2kG22bIK9D/4Q==", "token_balance": 137558 }, + { "peer_id": "EiAXDYLdsQlcFCcBArHnVbhoq1E2YBQhGLsVhqaV2pjI1g==", "token_balance": 137558 }, + { "peer_id": "EiDiSU/zw55buN1xoWp+EnVEfunIINTN/pNkWiSFx4oA5g==", "token_balance": 137558 }, + { "peer_id": "EiB3i1+vGFtWlC8Ei/8AfpypQexK7qx6F/R5REdPL3NcKA==", "token_balance": 137558 }, + { "peer_id": "EiBHbU8R0nMCaNXqO2g0ewk+4vakrS1f8EL/VCytPqmTFA==", "token_balance": 137558 }, + { "peer_id": "EiDwwS45tB4GWVX11CcliPTQejbRmS+lcTJClEpQbquF2Q==", "token_balance": 137558 }, + { "peer_id": "EiD0CCDAOEx5kecW5b4ICbyg8BhhEhApmhgASK9Bqt8UDg==", "token_balance": 137558 }, + { "peer_id": "EiCZ60/rAs6kL3pE9Qnw3y/bv2GdUMnBBIXNN3VRqpYq2Q==", "token_balance": 137558 }, + { "peer_id": "EiBpvlWQu1uluUEyD266UVTyn22s+GNv9MSdKsLfsyt7Ag==", "token_balance": 137558 }, + { "peer_id": "EiDTu1XyDtM1N43pEjkMzj7ai/Q4X2XnEaFgz31D/7G73w==", "token_balance": 137558 }, + { "peer_id": "EiCM/tgnkDCqFGaULwGhlGmxsH3VKVIEOVHMd6RyTyEPSA==", "token_balance": 137558 }, + { "peer_id": "EiCcaGUxOHidGuLwj3QDnVR5BA3BelUb5GA6UOFWVAcY6g==", "token_balance": 137558 }, + { "peer_id": "EiBVaTZNWB4SCvlWOmwK6d0C5Yzl33hW8q7CAxjgb5zlww==", "token_balance": 137558 }, + { "peer_id": "EiDjNhYf9f1XFjPNjHqat5KFyQGzTJadpcpom20xt0F56A==", "token_balance": 137558 }, + { "peer_id": "EiBoMVXwviMRtKyjX9dtXnBUiErng+hbl4hLPa1LBsDQLA==", "token_balance": 137558 }, + { "peer_id": "EiBoIc1nPO5+W5RqtFwqCo7kxSDFdckdPVwxpkW6UyEiNw==", "token_balance": 137558 }, + { "peer_id": "EiBDwCF8pdxlB93eGtnRSo23g651J/aygpWQEtOnAYUbYA==", "token_balance": 137558 }, + { "peer_id": "EiD24ZVIuHTeXxVpGf6b1azMHq67iyj1LZNLwlFP6eZDrg==", "token_balance": 157208 }, + { "peer_id": "EiB8EGifgNwKMxlBc8o71qYg++hs1FLIXQEq08s2/69ktg==", "token_balance": 137558 }, + { "peer_id": "EiDCPV7yxYd7F9zfXvF/nl02PR3zMZkQkm4KAERVBCFk3Q==", "token_balance": 137558 }, + { "peer_id": "EiCp/4Ozp78dzwtKPvqucyDPmAWvKQI0Fc9rf1SX6/CARQ==", "token_balance": 137558 }, + { "peer_id": "EiBT8XJHEtogwc/6GEAkLesKHzbDonM068Mg/mCLYPfUlA==", "token_balance": 137558 }, + { "peer_id": "EiC39fG4ecRzuvl4qpqkSJ3eIyVDr/iEfLviwASR/YtfsQ==", "token_balance": 157208 }, + { "peer_id": "EiDj5Tv5rYO3GX5P/qp14nwaEbDMHtgMkIaDYTAPCnJOwg==", "token_balance": 137558 }, + { "peer_id": "EiBZtWRH2M3P/YGeVcGzeH1WyPAT1EAufTd6yX1bRSR+vA==", "token_balance": 137558 }, + { "peer_id": "EiCsWqPPPT+BhctANRqQHFD+Dj/GdKsTmFHj6Xq84yg3XQ==", "token_balance": 137558 }, + { "peer_id": "EiDOauPSTm/mG6Elcd+EBBlpMXamMY4eCFXkRCGciKE6KQ==", "token_balance": 137558 }, + { "peer_id": "EiBw0l6b5gwe0O0Qr8WnIMx8a2Y2L5Vjziq74x/WfRg1HA==", "token_balance": 137558 }, + { "peer_id": "EiDKtgwh/YwV5hLImqNffl6K2cVyMYKbt243FB6YV4Re5w==", "token_balance": 137558 }, + { "peer_id": "EiBe1BsrDW/oLARa5LVx4d4o7ktYfxf/yoS+CEW1C4PcyQ==", "token_balance": 137558 }, + { "peer_id": "EiCVNzMEqHzb9TsgWV7AlzdT5lFAtVbA0b3g2VbZj7pUWA==", "token_balance": 157208 }, + { "peer_id": "EiBU1uPpjaSSbD+hqspV0xg9ms/a/u1SdZQPKPUwwp95WQ==", "token_balance": 157208 }, + { "peer_id": "EiBcAUabPqHI+YSNuzRpTFm5PxDeRVk3vb/4HYlLVkbxnw==", "token_balance": 137558 }, + { "peer_id": "EiCkMEu6lXBun9op8QbDcJJqwg+y531zJCdBcpKzyyzBVg==", "token_balance": 137558 }, + { "peer_id": "EiC57WWhYCOd1+qqmxW9XnjkhG82xuBDv/hZFT4wbJAbUA==", "token_balance": 157208 }, + { "peer_id": "EiAjhP9B5faB5+2IZI2cSm+FPfw9pfB7SL7AYDGlK4h4AQ==", "token_balance": 137558 }, + { "peer_id": "EiCOGSBY4ly4j3GZ9yCw1vVyr7rd5S9yWjViKuLuzTETsQ==", "token_balance": 137558 }, + { "peer_id": "EiAIIeJwouU3PVWLvUhPtm+MiQntIEKnpaKlsyIMzdgSnQ==", "token_balance": 137558 }, + { "peer_id": "EiBo1PqBDIQRzE3hNL0DttKpjuaj2yWG2tPL01UKY+31LA==", "token_balance": 137558 }, + { "peer_id": "EiD65Eh0LKGj6AgMlsVmEBGnpisJgTcdilvmlqAqBCVBTA==", "token_balance": 137558 }, + { "peer_id": "EiDxhfqRg6RzeaZrJl9FbDrEWc0Aq1iph+xWBamS6FcgTw==", "token_balance": 137558 }, + { "peer_id": "EiDrYab6PMYmhZ4e3ry0Qw/+3sbvVfG6M2vJfrUsZcn4hg==", "token_balance": 137558 }, + { "peer_id": "EiDeAUv0KvqElzgVULgPe7J3bZFV1Vi0qNREG7uCR4QOrA==", "token_balance": 137558 }, + { "peer_id": "EiBBqBROgbME6/f9OmDnI+p6gr/XfRWdhCVbaXWIe/It4g==", "token_balance": 137558 }, + { "peer_id": "EiByqJCfrbGJK6SAZJkdaM5LCIk1jsegmD9vQhscSWtFhw==", "token_balance": 137558 }, + { "peer_id": "EiCqM/Pjb5beBN9WYrlsg7jOYpT6qxLxIeflKY3dXXJRUw==", "token_balance": 137558 }, + { "peer_id": "EiDZopN+RnHBtk2ocwTFPuNe7JWxw22Q24q/vfvkcSW7KQ==", "token_balance": 157208 }, + { "peer_id": "EiAWko6omZov8Q02glLaXuFTcYcP2Em7cJ2EUdawyq1ZKQ==", "token_balance": 137558 }, + { "peer_id": "EiC79QU6FfCx3fXWTypurSbaa7MHNawbXlsRGRwD9kIuvw==", "token_balance": 137558 }, + { "peer_id": "EiBPAuih1CryAgv7qNreo/NH6lrJWAT8QYwt71XGMnprkQ==", "token_balance": 137558 }, + { "peer_id": "EiCNsbzzelVdxxhlJSvkwjt7xbZi40tpcojNJAaJlcxWTA==", "token_balance": 137558 }, + { "peer_id": "EiC842UyoMHux38m67Ij7jJz4aL7Mg1IB8CCZd2IF9ZlGA==", "token_balance": 137558 }, + { "peer_id": "EiB5TBHNSRvK9MwnbtW85pM9eaJGBULDivUFlCQauLUcig==", "token_balance": 137558 }, + { "peer_id": "EiB8UECS82ItUDV1t7C0UN4WyNu6nVBfdaM37lzwnXDcJg==", "token_balance": 137558 }, + { "peer_id": "EiDhS3+6Vc3SaMdJpYtrX5fnRS5ZRDEfF8vx+iOwWIO6Bw==", "token_balance": 137558 }, + { "peer_id": "EiAjlO4GFVrt2V+pQikLXmHfBmqcvTmJOQD3+eJXnOJl+w==", "token_balance": 137558 }, + { "peer_id": "EiCNte5+wR+8p342FHVoaR2OGq6Vb3NnXW2AhG4W+dFqLg==", "token_balance": 137558 }, + { "peer_id": "EiBjJ7ZhiNV9aHcjEtTyLANsX2lsrRbE7DUJvn5rHpW+vw==", "token_balance": 137558 }, + { "peer_id": "EiAcdEkxOVC3LS7goYpEO5du4LttGgg9NVM35jurSs99qw==", "token_balance": 137558 }, + { "peer_id": "EiDjXPG7MmNt/AKYQDDziB6rkUVFAQBPEL8rd2fhlnG08A==", "token_balance": 137558 }, + { "peer_id": "EiDrhsEnPMADvxpuN3kj1n18kvvWKXA3oWDHi1WZXFgnvw==", "token_balance": 137558 }, + { "peer_id": "EiCnI2JWuAmhP313X0ywIwgK0BugQ7NgA6ttp7PbBjPB3w==", "token_balance": 137558 }, + { "peer_id": "EiDa3uT5jYEMOq55A5jQStBwijgR0KF2GSifgZbGGRTZiA==", "token_balance": 157208 }, + { "peer_id": "EiCVUzWKGlK815nhPRldaii2fU72oiDebdMtOHC1NjxVBQ==", "token_balance": 137558 }, + { "peer_id": "EiAZ46m+gjk1acOEZWaYBE3Mzt/r2U9CKm9fZzkjwuP6pA==", "token_balance": 137558 }, + { "peer_id": "EiBNf+29/yg2TqUgcm5mVAKq2Awd4cIE0JXnehyh8GUVhA==", "token_balance": 137558 }, + { "peer_id": "EiD9ZRy93btwIV4ucAefUMYpuliAjG7dhP/8XV4jTOIgxQ==", "token_balance": 137558 }, + { "peer_id": "EiAk7UZ2QrfW3w7fW3aOnQGTeIx4BkodlcVoqoDj79dCUA==", "token_balance": 137558 }, + { "peer_id": "EiCWy8qgkdwORzG0b0o7x3sFHGMzGR29ZeCM0uUUqCJ3eQ==", "token_balance": 137558 }, + { "peer_id": "EiDz3WxEff1Sa8ByeiwpvFvIX7Nf9mBIGlSmzjW4yVtZPw==", "token_balance": 137558 }, + { "peer_id": "EiBbt7dZ0/SdrpyHEGRAVCNV5tpZOf8iGkf2hxGjGdgIWg==", "token_balance": 137558 }, + { "peer_id": "EiDOQHb6qPJjw4S9iqfiTbRyCpANCGzJ1wjk+DCMbZCqRA==", "token_balance": 137558 }, + { "peer_id": "EiB6VG4YGT/FOpsMlw70GtuxLwRWLtm82on6h8Tu0fZtNA==", "token_balance": 137558 }, + { "peer_id": "EiAk0lqV/eHg9YPtN4ChUQoPctuHRLtfi8X5i8cyP8aYCA==", "token_balance": 137558 }, + { "peer_id": "EiAkLM53kaSeuM8qcBRnWIBWtZbMtoYeTHrr2gOLQA7Y0Q==", "token_balance": 157208 }, + { "peer_id": "EiCA3PAFkKLgOXOPTOPWUvHRtDo4kqaP2A++jWq4FVnEeA==", "token_balance": 137558 }, + { "peer_id": "EiCb9Jvw2ydm4pzukNeYd7uQADWMG06yblmV+DBPlvvTGw==", "token_balance": 137558 }, + { "peer_id": "EiAD/2QUvbHyV9Z/I2YUFwGccdA7tyLJL3gN+78xoHvcJA==", "token_balance": 137558 }, + { "peer_id": "EiCZaMGDkCoAEyJgRozTxZtEkjmJmJK+7yGyiseoeFDsrQ==", "token_balance": 137558 }, + { "peer_id": "EiAr7ITg8LupDC6Ofj1F42es67j4IHuGDLXAbHBkcpX9iA==", "token_balance": 137558 }, + { "peer_id": "EiAuXy5mmGl5HjFoya9gddTfBQI9ltryDq5Jo8kTKrGmnA==", "token_balance": 157208 }, + { "peer_id": "EiA3D9eFBoUeMCLKvEiYi7mYE3pbmgedRj7MAdRWHC6iXA==", "token_balance": 137558 }, + { "peer_id": "EiCCwj7cKZuz7wtrOiw6GhNGg7j/pm1oupCJ5oq/8BR5Tg==", "token_balance": 137558 }, + { "peer_id": "EiDu8Y42LEgOfvdNSuX6le505kdL5UPQwfvXrndiVwUCXA==", "token_balance": 137558 }, + { "peer_id": "EiC/fh676OHCz8Vup8mvGJ2BRQuV4w/M2mPQVrjUy9418w==", "token_balance": 137558 }, + { "peer_id": "EiDisvDDXH6qzTtoiBfEo1UAQ/no5de1lOs5mbMvNuzGyw==", "token_balance": 137558 }, + { "peer_id": "EiDftaeCgl6Fl6VuRt7kH03+p83iY4fLCvVBO0awZbSkTg==", "token_balance": 137558 }, + { "peer_id": "EiCskqQddnjW6dsG0EkpjDRQtB0T/2i0HmF95FXq/oc/fw==", "token_balance": 137558 }, + { "peer_id": "EiDfYvHCkMwHQo1SGIrExl+xFo0Rk7WVyLzb3R6Jtdye4w==", "token_balance": 137558 }, + { "peer_id": "EiDPjISeAgCnpggVNcAFeTWP5T69QWbGZr9RK2D6SRoX6A==", "token_balance": 137558 }, + { "peer_id": "EiDjEtnQ3bj7NzDbaS1frrVfbGrjRNs4nXelBRgpdvtbhA==", "token_balance": 137558 }, + { "peer_id": "EiBmrBWcyAr89jZe8YB+hDvj/jl2ozI5fcN+UZU20RYmNA==", "token_balance": 137558 }, + { "peer_id": "EiAQjU11AvNccFg+G2P1xiXo3eAwdjKHDE+njKV+Gplo9A==", "token_balance": 137558 }, + { "peer_id": "EiCH1eO9ezx9Q5ICnHu0qUkWljIXs5sDKIgJ9toSbrzicw==", "token_balance": 137558 }, + { "peer_id": "EiBvj8iR5QnqtyCLMlgwLp17LRkCnPvjggfb+/8b00sQTg==", "token_balance": 137558 }, + { "peer_id": "EiCPkDUg+eAEAx4eMdctzsBJ2W6Q8KkCPz5Ed2WkCMDymg==", "token_balance": 137558 }, + { "peer_id": "EiD433yqmHk/ISwbVapwg4RFNvfOaw9OWtJTzHcB3JbmdQ==", "token_balance": 137558 }, + { "peer_id": "EiCUU9k9AvybFsitkfFweis1z8XdzfjSnFI7VwHxS4+K3w==", "token_balance": 137558 }, + { "peer_id": "EiCgvtp8KntElYqHPcMOqnNGaMRSMQrhiA75JzfpEjI5Kg==", "token_balance": 137558 }, + { "peer_id": "EiAkYt7QV7Fj37K5uPaaFzqSvm1Y6vQpHtYVkX2TXm6vZA==", "token_balance": 137558 }, + { "peer_id": "EiBqkYtumWWv0YqAikqLyskPL2XGvz984JUJao76py2Ftw==", "token_balance": 137558 }, + { "peer_id": "EiB48ZoOAYpAPbh+y1R/NV2KEStNmMtfluDCX9GEqLvrhg==", "token_balance": 137558 }, + { "peer_id": "EiB/LQgpH8BXKxXeT/1I04xa7BSK3Rpqf4gV7mgwecDV6g==", "token_balance": 137558 }, + { "peer_id": "EiBLTzHUsmdyJSU0K6IJ+3nD7deb3j/w4YBe2d9WNq5REA==", "token_balance": 157208 }, + { "peer_id": "EiDVngikPhoWPFGQDaukzbPry2+VnuJMNl+4qtNH7qLhQg==", "token_balance": 157208 }, + { "peer_id": "EiAeUgH7prW7wVAXhoMVNqfvN3epBJ75X0dftHiYx8/+kQ==", "token_balance": 137558 }, + { "peer_id": "EiDqQJ9ndp28N7MtMynRou6vRds9J30evUiR2sbzOSQwEA==", "token_balance": 137558 }, + { "peer_id": "EiBFJlOHZZIVEEOUYzN+7m9z8Dyi2JAJ7eMKzXhyxOxGwA==", "token_balance": 137558 }, + { "peer_id": "EiCgtRXQ+69xr3xSexZjmmBo9as5fmdXfeMAmee0LSRhHA==", "token_balance": 137558 }, + { "peer_id": "EiB+SDiUu5zYWg0XrjFABF+v72aDGylM/Xbvh99bLgC/rA==", "token_balance": 137558 }, + { "peer_id": "EiBra5zVAnmYZYfzXu6lB2/gS6BouXV7DcIPkA4qNdxIEA==", "token_balance": 137558 }, + { "peer_id": "EiDaGoc7jYl3OEYhffHVFzIa7lh2T/rwUSM8Lo8wt0wLIg==", "token_balance": 157208 }, + { "peer_id": "EiCc1y5qnoyYev03GMklk9mqPI4vzkRZA8NImSTq3pbHDQ==", "token_balance": 137558 }, + { "peer_id": "EiCreC2KQ/VUXAkh7B7vl/b+amIxVAK8agO4AakNwRhFqg==", "token_balance": 137558 }, + { "peer_id": "EiA+KiRdIaZ0l8JnM7p7b4ixAXsgnetDVV0b92i/0mKkfQ==", "token_balance": 157208 }, + { "peer_id": "EiCntm/fnimBfZVLzMo5FWyCLAecPdfBUjxsNA1ywux6uw==", "token_balance": 137558 }, + { "peer_id": "EiCpGHsztbHwtuhNQwCLjJ3A5mOuUBUSjZz0pOticztOSw==", "token_balance": 137558 }, + { "peer_id": "EiBpztNN0Xqf1t38b2EbhwdiV0uPnh7JouqPAQHf9k4eTA==", "token_balance": 137558 }, + { "peer_id": "EiDNgDbbc8h7d168za3+7OkxsPWfbF2/Wj0C0TqQ6wso8A==", "token_balance": 137558 }, + { "peer_id": "EiD2ODrPmC9doWMq/WCiymEsdeUBSHVdaQahQn8X1JSKGQ==", "token_balance": 137558 }, + { "peer_id": "EiCW3hg2/wfrbRsqXVAlRmNz6yhLJ/euBp5XT1WK0+RBTg==", "token_balance": 137558 }, + { "peer_id": "EiDcHmZCqlve6TljT1uNA4sf9nDkwoitbODaczsrrwDuyQ==", "token_balance": 137558 }, + { "peer_id": "EiCtKApm4Z9at1keNK+D9G+qtcZjcmOYorFsIdgBP4jxMQ==", "token_balance": 137558 }, + { "peer_id": "EiD3E5ybLfrVuIlTNC/PtLT0bxbJ73IkSm6V31WD7f/CgQ==", "token_balance": 137558 }, + { "peer_id": "EiABQZ4CCgA/dROFPMznV8OHlYJwvB7YVpE2QG8q/lYz4w==", "token_balance": 137558 }, + { "peer_id": "EiAv0sXFIFO95FO1i6pcm3Cv/etwK3hgnVNtAjIirIi79Q==", "token_balance": 137558 }, + { "peer_id": "EiA9CUVhMXrDwoey5zZ3UDk/9vv5NmVRSrI2E1VbfbwChQ==", "token_balance": 137558 }, + { "peer_id": "EiBF4OtXBF62jVOAd9/aGSmi4yMqJGgiGdlWuHoXXeQbgw==", "token_balance": 137558 }, + { "peer_id": "EiBVjczAYfWR+Cnr7Yr7wAWYVNDRvQB8MK7kimQxvddy9Q==", "token_balance": 157208 }, + { "peer_id": "EiCnqObOi5JPBt+wlcrfrtTOLvtzXcTH+bz8Ho9c0kBPlw==", "token_balance": 137558 }, + { "peer_id": "EiBf+/8AtcguK8Jg7G8uhSk2RrE4zeLyHtKvOq5ivGVG+g==", "token_balance": 137558 }, + { "peer_id": "EiCvbY06avzEq6Z5Fl9dVGpSYxgxvGXYBt2t/pYB5MeJUw==", "token_balance": 137558 }, + { "peer_id": "EiB5Anrm1+fR82PvpuxxA90ZxVjYytw1VNO2YofrhgHJ1w==", "token_balance": 137558 }, + { "peer_id": "EiDDxh5G0ceG81L8ZM5OImUHZCa5FRlBJQ517wpVK2d5pQ==", "token_balance": 157208 }, + { "peer_id": "EiB+lNO1WleKyaoqwVjpkYSBuq5YglwYMtokLaIJool4Mw==", "token_balance": 157208 }, + { "peer_id": "EiAqIczZ3El5VDLORlRmlL7P2PDJymLTOYbqVLVbOCxo+Q==", "token_balance": 137558 }, + { "peer_id": "EiABSmVVj6o/k35ZhWz+XngR//XA/WNXt9dnsOwuWqJomw==", "token_balance": 137558 }, + { "peer_id": "EiBC2kF9sAvD2S2/j95S1equyXP1djna0trNfv+/i7IWMg==", "token_balance": 137558 }, + { "peer_id": "EiB7pQTMuJPWqN4Ov9R4fA01GPo76W7U8wJnvyQ9mMwXyw==", "token_balance": 137558 }, + { "peer_id": "EiD7mDCGtv4wQl7lc9SKxdXWDGnhtCCG4xlnS/eHB6ZO5g==", "token_balance": 137558 }, + { "peer_id": "EiBK42dQx6Omt1qfftHiTirf/LjYcehnhshoLBNRxaNZ1Q==", "token_balance": 137558 }, + { "peer_id": "EiDI9PlqCQ44ItfmAv8res4WcjFBy/qoaLVUz3GgJ0l6tQ==", "token_balance": 137558 }, + { "peer_id": "EiDr82Ty9BU5Des3YkZWk+7yO0h6LwJKXzfkHbUipidHbw==", "token_balance": 137558 }, + { "peer_id": "EiCB0vEE0C3b88qllK92qAlVxjvAU6TZOUsVs0/q6O4BhQ==", "token_balance": 137558 }, + { "peer_id": "EiDeTDldMcJfdupjyie3xCfD6nqokoXvQWtrn6hpqY0x+g==", "token_balance": 137558 }, + { "peer_id": "EiB6e4Y3e7dHwg8qZkpt9uZ02Hopn3kNUkyxRcjFXabhpg==", "token_balance": 157208 }, + { "peer_id": "EiAN2SYlXsybAQ9XkdLJWBIYN3rPJJVyBODvA30r0crMgA==", "token_balance": 137558 }, + { "peer_id": "EiBtqGRGMEzBNhbkLDFeBqD0mtxI58LBRNV3wus3bKD18w==", "token_balance": 157208 }, + { "peer_id": "EiCQPlVMmVo2eTY+iZzPtKPtpSFpCkUrODV5bFDNoXu4Yg==", "token_balance": 137558 }, + { "peer_id": "EiBIKOtXIlcuDEBueu1XrNyswYrzSf1ujX8Kg5NWLZzc/g==", "token_balance": 137558 }, + { "peer_id": "EiAA7wfcxZux/SjXo2voSj09na2CREo1UGsWkf15UGZTDA==", "token_balance": 157208 }, + { "peer_id": "EiA4VMDy4uv9XWIgAi6ZhJDM35uaEuTwS3e/VZwecfKmKw==", "token_balance": 137558 }, + { "peer_id": "EiBN6EJOM2NQG4JSB17CIDX4Q1wer6oGycjXt7E2YzqfOQ==", "token_balance": 137558 }, + { "peer_id": "EiCGGLalG9WE6bozbLdGaK5guXTfZ23RBlGs9Uv9uPQVLQ==", "token_balance": 137558 }, + { "peer_id": "EiAvZSddPIdV7D3ljwhRZ+5b2OxVlC0Y8fa4zkILgPDSLA==", "token_balance": 137558 }, + { "peer_id": "EiAtFvzYP8Q3yVI6s0NMyUkjybEmDobuF//x+A3f5BJBmA==", "token_balance": 137558 }, + { "peer_id": "EiAZSsgD67QhAU3SE/6poRBrO5zPOR2VGOTxMM84TY/VDw==", "token_balance": 137558 }, + { "peer_id": "EiAgpQDCV3cnKbhkv16nt/pF8AxH4Zi9KLpkbU3L1j9G9A==", "token_balance": 137558 }, + { "peer_id": "EiCLn0btOlih5XI+OepuUGTh/cfAJ33w5ynGlpp83zyW4w==", "token_balance": 137558 }, + { "peer_id": "EiCDeai2KHQs6kQbLH89DmKkJGNB+bqz5Sf+8HMFRrtRnA==", "token_balance": 137558 }, + { "peer_id": "EiAu5k+odJOG2jRk2ikvsKBxdk1i2kSwXMUhmDCnipmc4A==", "token_balance": 137558 }, + { "peer_id": "EiCs4jsoS8acBYI/EEI3EWkF/LT++aAmJxwGL34NJ8fGaA==", "token_balance": 137558 }, + { "peer_id": "EiBI+4rfaF+ZqC40CLlZ+Rph6EBFLTtmipFLBf+0HIrurA==", "token_balance": 137558 }, + { "peer_id": "EiC5AOkAl2pVyR9LstzYM/NmYWgVfnIs3Icxz+mPmEPG4A==", "token_balance": 137558 }, + { "peer_id": "EiD/R1kMZFs/Uphk6hAObL8T+5tdW+bCUgZ9NVSdHcACyw==", "token_balance": 137558 }, + { "peer_id": "EiCuopZao+OzB/jbBkx4kBygbkbmdDfwGToQ9efHfvhFbA==", "token_balance": 137558 }, + { "peer_id": "EiC50zJitcCJXzRJvHvps2z6JPRhSDFmhibISk7ttGFoTQ==", "token_balance": 137558 }, + { "peer_id": "EiAT63M6Px7ZxHiYxiv7ZxZd42brelsauhHsRBt9Cax/uQ==", "token_balance": 137558 }, + { "peer_id": "EiCkrMhJuCmV2zhJIGGPciNuGc2lC6U4L0RxnbA5By4D+Q==", "token_balance": 137558 }, + { "peer_id": "EiB+GN7cVgllTzW8oYTbjHDYxV2p/OeuFkpCwaHZFUIlEA==", "token_balance": 137558 }, + { "peer_id": "EiBLrG3wRVZM++De9v3+ccZzDgMJ6HEAplPh61B4S5YY6w==", "token_balance": 137558 }, + { "peer_id": "EiBqwyHIPiwSaWxXdHRzK2iGUm4okH1Hv1KUjshZTmEZoQ==", "token_balance": 157208 }, + { "peer_id": "EiA0KQCUtI0PEsiNUZGyW+AaJouiK4FI9+FrLtQJxE+aDw==", "token_balance": 137558 }, + { "peer_id": "EiC4tbGUqzjdaz+IxkebyB3R3Pphzup4FvYFsxv4b3i+DQ==", "token_balance": 137558 }, + { "peer_id": "EiCGUQXfBRuokJRXVjxE5MTsmFW1PwJztBOqZ70C3PURdg==", "token_balance": 137558 }, + { "peer_id": "EiDe8g43WqEUHuI/FS1MOjIaBrvLDjiufR83YbwszrVqig==", "token_balance": 137558 }, + { "peer_id": "EiDPGdvmHON/A0ldwFf+R7z8PF49SLqEiAhyTfgqqTwDIw==", "token_balance": 157208 }, + { "peer_id": "EiDcMK4WU6KBZL4zXYvYOMz70aEWGY/npzh+LDzGlP+BKw==", "token_balance": 157208 }, + { "peer_id": "EiB8x4GuPKiBHbL5du5wlBYC5C5oppLajOjIfmLQJ6OTcA==", "token_balance": 137558 }, + { "peer_id": "EiB9s3cB6R2ef6szn78Buh39wNJTT6Zlzr8tOYyizQEGxg==", "token_balance": 157208 }, + { "peer_id": "EiDuLsv0RyrofD3QyQpsR5+1AWqGoxcSDWthqhmR4KlHUQ==", "token_balance": 137558 }, + { "peer_id": "EiAoWM0By5swyHg7oo8rX/+1dCoL4eUloJxAtl1YftCqrQ==", "token_balance": 137558 }, + { "peer_id": "EiBbNKpta8Qsn19wiIWnKTD6Z3Zq8NdZe7yW1rEyXdQwOg==", "token_balance": 137558 }, + { "peer_id": "EiBVxhk+ShOhyzwlS1dzxiBqvmh8zTfU7TBPa6H41Xu1Pg==", "token_balance": 137558 }, + { "peer_id": "EiCRoYv4VQBN7iwhi/yblGxEkTb163UCdcwbwS/VWEjfAg==", "token_balance": 137558 }, + { "peer_id": "EiCZ+OJHLfeVkclYarjJ6U9vVRq2yulDqBK7aPZEY5+oyQ==", "token_balance": 137558 }, + { "peer_id": "EiDhgDJHnu/4e+AIgtohdNveAD9vQN7bRAm89DSqSzP6sw==", "token_balance": 137558 }, + { "peer_id": "EiB9gEMg1+aoU0glO9ykSOOUPfCXNqt1qYUo9SOymO3bAg==", "token_balance": 137558 }, + { "peer_id": "EiA100t3PRxiPpxt1rKgVxYO5DLh7BAnZkkeLvveEXuT3Q==", "token_balance": 137558 }, + { "peer_id": "EiB97HDI8LQC0GV6Duon7y2OxJ6sYMhVrGFCwfmLLM3eRw==", "token_balance": 137558 }, + { "peer_id": "EiB3RYnKKJqN8plhodTS744tIZV42fQI2PDR3qi7pVXwoQ==", "token_balance": 137558 }, + { "peer_id": "EiDQAtKJ5IOwyWyJcLfMgQZUXagbv5T5bWJROotr7yhngw==", "token_balance": 137558 }, + { "peer_id": "EiDbQABcKvZHuMaoBOSILSvZ+WT1+HXz9uuh3iB4iCRmFg==", "token_balance": 137558 }, + { "peer_id": "EiDgA09flDCpYk2RZJLhkPNmYIaDPxFC9bc6HFK8t4y8nQ==", "token_balance": 137558 }, + { "peer_id": "EiBDoMkiC6QClOVuQbgQ8fNgF4+9UoYJEu/ofbY+Q2FZJA==", "token_balance": 137558 }, + { "peer_id": "EiDh6xqGmTtqaAcT8fUy42+9uOwSyuaiWu2H4BCemCT5Ig==", "token_balance": 157208 }, + { "peer_id": "EiAAY6pE5i4Va9G24V1wf3xlWAlff4lmAgGLxx5YRMDm+w==", "token_balance": 157208 }, + { "peer_id": "EiAUHyDXncXwDNwXam488KI/ZcHcEeQ3Y1UepIUVa0jZpw==", "token_balance": 137558 }, + { "peer_id": "EiDpDm6ULT0zFLyFt9FoEyDST0UUWENG/tvWVcfrPEEykA==", "token_balance": 137558 }, + { "peer_id": "EiDAgucJmDeH71VTlBr6Tds9Xlgp4wfCXYtPiEZ6EQl54w==", "token_balance": 137558 }, + { "peer_id": "EiCZW+B1BC+/XIBHqPp+TlQetFZtTONViTENNKX9gym+pw==", "token_balance": 137558 }, + { "peer_id": "EiB7jT12NBT0LAhuQgTe9W2ZbZPjh1r7zTw3gqeILXdW8A==", "token_balance": 137558 }, + { "peer_id": "EiAnaYLNqkkD+KsaiEmb3jjMxV5d3SwXZLRWZRWtyaolFA==", "token_balance": 137558 }, + { "peer_id": "EiAUXQsFmfuuGg2K6YXc/PIOcGoV7easZ4FLCZ2sLPubaQ==", "token_balance": 137558 }, + { "peer_id": "EiCKPjtVwy95EHMWusmjDKbuM7y5c2hQMARWuA2lWvozqw==", "token_balance": 137558 }, + { "peer_id": "EiAC36AjoCsacpU3+EDSsSmwn0k+h1kCRU0xDqUKDK1PBg==", "token_balance": 137558 }, + { "peer_id": "EiA9FLFmQFfiVgv42zPf00A1hiwsQ6dP3Ff4VYKhefPLPA==", "token_balance": 157208 }, + { "peer_id": "EiAAGmFUPKTUM+oerZ9OBng/gG5zKv7bNRSjmv/AC+evQA==", "token_balance": 137558 }, + { "peer_id": "EiAN5DlwxKeEIjn5coZTEUi4N464ny+WPYOWu6dzlTPS+g==", "token_balance": 137558 }, + { "peer_id": "EiCgtlpVM12OZRU3x1VCjvjsWNkKdjer5Xfy0VkMiZOhvQ==", "token_balance": 157208 }, + { "peer_id": "EiBgwEn7JglAV+D3hYPqQcmipxopjPiqc/nAzmYCngdQHw==", "token_balance": 137558 }, + { "peer_id": "EiAfLa3D9ekAB6XZ1E/P71iTJITjOJs3EKRNDBYtDiexmg==", "token_balance": 137558 }, + { "peer_id": "EiAgLM2GAnbCvpsZKlRfJwJ0eEKl3n4DPsIxE4jvX92l/Q==", "token_balance": 137558 }, + { "peer_id": "EiBZGTYzIzVy5pBAVaSbY9cGqw5xKfUvP0BQDZs5BR09mA==", "token_balance": 137558 }, + { "peer_id": "EiAqO53EIRsj17SqXZWWJECTyUaM6cOf3gW9PSEJKoQbZA==", "token_balance": 137558 }, + { "peer_id": "EiD706mU4PTftS7516daxMQoMj53ixhFrC83jtM62AK9aQ==", "token_balance": 137558 }, + { "peer_id": "EiDWgzeIvL6EEZM/EwjD0Q54TLd2+m2yZpIehC/tsJKOHg==", "token_balance": 137558 }, + { "peer_id": "EiAEogUfqBLPI1O3mUr/b/0AVNZLm7DuM1NT2RA+h4CS7w==", "token_balance": 137558 }, + { "peer_id": "EiCEg/y+xLhzoOykP3oen+G7aFEhcLgm5YNyiIrSlxEFlw==", "token_balance": 137558 }, + { "peer_id": "EiD1ClZn/lr+n/gnS96Q4gKwBDk3yl33kNIhx9wUxJiyEA==", "token_balance": 157208 }, + { "peer_id": "EiCS+UGbICfohPwlkkBArU+suk5ocOvPhGS5rDBavrSysw==", "token_balance": 137558 }, + { "peer_id": "EiBejpof3a17bbGGegjU85qeUIXYefAlXHLPAo1FIuQTYg==", "token_balance": 157208 }, + { "peer_id": "EiAGEpNUss0oz/eSsRLBtcNtd8TyiMVfr7xy48pIG8VawA==", "token_balance": 137558 }, + { "peer_id": "EiDyVGg9sAlZvuJh6sJn38vCxfReQGoSKupyZQT+PxZrBg==", "token_balance": 157208 }, + { "peer_id": "EiAdEYEQJQqGEmJmqnodIqypxuiRKOpULxCzOGk/Drc1Jw==", "token_balance": 137558 }, + { "peer_id": "EiD9Llqctpvwv0RRiL/zTbjoHZ9UxRaxw+rrZe1As8V61Q==", "token_balance": 137558 }, + { "peer_id": "EiA/aIPYU7aZvO8ZaEOf6aWuAo724Zh4wCoH0d0N0Pnn3w==", "token_balance": 157208 }, + { "peer_id": "EiBksN5pbnr4zLiMWshG5SrxhqxJUMBkZW5qvkpDauAq3Q==", "token_balance": 137558 }, + { "peer_id": "EiBE3N94casP7FFAAol1HDV++Nah7QCNKHHjZtRuk+z4QQ==", "token_balance": 137558 }, + { "peer_id": "EiBjrd5gSlAfML4iUDdknXTWrwZ9lfGViPSv7vIjyys99A==", "token_balance": 137558 }, + { "peer_id": "EiBLAMpUbItxRg+Q5FvtlvaGi2rYiwjtKzXe1DmqKUi6/g==", "token_balance": 137558 }, + { "peer_id": "EiApVYQygAgEh2IFOXEHou8avd8Qw3FjlwsYqbPVUhK0fg==", "token_balance": 137558 }, + { "peer_id": "EiCD+wJGK3BpFfByuU70A+iO/MhWoZZqkdxIkZqVtzq5Ag==", "token_balance": 157208 }, + { "peer_id": "EiDYS4Kmy1C3HrT/t5UcxpJhY9s274z/+QXmW+rh4qu4yg==", "token_balance": 157208 }, + { "peer_id": "EiBPrs9PE47ZceqK5Fp+jtpInHjfbvpuasWUDKgWx5JTLQ==", "token_balance": 137558 }, + { "peer_id": "EiDq+NOd3OhtBtqWme38fOnyjQ2DHYGp/d/15zk1ynCvEw==", "token_balance": 137558 }, + { "peer_id": "EiBdvGYTySnnmJC+7HY6hJ8mtgNtgEesmVGjM/bAZJO+sA==", "token_balance": 137558 }, + { "peer_id": "EiDHhNSCuO2rII/ptgl/bDU0Sp9M8gRdM4a5eOTk8JGBbA==", "token_balance": 137558 }, + { "peer_id": "EiBk/8MT5SauzL+k1yv6LdJ4IB1ANi8xG/VWP2oStpvVMg==", "token_balance": 137558 }, + { "peer_id": "EiDpJyIFhx17Wfw5i9Ae06RcoPE0h5or+uX7LO71Y+19Pg==", "token_balance": 137558 }, + { "peer_id": "EiCxYrVu5Bln87tUlUlItdaQuEFyNqj0FL9yOENzGRN94g==", "token_balance": 137558 }, + { "peer_id": "EiAV0e0LXPlk6QrL1ZQYD3yHC/As94dBNa5SnXC+8wduGw==", "token_balance": 137558 }, + { "peer_id": "EiBpRLhlsP/UL7lbpmG990Cx1Fs07gLbJn0qT/755fGGWQ==", "token_balance": 137558 }, + { "peer_id": "EiBfXibLxmGFrvVix+zDTo0f43GjXEKt0rYqMZfQyV7MJA==", "token_balance": 137558 }, + { "peer_id": "EiCqaQhJzVnVhpI/JPydNHCpQcc5YtpQ7KtljcByIGaezg==", "token_balance": 137558 }, + { "peer_id": "EiA9+jMngCwreiwZ4BQOkp4MRlEBG9Oopg2EfpSgWQ7zhg==", "token_balance": 137558 }, + { "peer_id": "EiDkWIDnoEvf8QAbWPplSJhlW365RSB92O3nt2Ur7wwtLA==", "token_balance": 137558 }, + { "peer_id": "EiDsoq1sGqFS27UHg5VPJZu/mi4xLVkOsO7aKfaPIiw9Sg==", "token_balance": 137558 }, + { "peer_id": "EiAmKQgu42zcqNBYuGM5Jy2f2KVhIvsrl4dLpJRbdoV3bQ==", "token_balance": 137558 }, + { "peer_id": "EiBRVawJJCCix8NamzTnuJ/eR1y+2AA53Arz29Wux9G4ZA==", "token_balance": 137558 }, + { "peer_id": "EiCKRjGjDdrs7V7YaYuR8whEioPfSdffIw27Hnz6WjOI0g==", "token_balance": 137558 }, + { "peer_id": "EiADQPc/+VKrP6PExxIIM29NtWczuEsQkIbFOuYOFK2RrQ==", "token_balance": 137558 }, + { "peer_id": "EiDgNrp4Fppegk+QsosN0y/ABGYGNPVL/bBI+W5+DxBodg==", "token_balance": 137558 }, + { "peer_id": "EiCoEn06vol7BFgli2k6joQ/X8oO35BJ8JqCJJTo8hhv+Q==", "token_balance": 137558 }, + { "peer_id": "EiCVHBFKcgVZvY+MrdeOb31tgLqsM+RFYHAbYBiZkrhMMQ==", "token_balance": 137558 }, + { "peer_id": "EiDysFOhmL573OSnuvgQCeI0uc/pWCyCjyQq2hL/IC2ARw==", "token_balance": 137558 }, + { "peer_id": "EiDZazhdtuv9vjvvHeJBjS51oNvrOwr9cZ+jMltVVbBcNw==", "token_balance": 137558 }, + { "peer_id": "EiCjDzoPmvwLkQKR/ClUHymrezQPixf6nTuTM7YiYkdE/Q==", "token_balance": 137558 }, + { "peer_id": "EiDmZTWRIgsNjFhaPEped34QUt1uBAJYSVvy7X0Hw+fJCg==", "token_balance": 137558 }, + { "peer_id": "EiALJndJOaLZYu9GaJ53HQ+OHqAtOU1s4/+iWOK/8iI42g==", "token_balance": 137558 }, + { "peer_id": "EiAQ1qkmtAgk9qEEJhGNFLaXprDMF+WNWkcZvGDKFVmOng==", "token_balance": 137558 }, + { "peer_id": "EiC2bTcJ8YY4Zz7VoSvfUlqYbRfwx6TPhntmH3PLETHXkQ==", "token_balance": 137558 }, + { "peer_id": "EiAlHUUXZnRgi49XoRmzEvLTBuf3J2WkCidjp9BA/HzslA==", "token_balance": 137558 }, + { "peer_id": "EiC4ftRiGqwMolMTJoV/gHGOykBApNz5E/3HwfvIkubIMA==", "token_balance": 137558 }, + { "peer_id": "EiAINRTHs8IfaTgVX/RztGWaf6XjMcWa9oHsWu9AqPQirQ==", "token_balance": 137558 }, + { "peer_id": "EiDKTuzZGvDyq4nrSLG1dG5EjSaBsTnI9Dhb5u2QR3w2IQ==", "token_balance": 157208 }, + { "peer_id": "EiA8mcKvhNnXtOzDkjarKpL5JPoRAaZIxd+Bxm5epWsh6A==", "token_balance": 137558 }, + { "peer_id": "EiBhB6ThVIxME1Z1BPBfxuKB94rSFUt2RDmc2+r/HUBlYA==", "token_balance": 137558 }, + { "peer_id": "EiD+dsBrCy/3DNorb9cecwT9FpR2MNNHc2OBeSA1Iu3HkQ==", "token_balance": 137558 }, + { "peer_id": "EiBGgDkjoanLHTyAKw8IR8wSnwPgnWOLcOsUQ2U+1oua2g==", "token_balance": 137558 }, + { "peer_id": "EiDk1kU9NOtQR8nMytl0QhxeH3Ssuw52OxqVvOZrslMr5w==", "token_balance": 137558 }, + { "peer_id": "EiCN+oyx7su4px0triKa6EYxl7AB0DbSpi1KJzbO86VTug==", "token_balance": 137558 }, + { "peer_id": "EiDcab9PStZfKELc20vvRFHE+fhzqmukDlJOR3+gAl+UmA==", "token_balance": 157208 }, + { "peer_id": "EiDp0kEdQErFqOg07FU/GnjeVB8XEpma44Vyc3mb1Ef9Eg==", "token_balance": 157208 }, + { "peer_id": "EiB89GnA0DEEes4RXAyLBJesXJSYjplUSkgpTNaoWBLsEw==", "token_balance": 157208 }, + { "peer_id": "EiA1VfdGkyx+tJG4N6UGu1hKjodgoAGTEorzWfVs+5Xw8Q==", "token_balance": 137558 }, + { "peer_id": "EiBU0+sJZJH+7Bp3a9j2LVluKd9t7UBFH5rahzHP7SpYWg==", "token_balance": 137558 }, + { "peer_id": "EiDA3iHr5GCI3fIw6p1sqS9Ut5PjuMqUdP+SitnCMvOPWg==", "token_balance": 137558 }, + { "peer_id": "EiCoQxedMtpJfB7t/35aaau4uWesK5ln4KykmSdgHG29AQ==", "token_balance": 137558 }, + { "peer_id": "EiATco/nzGcWFf8ahZJyIgppqW68b7uTDj92dh0QGrUzyA==", "token_balance": 137558 }, + { "peer_id": "EiBR2OQ10cuOu4nKdKeHaQy4Uz7IIyw3X9qCtsaNuH5Uow==", "token_balance": 137558 }, + { "peer_id": "EiCKZX/CNm+h9RK07sfStkvaxK6dHoP4/B1QCyOwQBmtug==", "token_balance": 137558 }, + { "peer_id": "EiABcdEEJJfBZJpdCbWjUh5uSRC4y59UkCp9GoM4biwT1g==", "token_balance": 137558 }, + { "peer_id": "EiCe0aTgx2Cu1zsxaVfDR7cxNu65D9v5I3T7MTIfosNG5A==", "token_balance": 137558 }, + { "peer_id": "EiDHCldHkMNoz266D1tgOti+SEToDtSjujcypt8nNL1G+w==", "token_balance": 137558 }, + { "peer_id": "EiB3vZK8ixEftjLqgkvldnn9yHTxe7mWNk6ba8mBmt9ksQ==", "token_balance": 137558 }, + { "peer_id": "EiDCSLA2xAN0e679iRafJHnACM1bKfnrLY7PIpFMxQmEFg==", "token_balance": 137558 }, + { "peer_id": "EiCLd4Vmux1VYcg6WvsfoOKy5vfL3uGtBnSCd7vE/fylWw==", "token_balance": 137558 }, + { "peer_id": "EiDPsj1bcWNbAKm+HHYmShX0kEZVA5UAcI7NyY14/CPpXQ==", "token_balance": 157208 }, + { "peer_id": "EiDqX4/5Ou8vKMRnayi0O3ltMkdvGkMRCb8G/PL87vQfFA==", "token_balance": 137558 }, + { "peer_id": "EiC68pzDnhtLXMM7nyATOZwbd+DQzQPse1J+ta5jeaOYFA==", "token_balance": 137558 }, + { "peer_id": "EiA/bxjlK17UdlZl5q//iaI+y9OYfz/sDiq/0+SkU6hT7A==", "token_balance": 137558 }, + { "peer_id": "EiCKfybjaoAGq11A2bZMx+W7NQCpEGYPDmj6IhM9TUh3tg==", "token_balance": 157208 }, + { "peer_id": "EiB4y3z/324s/Slp/j2Wv8hEP6RZPpL5pfkfNTI//d15/w==", "token_balance": 137558 }, + { "peer_id": "EiCtdbaksp9dOd9DDa72bWUokeG++DGYzI8tESCbN5U6TQ==", "token_balance": 137558 }, + { "peer_id": "EiBNmWmujZOthMkVX4hRXuxhT2UkGQxH3ZQSbRTYG3HkKQ==", "token_balance": 137558 }, + { "peer_id": "EiBHrClYSwP3MZZlFqPz/hycq7ujDlPGzNoHn1a2x7Bwhw==", "token_balance": 137558 }, + { "peer_id": "EiA+1+tnDCb0nAf41vlKbuXmB2NcY0CCe1FopFTwbULbxg==", "token_balance": 137558 }, + { "peer_id": "EiDmZtyzkPJQFaRPNlRXA9xqzFGZCqRI4dMxTI4xLlg5ug==", "token_balance": 137558 }, + { "peer_id": "EiAPdwVGIS5gdcR47qEmaQgYHTLGLDzbo8l+fqjFYA2qPw==", "token_balance": 137558 }, + { "peer_id": "EiDc3b5ykipRZwn2s9JD6GYWgCjjYNkDfEUH5ILBHnNPmw==", "token_balance": 137558 }, + { "peer_id": "EiAL2se2LGR/0tPFRZV1REcHjU2VzFgNfoE6BC07jkGw6w==", "token_balance": 137558 }, + { "peer_id": "EiAWxMdDkXEdvH+UyYTJ7zA/HopO7PdAeWzaFAlAPHxrAg==", "token_balance": 157208 }, + { "peer_id": "EiCUq9h/rTEx1kSSB4/LMVgsmregP6lUTomkgYrBnbjNFw==", "token_balance": 137558 }, + { "peer_id": "EiCHOyTO9Q4IfMaMUnUEiURByDFrt0eIGpgHrfMTH/qy0w==", "token_balance": 137558 }, + { "peer_id": "EiAQu5dD9vuiOMg6HzcsEMBivS/d4IQXN3h77mZjMNhibw==", "token_balance": 137558 }, + { "peer_id": "EiDXPzhsqJ/bjDAWkTTuIqlznKlSvcePJ8Mk4mhZnlCAfA==", "token_balance": 137558 }, + { "peer_id": "EiCkvJZWVRP7e4sH+GHrdXWqE7T5ADwTXCQ9SwZ6y1016g==", "token_balance": 137558 }, + { "peer_id": "EiBP9FJgyNS06x+T78clbNMSvf80nIQ+V7k+9aQhmwD/5g==", "token_balance": 137558 }, + { "peer_id": "EiBV6xdHFCqsbh/V2GHzykdmnpUaqFxNE92w4dlqokV0Xw==", "token_balance": 137558 }, + { "peer_id": "EiCCFD7uHIKJxtKbPL3LgG0wif4b8Vi8tWFd9l+1JqVDLw==", "token_balance": 137558 }, + { "peer_id": "EiDjWJ08Y7NayDPit22SnL/9lh2mdnBgejn0yULZYTkwHw==", "token_balance": 137558 }, + { "peer_id": "EiDMehCt7VlQilNOEWMDrqDTrRNgFQwB445sKjJtrTNb/A==", "token_balance": 137558 }, + { "peer_id": "EiDCwiJOFNOqormuFUv4gY9Fx3AALf2+zCGdINYXVwAU0w==", "token_balance": 137558 }, + { "peer_id": "EiB24XtmOig1e2COvo0PNam1LkAdVSqMaCVcoFuWfwPFjQ==", "token_balance": 137558 }, + { "peer_id": "EiCejOSxGqS/PE/DHRDdWqqtZsVg4dE3H/grq4kXA0lDFQ==", "token_balance": 137558 }, + { "peer_id": "EiA/6XxrFKmKK30gWpul7ST18nMrjCmE51M6SgRHEn0YCQ==", "token_balance": 137558 }, + { "peer_id": "EiArCutLI9X+cwDwPqe+ozv0OrjHovrvjPXA3vH5XbnY7w==", "token_balance": 137558 }, + { "peer_id": "EiCvMIjvDRwc9nKfoZGzf4PME3S7FKIQIwzXQndg5pNmWw==", "token_balance": 137558 }, + { "peer_id": "EiBpleJCfpISjLL2Qlpu0CBx0YtT/aitwblYnlbrEQhp0g==", "token_balance": 137558 }, + { "peer_id": "EiB3dJrzQj57hGlL77MICeb+TFDqzVu3ErL+bfFCKhkhtw==", "token_balance": 137558 }, + { "peer_id": "EiB1PX+8OIQEeBJn8aAbbIbhgCW2AeYXoO98ddxyGmiKMQ==", "token_balance": 137558 }, + { "peer_id": "EiB4d8LJCTTQDgqk54x0AJkhgb+WtGOaZIGJ4km8uUodMQ==", "token_balance": 137558 }, + { "peer_id": "EiAAA1mzRx2CKmcp/tmH3JZpmuJ11qFiWIz4iJKEFy5HWQ==", "token_balance": 137558 }, + { "peer_id": "EiB36hBV4anjov+eJuhz3Wu74TX5XEl0Def2x6sipX+/8Q==", "token_balance": 157208 }, + { "peer_id": "EiA/YbtP4FdCmHecgeScK+JyCgGsQXzT86F58sV6c2mWXA==", "token_balance": 137558 }, + { "peer_id": "EiD7OeDPJfZYGHp0Zhad50Zd7Rhx2YiLi/o8oUTwUQQRcw==", "token_balance": 137558 }, + { "peer_id": "EiC9YMdYZazIpQgj49BlcE31ISygEBkSDeYZJE3htKz/3g==", "token_balance": 137558 }, + { "peer_id": "EiD3zWIZoY6PXXeF7y4+a49EMNiviocb6Q7gHYu4wIpKsw==", "token_balance": 137558 }, + { "peer_id": "EiDiRmVhssP3nFyfci/Zh/EOBTzDlH6iht0UBNaaW9xLsQ==", "token_balance": 137558 }, + { "peer_id": "EiDiq1QVXeZvZoRP5lmcavDqMLk8oXKtSz+QJ7ok2zqQOw==", "token_balance": 137558 }, + { "peer_id": "EiAtM9sDIaGs9cvPhotiJDN0eX7TJPdAJ1c5lC1KR5px7Q==", "token_balance": 137558 }, + { "peer_id": "EiC0d2NFu9t8wJTBqSLCIbWLI5XVENcFHtnsMkUYUGcghg==", "token_balance": 137558 }, + { "peer_id": "EiBczdVAatCiWaVsgA4I6Qo9c8VDAndub3F1BYP6aFva1w==", "token_balance": 137558 }, + { "peer_id": "EiCQJsIDaNWniKt7oIH9PVMS4sAjqw9M1h9eI5tJOfcvbg==", "token_balance": 137558 }, + { "peer_id": "EiBAoCcTJStOSZN+ae9cxG/GJqJrFBPdAE9LkKxNzp0lOw==", "token_balance": 137558 }, + { "peer_id": "EiDDuejVIWjWB2EkgmzmTMXS0sWKWASp2xbiZ/w048hgLg==", "token_balance": 137558 }, + { "peer_id": "EiBTwCmsrLo9nNe+2v7aI/GfCAgvaVBx+JJlheMboLqmMw==", "token_balance": 137558 }, + { "peer_id": "EiDTBA//SALTW0B78xLh2mpJP/azixsk6pfPd9GE3VKd8g==", "token_balance": 157208 }, + { "peer_id": "EiD8xUedCdFDxKpvVHS6XxHuEon6fuXWIR5d27nWWOneQw==", "token_balance": 157208 }, + { "peer_id": "EiCkA9myYy7GEN28iFlSiunkX2hRWG5no8JKPRBnrGlBCA==", "token_balance": 137558 }, + { "peer_id": "EiC9u/D1Kc7QRXQQvYbtfdGyPORzFeyHAHlfjISFz8hlfw==", "token_balance": 137558 }, + { "peer_id": "EiBtQ+eLb0VB4yW1nXVa3iJiJ0qTYPNu/2gcyOEoV2zHbA==", "token_balance": 137558 }, + { "peer_id": "EiCwOekX3cjvUyWJAPdi1yKUW1QkpWF02joo7SKO75L+Mw==", "token_balance": 137558 }, + { "peer_id": "EiCJ0IkusUB9C9G1uS61LL1LGTKnybWemVgI6EIXnDpJhw==", "token_balance": 137558 }, + { "peer_id": "EiD0EzPqhxDeqVXu9L2f7ejpy1uyi7fMDTseHSicDS/uyQ==", "token_balance": 137558 }, + { "peer_id": "EiDYVPNxD3RkVLAn+NQRXw+Hpo9MndkCpZ9RRc4s56vkKQ==", "token_balance": 137558 }, + { "peer_id": "EiAhwM51CcGxWJu/btMQXLWcHvspYe9qmwOV71kr1saqmg==", "token_balance": 137558 }, + { "peer_id": "EiC3Kzc7YMFvjTQJNRzg3epEfqa2pf0HBUi7mO6/r4g0aw==", "token_balance": 137558 }, + { "peer_id": "EiBudCwHp8+4szbbCSoU/ub7EsqR4Ml2p6bkuKP8oqfB4w==", "token_balance": 128947 }, + { "peer_id": "EiDZobodlnDZ/nNPhlDlw29XUQlevuKLopzM4rsMC7keCA==", "token_balance": 128947 }, + { "peer_id": "EiCIdkcjKNCexeiA79DIPTsN7V6pEuZv9Jf+PIDAbXhT8g==", "token_balance": 128947 }, + { "peer_id": "EiBNcJ4MFi2zU9uRYHZecmx6mh2vnQtdtV42F7KpluNpeg==", "token_balance": 128947 }, + { "peer_id": "EiDFttf6R3TjZyW2/R9XODCUEbn3gSLruOSQclIanTGw3A==", "token_balance": 128947 }, + { "peer_id": "EiCYvXZFY0o48MA5kJL5aP+lX3jVIOyfwuxHE8QsqNsWrA==", "token_balance": 128947 }, + { "peer_id": "EiD1nq5d0976wfxEhbJqXeWO7lZgX+z9HyGuqkr03kj6wQ==", "token_balance": 128947 }, + { "peer_id": "EiDBqssLUs550bRROG5rT7Gh2ZSUD3yJt1sG+cR+KfEJbw==", "token_balance": 128947 }, + { "peer_id": "EiBHfY3x3OGPrj5NNv+Sv/v91uRzLunt5xDzfcjye47PMg==", "token_balance": 128947 }, + { "peer_id": "EiBVpYyEsSVHYip42IG4LbYO3R57T89Nckk7Gv0eDUPOAw==", "token_balance": 128947 }, + { "peer_id": "EiCKhTBAZRFHY3zim2MwgIg0ZHP4PEmyp0NJBqrHbouJ/w==", "token_balance": 128947 }, + { "peer_id": "EiDAlZyPyVkSoT396S5/4gqiV+T0xyFIUzfK3cu2/9YCQA==", "token_balance": 128947 }, + { "peer_id": "EiBenIgcHBcpfYF3glMJfMFFzR8vlTvEYKwut2EinyndIQ==", "token_balance": 128947 }, + { "peer_id": "EiBiCbWG5pLCORbJvDHn5WUs++MW3bJV5B18t1uk2x8vrg==", "token_balance": 128947 }, + { "peer_id": "EiB2HzTaxx/MVv/cnCzctCRJlt3dzGHS9GU90DW1acAmgw==", "token_balance": 128947 }, + { "peer_id": "EiB0ySUp4IB2umQ624skEgqsVQj0lY/LXieaABpuDV9+FA==", "token_balance": 128947 }, + { "peer_id": "EiAmVUd8apr3OsmKXyWwhJbMmmWA6BRTyZ+KI4IwAfFyCg==", "token_balance": 128947 }, + { "peer_id": "EiD83N5on3ALN7c5Yrukiz4ZvmtcVDF76I4/zRtLgc0wAA==", "token_balance": 128947 }, + { "peer_id": "EiD4bfZuFjOXKZLTxm71N6HZjzAIUM/8Sqv/cU998qo4mg==", "token_balance": 128947 }, + { "peer_id": "EiCe0FmkQzbuYrL9A0t3mwFeQdiIky2xHB7JMxjBYIeRAw==", "token_balance": 128947 }, + { "peer_id": "EiC8QR2AYCt9T7D/+f7uvyFWOKvcmUo4X0+aanBGzaVrEw==", "token_balance": 128947 }, + { "peer_id": "EiCPHbDqSZGymQQDigwc6p0jw4A13JlDF+BKvzh3O+P8fw==", "token_balance": 128947 }, + { "peer_id": "EiC9hCUmVbouUu0FP38f9zhg6x2AlhuBcQQxnIkau5X9oA==", "token_balance": 128947 }, + { "peer_id": "EiBKkvHpfQkotrQ+ZOcrHSr6Be8+Wr5JqXu9BurDWNvlQw==", "token_balance": 128947 }, + { "peer_id": "EiA/LKqaSLse/YfL2nBR0F3Vd7vrvmekkSMV2QUY8psjVA==", "token_balance": 128947 }, + { "peer_id": "EiA38eVX9pSSs+INUgLi/t3Zihv+LNAKjmW2Qt7+BwCx1Q==", "token_balance": 128947 }, + { "peer_id": "EiAlX1Mw91D9yPSEL479b5zRTfbUb+BSQ5QuGikmkgL3eA==", "token_balance": 128947 }, + { "peer_id": "EiAj8vNP8zEmMcEQTlsucUe82e7oBk8QVtvZDOmsje7Aew==", "token_balance": 128947 }, + { "peer_id": "EiAXmwRH8HdwKPDz7APhYqzVTxhoPUpdKmwSM5tPPkRm7Q==", "token_balance": 128947 }, + { "peer_id": "EiCrTZ/hTLYMdmQsS5mdScLKoGJTMoZ3F/U4GRxfB9gguA==", "token_balance": 128947 }, + { "peer_id": "EiAkWA+OKVn1Ajrp0evPyr/0rZD3doU4WmBkY4CtxzVYVw==", "token_balance": 128947 }, + { "peer_id": "EiCXvAsfWvjDuhFloqV/WQitXhHabIRBZqOHR3d5rVSiLA==", "token_balance": 128947 }, + { "peer_id": "EiDugvOk+eWUfMCo6LaEZbCdu6an+96p63qlPEHULHUTCA==", "token_balance": 128947 }, + { "peer_id": "EiDr0x0Yvn1qTyqfzwKne/hAnosvKjk9OWfsnbFI8tB2Aw==", "token_balance": 120115 }, + { "peer_id": "EiD5JmPqKCdDs3RIhBgapI1Ie4S6lddJ4O/iiMPwYoyoGg==", "token_balance": 120115 }, + { "peer_id": "EiCoPeQ+leGveEM5P/TX0fdNuWTg7Dp6T7lQiXzDLhbw7w==", "token_balance": 120115 }, + { "peer_id": "EiDf9LzrOWpezMQjR9rOjU+ZabucZu/yCLkzpvTJAj3dDw==", "token_balance": 120115 }, + { "peer_id": "EiCefkAcM4ut0sZiT2f6aEaNtIZI2hhjzT/itbe6PIqkuw==", "token_balance": 107750 }, + { "peer_id": "EiA24CKWMo0F6Am5cHrzVooLkX5sO0eFb3L3+jq74qY3hQ==", "token_balance": 107750 }, + { "peer_id": "EiAwlQCRHAtudM+JuqRo7h6Spgcp689hQeYufdGOVx9QPg==", "token_balance": 107750 }, + { "peer_id": "EiBfXdvIsVFwKz5WVDS4v4DCp7RY7WqiIL2ZDTWDVEz+yw==", "token_balance": 107750 }, + { "peer_id": "EiB06ZbA3xXEqWhuZGTHPUL1v0LOC9XmTjGLZQSiECcJLg==", "token_balance": 107750 }, + { "peer_id": "EiBa9ClQNKr1YNREaaZn5e7IrA0XkoCur/YTFlCPufsWkQ==", "token_balance": 107750 }, + { "peer_id": "EiCuH4VWAQn9FbvJem/dmswYYfRW8A7H+5Q3URK5gNv+NA==", "token_balance": 107750 }, + { "peer_id": "EiAX3pvxWa621huB5lEUELwY5RYFCrRd1KToebTN3TAXMA==", "token_balance": 107750 }, + { "peer_id": "EiB2EikvzsEXIL0Ybe2ssRjnmiATWfw9jXpWP9cL90dydw==", "token_balance": 107750 }, + { "peer_id": "EiCTzrmo7Gqk6p9gpJJvgyIb/rc9JT1OdjojH6TMKSMf8w==", "token_balance": 107750 }, + { "peer_id": "EiB+FoAYEbJ0RVI9L1rjxelLzUrbaHP1aOn/SQBbuVTFrA==", "token_balance": 107750 }, + { "peer_id": "EiDDwaJAiOcjRggi9W/DS01gwitZ4plOthL+sV/F2WydQQ==", "token_balance": 107750 }, + { "peer_id": "EiBk3y1Uw8WXmUHDRxc9ouse00KC9Dr3czFOG4MTZnUhsQ==", "token_balance": 107750 }, + { "peer_id": "EiD9PRQM5Kjk4lA5SV0wvdDJfgZjFrHyN5sZxXVySLuu7w==", "token_balance": 107750 }, + { "peer_id": "EiDmQlALdT6m6Q8wZM16eZGLVB9pjTsZEWivEIn4pdkczw==", "token_balance": 107750 }, + { "peer_id": "EiDp29FXbAscbKZNcH99kEDP0PcGwW22TsMxnn1HK1D1Rw==", "token_balance": 107750 }, + { "peer_id": "EiDj1Bddcb+fXnm4W8HLMHjNqjtlbhWmfED/1Oha+w33Hw==", "token_balance": 107750 }, + { "peer_id": "EiAuMprA+CZlzE1ehX9O3M7Y+ootz05eCVaFVyD8Udi47Q==", "token_balance": 107750 }, + { "peer_id": "EiDoyelvLUjWjyL343FNLw4b1v+6Loyo3YIJuG6Agb+TfA==", "token_balance": 107750 }, + { "peer_id": "EiAm0kOxey9yhwCN/J5WsfIm8fXRZuAX7gx4dWz5OgguKg==", "token_balance": 107750 }, + { "peer_id": "EiDVWI/eL4QK9rtZIRyqa36nRNaCroGY/8yX2Ex3q+4SQw==", "token_balance": 107750 }, + { "peer_id": "EiCUnLSqUJpeIkKltKHECTUl84MDnZtko0WTRh5OqCmO6A==", "token_balance": 107750 }, + { "peer_id": "EiDSP7Mw4TRSBmuF9OOEAXtcK//RTzJLWw3xwuVLQijRVQ==", "token_balance": 107750 }, + { "peer_id": "EiB5FLxFycijk+kbjUCLVN+u/LMdwvEHbiA8UmseaRJUDA==", "token_balance": 107750 }, + { "peer_id": "EiALqqXPhdT+LWlx2cbx3vYiFGxeUhv+KgyhE+MT8g7fLg==", "token_balance": 107750 }, + { "peer_id": "EiAQctJosQjmEukddnIjsML9I8IVPEG9AXnwIgSOwS1tag==", "token_balance": 107750 }, + { "peer_id": "EiB1uhvNx1vVURYaDgyjtKh/kAvKDTA+pizs9Ki1WK/nGg==", "token_balance": 107750 }, + { "peer_id": "EiBe+uGO+zfmX3+O+o4ZMG2FJYj8WNpF/3x8WIG/lpvr2Q==", "token_balance": 61824 }, + { "peer_id": "EiCB9chm8KaN8pZymFIdkbNZo0mi92Td9zwzZ3AoKrO79A==", "token_balance": 61824 }, + { "peer_id": "EiAPtMZC5vXTmvJp3tHtDWN//mufykfif5ytS+lM1xxMwA==", "token_balance": 61824 }, + { "peer_id": "EiD+FBGriVFqbWsGufY4M/7fljQ6Qczaa+ukYopboNnw0Q==", "token_balance": 61824 }, + { "peer_id": "EiAuD4e+ZRXsFJGh05UNVsqLQJQaaJ/E1PQZN6ElMB4Nnw==", "token_balance": 61824 }, + { "peer_id": "EiDTB6eObT+GA+cXvO/5j98NVdqQcU9wyBJ+/vSKIumj4A==", "token_balance": 61824 }, + { "peer_id": "EiDca75Qx2Lr6s/Lrbl9O6Z72bivN/mhpX/ckh8rwAK+fA==", "token_balance": 61824 }, + { "peer_id": "EiBvZAiuQHUUkZGpqjuOpwriYtQZwGArCN5E9NsoEEWu2Q==", "token_balance": 61824 }, + { "peer_id": "EiDRpjDjicKK6CO8udUAjJgAGqyCUNXVzAUa9PV/lnkenA==", "token_balance": 61824 }, + { "peer_id": "EiCwOvw+44dQw+xdnOoPehvrg3b/PNFBStXpRzj3t9+J8g==", "token_balance": 61824 }, + { "peer_id": "EiA3l0p54keWgYXyLjbyM6OIstASS0qMdA/8q/CCnvqIZA==", "token_balance": 61824 }, + { "peer_id": "EiCSeatYJDXNTeO5WPM7M8B8ts5SG+KPpda8MntmQAhVAQ==", "token_balance": 61824 }, + { "peer_id": "EiDs483tDwR8UI6GN9TlFNh2/BsrGKRIUJxXLJLKf9Sx2Q==", "token_balance": 61824 }, + { "peer_id": "EiDNkWoJFRRDalKXKC04bPXXg1UxEkGpRj/Dx7+02Z+ptA==", "token_balance": 61824 }, + { "peer_id": "EiCqTs19rOr56QlWFibxPrsgUwl8cWXzDxTmyE83dm+p/Q==", "token_balance": 61824 }, + { "peer_id": "EiB0lN+j8cxNve80vwVj+UnzYC5lkn5AzQxb1AitNkoiaA==", "token_balance": 61824 }, + { "peer_id": "EiCYihN/GnRlHD7rTEie7A5yPyYgFbLgY9mAvxz8gB2lrg==", "token_balance": 61824 }, + { "peer_id": "EiDY6YKI85tRvI+P3PlJ0YZovmCcAVQQHQOMZKZ/Y7hbsA==", "token_balance": 61824 }, + { "peer_id": "EiAk1nTaG+sv0sTHLdoB9v11oH1BmyGIZjMYOLQTLhF8KA==", "token_balance": 61824 }, + { "peer_id": "EiD24dCVyfK2SQFtCr0v+cb7m/R8fz0aSqi3MFdkG0heiw==", "token_balance": 61824 }, + { "peer_id": "EiB7yaEqAc4bQdJIxKNh7xpk86m3P6xgFWua7Y5mRBIaPg==", "token_balance": 61824 }, + { "peer_id": "EiC53Ud7eV3p75zwuoDzkkXdmZPbyz127GYJkNptKauLfw==", "token_balance": 61824 }, + { "peer_id": "EiCb/z7SCCYAb6ZRnj9pmDH/quyo9jJgUg22NNEYa/FgZQ==", "token_balance": 61824 }, + { "peer_id": "EiACcSxsFxdFF5tYlWtEJzu4phV0xY9nvT04UWxuzgaDJQ==", "token_balance": 61824 }, + { "peer_id": "EiCLgtRnsVkmApnbIuS4rKWuhHO/TNKb8RuMtMK/+RE4ow==", "token_balance": 61824 }, + { "peer_id": "EiAa5nZen9aFIi24iHtZ/Ex3lmQ2EPCmkeCPDFMwiOcNMQ==", "token_balance": 61824 }, + { "peer_id": "EiCujlXgMAztpSmJQXfEie+4VsQQ5ycaeyp1BWk4zkkfvA==", "token_balance": 61824 }, + { "peer_id": "EiAFPSGu47UX5mK1WZub/cyz+Eqzz/kHojG+EObqOT+Tig==", "token_balance": 61824 }, + { "peer_id": "EiCS2kZI7/gne4GKvMrZ2pZ2M2dvdOQT1SonV/VcHTL6rw==", "token_balance": 61824 }, + { "peer_id": "EiA+C/aDS3W3hp7Q8NU9KQrTk+1JSdVgOQUuZhBU1iQEvQ==", "token_balance": 61824 }, + { "peer_id": "EiCeW9mbI6XPL/+u3SCSBP/vnh3Y+04Nqa5j2aQu/es6Bw==", "token_balance": 61824 }, + { "peer_id": "EiDfDAJkh33TLXipy070sZFs6gkawKYOktAiMJzAlY3dHQ==", "token_balance": 61824 }, + { "peer_id": "EiB8E50DyoPLYs31IOyU5TVjEAHi9fQaLFKpkc6/ybGycg==", "token_balance": 61824 }, + { "peer_id": "EiDbjh0elVYk86eG92sl594syNz+T4kmRUOfGg5OT6jIBw==", "token_balance": 61824 }, + { "peer_id": "EiASb36QWr/w2bfUVBAdeyvJdae6VG6ymGHn1F6ThMqOXg==", "token_balance": 61824 }, + { "peer_id": "EiAOMF0CzwdP71hBZX85HrI/7EL3B0V5R1cQSt6P9tWVHQ==", "token_balance": 61824 }, + { "peer_id": "EiC1km3lus0lSHxQHZBITkn27ALaloZ+tm5wm4eZUQp05w==", "token_balance": 61824 }, + { "peer_id": "EiBn1Q9Yyf/9wN0DNHqi1BXtFnr8MO1rx/GKCBP9SIcD5w==", "token_balance": 61824 }, + { "peer_id": "EiD/aXrgArqCjMprLcySsatTjG3lmN3Vpm21ERGBapnbCQ==", "token_balance": 61824 }, + { "peer_id": "EiBGo3UaRFtHxngQqLNRhkDOEaWs5mNe1ZsliC5H9N0+FA==", "token_balance": 61824 }, + { "peer_id": "EiD4SkT81EXr+ZYU8aEtP5IAbLYRb4hfN4CWIbgoMa15Ew==", "token_balance": 61824 }, + { "peer_id": "EiDgOMRPpkwnokWMhfIulVCx5iRI/ZtVtkvGifd7oplQqA==", "token_balance": 61824 }, + { "peer_id": "EiAOU1Vo85qO+op2b8nHbDxEoQB1PiugGmU6oM1xIxGfIA==", "token_balance": 61824 }, + { "peer_id": "EiDUwdzfroP/KA3GUUhaS+FAUnN7St87xdg2aRbCxPYgBA==", "token_balance": 61824 }, + { "peer_id": "EiBH4XPQlGcXC4Tp64lv/dXntJwHAiwfiyd5KbiCALchpg==", "token_balance": 61824 }, + { "peer_id": "EiDuLC/SyTv5wbp/jsRjHXQz3ME2r65RMBoULwq8GOj8gA==", "token_balance": 61824 }, + { "peer_id": "EiC7FJ83dVoH+KhxBsRiWCOvvmOmgcbOykqjIJZGuMWtQw==", "token_balance": 61824 }, + { "peer_id": "EiCrnRmoeqytLLo2t7gFXN4DoXgDcW5gGxsdfx1NxiDPfQ==", "token_balance": 61824 }, + { "peer_id": "EiAUPveUbHBELb34KirMKvDmYFpM6gu7Y+7RFWGqt5G4TQ==", "token_balance": 61824 }, + { "peer_id": "EiBLmdy419du6Cr3FJ2JJ+pJknek6vvVBquFr6Wof24aqg==", "token_balance": 61824 }, + { "peer_id": "EiDet0IKnZSHTrc5runAi0s4fPU66sCrfVnC3YrKxl6FvQ==", "token_balance": 61824 }, + { "peer_id": "EiB8YNNT+f32fQyo5Yb2AZlj5djjuqohs2rS16847nfo7w==", "token_balance": 61824 }, + { "peer_id": "EiBfRtaMHINkOraGjpoveo0xGmXVIiTkcLPY3+s5zCSTBg==", "token_balance": 61824 }, + { "peer_id": "EiBzobVhRUY6NmHFQphTXzSVwGec4Dh6RO5hExcLzrurJA==", "token_balance": 61824 }, + { "peer_id": "EiCgn1324zpvTbmxl+4IkM7QsODIaApQ7szZowWWowBd+Q==", "token_balance": 61824 }, + { "peer_id": "EiBmytP4g2gix8OkjbEFrvPl9lr1KyW6SSjuo1duWg8mcw==", "token_balance": 61824 }, + { "peer_id": "EiBw6ZcCRqk6jnsin0txNjOXU33pycAv0dfvno2slS1orQ==", "token_balance": 61824 }, + { "peer_id": "EiB+pcJzbDvkZt9wFeDouWA0A+uWg1EoiNdkRCjTUshjvg==", "token_balance": 61824 }, + { "peer_id": "EiC0Z7QzQ7+LJMQDShs/t50tSSpgdBLHEVnIi37TCrcI/Q==", "token_balance": 61824 }, + { "peer_id": "EiDIhtoFJIXlDLxa30nAlL1xfVEsPer1PZPtL+OfImXoTQ==", "token_balance": 61824 }, + { "peer_id": "EiDqCjFGTEahSbLOKlHJhBWegH0Qp+ulT6OId6U9UlN82g==", "token_balance": 61824 }, + { "peer_id": "EiCnoPVKUzy4ckmKh29PTYa5YNLEFk7dSJp8jkjDfldZlA==", "token_balance": 61824 }, + { "peer_id": "EiAeijjugEKMsWJK2S4k9ZsdxL2AtNoo3yiu9yzgCUYljw==", "token_balance": 61824 }, + { "peer_id": "EiCgBkybsxAJev8LDuGaHFo+Y/0oXalNGwyNU8bn3dZ2EA==", "token_balance": 61824 }, + { "peer_id": "EiDI04gf7fFIdMgy48woEvhL37qjCLBoXO4FRx9SYjchkw==", "token_balance": 61824 }, + { "peer_id": "EiDXKrbqTGUCD+Sc+vK6dffF2hPyqrb24ZNYdnuGWlZJ0A==", "token_balance": 61824 }, + { "peer_id": "EiAuW/4ekqiqvUQ9uAuW8MmlFab0hGUzed7XYA547uNFVQ==", "token_balance": 61824 }, + { "peer_id": "EiCAisdo4y1a8hhfbTW57cf1mO1B0ozIjWFWBl/Rpuc8NQ==", "token_balance": 61824 }, + { "peer_id": "EiAD7hV/sysaahbEZvB1wgQ0qSZ7TJeCQpTEKJQp8e7gRA==", "token_balance": 61824 }, + { "peer_id": "EiAe5RkC1xrO2dLHNKRDfVMPnfQdqkuoFiSUEwq7/kqt7w==", "token_balance": 61824 }, + { "peer_id": "EiBf6AAXBRJNIlo4EVlCowR+GrkS1xtOZqfFDwZrbdj76A==", "token_balance": 61824 }, + { "peer_id": "EiAf6JNp6EJ+slqf6gFNccTE5Weaje/7mH9AC65B/l0XDA==", "token_balance": 61824 }, + { "peer_id": "EiDVQtE9e+nVS3KNnlT+K6I+8yyUbWgOCQ+i7q7ygQQd3A==", "token_balance": 61824 }, + { "peer_id": "EiB6VIK2jYp6H3TaNNvfekSteOtOtDTBR0ouuw08cz8goQ==", "token_balance": 61824 }, + { "peer_id": "EiBsKhzoSZX3OfW7JkaQ4SMhG4OFLQC/dLVS8UdqKRl8iw==", "token_balance": 61824 }, + { "peer_id": "EiBbyy7XmY+K1a7KZ3B1Rhqv8N5QycrRBi+SIMoTd1gCAw==", "token_balance": 61824 }, + { "peer_id": "EiDxKveI0vntWs5k2Gbkp7Jz1gsOWmM965JIkPIfybYM0g==", "token_balance": 61824 }, + { "peer_id": "EiCanoHsHQfIYBX2m61Lx8sQIJgfQR5kwisnqIzqjgoKJA==", "token_balance": 61824 }, + { "peer_id": "EiAm6wr+C8ONNrhJ/C03Urtr6ZymsqwUrQWcFO2sqehWFQ==", "token_balance": 61824 }, + { "peer_id": "EiBKuUHD5fLHHw0zbGDoSnudE0cK9MYqWgIlztDit86Cfw==", "token_balance": 61824 }, + { "peer_id": "EiD4l58piCN43cC082ods0+ZU14ztKJA3oJFYDDdxbZUmw==", "token_balance": 61824 }, + { "peer_id": "EiAv5VroWTdm+bfqpZP/GJNFBsZ4w7yX9MwjzNuk8VrDow==", "token_balance": 61824 }, + { "peer_id": "EiBpnRvySi87aP7LRpnZzTUORl+Xd3hXS+xm7HUHqWMPNQ==", "token_balance": 61824 }, + { "peer_id": "EiCvMFaIEmqxfeWqEIc8wupEbN9/UkgKzGpJp8Y7DSX1Pg==", "token_balance": 61824 }, + { "peer_id": "EiATuJlAk0T59YjAfRW/QU/r2TWsQ25HQs6ufKaKnbjsXg==", "token_balance": 61824 }, + { "peer_id": "EiCq8997oUfbEIBjahCrhxh7H31zVfdNkFkj+BDhhxaYQQ==", "token_balance": 61824 }, + { "peer_id": "EiAuGeUeyMApmQHbnLJNJsAKPshU1DGMZTVYEyVYNgoZyg==", "token_balance": 61824 }, + { "peer_id": "EiADsA5N8dze0U/Uo3KcuIlE4jYpVWNlfb9MkilY3kq7tw==", "token_balance": 61824 }, + { "peer_id": "EiB540GDYq60zLwqV7CKQn0+wN/Pdg67xnwZzFOhoh/2FA==", "token_balance": 61824 }, + { "peer_id": "EiAFs3PzeiYao4UnwO0+fBRE1sQfm3WJu3K0KD9wQyU4gA==", "token_balance": 61824 }, + { "peer_id": "EiBxWRcB+i8yYk6JsfjgWsiumDlx1d0rirWObGy+WrgSQw==", "token_balance": 61824 }, + { "peer_id": "EiC6Qc4m9Av+L6T3p2vtwzKhi9IQphqMxdjfjBjzeZAfNA==", "token_balance": 61824 }, + { "peer_id": "EiBWHq49B6I2twbr4ojxEMZ1zsPL3KWFVPz4zAs9pcNCMw==", "token_balance": 61824 }, + { "peer_id": "EiB++1x6xUHvHycpWOBnSyLQJoZI2phig50z+5M/pKAR4A==", "token_balance": 61824 }, + { "peer_id": "EiCtob6JlVawBXqHImckijCy8f3Ge7WNpRDAXCOlTRGuDg==", "token_balance": 61824 }, + { "peer_id": "EiB/Jdh3vbXBDgM3WppnUuct25thmE+eZDpv3yX491D2eg==", "token_balance": 61824 }, + { "peer_id": "EiBac8yDlhETCRXuEjqOBNwaR6VdAsN8Ysz3wd+Iw4WdPQ==", "token_balance": 61824 }, + { "peer_id": "EiDo/GPoHDZ30pnJ59vjSE0+SzcYIeR8lKknAQhzpuytJQ==", "token_balance": 61824 }, + { "peer_id": "EiDodcIFaqlsODm+b2HhJAFL/mAxmixJOVVbWWb+zbetSg==", "token_balance": 61824 }, + { "peer_id": "EiAxlPx8NRUo/Sz/EFfVkIhfa8W906wIz5KuDdpwT7lT+g==", "token_balance": 61824 }, + { "peer_id": "EiB8ev89g0spull/cBised4VxH7ny3cF6cDy9kqnLCvU2Q==", "token_balance": 61824 }, + { "peer_id": "EiC/nVmFcGmGT1CYY93X+PSTki2bLYhpG9y1U/8xxsz6tQ==", "token_balance": 61824 }, + { "peer_id": "EiCsdBs8fE2TDzQBj7hiTo8bJMmHyD3yt5H6D4CN0V99hg==", "token_balance": 61824 }, + { "peer_id": "EiA4yAXe96tr27C0bnyRu2nkBThd6f8s+5OH+WNbVx1Wjg==", "token_balance": 61824 }, + { "peer_id": "EiAcF3iqG1YDBQskroraFGPmgPkQeEToC0PeS0+/4Fxg8g==", "token_balance": 61824 }, + { "peer_id": "EiA8TCBqYy/w8Bkis0Lc9VEgcxntOIoxKFgoEEI9obruZg==", "token_balance": 61824 }, + { "peer_id": "EiCafNjTZ6ikwsNN6IHbT0d8V86sHGPnRsFAXe36xCwUUg==", "token_balance": 61824 }, + { "peer_id": "EiDQ9b0Td2Q61QD5PpXP5oa16wxJc9UDIlPJ4w64ZNvqYw==", "token_balance": 61824 }, + { "peer_id": "EiCHFUr7q08y7RpdkeL8mXLsnGA8M06r3kd9nBUTprDVtQ==", "token_balance": 61824 }, + { "peer_id": "EiDAX0wjwdc/SVHLDZN/LxXHHEN9ErUP9wbmHCU7ptrINA==", "token_balance": 61824 }, + { "peer_id": "EiAB8GYGS0bYBxwtxHRjfk1+NjCjo0qCe8zgKcvrwpGYkw==", "token_balance": 61824 }, + { "peer_id": "EiBBaL8rpEK4BzwxcWfBEq2OrEAoHTPymv2uwDYHqbfmRQ==", "token_balance": 61824 }, + { "peer_id": "EiA47GEI33UVHQEetz4Kao7xelFsR5nUMwDIamPxqHvxVA==", "token_balance": 61824 }, + { "peer_id": "EiB0ONqncERGkOWujrOSliQzQXTZBtdbypJ+Pmo8+5SqBA==", "token_balance": 61824 }, + { "peer_id": "EiCIOoud54jxO9jx390end9EIQaPZuqtKsY/F8WnFSmJfA==", "token_balance": 61824 }, + { "peer_id": "EiDnSgQZSMRH0M/Ezf5gypb/QNrbLkdcWC8GyCnpLVZwzg==", "token_balance": 61824 }, + { "peer_id": "EiAptsYmq/a5Icp9tR1AeMP3hxlpFZkOEq/k0BxMSW0wsw==", "token_balance": 61824 }, + { "peer_id": "EiCupmS1ciQafdBAooHOmPSZYGRIqWX1BVGmIz+xhDKN7A==", "token_balance": 61824 }, + { "peer_id": "EiCvpniatiKdJDnEoQeCrBJ5/Ojru5/uOjQQN/Vz1iCmzA==", "token_balance": 61824 }, + { "peer_id": "EiB+qjdMyYs5OIO/Sh//4EHHbfWEizaEGFFLc+w6A/NsYQ==", "token_balance": 61824 }, + { "peer_id": "EiCAK916o+OcyYV/7hCUiFVSfX3MCletSrzSHEMAgAvstQ==", "token_balance": 61824 }, + { "peer_id": "EiC+M2tOkhL/A40NYkgLxmI0/aSzC6A3gLc3wFF1EwQamw==", "token_balance": 61824 }, + { "peer_id": "EiA/akLerobM52O/J17SIMN2BKis+CPamCHdEVrEnCtcGw==", "token_balance": 33561 }, + { "peer_id": "EiA/iZh3KK5VxH9cc3iPp9A8NVZf97E+nbBiiSnvrx++Vg==", "token_balance": 33561 }, + { "peer_id": "EiA0740dd4FBfTVQ5B8XvGYVlyRR0JO/5y0NPkYDTlhx+w==", "token_balance": 33561 }, + { "peer_id": "EiA0HZzbMv0NIfY3mhHxbkuiOybc9eTgZ205Rvmt0LqEtg==", "token_balance": 33561 }, + { "peer_id": "EiA0TrhWj0QVgt6KeqEKuji+TYYGpMD3IuAaKbiTVRHpww==", "token_balance": 33561 }, + { "peer_id": "EiA2baZcR3a8El5GY/I+UPfTqG60Gac55TMZLccPKzFmDw==", "token_balance": 33561 }, + { "peer_id": "EiA5AGZ8I4EIf1nMgc/FBkI/4LjQcXv73DlD+fNVtOkq0Q==", "token_balance": 33561 }, + { "peer_id": "EiA6l1cD3nprm93KF9Y8KfPkogh2bqs7bx8rOZy23TP2kg==", "token_balance": 33561 }, + { "peer_id": "EiA7Pz+t1mGSvtRFLWarCW/okdx1CIOIIFG9+D13uWX6Jw==", "token_balance": 33561 }, + { "peer_id": "EiA9btruVye7Cpf0WiIda67ZKqgPL4YLWC+ABDY0rpFrbg==", "token_balance": 33561 }, + { "peer_id": "EiA9kSxfO2PvQAqguGxEOHX5tyrMBOSwwJQgZ/g/h97Dpg==", "token_balance": 33561 }, + { "peer_id": "EiA9vOx1NGXZZXUP906t9G8u1fFV2xH+o29kbPXvSnA5RA==", "token_balance": 33561 }, + { "peer_id": "EiAa3HCAVFV973qGJZEEg3lmgys5cKXH6G7HVx0f9DmBIA==", "token_balance": 33561 }, + { "peer_id": "EiAAhlHx5OR6cLBYnnpX5nXs4Y8k9RjPJrhzf3cHQb2xkg==", "token_balance": 33561 }, + { "peer_id": "EiAAmnX4CodVLPBwcvIEs1Oqcj6ApANp2xYPHdjea50jRw==", "token_balance": 33561 }, + { "peer_id": "EiAAPW1G0+5S+z8nfgFHySiKbsX9CT2+13kcLfRhYalkww==", "token_balance": 33561 }, + { "peer_id": "EiAaxxb4RlJ74mvFyGacvhRjJreBEKTykeYDwBsGmrabOw==", "token_balance": 33561 }, + { "peer_id": "EiAb6kSbftKbXZ8qumYjHn+wxCNY6RtC2wDX0/e1fPDEYg==", "token_balance": 33561 }, + { "peer_id": "EiAbbDQeJ71MCaOfNpqQIF4hJEfxJOolOf3Halfg7pORaw==", "token_balance": 33561 }, + { "peer_id": "EiABQRGzduIk5aWq5hkALpsJdx3MJBDv8fRHoUyyOqjomQ==", "token_balance": 33561 }, + { "peer_id": "EiAbtdBmWtDs1u6iUBlIt+Y5v6Sj8b7fPxfQ0mqisU0lMQ==", "token_balance": 33561 }, + { "peer_id": "EiAbwwA4wpLCEukt6vg4EiRiP/HI4OS+cxRi+WpdIDtBag==", "token_balance": 33561 }, + { "peer_id": "EiAc2s3k+s+MEeaoQ8ANiH28dSxWbbdLic4W9mMJ9M3PeQ==", "token_balance": 33561 }, + { "peer_id": "EiACz7owFX2P2m1qMsODZuLXRLd9fMc83WWf7cHFJMmqHw==", "token_balance": 33561 }, + { "peer_id": "EiADAfzHMYcg5UUGt0sIwVmJDkiiSM7kK/r7n2a3vh1Guw==", "token_balance": 33561 }, + { "peer_id": "EiAdOK/lq4G7SqVxEHZE9Jv6CWfHoK5a1vwfbm4O+knyGA==", "token_balance": 33561 }, + { "peer_id": "EiAdS9WLuPm1c5BU+Irw/Lc31UhYmddscB80aExVsyu9ZQ==", "token_balance": 33561 }, + { "peer_id": "EiAF8Kmvn7JtUwHLtej153fVHzwOpzA9lbL/ShtMRISbjw==", "token_balance": 33561 }, + { "peer_id": "EiAFb9b1iasILaveYLBq+dudCxXZrZ5mYWpqMTDPLP5SjQ==", "token_balance": 33561 }, + { "peer_id": "EiAFLa9OqY3DgnYxvAyUjrcFw/vbnTYXxz1VB28kUhrSdQ==", "token_balance": 33561 }, + { "peer_id": "EiAfqrHIIo2+Dr1aaQVcmeQ/Z0W9PlnCZoZQgRCHi0ue6Q==", "token_balance": 33561 }, + { "peer_id": "EiAfvR+3gMSl1rRb06UjW3r4BnqBmKScDq0SwcWnaHFK4w==", "token_balance": 33561 }, + { "peer_id": "EiAGi/THEGK8Xnj+T8j5Ts/E1Tr/gde8GDR4YkU8T9OYRw==", "token_balance": 33561 }, + { "peer_id": "EiAhUaHSoj3BKUUnvVg9abdHuq63Tm0qWIN1nX2xmyCV3Q==", "token_balance": 33561 }, + { "peer_id": "EiAHwIbKCjbm+BmxOEarNRsTtbh1H8/I9lWC6PunWpFGhw==", "token_balance": 33561 }, + { "peer_id": "EiAISnKrs5ASNGb0s7xnZjrrCCWevPDOsd4lHv57J5gW2g==", "token_balance": 33561 }, + { "peer_id": "EiAJdvz9obduntJXgi8Q6bHHrn6Vzhqtb2Qq0n8aNgukUQ==", "token_balance": 33561 }, + { "peer_id": "EiAJS3i+shd9vHDEYbyeMVlZS0RQ7e8kGIGdZ+H1V1+7Wg==", "token_balance": 33561 }, + { "peer_id": "EiAl6DYRNqRMtEKbPs4RQR0IBo1zqfM6QqVsORU5hXCIHA==", "token_balance": 33561 }, + { "peer_id": "EiAlE+Y2efamrIMiM8ykoVS373b9JfRyIUYs9MepXryPyQ==", "token_balance": 33561 }, + { "peer_id": "EiAmbSKaIkxs4INgf+P28hu1U8ZllazNtkhbZzlua6kb7Q==", "token_balance": 33561 }, + { "peer_id": "EiAMjLq1fHta0P+DZ8Lj1qiR6mT1TyWf/OEdHltrViFlpA==", "token_balance": 33561 }, + { "peer_id": "EiAmX3zdnKnKmdFHWEgYVvABA6QqCBCovXoqIF7kFmN3dg==", "token_balance": 33561 }, + { "peer_id": "EiAnTKU212B/Yb64J7v14ZSIioBqbPQXog6PvSGLOE/msA==", "token_balance": 33561 }, + { "peer_id": "EiAO+aV6/Czp74D2m6SI+x0wwtL7lAwIULzf2z+ObV0ydg==", "token_balance": 33561 }, + { "peer_id": "EiAobcBgASGBqo7kpINLnNmwv+v383h/MA2x/V58c7tQPA==", "token_balance": 33561 }, + { "peer_id": "EiAoO4D+jjdejWYA160gIaeJNpowr724X6/4cRKMqOYCJQ==", "token_balance": 33561 }, + { "peer_id": "EiAoyXeMEGhqze7b7QVVS7Tx1xfOEqtfi8LoVbwgs/WHrQ==", "token_balance": 33561 }, + { "peer_id": "EiAp0+gEXAkzJoOuBJMgndElivLXDR3smEwUTl0lYhz1RQ==", "token_balance": 33561 }, + { "peer_id": "EiAp60bHplMpPtpLEndCFc86K+wdGmej8jNK7x6L3/Zn8Q==", "token_balance": 33561 }, + { "peer_id": "EiAp6WcyGNk8adu4zRBt9tsEIErFtxdLB95icnuay68UVw==", "token_balance": 33561 }, + { "peer_id": "EiAq+eHz1CKq+c6gkJdz1Q6bIutfH8uyVjq5PRelTGn1TA==", "token_balance": 33561 }, + { "peer_id": "EiAqIi/Ohox19hK3Q57dwCjTGBcUSpprh8JS5CBggd/OAA==", "token_balance": 33561 }, + { "peer_id": "EiAQZ+QgJ5m59tuoSE1l/ieEwNlz6Po7223pYwmnDXGKIA==", "token_balance": 33561 }, + { "peer_id": "EiAR+iLt2TonvFdwsPc+CnhggVMyLJgsUTDv3uP5ViTFww==", "token_balance": 33561 }, + { "peer_id": "EiArIQvipFwtI6XERgNx0pqGjoZ1MX56hYRu8CdpulsZ8w==", "token_balance": 33561 }, + { "peer_id": "EiArK2bArvm4MYl7py3z2Xp3fBxe1ma0tEkuQR8zzPUskQ==", "token_balance": 33561 }, + { "peer_id": "EiAsfjCQGZLXjDJche5we8SDpT7++fH/NTlb8CjSYkMrjQ==", "token_balance": 33561 }, + { "peer_id": "EiCwv4d6oHK+EYLz3vx5bQe75I7Ps0LYrqNpSdw076vHiw==", "token_balance": 33561 }, + { "peer_id": "EiASJpXnYpnbh9KqA1ejjNrx548nvzREhhggyl+jnqkpng==", "token_balance": 33561 }, + { "peer_id": "EiAsydKWNZRjiSX2NMERO0JKVO8SSr9GqhviqQQG+MbS1g==", "token_balance": 33561 }, + { "peer_id": "EiATfXiLdhvhFAm3qzjqrjQc8ayLjCs0hFE5rCWXkTDsUg==", "token_balance": 33561 }, + { "peer_id": "EiAtMEKlsUwrSbsube1d6cJllBLsNq1j+iH56W1OnV7lmQ==", "token_balance": 33561 }, + { "peer_id": "EiATulgo3nHfYPQp0djiFlyhL8c9ZirZ4NSGTbTwI6dR4A==", "token_balance": 33561 }, + { "peer_id": "EiAVMJPE3xobrvDBSYrWYyFIESnB3aedgb1xQNhdvuYZrQ==", "token_balance": 33561 }, + { "peer_id": "EiAWtOu8NzFPJesBacHDG2E3WISh2LWNjuZAHnXd6Cvg3Q==", "token_balance": 33561 }, + { "peer_id": "EiAXFu4gdiCHtG/A6VZdtCywJ5Lplh6APHZfMSrHdjix0Q==", "token_balance": 33561 }, + { "peer_id": "EiAYIsoULoY6bIekM+Yqa5hJYuKhPSVy5+2bGUIaUkx1AA==", "token_balance": 33561 }, + { "peer_id": "EiAyj7Dl3rlh2z6B6gACIJv5IjLCm7gJSk/2D9H5VFOTUw==", "token_balance": 33561 }, + { "peer_id": "EiAZB+lRay451UHeCVksj03K0Zk4rGJo4efSL7mULpeh4A==", "token_balance": 33561 }, + { "peer_id": "EiAzguJnszJo+cNDo40ainUoR0lvSoPG4LT9LCXmUyMGJQ==", "token_balance": 33561 }, + { "peer_id": "EiB+0U1jaQW/33m05dxQ/S0OYnItEB1OMG+NwUyAH0PVxQ==", "token_balance": 33561 }, + { "peer_id": "EiB/8kobVeqeFljr2nMP0n5vDfod0S9WRzrsSFv9nkUpTg==", "token_balance": 33561 }, + { "peer_id": "EiB0bwtUxtSyFavpehB/DKEZSzjpg2P+l52DyTCGF88cxg==", "token_balance": 33561 }, + { "peer_id": "EiB0qeqkS1VR2xWI83HMr+ey9ZnMz/M1hQfsalpvrOAZeA==", "token_balance": 33561 }, + { "peer_id": "EiB1a1SKljSPHmso4+EfG1lMSEdFS0Wl84xh2gUNC96Vgw==", "token_balance": 33561 }, + { "peer_id": "EiB3mW1+NHsFaEWr8BS2TBf3okZdEs7LTwEBGLOS9fJUuA==", "token_balance": 33561 }, + { "peer_id": "EiB4JHgXoUSx4r/REx5wHJFk1YNlUR4zkVthOrF2G3qaIg==", "token_balance": 33561 }, + { "peer_id": "EiB4Nxn4n9eYwxSTMRT3PmhUDVQclQwjx6WHpRd5OUB8nA==", "token_balance": 33561 }, + { "peer_id": "EiB5JUEuZV7aXoTuF3DK2h+Rl6qhuq/pgQwh9p3ivjc+Dw==", "token_balance": 33561 }, + { "peer_id": "EiB5lMV8eGdxAq+nl3afjNspbuIOz0DbVcA7JAcXJpip6Q==", "token_balance": 33561 }, + { "peer_id": "EiB7mWD3Zu2E10et6LjGwn/i10hR/QzL7SR5JZ4h7AP4YQ==", "token_balance": 33561 }, + { "peer_id": "EiB7n66XHzp5X16+DiLjrIIoeliYqFUUzT2Nf57Haf1WMg==", "token_balance": 33561 }, + { "peer_id": "EiB8zyMv06OTwZn0HJB0yQh8YMKmaYr9IoT1hZlmT7blsA==", "token_balance": 33561 }, + { "peer_id": "EiBadGrAfFftqRwM/tSofGzpHbF1UEfgxcKCdPwP2pWcaw==", "token_balance": 33561 }, + { "peer_id": "EiBBWJR7DYIW2Ur6/3Lo8PtWruF40mkZsBd9pjhwvRmKBw==", "token_balance": 33561 }, + { "peer_id": "EiBC5VKF5YQfVSwPwKl2Ka+BnEquAYGT6PVBHzm1pzO2kA==", "token_balance": 33561 }, + { "peer_id": "EiBcmsS6EmEBO/YqVA9EtYDhMvSXiwTCbegWaAz7Ilggbg==", "token_balance": 33561 }, + { "peer_id": "EiBdAngPCp4mMyeT3/dp/tU5K8gRLkJbNRLpVBZVDOzrxQ==", "token_balance": 33561 }, + { "peer_id": "EiBDmeSPVzcneJHL3Lg4IG3FOY3hhhGQQHw/DBBDCiWDeA==", "token_balance": 33561 }, + { "peer_id": "EiBdnU3CGuhKS4i3CCOoXWUgd1pbP6BzssQyJ9uyPpsjoA==", "token_balance": 33561 }, + { "peer_id": "EiBDpkY2IAoq2X15Z1N7mzFsvvrkGK5Oo4Zptf64vWI30g==", "token_balance": 33561 }, + { "peer_id": "EiBDUFthmcCe+jlPcCUiTpfYdeL4mTseblHp49KH+HmX3g==", "token_balance": 33561 }, + { "peer_id": "EiBeh2Q4YiInxE24QXd7OCZWIufaP8WiIBVWHiMCN3HHTQ==", "token_balance": 33561 }, + { "peer_id": "EiBeKcob5o+L+Lc4xQcPYmayt3nmbDRiaXb+hcWAqeKYPA==", "token_balance": 33561 }, + { "peer_id": "EiBevqQOOZ+75Z3tQGS5UG9qHkUpg7ELZf6fD2t9PyqE/g==", "token_balance": 33561 }, + { "peer_id": "EiBeXZ7YqqTq+QKJig4FyCS+ozHPFJJ1yStJ0s8qnECP1g==", "token_balance": 33561 }, + { "peer_id": "EiBfcXJwnRrubRaLaNMbFCPEF4mIB1aeiMpEojbKDDiL1w==", "token_balance": 33561 }, + { "peer_id": "EiBfuLXHHGhdOu8zGgQF7QTHKuQga+BIizrhKplKvAMvRQ==", "token_balance": 33561 }, + { "peer_id": "EiBFXUH2G2KfAlodMJbXj2ox7QPw1jz5l0mMwICRO5Jo9g==", "token_balance": 33561 }, + { "peer_id": "EiBG4ll7Xd5GnZwnyoRD0CPPHwKI0XaVbXaDuBCwbuxetw==", "token_balance": 33561 }, + { "peer_id": "EiBGJELZMMnglR3l4CQF71ItkOzH042NnJ3Ms51Y4AJsTQ==", "token_balance": 33561 }, + { "peer_id": "EiBgpfYoiSVKy1WqQA0LNsRhHA6F1mkp4QvHWI2MU/UeZA==", "token_balance": 33561 }, + { "peer_id": "EiBgY6eN0MxGT3zLN7KzYnbIL2Sr5piDLyxrkbuPimeGAg==", "token_balance": 33561 }, + { "peer_id": "EiBh52+5wTVp60gTVCmREPZAiThINbKMs1NuT7fIOr2JnQ==", "token_balance": 33561 }, + { "peer_id": "EiBHeY4bA3u2JYRmjMoVIHJLZvK9QaLwsw1pbuD/kn8mnA==", "token_balance": 33561 }, + { "peer_id": "EiBHF2D1YFAMI/hjJAJ4B/y+PBhNafGEAxjccfJPishm+g==", "token_balance": 33561 }, + { "peer_id": "EiBhgKCiuKGNnM2Y1vwRLuIxAPR52monvCuG+qm3pv7pSQ==", "token_balance": 33561 }, + { "peer_id": "EiBHM3LPhhIhXX8oj7T1hJN7fBaKw30cn5zUMitxb8U6ZQ==", "token_balance": 33561 }, + { "peer_id": "EiBhM9eraY/qGeTnW+ZajQWMoOu3gBQ2z9fT0vM41gZwHQ==", "token_balance": 33561 }, + { "peer_id": "EiBhzc7WxJXbPS1IqD2KrtyOGq55aA1vSlSFaang1YTudQ==", "token_balance": 33561 }, + { "peer_id": "EiBi6d1AB30oohOQKpxTPuQcsCf9jAP+8es/m8xql+fxkg==", "token_balance": 33561 }, + { "peer_id": "EiBigEs+ON2j/hMhXl9ctg1oCyZiMIx7wTtYylx0Ou8wKA==", "token_balance": 33561 }, + { "peer_id": "EiBiMcu3TQx9OFyseXMW7Kta5qU9d6j1kfKJJHShymAkTw==", "token_balance": 33561 }, + { "peer_id": "EiBjNoymiVeNXeK8blTJrrpQEv0Pow9wNc1T5jufTpZzbw==", "token_balance": 33561 }, + { "peer_id": "EiBKaK+NyybyOJ8/ElLl4f833O0LuI9M7TPBoomWMSwOfQ==", "token_balance": 33561 }, + { "peer_id": "EiBm0As/9wBQb/u9j6++M3P/5Dh52/px0cLLMzgacwHF1g==", "token_balance": 33561 }, + { "peer_id": "EiBmkzxsNd2njEnsB2C9ZF6hSnkiFWgBV8jkb6zrboKR7A==", "token_balance": 33561 }, + { "peer_id": "EiBMXMuMJgVK9Reb9WzXszjSWIxWVNPOR7/wjmLWQuqgPw==", "token_balance": 33561 }, + { "peer_id": "EiBNK/q5mjOOGQonNkaOC/dvUOLMdQuT1FrvahlBzLNBZA==", "token_balance": 33561 }, + { "peer_id": "EiBNqPROwDeTq/wdw/wG+p77gdRxpQxYznJfDyy4PuPZ0w==", "token_balance": 33561 }, + { "peer_id": "EiBoyrGstUmmW796+68m315oYWHzkySxw6KQZhw/2QxjoA==", "token_balance": 33561 }, + { "peer_id": "EiBpCiEwzXp/IwAqBAm0aXquk+r38gkeX2oSwExq5FIkQg==", "token_balance": 33561 }, + { "peer_id": "EiBPilMWfKdMaHK3QUFo8te7MsJ28a/J99BdAwxTKf0tgQ==", "token_balance": 33561 }, + { "peer_id": "EiBpj2rICDPCIRRQ4X67hSXq/z1XOyi+VCztwwexkS3jDw==", "token_balance": 33561 }, + { "peer_id": "EiBPTmUxdd/X1LidP2a5919j9sNTJwmT559heKGcWUX+EA==", "token_balance": 33561 }, + { "peer_id": "EiBQ2y/H0VkSU5OvomqL47R6I+TsHpWouU0Sw4sXiyIUXA==", "token_balance": 33561 }, + { "peer_id": "EiBR9t0JKdnTBtp3FfjIwOQkNuTzEOwrDGBR67ir6O2Drw==", "token_balance": 33561 }, + { "peer_id": "EiBr9VmoOm+4IkBb6w1/Cs5YiECgcKSIFg15mWO3PXtxmw==", "token_balance": 33561 }, + { "peer_id": "EiBrA2jFA/vMnltH2CGgXtTHh6jkIsHU6PeiCH8el2O4tg==", "token_balance": 33561 }, + { "peer_id": "EiBRrIgSQDM/6Z33kSuomTRnaI2bhV/e9TYIR02hrBpKaQ==", "token_balance": 33561 }, + { "peer_id": "EiBrsy7y859RkDavqTzB7xu7d+812SgziYZp310Slx5aCg==", "token_balance": 33561 }, + { "peer_id": "EiBsmsQAsfN1HrPI1gwat4lSiV7gY7wgVj9Bv7Z6s4RQLQ==", "token_balance": 33561 }, + { "peer_id": "EiBsOLRx8jfcINZK/ssxgcwyVTrA4Qblw2j3glZ13bnOtA==", "token_balance": 33561 }, + { "peer_id": "EiBT8DnUHZSGwPCkT61r7dIrX30RXJ3ESnDZBTaRwR50kg==", "token_balance": 33561 }, + { "peer_id": "EiBTBk/X27/DClwVSAWjAFQV5OvcBC+sH8slItb84a3Kjg==", "token_balance": 33561 }, + { "peer_id": "EiBTeMZAt5ic7noc06AEJY8l6qScDHGtdNKWxH33mqcpbQ==", "token_balance": 33561 }, + { "peer_id": "EiBtGIu0h+64wjQVG4kQoxclXt6OCnG9JIuvXutYsPFJFw==", "token_balance": 33561 }, + { "peer_id": "EiBtYfZnWJu/Ar1oDXp0FM68VQDvBalJ9Cbq0E6RX7HOXQ==", "token_balance": 33561 }, + { "peer_id": "EiBu/WWMQHxaLsX1XyqYy25uAIE27G6iSKvM5itJ0rs9Kw==", "token_balance": 33561 }, + { "peer_id": "EiBUBgV0XJHRn3PmWtkISus5mlE5rB9vcUi51ZoYBNUm6Q==", "token_balance": 33561 }, + { "peer_id": "EiBUrVQqgIkfw+8jibFTiIRNbH7mSTtsW8HzEZmIeoZrUQ==", "token_balance": 33561 }, + { "peer_id": "EiBus/DIfBSaJGdwgK5Ooue1+jZ0ZVd8/9LtK1HNCqgx7w==", "token_balance": 33561 }, + { "peer_id": "EiBUyrjgnJtVbvfHI5kZTGiKvgvXuXWIGasC/nHWTYy0HQ==", "token_balance": 33561 }, + { "peer_id": "EiBVCf8/gq2S9gGLAKDAJF81I7uYcG4PfT3opAfBviSIIA==", "token_balance": 33561 }, + { "peer_id": "EiBWVKy5Ft92suGdvj4uynD025iyK1bHQZ+S2fKGDMdLkw==", "token_balance": 33561 }, + { "peer_id": "EiBx3EdbtPNcNViV6MR6LOq3lPJvwoArGzbpzxYzAzMG1g==", "token_balance": 33561 }, + { "peer_id": "EiBXDIUOsIXCVQHS+sW7B+Yg+1OudVthocHU0LTcDxZwRw==", "token_balance": 33561 }, + { "peer_id": "EiBxELcqLhVBUY7ZZyW/JNyTxdXzgYkzh51S4C8FBSl7Sw==", "token_balance": 33561 }, + { "peer_id": "EiBxgKPONqQVlIk5VqM4KVDeM6KSrbegG66Ncn4ySAXW5g==", "token_balance": 33561 }, + { "peer_id": "EiBXHNMT5mdoPQY2JfrEsu4amVv6Eggx2AEXrwXx/UHQ6g==", "token_balance": 33561 }, + { "peer_id": "EiBxlyWYOKw3ZQgyOxbDVINASSXczl4lAvccN33rO43JmQ==", "token_balance": 33561 }, + { "peer_id": "EiBXqH9N2MPXhqVE/5T/eCC/XYog8kiO5swlKMB4iMwrtQ==", "token_balance": 33561 }, + { "peer_id": "EiBXUarTiitJ2PEwqzo+l6AEld0m/W5qKw6pfFGZDSHWdg==", "token_balance": 33561 }, + { "peer_id": "EiBXXLcXOI3lI7+Yz3YpiAkeErANUw3zeVkMOHUM0DQvEw==", "token_balance": 33561 }, + { "peer_id": "EiByHO6dP93FY3FsuPVJ98BgFlXs03SaFk3w16GDKjJyuw==", "token_balance": 33561 }, + { "peer_id": "EiBypVSidPouWbIlwiVEgWDYhl0mXf870eIP/kBiCQo8oQ==", "token_balance": 33561 }, + { "peer_id": "EiBZzUZGBxNnfPFELzIy//rJmU0FpRk7Mu2aZv60wDGFBQ==", "token_balance": 33561 }, + { "peer_id": "EiC0WRkyHHxUQML+030JsEKoKMUzf0JbXGtl3oxW6K1kdw==", "token_balance": 33561 }, + { "peer_id": "EiC1l7XzOaZul+EnpRlNqicYIEQ3cIwe5FvAert0qvOxUA==", "token_balance": 33561 }, + { "peer_id": "EiC1RVPdNtjwIEH3ZON3OrVrFvOY1acugPS0KKpGO5ZVmQ==", "token_balance": 33561 }, + { "peer_id": "EiC1ST/PIv2uheK9Zt6HWU+PByWA7PaV2SForEClUDHEeQ==", "token_balance": 33561 }, + { "peer_id": "EiC2obFK5cPgQe/IuttjHqHYni8GsYsblYGTkQ4HvY6LZA==", "token_balance": 33561 }, + { "peer_id": "EiC484zYPCYAOQTc7I7BrpO+5plVVeW7HTjvmVqhQw6BHA==", "token_balance": 33561 }, + { "peer_id": "EiC4LgRgN6I8jvTES08ggVznv6CDpYQF7quw4TYVnsml4Q==", "token_balance": 33561 }, + { "peer_id": "EiC68ul1+gDOyG93CVMf3+TIxIRKodKmc/e5Yp4wZqJ+Mg==", "token_balance": 33561 }, + { "peer_id": "EiC7m7Q+geHtVjVqRKYTEqXcyru2IvSNyQXiLbwIVWVw0w==", "token_balance": 33561 }, + { "peer_id": "EiC8ZP9nLpSPeT7nTBYazd9V3RaDWoA4Ekziz0v+gHYRuQ==", "token_balance": 33561 }, + { "peer_id": "EiCagErmVTA8ytscM1QaWqFWRDC7mzQfBeUo3IqdCUnYsQ==", "token_balance": 33561 }, + { "peer_id": "EiCb34w0U9JqmsbKeYoJmhabt4FnNeydaq6Oqlvq1w5/fQ==", "token_balance": 33561 }, + { "peer_id": "EiCb9d8qxJ0iYDgNwO0gx8UIiJdikiIVUhcQrVsK1zGzCQ==", "token_balance": 33561 }, + { "peer_id": "EiCBHXr9Zls4Ov6VNntiy2gEKvb1mNjS2kO7zUhMRSmieQ==", "token_balance": 33561 }, + { "peer_id": "EiCE5oe7nQ7JpnNtSLXCMiHq/2SDuNY/FlIBdpFawDACCA==", "token_balance": 33561 }, + { "peer_id": "EiCEWTPV09cZrP87m0EdX2ZBNF3egeqEiwAMYz2ZYrgjhQ==", "token_balance": 33561 }, + { "peer_id": "EiCeXjEqt4hs/vmdpMsDVtlgnniNDzp9A/8z1O6PKDEukA==", "token_balance": 33561 }, + { "peer_id": "EiCfKRYgC0nEsbmFGY1nI3RGo9RCdzMZRLfQbMys9OAMsg==", "token_balance": 33561 }, + { "peer_id": "EiCFWus17WYdzOD7dcLAZlcPlfKDUy9QvRRYdg/YJBn48A==", "token_balance": 33561 }, + { "peer_id": "EiCg81scwxZyLkhMSgclSIVx6VKeoXMga4i3iMaKCMjUeg==", "token_balance": 33561 }, + { "peer_id": "EiCgOhJzjzGYHpwiqntXrfhuMnU1V6/MTZrAFjGpX+ydWA==", "token_balance": 33561 }, + { "peer_id": "EiCh0pcSGxXjRXf7OXcGg9wQGqQA/4DyrEh30XnEF6H4fg==", "token_balance": 33561 }, + { "peer_id": "EiCiC9dRImqGhqKABMUt8Hl81OpQwoxnJ7m0UASpH4lItg==", "token_balance": 33561 }, + { "peer_id": "EiCIm4dpeLlI+qLLHZ2Gsw6QKTM7JcxVobtsee/TTcWODw==", "token_balance": 33561 }, + { "peer_id": "EiCinhN5WURUo7iQv7zb7DYnzsMzij0sM15/shTTQ287DA==", "token_balance": 33561 }, + { "peer_id": "EiCiwC3yv3ePIt70lxe9PtWNhrwr7mXXBMJV+Xi6k/FKmA==", "token_balance": 33561 }, + { "peer_id": "EiCJ0rBmh6l4d2ygoaTxnbgqEX+CG6xdxKTa0MDnx2OC+g==", "token_balance": 33561 }, + { "peer_id": "EiCjCuorEu8LGrdJQyTzSsOPoKWbU1sGMTKENMDBZEkSWQ==", "token_balance": 33561 }, + { "peer_id": "EiCjL1Ek53nbh/WinVrH+kzmXRWOsthbobrTCzc29Jo7xQ==", "token_balance": 33561 }, + { "peer_id": "EiCjtvzkM8CqHvUHiQSzVjz/gSeg+2705f4/bZphUkf/NQ==", "token_balance": 33561 }, + { "peer_id": "EiCk5x+DdW2U38dgxiiQnSP/KB4rxn04lvdPJuubVrQdjw==", "token_balance": 33561 }, + { "peer_id": "EiCk7DYBnTkEz/UVj+X6AAZ66zrtDHPWQA8X2pah2hoccg==", "token_balance": 33561 }, + { "peer_id": "EiCl0cat1aSXx4nGYdOKNnvRFZQAYcqqDgtrh+g32j0uqw==", "token_balance": 33561 }, + { "peer_id": "EiClic3+AUmTDVqH8juQxGu1w9FEOf76bEBJW3l6J4S6cg==", "token_balance": 33561 }, + { "peer_id": "EiClV7TNuDYNywNzLpIxPTYsT9Gy2xlvNlBIIwgSMDYsbg==", "token_balance": 33561 }, + { "peer_id": "EiCNmcgNpLGN9Jwjyf/q7eCyDSA9U/Ba7RK7BX0kc/iOQw==", "token_balance": 33561 }, + { "peer_id": "EiCO9rhg1Rf8aFhOdBBxhXb4hOcXalPhFBJN1752/X+hdg==", "token_balance": 33561 }, + { "peer_id": "EiCOlhap9AJPpU8Bo2ZqhalrLyCtXfA9ta1kfPJ4QH4siw==", "token_balance": 33561 }, + { "peer_id": "EiCOnJABC9Nas3tONmczSklaxqkZDOaC+Pf4rEt4ufpu1A==", "token_balance": 33561 }, + { "peer_id": "EiCPE6wq/EFjgy8XntAioIOZQBtyBMuoJNdVmYRr25CIZQ==", "token_balance": 33561 }, + { "peer_id": "EiCq4Qqp06UxkbjKXHytu0yjUZLC1vXmYtNvydr3PMFj6w==", "token_balance": 33561 }, + { "peer_id": "EiCQDcloEnVyFTU04lxDTIP2XlJvlan0T8MCw/chwfZrSA==", "token_balance": 33561 }, + { "peer_id": "EiCqJwAtWrf4FJfj9GRsPSVaJ0p7OX5onfgrSURWeH8xIQ==", "token_balance": 33561 }, + { "peer_id": "EiCr1+4YvnxvquEKEVr8aZlkPdv9xPO1tvb5j/5EjjXnLQ==", "token_balance": 33561 }, + { "peer_id": "EiCRgIfoKLobbbSet+CIcoxpwmRGfZQmtexQGAL/fqParw==", "token_balance": 33561 }, + { "peer_id": "EiCSAdTRYWnxcQGQ9czd8RVql4W7unD+kCYz9/6WSjtmhw==", "token_balance": 33561 }, + { "peer_id": "EiCT+3Yt9UCVkwSbFxeJVvNUHgeqVVVO5QWeLnIGo9F7wA==", "token_balance": 33561 }, + { "peer_id": "EiCTa4639f/X5q+D74mSuC3QleLO5PMpCedo6bSnvhPveg==", "token_balance": 33561 }, + { "peer_id": "EiCTehwHrsswcYCnXB1PtjLhspaKVqU8rcCaD7crN0JKXQ==", "token_balance": 33561 }, + { "peer_id": "EiCtn8WO1aBlmjhXe+H1enGXJ4qq7owbchsgbClCRe6zEw==", "token_balance": 33561 }, + { "peer_id": "EiCV4gsPbqhe3dbMmrM9dY8rQRl0nSyzbD8bdFv7JVbB/Q==", "token_balance": 33561 }, + { "peer_id": "EiCvgCZr6YyWRGqiqEXv5ZABVwGyptZomlFkZINV68Uo5w==", "token_balance": 33561 }, + { "peer_id": "EiCw/Q8lTBPKAVWIoTdHzLTnngu/wZLlLnGONUAavDUdQQ==", "token_balance": 33561 }, + { "peer_id": "EiCWGm9hyulhgWc9FH9ejedN5kK0FlXCIYMV4JaNRh4o4g==", "token_balance": 33561 }, + { "peer_id": "EiCXGTygpITlerQe56QbBPQvH91IjalW5EM+iRKspD1NKA==", "token_balance": 33561 }, + { "peer_id": "EiCYE8yfBKAhsLg3c+nNsgZfuPkpqesC2MzEA7cGagZO0Q==", "token_balance": 33561 }, + { "peer_id": "EiCZ+QsNHYSeupXEwwpqZTcbmiVhs83YpXnqpAiZZN0QEg==", "token_balance": 33561 }, + { "peer_id": "EiD/WU32JObv7p8TukO4W1HTpbWFHPKMinRzt9XnHC3tJQ==", "token_balance": 33561 }, + { "peer_id": "EiD0PxxnNwV8tUqsqEL8u+tbFOO2/DgTam+E1iJyymECHA==", "token_balance": 33561 }, + { "peer_id": "EiD27CTm1hX5sfjJAp5aZQ3ZGQMJoKYjS6Ew08SLbWEroQ==", "token_balance": 33561 }, + { "peer_id": "EiD29GMnzfaDVuX0yzSkhdVKcUdklLReD0OO0J4rkA+ydA==", "token_balance": 33561 }, + { "peer_id": "EiD29YiZZBZnDL8O32tMrG7lfGmBEI1RJLVCposmEXrcNA==", "token_balance": 33561 }, + { "peer_id": "EiD3bLzoV0vdqAVD0B/lTSbKlV2qitlQuWf9TrFolf6kBg==", "token_balance": 33561 }, + { "peer_id": "EiD4EmSxter0Gv6sGLKSPgKQv3/e49vt9x5xgdOqyWFwWg==", "token_balance": 33561 }, + { "peer_id": "EiD5Bn9Ln6lI1zklAyIsM2+KR/fwyKL/rwV4YY3DuqjUew==", "token_balance": 33561 }, + { "peer_id": "EiD6/Q3HhVJWC9tNL+2kTroT/vRsNFv8nIKuAYRDZXStoA==", "token_balance": 33561 }, + { "peer_id": "EiD60ZIpO3Lzd+mmDUqxWOxD2aJCuENTdDudWcyrCehOzQ==", "token_balance": 33561 }, + { "peer_id": "EiD66rvM4SOZimHZwjosNyaL966f6109BLPuhkxGOYl1Lw==", "token_balance": 33561 }, + { "peer_id": "EiD66WAy4F6v0gXPFAZq1DIbejNT/PiHtO9cZmSgzAba6A==", "token_balance": 33561 }, + { "peer_id": "EiD7HhMlv0I+DO7LgaCXldw77czuyU3MWDX0E73e4DHQMw==", "token_balance": 33561 }, + { "peer_id": "EiD7KNmH7BJGvmXs5U2ULcLfEvjckhuzZyAgfQ4iLBsIOg==", "token_balance": 33561 }, + { "peer_id": "EiD7YoR7BoOlrDkV6SDbSZ5R1XxhAYpTXf9ZZkc0JywXNQ==", "token_balance": 33561 }, + { "peer_id": "EiD9W4Gx3C2Ir0lV2G22pbOX6w/pl5U5Xj8/QqwpmypuwQ==", "token_balance": 33561 }, + { "peer_id": "EiDA6nzZspLd9A2WXA86Wos45y3/6hauVJMJpWVVRhKMpQ==", "token_balance": 33561 }, + { "peer_id": "EiDAbSr47sbAy8IWtA7l43iYx8KcsXC/4UVmf9IYFYZXXA==", "token_balance": 33561 }, + { "peer_id": "EiDAKQjpZjFKd+e2xpv+zWxXlJtzRRJVIQln8Q/sJeV2FA==", "token_balance": 33561 }, + { "peer_id": "EiDB0azciKU6nhYyVmQnhdiESyemEKSBAyC+0v1Lxk6vXQ==", "token_balance": 33561 }, + { "peer_id": "EiDBeD7AHN9N8lDgqekN5xMCgu6AG26BkizF+3KlK8//SQ==", "token_balance": 33561 }, + { "peer_id": "EiDbPN8P4wl9UftHptd6nID1mHVaUn1pjH9RaUbULQfM1g==", "token_balance": 33561 }, + { "peer_id": "EiDC3Qg9wxNXBNXUg4etRoXcDZ77zs4sha2EjDFGjkBs3A==", "token_balance": 33561 }, + { "peer_id": "EiDc4eGssM/nIMWdlRQYFN+I+FKkTiqsyLeAmMI7u+DqGQ==", "token_balance": 33561 }, + { "peer_id": "EiDdhCrFlFXziz3Ks2EP4aGA+baZH+aSq88HFF/tHLegDg==", "token_balance": 33561 }, + { "peer_id": "EiDdJ2VBhCLCdxy5W0uRM7DQu1N+u34uS2mmzhx3Gi98JA==", "token_balance": 33561 }, + { "peer_id": "EiDe9W1f6gSofLBD292UzOL/a7y5ve9nro04d7P1TT+xmw==", "token_balance": 33561 }, + { "peer_id": "EiDeqYbTSJxcKTK8OVrTV5Zfh9hfEV6yzL+DJ35qyPUyNg==", "token_balance": 33561 }, + { "peer_id": "EiDfIbOJWhFFdAtd7Ypr2JbiN/Fdv0+7O9Yp1ceLezOLrw==", "token_balance": 33561 }, + { "peer_id": "EiDGdp6wRE2Vr3H6jMTc9hw7vsW0ekIjhySU+oxLHHIDcw==", "token_balance": 33561 }, + { "peer_id": "EiDGFcew84luMddwYdBRUrau8wUFqBfVmXXkeCa/yzT+jg==", "token_balance": 33561 }, + { "peer_id": "EiDgIQPTwxPNU+sxzBYH852WdNaWzCP8N8Q4HzY3sf88VA==", "token_balance": 33561 }, + { "peer_id": "EiDglUe1WsMkRs1bf1h8Ul+wEOnLUmih9S6FeE6aDWTGhw==", "token_balance": 33561 }, + { "peer_id": "EiDGu0ifK+mUbzKnHimQhDzO5V5xiF9pWry/gvIySPE8ow==", "token_balance": 33561 }, + { "peer_id": "EiDHRYbkf4lnWOKiySMj+Sz0bN0R7gckauP9rI277lYJNw==", "token_balance": 33561 }, + { "peer_id": "EiDJux9BM5SomowX1NTnYFBFSPOokE41C6DJ9Is864CvKw==", "token_balance": 33561 }, + { "peer_id": "EiDK1hOcfGrOh5UO4Pw+yYDv2MRs7aQoj+Znatddm1zN7Q==", "token_balance": 33561 }, + { "peer_id": "EiDK4CxNrEUn2moWMgEqIK/XqQozt4EJbS34ffrs7P/ItQ==", "token_balance": 33561 }, + { "peer_id": "EiDLAOrrGUnhrsjHAZMdqEmAlMdhyQw821xbs5c3t88EiA==", "token_balance": 33561 }, + { "peer_id": "EiDmgHCG4xZhd2Ks/t07nFDXARlzV/+N3yexBUlNTmqnrg==", "token_balance": 33561 }, + { "peer_id": "EiDMixUJXTtGcRyxwE+uM5UlfsxED4NuI4LVI95oxAm4BQ==", "token_balance": 33561 }, + { "peer_id": "EiDMY7IXiZB3issJiG8wX8Lb9Iku9i3fVV6dqqVFjBvz4Q==", "token_balance": 33561 }, + { "peer_id": "EiDn2Q10/Hym961SYwzfVmbp1uEfueMmiqZk4mlNOzuvbA==", "token_balance": 33561 }, + { "peer_id": "EiDnYFVX8Rr3ctyH9Nz0rlDXfwc+U8WQKeXoK7k43rP0UQ==", "token_balance": 33561 }, + { "peer_id": "EiDOn1yXt4t1bOLMBM+Lh6Pm4JBpKP6K3lV2oPMtEbmjkg==", "token_balance": 33561 }, + { "peer_id": "EiDOPuCY+XHn+deTf7FScuJSPK7uRN7bLekSMGSY0SXgiw==", "token_balance": 33561 }, + { "peer_id": "EiDoQskpaCTOEuP1QuawoVdewVHlc24Nxv1IHmXcjvbXgA==", "token_balance": 33561 }, + { "peer_id": "EiDOsXWYGeFSQCNs8jzysDXMpwhRCnfTiRdM7vFxw2tOPQ==", "token_balance": 33561 }, + { "peer_id": "EiDpbxb5eRcbqsvgLSLcTphknlUUmZg7E9sDz1wftKxCUw==", "token_balance": 33561 }, + { "peer_id": "EiDPiFiPckjEifgudwkmcfVueFX//ZS4t8LTq69N4kKIrQ==", "token_balance": 33561 }, + { "peer_id": "EiDpk+sQ42fIHEPCXjKswQVnq7XVQ/ySn3kfCg6tDUzBfA==", "token_balance": 33561 }, + { "peer_id": "EiDQKKqh6xsXAGdT9P29p+kcVobboAnwZt77bnLiuRal/Q==", "token_balance": 33561 }, + { "peer_id": "EiDRoqSJBW18n+qGu1RBMDsL2Sglkz3YGteYixIpRORvpw==", "token_balance": 33561 }, + { "peer_id": "EiDrVC3vX1ahb38Q5GJz9u7osC4EimbyhEFd6x23AijPqQ==", "token_balance": 33561 }, + { "peer_id": "EiDsI9lx0jiJ5L9Q6CWsnhnZfP0kCQ7Pvq6g7WdcQhyhew==", "token_balance": 33561 }, + { "peer_id": "EiDsSAzu5rDFHenTqj1GdXQw+mVUMDoMvWLeQKdK546F5A==", "token_balance": 33561 }, + { "peer_id": "EiDT/3ugdOvxgb4vg6x5CGFMf4js65U55aWKCMwK14AJGw==", "token_balance": 33561 }, + { "peer_id": "EiDtAtMfou/MhHrQNuCAKY/nExJGhUqPHy6q4s+kGzkTbA==", "token_balance": 33561 }, + { "peer_id": "EiDTxgd6YQr8q720CBjgBVHn8RH7cm8V4vp/TcPZznBR8g==", "token_balance": 33561 }, + { "peer_id": "EiDUUpBTQqf4O9avsTWT04PGDAwSjluLCXYrlZrmcnIAzg==", "token_balance": 33561 }, + { "peer_id": "EiDUxwzOEhSqfmr1I+gKAjOHOiRIzT5Lxi6JZdTEbn+9ZA==", "token_balance": 33561 }, + { "peer_id": "EiDvKwaMeYr3wtknCAbVV7S6lbEVbLKxKsdydZr7vAJLRw==", "token_balance": 33561 }, + { "peer_id": "EiDvlFO6IxfAJGQFHQQ1KNlFRFgGdnVCXf3QO42fz0O7NQ==", "token_balance": 33561 }, + { "peer_id": "EiDVRXtI2+zXpUC0tNe7h284vUwSN7xUde43hxUx5KiD1g==", "token_balance": 33561 }, + { "peer_id": "EiDwQWP9U75DFY1t1wRFbp/U+cj9l/UaTLY+MuGBYsLLDA==", "token_balance": 33561 }, + { "peer_id": "EiDX/W0a43KqHcx3jmbzO8SQxNHUi6+j9dq7F3Nzxy3vCA==", "token_balance": 33561 }, + { "peer_id": "EiDyedbXzqeXwwQyirQTKae/E5WQNhzKYf56KRP7xFyysA==", "token_balance": 33561 }, + { "peer_id": "EiDYOZL8lMNDKPiV3i86wbTjQitdNHW2VW0cy3gFJ0axgQ==", "token_balance": 33561 }, + { "peer_id": "EiDYt8fxGG/ULy6eX/t2kZxp8f16taumsvEX55JSgTJtwQ==", "token_balance": 33561 }, + { "peer_id": "EiDz/eTYn8bsQW0nD6iDSUAVd/ZQuUOy3CQewg2IQQx3vw==", "token_balance": 33561 }, + { "peer_id": "EiDZ06PQVTfdNqed8CeAK1Iv2u2UmlWqEo83OYfrblE/6w==", "token_balance": 33561 }, + { "peer_id": "EiDz0lmuI58BkBDLD744KQ04rbHFoG9mynSN8zzmQOjypA==", "token_balance": 33561 }, + { "peer_id": "EiDZGlv2zmXBgSz5r0a0GgUzyhcgfnkRHn3uj656cQL2KQ==", "token_balance": 33561 }, + { "peer_id": "EiDZnFHAENyupayig0aaBuEpD9LI/l/MarlPFvOXx1uYBA==", "token_balance": 33561 }, + { "peer_id": "EiCCL8IIBky56tp+bZIx0NHono36QgPVhoVFnbXXimZpjg==", "token_balance": 33561 }, + { "peer_id": "EiB3vW1YNxiqw2q6W0Qszof/Jv3BLvJvTNKRAzvlirfm0A==", "token_balance": 33561 }, + { "peer_id": "EiDiOz2nMrxqGBqsck8CcamwMsnZY8NlhXbl0IoqX0UZ4A==", "token_balance": 33561 }, + { "peer_id": "EiCJQgxkDhPQ7lBR9HYet5wKumBPl6YKCLThRcEo1BRPvg==", "token_balance": 33561 }, + { "peer_id": "EiAvwrixOsPLPWD06V7YvZbgHYFoCj8aTX/4V6RNIWSeFQ==", "token_balance": 33561 }, + { "peer_id": "EiA0uBCHhPvWzprp9sUCHiGBsR8cR6FGeiY2EP0w0yfKDg==", "token_balance": 33561 }, + { "peer_id": "EiBGXBHH7axfRHYhZIx03raLDe2kBgznSYRTNRbv0nrZqg==", "token_balance": 33561 }, + { "peer_id": "EiBZYHxq3m+Hmx6MxQD5r2NPjkHQ6IJXvEKEz6GSbsNsrw==", "token_balance": 33561 }, + { "peer_id": "EiAm45a2yURP5fS/qhslXzx/jbHyoJ+aYlh9JB4mZPKenA==", "token_balance": 33561 }, + { "peer_id": "EiC5wYjJ8IIGWXiNSRLJ55er/hUv++PoN6i9DgXq7aq+wg==", "token_balance": 33561 }, + { "peer_id": "EiA+p5KYXfEdSFVw9BhsgGQQVX9XyxOeRpmKI8guCf0Qwg==", "token_balance": 14131 }, + { "peer_id": "EiA2LTxEOq+aY3oXqW6II4rxMs++I3m6b1F/385DazbhAQ==", "token_balance": 14131 }, + { "peer_id": "EiA3iH1DhRv2Yq1foZJvJrSkBxnJpyBHgPWOKOzrSXlywA==", "token_balance": 14131 }, + { "peer_id": "EiA79Q7Re/ov7XBzR0oxeJgrPKzWlTt0/2deGNrvLSowbg==", "token_balance": 14131 }, + { "peer_id": "EiA7C6zF40t7xoAjwePQjrCfIdh1t+1hmU510igs0hV6kw==", "token_balance": 14131 }, + { "peer_id": "EiA8pL87yzOUyMDx1FzIM4muQv6Jpaa2CQ3PdGEFSdNpGw==", "token_balance": 14131 }, + { "peer_id": "EiAACXXXzU9DAQrN4gaGVgl4b216C/AH9hZxNv7eyhjazA==", "token_balance": 14131 }, + { "peer_id": "EiAahlQqT3/UlFvc1p+ww8S8S7gbK14Dyde50sK8hTINIw==", "token_balance": 14131 }, + { "peer_id": "EiAbO5mcT16RxhiZOozqX6rM3jVswTWBTOkq3QbQKgN8Yw==", "token_balance": 14131 }, + { "peer_id": "EiAcWp157pz7+CWI3VR2kWexFjndqceZkDJmaEnRYK2Eyg==", "token_balance": 14131 }, + { "peer_id": "EiAeCRrFuX5C/kdZYTVfriE1e3cQSQTKOEdnb62SCCKxag==", "token_balance": 14131 }, + { "peer_id": "EiAfa1YbG6suWnaDdeKePJDtcCdKn7sOotk+6evEaYGSVA==", "token_balance": 14131 }, + { "peer_id": "EiAgo0D0ewh2ozj2o4w6eifg47bAqg/aASPnODHq5DbJsw==", "token_balance": 14131 }, + { "peer_id": "EiAGpy+7f6G8KJO90sZhEwtms2pb/uV6y46FrGOAfLWciA==", "token_balance": 14131 }, + { "peer_id": "EiAh3ml2qwcOLuYmLNkCzdoqhKQubSGodc5c52TX7O5FIw==", "token_balance": 14131 }, + { "peer_id": "EiAIOvtUvZbLmyuACX8m3iVXBlWcbTeDe/qvT/Z/8WEoIQ==", "token_balance": 14131 }, + { "peer_id": "EiAJ05XAk9ozclScMhUY0JjMScIoltFIRnr9S2+FlFuU7Q==", "token_balance": 14131 }, + { "peer_id": "EiAJUiqPKU7Mtzv1uNUo2Eb1dLQH1AQXHCs9Y/mdE8LZFw==", "token_balance": 14131 }, + { "peer_id": "EiAKHUO4r8n420rLFpwpPGXvhnyjPmE/I6XU0UXLNkF8QA==", "token_balance": 14131 }, + { "peer_id": "EiAkmBx/IweHwGUFtS+wTJee30lN/IjD0WKaoezBzXTtYQ==", "token_balance": 14131 }, + { "peer_id": "EiAl1/Bp+0eIMrQQHz/Cq70g4LAqCPFm9pL8fXjynTDWIg==", "token_balance": 14131 }, + { "peer_id": "EiAlVBObQQZQlXJog9KHjcwCNn6My7QPDeUrQ5jhczFKJQ==", "token_balance": 14131 }, + { "peer_id": "EiAmclu9uZtgv3s7OKK1RgSZthUzSHfgKp30/RpB2sVsYA==", "token_balance": 14131 }, + { "peer_id": "EiAmwRgulhRQZ3k7ZfMzwfQu01svPWc0mRZUfzisyp2eXg==", "token_balance": 14131 }, + { "peer_id": "EiANWjxJzSc613LL8OuVeGuoCGsUAH1ffsyAzIXf/amqeg==", "token_balance": 14131 }, + { "peer_id": "EiAogWRRTBr2swYNDUDx8pkbW3fbSFV5XLzvyHbSP74mUA==", "token_balance": 14131 }, + { "peer_id": "EiAoW+AnTP/1jG2nGDcKEky5yLjj10rxOK/cS4VcHWHTgg==", "token_balance": 14131 }, + { "peer_id": "EiAP80CvojfPuRBG+VNJxSFonvhgrY8bflDWEsHuDlP3Jw==", "token_balance": 14131 }, + { "peer_id": "EiAPjOb8we+ocPGyciL5pgUFjLQEo4seVztvbI5qDowpIg==", "token_balance": 14131 }, + { "peer_id": "EiAq/9aanJWTgxULhlycv44eYd2sT/OSFkXYnQPVE70XKw==", "token_balance": 14131 }, + { "peer_id": "EiAq9OQhcZTIEVWNUMkAgAIdOyVyK9Pb0GoF6JzMtfz8ow==", "token_balance": 14131 }, + { "peer_id": "EiAr7LQ1goFdkwaYnLWZZnL7pnatm4rr5SDff8XU/k4fTQ==", "token_balance": 14131 }, + { "peer_id": "EiARCleCxEfq9N1oHMEGiEwm6M75QixANqXQGEIAsN8iyg==", "token_balance": 14131 }, + { "peer_id": "EiArYOBCuzHjKaOvc16KZWSaK02j+0z/ezXwS4I5tnSySA==", "token_balance": 14131 }, + { "peer_id": "EiATomioP0BdxT8l6xtqM1pidOs6XIkAFn3PnvnjjTgYlg==", "token_balance": 14131 }, + { "peer_id": "EiAv/HAyL9Flsw3UOBQaRiHbYvKn7u2SbZwfZz+Gwk88fA==", "token_balance": 14131 }, + { "peer_id": "EiAveS8oMO93ID3JT1eEgjUF3as/DpOZ2U/fY4TY3sClJg==", "token_balance": 14131 }, + { "peer_id": "EiAzALXcTZtECDgNMn7EbHYeGdFqumYukgPuvlEtY9Hszg==", "token_balance": 14131 }, + { "peer_id": "EiAZl/e92G7m7+MWa/q/CCliixw/LgSjRLMf+EoXJo2A2g==", "token_balance": 14131 }, + { "peer_id": "EiB104ofyZgg4SbNV1hbCxrupU2wBlA9RAyFL182vf7dpQ==", "token_balance": 14131 }, + { "peer_id": "EiB44PpKKYsov6QTsEL8qg4xjdfWmseav2cEq1wQF2q45w==", "token_balance": 14131 }, + { "peer_id": "EiB4CLe1iirwASrbNR2u1SIM53xaqJNpaaWnEpVLqFNf5A==", "token_balance": 14131 }, + { "peer_id": "EiB4SQlsGRD1xkWRy0zHa0wvrBlIUVmOGsownttpWHM8SA==", "token_balance": 14131 }, + { "peer_id": "EiB5Mzanmx9YS9nPK4c4FmMZULvJVydieWVRLTa2KbUmjA==", "token_balance": 14131 }, + { "peer_id": "EiB7dQs9O4zOkLS7TzjUNfxquzcoIezZ91YAg8DkzK4heA==", "token_balance": 14131 }, + { "peer_id": "EiB7DY3BGfeQ7My3lzZv2ZrGj2UFn5a7UC8n3dQdAguwcg==", "token_balance": 14131 }, + { "peer_id": "EiB7h+COLmrKzVuh6revYAeuGf0NuP62L7qTaTi3Pp4W2Q==", "token_balance": 14131 }, + { "peer_id": "EiB93kvIpMxjMQQNEWCmxoKeM5XD+2Lqqk8OJHYsZNCGMw==", "token_balance": 14131 }, + { "peer_id": "EiBABy5T0sFNCLuoLjNULkXJlLstpo13RsPvuAIKCiYQPA==", "token_balance": 14131 }, + { "peer_id": "EiBaFhAiwplT1pJf3uxKMcspM07osb3dOdwGSJk11rTR1Q==", "token_balance": 14131 }, + { "peer_id": "EiBaKSSnCi/ZfokmQApSYtAi4xvxbT5B263CkJqAt7YSAQ==", "token_balance": 14131 }, + { "peer_id": "EiBaKt+mklDecWvS1m17WSFlb1JjJDA6uXBcMRIr2NTz/w==", "token_balance": 14131 }, + { "peer_id": "EiBaUNq3wdeadsrrfv7UeOMCCnb+ilJ8b9rez8BxnXFDlQ==", "token_balance": 14131 }, + { "peer_id": "EiBavPp4YwTyr9/3pCeWOBVySprBSsr3n2U5u/0nmQ4bxg==", "token_balance": 14131 }, + { "peer_id": "EiBEi4r/vpOjt9Wp5obXZdHQoLTFoJNZutbtxdpEp6USPQ==", "token_balance": 14131 }, + { "peer_id": "EiBFJEoAMl9Y2wD1es0PIxJeo9zVVblq2yCrSAXEMs6ecw==", "token_balance": 14131 }, + { "peer_id": "EiBFNztvBF1fiJdeRZvqvwPykbvDJejXrak1Ex5+U+lu3g==", "token_balance": 14131 }, + { "peer_id": "EiBg0bACYD3Hqn6pi6K8lX7Q0z1d9BY7+qQuCgVao84yzg==", "token_balance": 14131 }, + { "peer_id": "EiBIQXASyYPnVMmSrQcchsWtsf/woOYQXfF6pYex/3IxkA==", "token_balance": 14131 }, + { "peer_id": "EiBiX3Di7eSOIV15fJn9MQa/2rk4t33vV5oP52Bz8qjzyA==", "token_balance": 14131 }, + { "peer_id": "EiBJQSjehIVAEQPADz1EPnq+/YtndF/gzfHhnAo6/Ytn3w==", "token_balance": 14131 }, + { "peer_id": "EiBkd2IgkaS1fd/3sHQc2XeM93gueoQPymSJnpy62PdhDQ==", "token_balance": 14131 }, + { "peer_id": "EiBlbDATmftg5CHxy9uQ95hOmBa+dQMEfrCT6ax6V55H3g==", "token_balance": 14131 }, + { "peer_id": "EiBNwIGR5eqXMXoFkg/Co0tiHXg/4G8i721v6yqFPynNLg==", "token_balance": 14131 }, + { "peer_id": "EiBo1ySjJJFfzv+E0OJTfG3Bc/vr03X7YU7bb6kIFsRTRA==", "token_balance": 14131 }, + { "peer_id": "EiBP+L50XDFFnSpAqLEpmOd78BNyXkBm3+1hMWRYWv5Tzg==", "token_balance": 14131 }, + { "peer_id": "EiBPSC7f1vRRRHH5oy1xCTKjSglLpRDDJxbFhBswHXKtNw==", "token_balance": 14131 }, + { "peer_id": "EiBR8FV0T0jpAuxBZCI6B+91USsfOb0l/3HVB0i+gnN3Wg==", "token_balance": 14131 }, + { "peer_id": "EiBRRGlEPmJifZ0MMoy1Utw8Q7o2UA8BKDRIYA6bx097GQ==", "token_balance": 14131 }, + { "peer_id": "EiBSMFZZ4TJffoXAwdUBGSnR+Um/G9ZCrrAiS8du2+ChKQ==", "token_balance": 14131 }, + { "peer_id": "EiBSRkj61j5iARkjIsvoRDtqbuiFXJAZei4TdQvLF7oEHA==", "token_balance": 14131 }, + { "peer_id": "EiBULRje4iGOFbRskmsV3hDFb0JxHIiAwqacthUkd+qJgA==", "token_balance": 14131 }, + { "peer_id": "EiBVLbCAfL1PI281LA2x+3zRtljiwMZomOA+uMuGRRvHnw==", "token_balance": 14131 }, + { "peer_id": "EiBwCm57KOKTW3KojF5hcYr5uny0gLOufZCQ9z29OXevgA==", "token_balance": 14131 }, + { "peer_id": "EiBWkUrOeYF9fBjgYjM1pL/d0F2ecDEUKJ3MwtK7nz2ihw==", "token_balance": 14131 }, + { "peer_id": "EiBX2BFQK4mDRNTJkQZi4t0AsAj0+t6YvVgSA62Cr1lLgA==", "token_balance": 14131 }, + { "peer_id": "EiBXXiIu7TEJZ8iubowxgHX2SXps2hvcoz7UOvU86yEZtw==", "token_balance": 14131 }, + { "peer_id": "EiBz/9XZy8GzqQAMSsh33WH7gSl9BjCe2CCosxuqL3kEyA==", "token_balance": 14131 }, + { "peer_id": "EiBZDzjcmWq411H6xhuYjPm2UinwNQPnjtqAcvacV6KTxg==", "token_balance": 14131 }, + { "peer_id": "EiBZrZpZEbVlmhzUnPUY/TmmgTXDFukYbxUe1fDFBobN2A==", "token_balance": 14131 }, + { "peer_id": "EiC/WW9wiPF6oV4P61KF+x63PP7bxUYKKn7KdJtduxpSOQ==", "token_balance": 14131 }, + { "peer_id": "EiC5PG8ELRDfFs57o/Lzo3wbl/0sUv+jfFysqgT+f4zF9g==", "token_balance": 14131 }, + { "peer_id": "EiC61EfGsE1TxPfXQIu8sIo8agpOIag5OWzuxy78So8p5g==", "token_balance": 14131 }, + { "peer_id": "EiC6Eqpi4B1w02f/sMTzCK6yza1FfrLcfajM1roAXwBW4g==", "token_balance": 14131 }, + { "peer_id": "EiC8PpB0i1w/qlTfAjSkF+71BpioCFJKx83PeRbj2dMGkA==", "token_balance": 14131 }, + { "peer_id": "EiCAKalEIOVZll5qTaJkoOORPvlvWy4dliRkvU4DGkTBPw==", "token_balance": 14131 }, + { "peer_id": "EiCAm8vtd0n4znyA2/wvOm0z6b1Np+Da0CILLlpKNDTVLQ==", "token_balance": 14131 }, + { "peer_id": "EiCbENzmjAe1NBaulfMVwu8WeIw4EsTZ1y9Wb9jM8aACvw==", "token_balance": 14131 }, + { "peer_id": "EiCBoUdPb0/A7SwR3PGVoS1B1NE+A3JschDYrrznjP/2/w==", "token_balance": 14131 }, + { "peer_id": "EiCEIc+lI/0h1nwaHM+jwyIwdNv9DuTwzsi9vXSYkvFGzQ==", "token_balance": 14131 }, + { "peer_id": "EiCEJTohv96gw6sui4NezA/5zS3DShpHcS3yrT0ifXBITg==", "token_balance": 14131 }, + { "peer_id": "EiCFAQ8TKGZxMN4SNekcYJRHGBIOXKbkVFRr2ScCd7msOg==", "token_balance": 14131 }, + { "peer_id": "EiCfOozM8WwgvzAOuhH4QTSqtzVjYYFolT9J3zBC4Vuo+A==", "token_balance": 14131 }, + { "peer_id": "EiCFTKONzz79nk8pq4Cx8w4Rj9lRLUQSUuqGJPKyR5eLQQ==", "token_balance": 14131 }, + { "peer_id": "EiCgA2vgV5TkNbB1BCDvfvCHwoN77QnBdLYVKbB+iucROQ==", "token_balance": 14131 }, + { "peer_id": "EiChBy5gzOvUBJGjlGwu8D0AOdyRTnZxkWf6xj3wtdfoLg==", "token_balance": 14131 }, + { "peer_id": "EiCHt1daeYFg3cqqGco15WbsDvK3/EMX7iSW0l3cG934Ig==", "token_balance": 14131 }, + { "peer_id": "EiChTVF5cYU31FtNQN4pi5TbcYZbl9Mm4vvgRX3WXkBRpg==", "token_balance": 14131 }, + { "peer_id": "EiChuk1RuNAGMf/jsHBaevQqu1Lc6CQgEnIUVc6+a9MfZw==", "token_balance": 14131 }, + { "peer_id": "EiCJIIdUQWygzHxU4N3eipgAYfA6oWME2+zeprOjHDuG+g==", "token_balance": 14131 }, + { "peer_id": "EiCKA61pUxmFcMaKmn/ykOD8UUPeun96ATttwdb96eP7iQ==", "token_balance": 14131 }, + { "peer_id": "EiCKFxs1XPwcYROzEWcBx3cm+6sAUXD4ytcHrvKfO1ecEA==", "token_balance": 14131 }, + { "peer_id": "EiCMU3UKFEm09ry84SM+cJWd6tQIVKp4a2FsDYv4KueM4w==", "token_balance": 14131 }, + { "peer_id": "EiCMUwONUM+HZjbSJogaFwryZUqEhVZfOOtsxfc15595dw==", "token_balance": 14131 }, + { "peer_id": "EiCP8Tvwyr0X13ErQ08wwJfQDQlIHJf36xcp36daZax84A==", "token_balance": 14131 }, + { "peer_id": "EiCPEA5+vd9bWr35axyuo+aGl8sPNzuWmuYqiDbLsKmq8A==", "token_balance": 14131 }, + { "peer_id": "EiCpXjiJFcyuQDSo9bHvZaN0lysbBqSyQdPL0ncRWjgqMg==", "token_balance": 14131 }, + { "peer_id": "EiCqm2uoj682Zm9wLC34QS6PZzT8MK+1VouN2uS7ZynLyA==", "token_balance": 14131 }, + { "peer_id": "EiCSiwM6vnVlcBve5sx5A31GJVpCW3LkTtgyLtcVZHSckg==", "token_balance": 14131 }, + { "peer_id": "EiCttygf7ST4sHqshAHusQazOBFVIAOPxQGv/dUwKmYDyQ==", "token_balance": 14131 }, + { "peer_id": "EiCU3/SNW2GUnF9j2m/AqEKvSlbj89RCIUe61hb+aaI4SA==", "token_balance": 14131 }, + { "peer_id": "EiCWe4C+eEuLmkui1FCgMHzw8dVCXG3zNRLm3COkdKxE0Q==", "token_balance": 14131 }, + { "peer_id": "EiCyDcq6dy4ZHHMo+S/zsxZbVF1J8qv/hkHjQBXtChj/pg==", "token_balance": 14131 }, + { "peer_id": "EiD/sK801+gJVhriWwPInV8K7yB0OVCqWQaShpZy+JMy4g==", "token_balance": 14131 }, + { "peer_id": "EiD2WIIwy5DcHuhy8jAG7xrvB00aB4dNJEYuv/6Fs4X7Fg==", "token_balance": 14131 }, + { "peer_id": "EiD38tGZ8118CmVi324OI6HjiOdiZfoKpQ0DM6QroSLGmA==", "token_balance": 14131 }, + { "peer_id": "EiD5C+It9Fi5ae81Bud8egyByPyYHJNMDBGLMKR8h8pCrA==", "token_balance": 14131 }, + { "peer_id": "EiD8qqXLTPxTS0tChKYkxtrYIiOs4YTxA0EcM3KfFYlCBg==", "token_balance": 14131 }, + { "peer_id": "EiDB7gTrbys+YCIGXqohCrJgSIKKhLkuIQ4Lt3XgcwUiYQ==", "token_balance": 14131 }, + { "peer_id": "EiDbn7CabET039d7TULd5bRWSW5a4FnuPyO7l3a7Qu6ULQ==", "token_balance": 14131 }, + { "peer_id": "EiDc3vxkQQxMDZh5/shT9asIXxRKADOmsIT/yUKm+lErsA==", "token_balance": 14131 }, + { "peer_id": "EiDDNED6DphDR/MfWD/LcmT1cF+myrhu8DqRDjpVG9kN1Q==", "token_balance": 14131 }, + { "peer_id": "EiDFZ3WZDDPjYO4+inRDTuyAQN0bNCCkDX7YcxVaGL5isQ==", "token_balance": 14131 }, + { "peer_id": "EiDghtmPnBgBl9aEfAEoX7zqGjRqyikcf8imBRSIx79SpQ==", "token_balance": 14131 }, + { "peer_id": "EiDI3IWtdE6+riV4Ag3pe32Ek0g7QETFsQD6IuCx7ZzkNQ==", "token_balance": 14131 }, + { "peer_id": "EiDLU5mqRefq3Lnleo4LXgk5+JG2Tq1LQ1daJErAkSmQkQ==", "token_balance": 14131 }, + { "peer_id": "EiDmlBVIHR+jqAZab1JomprMk4Zd60Kk9pfmvhxPRGwIzQ==", "token_balance": 14131 }, + { "peer_id": "EiDMLuuSCxqmmAZwl0fhAEB29T2NrYuxNX3eoVkoB+LgFw==", "token_balance": 14131 }, + { "peer_id": "EiDIXso0zaNVYv9o1oa6Xx4DbheVEht+mMcPJIvJWKSxuQ==", "token_balance": 14131 }, + { "peer_id": "EiDOt1mXOV0cA1q1yWHO7At79CFOSxBAFidPQhwMD4licg==", "token_balance": 14131 }, + { "peer_id": "EiDP21JvwRuBXadS9TWZ+AHvvsAOvucLV2GUf5CehuninA==", "token_balance": 14131 }, + { "peer_id": "EiDQYHxH3uptGi8btm3JlSllnuJv4psDL0mrgVmE4Tg0nA==", "token_balance": 14131 }, + { "peer_id": "EiDSJUeAmaspN5qvLSnOR/ku1cUtqA75i1nqgBND+X8VkA==", "token_balance": 14131 }, + { "peer_id": "EiDsx6A1ZJobkwHzI1L4x8MV/qSTnLo7B4dVwCOcmFntQw==", "token_balance": 14131 }, + { "peer_id": "EiDTUujybIA/oPuG8hSjPUioegcd1pOillvksY5zeOTUqw==", "token_balance": 14131 }, + { "peer_id": "EiDucIlcuEY1ohpR421WkGUOMimWFqAfMG3tp7lUtx534w==", "token_balance": 14131 }, + { "peer_id": "EiDueSVvXpj2B5IKYkjmjnGZtEeXuyqvCxgsNyClTeSZqA==", "token_balance": 14131 }, + { "peer_id": "EiDuXb8WGtb41p0fTfCECraBXI71y8TLCqgjyQP8/Cfw8A==", "token_balance": 14131 }, + { "peer_id": "EiDV19Jt3o+SI2JnbAub+zMddORpSZ+dfGO/j+P1lGDhPg==", "token_balance": 14131 }, + { "peer_id": "EiDWzbv+2wYUL9q5uDhQZq+j/OPxjzLZxCrwZCYQtPMAZQ==", "token_balance": 14131 }, + { "peer_id": "EiDx6BMj1AEzyv71+ZcJz8lpYZJo7TpFjj4aJ2jf8pXq3g==", "token_balance": 14131 }, + { "peer_id": "EiDXMvSSOv+nF6qZuBEU+cVCczPoclZgsWEzaDek8dIGjA==", "token_balance": 14131 }, + { "peer_id": "EiDyqp7F9IKBL5BGiAYdExLw+8ItsAYL2AVcS3XMDtPp+Q==", "token_balance": 14131 }, + { "peer_id": "EiDZGMJApfat9XujI1X19ulYZV9KdCjUP94kTp4ypv1JCw==", "token_balance": 14131 } + ] +} \ No newline at end of file diff --git a/node/store/clock.go b/node/store/clock.go index b688a58..073095b 100644 --- a/node/store/clock.go +++ b/node/store/clock.go @@ -97,29 +97,29 @@ type ClockStore interface { } type PebbleClockStore struct { - db *pebble.DB + db KVDB logger *zap.Logger } var _ ClockStore = (*PebbleClockStore)(nil) type PebbleMasterClockIterator struct { - i *pebble.Iterator + i Iterator } type PebbleClockIterator struct { - i *pebble.Iterator + i Iterator db *PebbleClockStore } type PebbleCandidateClockIterator struct { - i *pebble.Iterator + i Iterator db *PebbleClockStore } -var _ Iterator[*protobufs.ClockFrame] = (*PebbleMasterClockIterator)(nil) -var _ Iterator[*protobufs.ClockFrame] = (*PebbleClockIterator)(nil) -var _ Iterator[*protobufs.ClockFrame] = (*PebbleCandidateClockIterator)(nil) +var _ TypedIterator[*protobufs.ClockFrame] = (*PebbleMasterClockIterator)(nil) +var _ TypedIterator[*protobufs.ClockFrame] = (*PebbleClockIterator)(nil) +var _ TypedIterator[*protobufs.ClockFrame] = (*PebbleCandidateClockIterator)(nil) func (p *PebbleMasterClockIterator) First() bool { return p.i.First() @@ -173,7 +173,7 @@ func (p *PebbleMasterClockIterator) Value() (*protobufs.ClockFrame, error) { return nil, errors.Wrap(err, "get master clock frame iterator value") } - frame.ParentSelector = parent.Bytes() + frame.ParentSelector = parent.FillBytes(make([]byte, 32)) return frame, nil } @@ -306,7 +306,7 @@ func (p *PebbleCandidateClockIterator) Close() error { return errors.Wrap(p.i.Close(), "closing candidate clock frame iterator") } -func NewPebbleClockStore(db *pebble.DB, logger *zap.Logger) *PebbleClockStore { +func NewPebbleClockStore(db KVDB, logger *zap.Logger) *PebbleClockStore { return &PebbleClockStore{ db, logger, @@ -446,9 +446,7 @@ func clockProverTrieKey(filter []byte, frameNumber uint64) []byte { } func (p *PebbleClockStore) NewTransaction() (Transaction, error) { - return &PebbleTransaction{ - b: p.db.NewBatch(), - }, nil + return p.db.NewBatch(), nil } // GetEarliestMasterClockFrame implements ClockStore. @@ -530,7 +528,7 @@ func (p *PebbleClockStore) GetMasterClockFrame( return nil, errors.Wrap(err, "get master clock frame") } - frame.ParentSelector = parent.Bytes() + frame.ParentSelector = parent.FillBytes(make([]byte, 32)) return frame, nil } @@ -547,10 +545,10 @@ func (p *PebbleClockStore) RangeMasterClockFrames( startFrameNumber = temp } - iter, err := p.db.NewIter(&pebble.IterOptions{ - LowerBound: clockMasterFrameKey(filter, startFrameNumber), - UpperBound: clockMasterFrameKey(filter, endFrameNumber), - }) + iter, err := p.db.NewIter( + clockMasterFrameKey(filter, startFrameNumber), + clockMasterFrameKey(filter, endFrameNumber), + ) if err != nil { return nil, errors.Wrap(err, "range master clock frames") } @@ -863,7 +861,7 @@ func (p *PebbleClockStore) PutCandidateDataClockFrame( frame *protobufs.ClockFrame, txn Transaction, ) error { - if err := p.saveAggregateProofs(nil, frame); err != nil { + if err := p.saveAggregateProofs(txn, frame); err != nil { return errors.Wrap( errors.Wrap(err, ErrInvalidData.Error()), "put candidate data clock frame", @@ -920,7 +918,7 @@ func (p *PebbleClockStore) PutDataClockFrame( backfill bool, ) error { if frame.FrameNumber != 0 { - if err := p.saveAggregateProofs(nil, frame); err != nil { + if err := p.saveAggregateProofs(txn, frame); err != nil { return errors.Wrap( errors.Wrap(err, ErrInvalidData.Error()), "put candidate data clock frame", @@ -1004,8 +1002,8 @@ func (p *PebbleClockStore) GetCandidateDataClockFrames( filter []byte, frameNumber uint64, ) ([]*protobufs.ClockFrame, error) { - iter, err := p.db.NewIter(&pebble.IterOptions{ - LowerBound: clockDataCandidateFrameKey( + iter, err := p.db.NewIter( + clockDataCandidateFrameKey( filter, frameNumber, []byte{ @@ -1021,7 +1019,7 @@ func (p *PebbleClockStore) GetCandidateDataClockFrames( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, ), - UpperBound: clockDataCandidateFrameKey( + clockDataCandidateFrameKey( filter, frameNumber, []byte{ @@ -1037,7 +1035,7 @@ func (p *PebbleClockStore) GetCandidateDataClockFrames( 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }, ), - }) + ) if err != nil { return nil, errors.Wrap(err, "get candidate data clock frames") } @@ -1084,8 +1082,8 @@ func (p *PebbleClockStore) RangeCandidateDataClockFrames( 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, } } - iter, err := p.db.NewIter(&pebble.IterOptions{ - LowerBound: clockDataCandidateFrameKey( + iter, err := p.db.NewIter( + clockDataCandidateFrameKey( filter, frameNumber, fromParent, @@ -1096,7 +1094,7 @@ func (p *PebbleClockStore) RangeCandidateDataClockFrames( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }, ), - UpperBound: clockDataCandidateFrameKey( + clockDataCandidateFrameKey( filter, frameNumber, toParent, @@ -1107,7 +1105,7 @@ func (p *PebbleClockStore) RangeCandidateDataClockFrames( 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }, ), - }) + ) if err != nil { return nil, errors.Wrap(err, "range candidate data clock frames") } @@ -1127,10 +1125,10 @@ func (p *PebbleClockStore) RangeDataClockFrames( startFrameNumber = temp } - iter, err := p.db.NewIter(&pebble.IterOptions{ - LowerBound: clockDataFrameKey(filter, startFrameNumber), - UpperBound: clockDataFrameKey(filter, endFrameNumber), - }) + iter, err := p.db.NewIter( + clockDataFrameKey(filter, startFrameNumber), + clockDataFrameKey(filter, endFrameNumber), + ) if err != nil { return nil, errors.Wrap(err, "get data clock frames") } @@ -1161,10 +1159,7 @@ func (p *PebbleClockStore) Deduplicate(filter []byte) error { }, ) - iter, err := p.db.NewIter(&pebble.IterOptions{ - LowerBound: from, - UpperBound: to, - }) + iter, err := p.db.NewIter(from, to) if err != nil { return errors.Wrap(err, "deduplicate") } @@ -1187,7 +1182,7 @@ func (p *PebbleClockStore) Deduplicate(filter []byte) error { return err } - err = p.db.Set(iter.Key(), newValue, &pebble.WriteOptions{Sync: true}) + err = p.db.Set(iter.Key(), newValue) if err != nil { return err } @@ -1205,10 +1200,7 @@ func (p *PebbleClockStore) Deduplicate(filter []byte) error { from = clockDataFrameKey(filter, 1) to = clockDataFrameKey(filter, 20000) - iter, err = p.db.NewIter(&pebble.IterOptions{ - LowerBound: from, - UpperBound: to, - }) + iter, err = p.db.NewIter(from, to) if err != nil { return errors.Wrap(err, "deduplicate") } @@ -1231,7 +1223,7 @@ func (p *PebbleClockStore) Deduplicate(filter []byte) error { return err } - err = p.db.Set(iter.Key(), newValue, &pebble.WriteOptions{Sync: true}) + err = p.db.Set(iter.Key(), newValue) if err != nil { return err } @@ -1279,10 +1271,7 @@ func (p *PebbleClockStore) Deduplicate(filter []byte) error { }, ) - iter, err = p.db.NewIter(&pebble.IterOptions{ - LowerBound: from, - UpperBound: to, - }) + iter, err = p.db.NewIter(from, to) if err != nil { return errors.Wrap(err, "deduplicate") } @@ -1305,7 +1294,7 @@ func (p *PebbleClockStore) Deduplicate(filter []byte) error { return err } - err = p.db.Set(iter.Key(), newValue, &pebble.WriteOptions{Sync: true}) + err = p.db.Set(iter.Key(), newValue) if err != nil { return err } @@ -1334,10 +1323,7 @@ func (p *PebbleClockStore) GetCompressedDataClockFrames( from := clockDataFrameKey(filter, fromFrameNumber) to := clockDataFrameKey(filter, toFrameNumber+1) - iter, err := p.db.NewIter(&pebble.IterOptions{ - LowerBound: from, - UpperBound: to, - }) + iter, err := p.db.NewIter(from, to) if err != nil { return nil, errors.Wrap(err, "get compressed data clock frames") } @@ -1418,10 +1404,7 @@ func (p *PebbleClockStore) GetCompressedDataClockFrames( }, ) - iter, err := p.db.NewIter(&pebble.IterOptions{ - LowerBound: from, - UpperBound: to, - }) + iter, err := p.db.NewIter(from, to) if err != nil { return nil, errors.Wrap(err, "get compressed data clock frames") } @@ -1458,7 +1441,7 @@ func (p *PebbleClockStore) GetCompressedDataClockFrames( if err != nil { return nil, errors.Wrap(err, "get compressed data clock frames") } - parentSelector, _, _, err := frame.GetParentSelectorAndDistance() + parentSelector, _, _, err := frame.GetParentSelectorAndDistance(nil) if err != nil { return nil, errors.Wrap(err, "get compressed data clock frames") } @@ -1480,8 +1463,28 @@ func (p *PebbleClockStore) GetCompressedDataClockFrames( break } score := new(big.Int) - for _, p := range paths[i] { - _, distance, _, err := p.GetParentSelectorAndDistance() + for _, path := range paths[i] { + master, err := p.GetMasterClockFrame( + []byte{ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }, + path.FrameNumber, + ) + if err != nil { + return nil, errors.Wrap(err, "get compressed data clock frames") + } + + discriminator, err := master.GetSelector() + if err != nil { + return nil, errors.Wrap(err, "get compressed data clock frames") + } + + _, distance, _, err := path.GetParentSelectorAndDistance( + discriminator, + ) if err != nil { return nil, errors.Wrap(err, "get compressed data clock frames") } @@ -1535,10 +1538,13 @@ func (p *PebbleClockStore) GetCompressedDataClockFrames( return nil, errors.Wrap(err, "get compressed data clock frames") } - iter, err := p.db.NewIter(&pebble.IterOptions{ - LowerBound: dataProofInclusionKey(filter, []byte(k), 0), - UpperBound: dataProofInclusionKey(filter, []byte(k), limit+1), - }) + iter, err := p.db.NewIter( + dataProofInclusionKey(filter, []byte(k), 0), + dataProofInclusionKey(filter, []byte(k), limit+1), + ) + if err != nil { + return nil, errors.Wrap(err, "get compressed data clock frames") + } for iter.First(); iter.Valid(); iter.Next() { incCommit := iter.Value() @@ -1632,9 +1638,6 @@ func (p *PebbleClockStore) SetLatestDataClockFrameNumber( err := p.db.Set( clockDataLatestIndex(filter), binary.BigEndian.AppendUint64(nil, frameNumber), - &pebble.WriteOptions{ - Sync: true, - }, ) return errors.Wrap(err, "set latest data clock frame number") @@ -1678,9 +1681,6 @@ func (p *PebbleClockStore) DeleteCandidateDataClockFrameRange( 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }, ), - &pebble.WriteOptions{ - Sync: true, - }, ) return errors.Wrap(err, "delete candidate data clock frame range") } @@ -1727,10 +1727,13 @@ func (p *PebbleClockStore) GetHighestCandidateDataClockFrame( }, ) - iter, err := p.db.NewIter(&pebble.IterOptions{ - LowerBound: from, - UpperBound: to, - }) + iter, err := p.db.NewIter(from, to) + if err != nil { + return nil, errors.Wrap( + errors.Wrap(err, ErrInvalidData.Error()), + "get highest candidate data clock frame", + ) + } found := iter.SeekLT(to) if found { diff --git a/node/store/data_proof.go b/node/store/data_proof.go index 6221447..0952fc2 100644 --- a/node/store/data_proof.go +++ b/node/store/data_proof.go @@ -30,12 +30,12 @@ type DataProofStore interface { } type PebbleDataProofStore struct { - db *pebble.DB + db KVDB logger *zap.Logger } func NewPebbleDataProofStore( - db *pebble.DB, + db KVDB, logger *zap.Logger, ) *PebbleDataProofStore { return &PebbleDataProofStore{ @@ -81,13 +81,11 @@ func dataProofSegmentKey( } func (p *PebbleDataProofStore) NewTransaction() (Transaction, error) { - return &PebbleTransaction{ - b: p.db.NewBatch(), - }, nil + return p.db.NewBatch(), nil } func internalGetAggregateProof( - db *pebble.DB, + db KVDB, filter []byte, commitment []byte, frameNumber uint64, @@ -114,10 +112,10 @@ func internalGetAggregateProof( Proof: copied, } - iter, err := db.NewIter(&pebble.IterOptions{ - LowerBound: dataProofInclusionKey(filter, commitment, 0), - UpperBound: dataProofInclusionKey(filter, commitment, limit+1), - }) + iter, err := db.NewIter( + dataProofInclusionKey(filter, commitment, 0), + dataProofInclusionKey(filter, commitment, limit+1), + ) if err != nil { return nil, errors.Wrap(err, "get aggregate proof") } @@ -206,7 +204,7 @@ func (p *PebbleDataProofStore) GetAggregateProof( } func internalPutAggregateProof( - db *pebble.DB, + db KVDB, txn Transaction, aggregateProof *protobufs.InclusionAggregateProof, commitment []byte, diff --git a/node/store/inmem.go b/node/store/inmem.go new file mode 100644 index 0000000..7ec5661 --- /dev/null +++ b/node/store/inmem.go @@ -0,0 +1,349 @@ +package store + +import ( + "errors" + "io" + "math/rand" + "sort" + "sync" + + "github.com/cockroachdb/pebble" +) + +type InMemKVDB struct { + open bool + sortedKeys []string + store map[string][]byte + storeMx sync.Mutex +} + +type Operation int + +const ( + SetOperation Operation = iota + DeleteOperation +) + +type InMemKVDBOperation struct { + op Operation + key []byte + value []byte +} + +type InMemKVDBTransaction struct { + id int + changes []InMemKVDBOperation + db *InMemKVDB +} + +type InMemKVDBIterator struct { + db *InMemKVDB + start []byte + end []byte + pos int + open bool +} + +func (i *InMemKVDBIterator) Key() []byte { + if !i.open { + return nil + } + i.db.storeMx.Lock() + if _, ok := i.db.store[i.db.sortedKeys[i.pos]]; !ok { + return nil + } + i.db.storeMx.Unlock() + + return []byte(i.db.sortedKeys[i.pos]) +} + +func (i *InMemKVDBIterator) First() bool { + if !i.open { + return false + } + i.db.storeMx.Lock() + found := false + idx := sort.SearchStrings(i.db.sortedKeys, string(i.start)) + final := sort.SearchStrings(i.db.sortedKeys, string(i.end)) + if idx < final { + i.pos = idx + found = true + } + i.db.storeMx.Unlock() + + return found +} + +func (i *InMemKVDBIterator) Next() bool { + if !i.open { + return false + } + i.db.storeMx.Lock() + found := false + if _, ok := i.db.store[i.db.sortedKeys[i.pos]]; ok { + final := sort.SearchStrings(i.db.sortedKeys, string(i.end)) + if i.pos < final { + i.pos = i.pos + 1 + found = true + } + } + i.db.storeMx.Unlock() + + return found +} + +func (i *InMemKVDBIterator) Prev() bool { + if !i.open { + return false + } + i.db.storeMx.Lock() + found := false + if _, ok := i.db.store[i.db.sortedKeys[i.pos]]; ok { + start := sort.SearchStrings(i.db.sortedKeys, string(i.start)) + if i.pos-1 > start { + i.pos = i.pos - 1 + found = true + } + } + i.db.storeMx.Unlock() + + return found +} + +func (i *InMemKVDBIterator) Valid() bool { + if !i.open { + return false + } + i.db.storeMx.Lock() + start := sort.SearchStrings(i.db.sortedKeys, string(i.start)) + final := sort.SearchStrings(i.db.sortedKeys, string(i.end)) + i.db.storeMx.Unlock() + + return i.pos < final && i.pos >= start +} + +func (i *InMemKVDBIterator) Value() []byte { + if !i.open { + return nil + } + + i.db.storeMx.Lock() + value := i.db.store[i.db.sortedKeys[i.pos]] + i.db.storeMx.Unlock() + + return value +} + +func (i *InMemKVDBIterator) Close() error { + if !i.open { + return errors.New("already closed iterator") + } + + i.open = false + return nil +} + +func (i *InMemKVDBIterator) SeekLT(lt []byte) bool { + if !i.open { + return false + } + i.db.storeMx.Lock() + found := false + if _, ok := i.db.store[i.db.sortedKeys[i.pos]]; ok { + idx := sort.SearchStrings(i.db.sortedKeys, string(lt)) + start := sort.SearchStrings(i.db.sortedKeys, string(i.start)) + if idx >= start { + i.pos = idx + 1 + found = true + } + } + i.db.storeMx.Unlock() + + return found +} + +func (t *InMemKVDBTransaction) Set(key []byte, value []byte) error { + if !t.db.open { + return errors.New("inmem db closed") + } + + t.changes = append(t.changes, InMemKVDBOperation{ + op: SetOperation, + key: key, + value: value, + }) + + return nil +} + +func (t *InMemKVDBTransaction) Commit() error { + if !t.db.open { + return errors.New("inmem db closed") + } + + var err error +loop: + for _, op := range t.changes { + switch op.op { + case SetOperation: + err = t.db.Set(op.key, op.value) + if err != nil { + break loop + } + case DeleteOperation: + err = t.db.Delete(op.key) + if err != nil { + break loop + } + } + } + + return err +} + +func (t *InMemKVDBTransaction) Delete(key []byte) error { + if !t.db.open { + return errors.New("inmem db closed") + } + + t.changes = append(t.changes, InMemKVDBOperation{ + op: DeleteOperation, + key: key, + }) + + return nil +} + +func (t *InMemKVDBTransaction) Abort() error { + return nil +} + +func NewInMemKVDB() *InMemKVDB { + return &InMemKVDB{ + open: true, + store: map[string][]byte{}, + sortedKeys: []string{}, + } +} + +func (d *InMemKVDB) Get(key []byte) ([]byte, io.Closer, error) { + if !d.open { + return nil, nil, errors.New("inmem db closed") + } + + d.storeMx.Lock() + b, ok := d.store[string(key)] + d.storeMx.Unlock() + if !ok { + return nil, nil, pebble.ErrNotFound + } + return b, io.NopCloser(nil), nil +} + +func (d *InMemKVDB) Set(key, value []byte) error { + if !d.open { + return errors.New("inmem db closed") + } + + d.storeMx.Lock() + _, ok := d.store[string(key)] + if !ok { + i := sort.SearchStrings(d.sortedKeys, string(key)) + if len(d.sortedKeys) > i { + d.sortedKeys = append(d.sortedKeys[:i+1], d.sortedKeys[i:]...) + d.sortedKeys[i] = string(key) + } else { + d.sortedKeys = append(d.sortedKeys, string(key)) + } + } + d.store[string(key)] = value + + d.storeMx.Unlock() + return nil +} + +func (d *InMemKVDB) Delete(key []byte) error { + if !d.open { + return errors.New("inmem db closed") + } + + d.storeMx.Lock() + _, ok := d.store[string(key)] + if ok { + i := sort.SearchStrings(d.sortedKeys, string(key)) + if len(d.sortedKeys)-1 > i { + d.sortedKeys = append(d.sortedKeys[:i], d.sortedKeys[i+1:]...) + } else { + d.sortedKeys = d.sortedKeys[:i] + } + } + delete(d.store, string(key)) + d.storeMx.Unlock() + return nil +} + +func (d *InMemKVDB) NewBatch() Transaction { + if !d.open { + return nil + } + + id := rand.Int() + return &InMemKVDBTransaction{ + id: id, + db: d, + changes: []InMemKVDBOperation{}, + } +} + +func (d *InMemKVDB) NewIter(lowerBound []byte, upperBound []byte) (Iterator, error) { + if !d.open { + return nil, errors.New("inmem db closed") + } + + return &InMemKVDBIterator{ + open: true, + db: d, + start: lowerBound, + end: upperBound, + pos: -1, + }, nil +} + +func (d *InMemKVDB) Compact(start, end []byte, parallelize bool) error { + if !d.open { + return errors.New("inmem db closed") + } + + return nil +} + +func (d *InMemKVDB) Close() error { + if !d.open { + return errors.New("inmem db closed") + } + + d.open = false + return nil +} + +func (d *InMemKVDB) DeleteRange(start, end []byte) error { + if !d.open { + return errors.New("inmem db closed") + } + + iter, err := d.NewIter(start, end) + if err != nil { + return err + } + + for iter.First(); iter.Valid(); iter.Next() { + err = d.Delete(iter.Key()) + if err != nil { + return err + } + } + + return nil +} + +var _ KVDB = (*InMemKVDB)(nil) diff --git a/node/store/inmem_test.go b/node/store/inmem_test.go new file mode 100644 index 0000000..4c5424a --- /dev/null +++ b/node/store/inmem_test.go @@ -0,0 +1,90 @@ +package store_test + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "source.quilibrium.com/quilibrium/monorepo/node/store" +) + +func TestIter(t *testing.T) { + db := store.NewInMemKVDB() + db.Set([]byte{0x01}, []byte{0x01}) + db.Set([]byte{0x02}, []byte{0x02}) + db.Set([]byte{0x03}, []byte{0x03}) + db.Set([]byte{0x04}, []byte{0x04}) + db.Set([]byte{0x06}, []byte{0x06}) + db.Set([]byte{0x07}, []byte{0x07}) + db.Set([]byte{0x08}, []byte{0x08}) + db.Set([]byte{0x010}, []byte{0x010}) + db.Set([]byte{0x012}, []byte{0x012}) + db.Set([]byte{0x014}, []byte{0x014}) + iter, err := db.NewIter([]byte{0x01}, []byte{0x04}) + assert.NoError(t, err) + assert.True(t, iter.First()) + assert.True(t, iter.Valid()) + assert.ElementsMatch(t, iter.Value(), []byte{0x01}) + assert.ElementsMatch(t, iter.Key(), []byte{0x01}) + assert.True(t, iter.Next()) + assert.True(t, iter.Valid()) + assert.ElementsMatch(t, iter.Value(), []byte{0x02}) + assert.ElementsMatch(t, iter.Key(), []byte{0x02}) + assert.True(t, iter.Next()) + assert.True(t, iter.Valid()) + assert.ElementsMatch(t, iter.Value(), []byte{0x03}) + assert.ElementsMatch(t, iter.Key(), []byte{0x03}) + assert.True(t, iter.Next()) + assert.False(t, iter.Valid()) + assert.NoError(t, iter.Close()) + + iter, err = db.NewIter([]byte{0x06}, []byte{0x09}) + assert.NoError(t, err) + assert.True(t, iter.First()) + assert.True(t, iter.Valid()) + assert.ElementsMatch(t, iter.Value(), []byte{0x06}) + assert.ElementsMatch(t, iter.Key(), []byte{0x06}) + assert.True(t, iter.Next()) + assert.True(t, iter.Valid()) + assert.ElementsMatch(t, iter.Value(), []byte{0x07}) + assert.ElementsMatch(t, iter.Key(), []byte{0x07}) + assert.True(t, iter.Next()) + assert.True(t, iter.Valid()) + assert.ElementsMatch(t, iter.Value(), []byte{0x08}) + assert.ElementsMatch(t, iter.Key(), []byte{0x08}) + assert.True(t, iter.Next()) + assert.False(t, iter.Valid()) + + iter, err = db.NewIter([]byte{0x05}, []byte{0x09}) + assert.NoError(t, err) + assert.True(t, iter.First()) + assert.True(t, iter.Valid()) + assert.ElementsMatch(t, iter.Value(), []byte{0x06}) + assert.ElementsMatch(t, iter.Key(), []byte{0x06}) + assert.True(t, iter.Next()) + assert.True(t, iter.Valid()) + assert.ElementsMatch(t, iter.Value(), []byte{0x07}) + assert.ElementsMatch(t, iter.Key(), []byte{0x07}) + assert.True(t, iter.Next()) + assert.True(t, iter.Valid()) + assert.ElementsMatch(t, iter.Value(), []byte{0x08}) + assert.ElementsMatch(t, iter.Key(), []byte{0x08}) + assert.True(t, iter.Next()) + assert.False(t, iter.Valid()) + + iter, err = db.NewIter([]byte{0x010}, []byte{0x015}) + assert.NoError(t, err) + assert.True(t, iter.First()) + assert.True(t, iter.Valid()) + assert.ElementsMatch(t, iter.Value(), []byte{0x10}) + assert.ElementsMatch(t, iter.Key(), []byte{0x10}) + assert.True(t, iter.Next()) + assert.True(t, iter.Valid()) + assert.ElementsMatch(t, iter.Value(), []byte{0x12}) + assert.ElementsMatch(t, iter.Key(), []byte{0x12}) + assert.True(t, iter.Next()) + assert.True(t, iter.Valid()) + assert.ElementsMatch(t, iter.Value(), []byte{0x14}) + assert.ElementsMatch(t, iter.Key(), []byte{0x14}) + assert.True(t, iter.Next()) + assert.False(t, iter.Valid()) +} diff --git a/node/store/iterator.go b/node/store/iterator.go index d239ee1..b19247d 100644 --- a/node/store/iterator.go +++ b/node/store/iterator.go @@ -2,7 +2,18 @@ package store import "google.golang.org/protobuf/proto" -type Iterator[T proto.Message] interface { +type Iterator interface { + Key() []byte + First() bool + Next() bool + Prev() bool + Valid() bool + Value() []byte + Close() error + SeekLT([]byte) bool +} + +type TypedIterator[T proto.Message] interface { First() bool Next() bool Valid() bool diff --git a/node/store/key.go b/node/store/key.go index 0adc8fc..0031e3c 100644 --- a/node/store/key.go +++ b/node/store/key.go @@ -37,28 +37,28 @@ type KeyStore interface { } type PebbleKeyStore struct { - db *pebble.DB + db KVDB logger *zap.Logger } type PebbleProvingKeyIterator struct { - i *pebble.Iterator + i Iterator } type PebbleStagedProvingKeyIterator struct { - i *pebble.Iterator + i Iterator } type PebbleKeyBundleIterator struct { - i *pebble.Iterator + i Iterator } var pki = (*PebbleProvingKeyIterator)(nil) var spki = (*PebbleStagedProvingKeyIterator)(nil) var kbi = (*PebbleKeyBundleIterator)(nil) -var _ Iterator[*protobufs.InclusionCommitment] = pki -var _ Iterator[*protobufs.ProvingKeyAnnouncement] = spki -var _ Iterator[*protobufs.InclusionCommitment] = kbi +var _ TypedIterator[*protobufs.InclusionCommitment] = pki +var _ TypedIterator[*protobufs.ProvingKeyAnnouncement] = spki +var _ TypedIterator[*protobufs.InclusionCommitment] = kbi var _ KeyStore = (*PebbleKeyStore)(nil) func (p *PebbleProvingKeyIterator) First() bool { @@ -169,7 +169,7 @@ func (p *PebbleKeyBundleIterator) Close() error { return errors.Wrap(p.i.Close(), "closing iterator") } -func NewPebbleKeyStore(db *pebble.DB, logger *zap.Logger) *PebbleKeyStore { +func NewPebbleKeyStore(db KVDB, logger *zap.Logger) *PebbleKeyStore { return &PebbleKeyStore{ db, logger, @@ -217,9 +217,7 @@ func keyBundleEarliestKey(provingKey []byte) []byte { } func (p *PebbleKeyStore) NewTransaction() (Transaction, error) { - return &PebbleTransaction{ - b: p.db.NewBatch(), - }, nil + return p.db.NewBatch(), nil } // Stages a proving key for later inclusion on proof of meaningful work. @@ -235,9 +233,6 @@ func (p *PebbleKeyStore) StageProvingKey( err = p.db.Set( stagedProvingKeyKey(provingKey.PublicKey()), data, - &pebble.WriteOptions{ - Sync: true, - }, ) if err != nil { return errors.Wrap(err, "stage proving key") @@ -462,8 +457,8 @@ func (p *PebbleKeyStore) PutKeyBundle( } func (p *PebbleKeyStore) RangeProvingKeys() (*PebbleProvingKeyIterator, error) { - iter, err := p.db.NewIter(&pebble.IterOptions{ - LowerBound: provingKeyKey([]byte{ + iter, err := p.db.NewIter( + provingKeyKey([]byte{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -473,7 +468,7 @@ func (p *PebbleKeyStore) RangeProvingKeys() (*PebbleProvingKeyIterator, error) { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }), - UpperBound: provingKeyKey([]byte{ + provingKeyKey([]byte{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, @@ -483,7 +478,7 @@ func (p *PebbleKeyStore) RangeProvingKeys() (*PebbleProvingKeyIterator, error) { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }), - }) + ) if err != nil { return nil, errors.Wrap(err, "range proving keys") } @@ -495,8 +490,8 @@ func (p *PebbleKeyStore) RangeStagedProvingKeys() ( *PebbleStagedProvingKeyIterator, error, ) { - iter, err := p.db.NewIter(&pebble.IterOptions{ - LowerBound: stagedProvingKeyKey([]byte{ + iter, err := p.db.NewIter( + stagedProvingKeyKey([]byte{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -506,7 +501,7 @@ func (p *PebbleKeyStore) RangeStagedProvingKeys() ( 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }), - UpperBound: stagedProvingKeyKey([]byte{ + stagedProvingKeyKey([]byte{ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, @@ -516,7 +511,7 @@ func (p *PebbleKeyStore) RangeStagedProvingKeys() ( 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }), - }) + ) if err != nil { return nil, errors.Wrap(err, "range staged proving keys") } @@ -528,10 +523,10 @@ func (p *PebbleKeyStore) RangeKeyBundleKeys(provingKey []byte) ( *PebbleKeyBundleIterator, error, ) { - iter, err := p.db.NewIter(&pebble.IterOptions{ - LowerBound: keyBundleKey(provingKey, 0), - UpperBound: keyBundleKey(provingKey, 0xffffffffffffffff), - }) + iter, err := p.db.NewIter( + keyBundleKey(provingKey, 0), + keyBundleKey(provingKey, 0xffffffffffffffff), + ) if err != nil { return nil, errors.Wrap(err, "range key bundle keys") } diff --git a/node/store/kvdb.go b/node/store/kvdb.go new file mode 100644 index 0000000..01ebadf --- /dev/null +++ b/node/store/kvdb.go @@ -0,0 +1,16 @@ +package store + +import ( + "io" +) + +type KVDB interface { + Get(key []byte) ([]byte, io.Closer, error) + Set(key, value []byte) error + Delete(key []byte) error + NewBatch() Transaction + NewIter(lowerBound []byte, upperBound []byte) (Iterator, error) + Compact(start, end []byte, parallelize bool) error + Close() error + DeleteRange(start, end []byte) error +} diff --git a/node/store/pebble.go b/node/store/pebble.go index fc475b9..91b39fd 100644 --- a/node/store/pebble.go +++ b/node/store/pebble.go @@ -1,19 +1,67 @@ package store import ( + "io" + "github.com/cockroachdb/pebble" "source.quilibrium.com/quilibrium/monorepo/node/config" ) -func NewPebbleDB(config *config.DBConfig) *pebble.DB { +type PebbleDB struct { + db *pebble.DB +} + +func NewPebbleDB(config *config.DBConfig) *PebbleDB { db, err := pebble.Open(config.Path, &pebble.Options{}) if err != nil { panic(err) } - return db + return &PebbleDB{db} } +func (p *PebbleDB) Get(key []byte) ([]byte, io.Closer, error) { + return p.db.Get(key) +} + +func (p *PebbleDB) Set(key, value []byte) error { + return p.db.Set(key, value, &pebble.WriteOptions{Sync: true}) +} + +func (p *PebbleDB) Delete(key []byte) error { + return p.db.Delete(key, &pebble.WriteOptions{Sync: true}) +} + +func (p *PebbleDB) NewBatch() Transaction { + return &PebbleTransaction{ + b: p.db.NewBatch(), + } +} + +func (p *PebbleDB) NewIter(lowerBound []byte, upperBound []byte) ( + Iterator, + error, +) { + return p.db.NewIter(&pebble.IterOptions{ + LowerBound: lowerBound, + UpperBound: upperBound, + }) +} + +func (p *PebbleDB) Compact(start, end []byte, parallelize bool) error { + return p.db.Compact(start, end, parallelize) +} + +func (p *PebbleDB) Close() error { + return p.db.Close() +} + +func (p *PebbleDB) DeleteRange(start, end []byte) error { + return p.db.DeleteRange(start, end, &pebble.WriteOptions{Sync: true}) +} + +var _ KVDB = (*PebbleDB)(nil) + type Transaction interface { Set(key []byte, value []byte) error Commit() error diff --git a/pebble/.editorconfig b/pebble/.editorconfig new file mode 100644 index 0000000..0e4642a --- /dev/null +++ b/pebble/.editorconfig @@ -0,0 +1,10 @@ +# See http://editorconfig.org + +[*] +end_of_line = lf +insert_final_newline = true +charset = utf-8 + +# For non-go files, we indent with two spaces. In go files we indent +# with tabs but still set indent_size to control the github web viewer. +indent_size=2 diff --git a/pebble/.github/workflows/ci.yaml b/pebble/.github/workflows/ci.yaml new file mode 100644 index 0000000..a8fbb26 --- /dev/null +++ b/pebble/.github/workflows/ci.yaml @@ -0,0 +1,160 @@ +name: Test + +on: + push: + branches: + - master + - crl-release-* + pull_request: + branches: + - master + - crl-release-* + +jobs: + + linux: + name: go-linux + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: "1.21" + + - run: make test generate + + linux-32bit: + name: go-linux-32bit + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: "1.21" + + - run: GOARCH=386 make test + + linux-crossversion: + name: go-linux-crossversion + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: "1.21" + + - run: make crossversion-meta + + linux-race: + name: go-linux-race + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: "1.21" + + - run: make testrace TAGS= + + linux-no-invariants: + name: go-linux-no-invariants + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: "1.21" + + - run: make test TAGS= + + linux-no-cgo: + name: go-linux-no-cgo + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: "1.21" + + - run: CGO_ENABLED=0 make test TAGS= + + darwin: + name: go-macos + runs-on: macos-12 + steps: + - uses: actions/checkout@v2 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: "1.21" + + - run: make test + + windows: + name: go-windows + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: "1.21" + + - run: go test -v ./... + + bsds: + name: go-bsds + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: "1.21" + + - name: FreeBSD build + env: + GOOS: freebsd + run: go build -v ./... + + - name: NetBSD build + env: + GOOS: netbsd + run: go build -v ./... + + - name: OpenBSD build + env: + GOOS: openbsd + run: go build -v ./... + + go-lint-checks: + name: go-lint-checks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Go + uses: actions/setup-go@v4 + with: + go-version: "1.21" + + - name: mod-tidy-check + run: make mod-tidy-check + + - name: format-check + run: make format-check diff --git a/pebble/.github/workflows/code-cover-gen.yaml b/pebble/.github/workflows/code-cover-gen.yaml new file mode 100644 index 0000000..e6ec42a --- /dev/null +++ b/pebble/.github/workflows/code-cover-gen.yaml @@ -0,0 +1,71 @@ +name: PR code coverage (generate) + +on: + # This workflow does not have access to secrets because it runs on top of + # potentially unsafe changes. + pull_request: + types: [ opened, reopened, synchronize ] + branches: [ master ] + +jobs: + # The results of this job are uploaded as artifacts. A separate job will + # download the artifacts and upload them to a GCS bucket. + code-cover-gen: + runs-on: ubuntu-latest + env: + PR: ${{ github.event.pull_request.number }} + HEAD_SHA: ${{ github.event.pull_request.head.sha }} + GH_TOKEN: ${{ github.token }} + steps: + - uses: actions/checkout@v3 + with: + # By default, checkout merges the PR into the current master. + # Instead, we want to check out the PR as-is. + ref: ${{ github.event.pull_request.head.sha }} + # Fetch all branches and history (we'll need the origin/master ref and + # the base commit). + fetch-depth: 0 + + - name: Set up Go + uses: actions/setup-go@v2 + with: + go-version: "1.21" + + - name: Get list of changed packages + shell: bash + run: | + set -euxo pipefail + # To get the base commit, we get the number of commits in the PR. + # Note that github.event.pull_request.base.sha is not what we want, + # that is the tip of master and not necessarily the PR fork point. + NUM_COMMITS=$(gh pr view $PR --json commits --jq '.commits | length') + BASE_SHA=$(git rev-parse HEAD~${NUM_COMMITS}) + CHANGED_PKGS=$(scripts/changed-go-pkgs.sh ${BASE_SHA} ${HEAD_SHA}) + echo "BASE_SHA=${BASE_SHA}" >> "${GITHUB_ENV}" + echo "CHANGED_PKGS=${CHANGED_PKGS}" >> "${GITHUB_ENV}" + + - name: Generate "after" coverage + shell: bash + run: | + set -euxo pipefail + CHANGED_PKGS='${{ env.CHANGED_PKGS }}' + mkdir -p artifacts + # Make a copy of the script so that the "before" run below uses the + # same version. + cp scripts/pr-codecov-run-tests.sh ${RUNNER_TEMP}/ + ${RUNNER_TEMP}/pr-codecov-run-tests.sh artifacts/cover-${PR}-${HEAD_SHA}.json "${CHANGED_PKGS}" + + - name: Generate "before" coverage + shell: bash + run: | + set -euxo pipefail + BASE_SHA='${{ env.BASE_SHA }}' + CHANGED_PKGS='${{ env.CHANGED_PKGS }}' + git checkout -f ${BASE_SHA} + ${RUNNER_TEMP}/pr-codecov-run-tests.sh artifacts/cover-${PR}-${BASE_SHA}.json "${CHANGED_PKGS}" + + - name: Upload artifacts + uses: actions/upload-artifact@v2 + with: + name: cover + path: artifacts/cover-*.json diff --git a/pebble/.github/workflows/code-cover-publish.yaml b/pebble/.github/workflows/code-cover-publish.yaml new file mode 100644 index 0000000..ba5f63c --- /dev/null +++ b/pebble/.github/workflows/code-cover-publish.yaml @@ -0,0 +1,55 @@ +name: PR code coverage (publish) + +on: + workflow_run: + workflows: [ "PR code coverage (generate)" ] + types: [ "completed" ] + + +jobs: + # This job downloads the artifacts genearted by the code-cover-gen job and + # uploads them to a GCS bucket, from where Reviewable can access them. + code-cover-publish: + runs-on: ubuntu-latest + if: > + github.event.workflow_run.event == 'pull_request' && + github.event.workflow_run.conclusion == 'success' + steps: + - name: 'Download artifact' + uses: actions/github-script@v3.1.0 + with: + script: | + var artifacts = await github.actions.listWorkflowRunArtifacts({ + owner: context.repo.owner, + repo: context.repo.repo, + run_id: ${{github.event.workflow_run.id }}, + }); + var matchArtifact = artifacts.data.artifacts.filter((artifact) => { + return artifact.name == "cover" + })[0]; + var download = await github.actions.downloadArtifact({ + owner: context.repo.owner, + repo: context.repo.repo, + artifact_id: matchArtifact.id, + archive_format: 'zip', + }); + var fs = require('fs'); + fs.writeFileSync('${{github.workspace}}/cover.zip', Buffer.from(download.data)); + + - run: | + mkdir -p cover + unzip cover.zip -d cover + + - name: 'Authenticate to Google Cloud' + uses: 'google-github-actions/auth@v1' + with: + credentials_json: '${{ secrets.CODECOVER_SERVICE_ACCOUNT_KEY }}' + + - name: 'Upload to GCS' + uses: 'google-github-actions/upload-cloud-storage@v1' + with: + path: 'cover' + glob: '**/cover-*.json' + parent: false + destination: 'crl-codecover-public/pr-pebble/' + process_gcloudignore: false diff --git a/pebble/.github/workflows/nightly-code-cover.yaml b/pebble/.github/workflows/nightly-code-cover.yaml new file mode 100644 index 0000000..5c444c3 --- /dev/null +++ b/pebble/.github/workflows/nightly-code-cover.yaml @@ -0,0 +1,48 @@ +name: Nightly code coverage + +on: + schedule: + - cron: '00 08 * * * ' + workflow_dispatch: + +jobs: + coverage-gen-and-publish: + runs-on: ubuntu-latest + env: + GH_TOKEN: ${{ github.token }} + + steps: + - uses: actions/checkout@v3 + with: + # By default, checkout merges the PR into the current master. + # Instead, we want to check out the PR as-is. + ref: ${{ github.event.pull_request.head.sha }} + # Fetch all branches and history (we'll need the origin/master ref and + # the base commit). + fetch-depth: 0 + + - name: Set up Go + uses: actions/setup-go@v2 + with: + go-version: "1.21" + + - name: Generate coverage + run: scripts/code-coverage.sh + + - name: Install lcov + run: | + sudo apt-get update + sudo apt-get install lcov + + - name: 'Authenticate to Google Cloud' + uses: 'google-github-actions/auth@v1' + with: + credentials_json: '${{ secrets.CODECOVER_SERVICE_ACCOUNT_KEY }}' + + - name: 'Set up Cloud SDK' + uses: 'google-github-actions/setup-gcloud@v1' + with: + version: '>= 363.0.0' + + - name: Publish coverage + run: scripts/code-coverage-publish.sh diff --git a/pebble/.github/workflows/sanitizers.yaml b/pebble/.github/workflows/sanitizers.yaml new file mode 100644 index 0000000..a9da116 --- /dev/null +++ b/pebble/.github/workflows/sanitizers.yaml @@ -0,0 +1,32 @@ +name: Sanitizers + +on: + schedule: + - cron: "0 0 * * *" # Midnight UTC, daily. + +jobs: + linux-asan: + name: go-linux-asan + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Go + uses: actions/setup-go@v2 + with: + go-version: "1.21" + + - run: make testasan + + linux-msan: + name: go-linux-msan + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Set up Go + uses: actions/setup-go@v2 + with: + go-version: "1.21" + + - run: make testmsan diff --git a/pebble/.github/workflows/stale.yml b/pebble/.github/workflows/stale.yml new file mode 100644 index 0000000..92d39cf --- /dev/null +++ b/pebble/.github/workflows/stale.yml @@ -0,0 +1,34 @@ +name: Mark stale issues and pull requests + +on: + schedule: + - cron: "0 11 * * 1-4" + workflow_dispatch: + +jobs: + stale: + runs-on: ubuntu-latest + permissions: + issues: write + pull-requests: write + steps: + - uses: actions/stale@v3 + with: + operations-per-run: 1000 + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-message: | + We have marked this issue as stale because it has been inactive for + 18 months. If this issue is still relevant, removing the stale label + or adding a comment will keep it active. Otherwise, we'll close it + in 10 days to keep the issue queue tidy. Thank you for your + contribution to Pebble! + stale-pr-message: 'Stale pull request message' + stale-issue-label: 'no-issue-activity' + stale-pr-label: 'no-pr-activity' + close-issue-label: 'X-stale' + close-pr-label: 'X-stale' + # Disable this for PR's, by setting a very high bar + days-before-pr-stale: 99999 + days-before-issue-stale: 540 + days-before-close: 10 + exempt-issue-labels: 'X-nostale' diff --git a/pebble/.gitignore b/pebble/.gitignore new file mode 100644 index 0000000..87ef192 --- /dev/null +++ b/pebble/.gitignore @@ -0,0 +1,9 @@ +# Github action artifacts. +artifacts +# Profiling artifacts. +cpu.*.prof +heap.prof +mutex.prof +coverprofile.out +# Testing artifacts +meta.*.test diff --git a/pebble/LICENSE b/pebble/LICENSE new file mode 100644 index 0000000..fec05ce --- /dev/null +++ b/pebble/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2011 The LevelDB-Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/pebble/Makefile b/pebble/Makefile new file mode 100644 index 0000000..e430ff2 --- /dev/null +++ b/pebble/Makefile @@ -0,0 +1,131 @@ +GO := go +PKG := ./... +GOFLAGS := +STRESSFLAGS := +TAGS := invariants +TESTS := . +COVER_PROFILE := coverprofile.out + +.PHONY: all +all: + @echo usage: + @echo " make test" + @echo " make testrace" + @echo " make stress" + @echo " make stressrace" + @echo " make stressmeta" + @echo " make crossversion-meta" + @echo " make testcoverage" + @echo " make mod-update" + @echo " make generate" + @echo " make generate-test-data" + @echo " make clean" + +override testflags := +.PHONY: test +test: + ${GO} test -tags '$(TAGS)' ${testflags} -run ${TESTS} ${PKG} + +.PHONY: testcoverage +testcoverage: + ${GO} test -tags '$(TAGS)' ${testflags} -run ${TESTS} ${PKG} -coverprofile ${COVER_PROFILE} + +.PHONY: testrace +testrace: testflags += -race -timeout 20m +testrace: test + +testasan: testflags += -asan -timeout 20m +testasan: test + +testmsan: export CC=clang +testmsan: testflags += -msan -timeout 20m +testmsan: test + +.PHONY: testobjiotracing +testobjiotracing: + ${GO} test -tags '$(TAGS) pebble_obj_io_tracing' ${testflags} -run ${TESTS} ./objstorage/objstorageprovider/objiotracing + +.PHONY: lint +lint: + ${GO} test -tags '$(TAGS)' ${testflags} -run ${TESTS} ./internal/lint + +.PHONY: stress stressrace +stressrace: testflags += -race +stress stressrace: testflags += -exec 'stress ${STRESSFLAGS}' -timeout 0 -test.v +stress stressrace: test + +.PHONY: stressmeta +stressmeta: override PKG = ./internal/metamorphic +stressmeta: override STRESSFLAGS += -p 1 +stressmeta: override TESTS = TestMeta$$ +stressmeta: stress + +.PHONY: crossversion-meta +crossversion-meta: + $(eval LATEST_RELEASE := $(shell git fetch origin && git branch -r --list '*/crl-release-*' | grep -o 'crl-release-.*$$' | sort | tail -1)) + git checkout ${LATEST_RELEASE}; \ + ${GO} test -c ./internal/metamorphic -o './internal/metamorphic/crossversion/${LATEST_RELEASE}.test'; \ + git checkout -; \ + ${GO} test -c ./internal/metamorphic -o './internal/metamorphic/crossversion/head.test'; \ + ${GO} test -tags '$(TAGS)' ${testflags} -v -run 'TestMetaCrossVersion' ./internal/metamorphic/crossversion --version '${LATEST_RELEASE},${LATEST_RELEASE},${LATEST_RELEASE}.test' --version 'HEAD,HEAD,./head.test' + +.PHONY: stress-crossversion +stress-crossversion: + STRESS=1 ./scripts/run-crossversion-meta.sh crl-release-21.2 crl-release-22.1 crl-release-22.2 crl-release-23.1 master + +.PHONY: generate +generate: + ${GO} generate ${PKG} + +generate: + +# Note that the output of generate-test-data is not deterministic. This should +# only be run manually as needed. +.PHONY: generate-test-data +generate-test-data: + ${GO} run -tags make_incorrect_manifests ./tool/make_incorrect_manifests.go + ${GO} run -tags make_test_find_db ./tool/make_test_find_db.go + ${GO} run -tags make_test_sstables ./tool/make_test_sstables.go + ${GO} run -tags make_test_remotecat ./tool/make_test_remotecat.go + +mod-update: + ${GO} get -u + ${GO} mod tidy + +.PHONY: clean +clean: + rm -f $(patsubst %,%.test,$(notdir $(shell go list ${PKG}))) + +git_dirty := $(shell git status -s) + +.PHONY: git-clean-check +git-clean-check: +ifneq ($(git_dirty),) + @echo "Git repository is dirty!" + @false +else + @echo "Git repository is clean." +endif + +.PHONY: mod-tidy-check +mod-tidy-check: +ifneq ($(git_dirty),) + $(error mod-tidy-check must be invoked on a clean repository) +endif + @${GO} mod tidy + $(MAKE) git-clean-check + +# TODO(radu): switch back to @latest once bogus doc changes are +# addressed; see https://github.com/cockroachdb/crlfmt/pull/44 +.PHONY: format +format: + go install github.com/cockroachdb/crlfmt@44a36ec7 && crlfmt -w -tab 2 . + +.PHONY: format-check +format-check: +ifneq ($(git_dirty),) + $(error format-check must be invoked on a clean repository) +endif + $(MAKE) format + git diff + $(MAKE) git-clean-check diff --git a/pebble/README.md b/pebble/README.md new file mode 100644 index 0000000..c09e45d --- /dev/null +++ b/pebble/README.md @@ -0,0 +1,226 @@ +# Pebble [![Build Status](https://github.com/cockroachdb/pebble/actions/workflows/ci.yaml/badge.svg?branch=master)](https://github.com/cockroachdb/pebble/actions/workflows/ci.yaml) [![GoDoc](https://godoc.org/github.com/cockroachdb/pebble?status.svg)](https://godoc.org/github.com/cockroachdb/pebble) [Coverage](https://storage.googleapis.com/crl-codecover-public/pebble/index.html) + +#### [Nightly benchmarks](https://cockroachdb.github.io/pebble/) + +Pebble is a LevelDB/RocksDB inspired key-value store focused on +performance and internal usage by CockroachDB. Pebble inherits the +RocksDB file formats and a few extensions such as range deletion +tombstones, table-level bloom filters, and updates to the MANIFEST +format. + +Pebble intentionally does not aspire to include every feature in RocksDB and +specifically targets the use case and feature set needed by CockroachDB: + +* Block-based tables +* Checkpoints +* Indexed batches +* Iterator options (lower/upper bound, table filter) +* Level-based compaction +* Manual compaction +* Merge operator +* Prefix bloom filters +* Prefix iteration +* Range deletion tombstones +* Reverse iteration +* SSTable ingestion +* Single delete +* Snapshots +* Table-level bloom filters + +RocksDB has a large number of features that are not implemented in +Pebble: + +* Backups +* Column families +* Delete files in range +* FIFO compaction style +* Forward iterator / tailing iterator +* Hash table format +* Memtable bloom filter +* Persistent cache +* Pin iterator key / value +* Plain table format +* SSTable ingest-behind +* Sub-compactions +* Transactions +* Universal compaction style + +***WARNING***: Pebble may silently corrupt data or behave incorrectly if +used with a RocksDB database that uses a feature Pebble doesn't +support. Caveat emptor! + +## Production Ready + +Pebble was introduced as an alternative storage engine to RocksDB in +CockroachDB v20.1 (released May 2020) and was used in production +successfully at that time. Pebble was made the default storage engine +in CockroachDB v20.2 (released Nov 2020). Pebble is being used in +production by users of CockroachDB at scale and is considered stable +and production ready. + +## Advantages + +Pebble offers several improvements over RocksDB: + +* Faster reverse iteration via backwards links in the memtable's + skiplist. +* Faster commit pipeline that achieves better concurrency. +* Seamless merged iteration of indexed batches. The mutations in the + batch conceptually occupy another memtable level. +* L0 sublevels and flush splitting for concurrent compactions out of L0 and + reduced read-amplification during heavy write load. +* Faster LSM edits in LSMs with large numbers of sstables through use of a + copy-on-write B-tree to hold file metadata. +* Delete-only compactions that drop whole sstables that fall within the bounds + of a range deletion. +* Block-property collectors and filters that enable iterators to skip tables, + index blocks and data blocks that are irrelevant, according to user-defined + properties over key-value pairs. +* Range keys API, allowing KV pairs defined over a range of keyspace with + user-defined semantics and interleaved during iteration. +* Smaller, more approachable code base. + +See the [Pebble vs RocksDB: Implementation +Differences](docs/rocksdb.md) doc for more details on implementation +differences. + +## RocksDB Compatibility + +Pebble strives for forward compatibility with RocksDB 6.2.1 (the latest +version of RocksDB used by CockroachDB). Forward compatibility means +that a DB generated by RocksDB can be used by Pebble. Currently, Pebble +provides bidirectional compatibility with RocksDB (a Pebble generated DB +can be used by RocksDB) when using its FormatMostCompatible format. New +functionality that is backwards incompatible is gated behind new format +major versions. In general, Pebble only provides compatibility with the +subset of functionality and configuration used by CockroachDB. The scope +of RocksDB functionality and configuration is too large to adequately +test and document all the incompatibilities. The list below contains +known incompatibilities. + +* Pebble's use of WAL recycling is only compatible with RocksDB's + `kTolerateCorruptedTailRecords` WAL recovery mode. Older versions of + RocksDB would automatically map incompatible WAL recovery modes to + `kTolerateCorruptedTailRecords`. New versions of RocksDB will + disable WAL recycling. +* Column families. Pebble does not support column families, nor does + it attempt to detect their usage when opening a DB that may contain + them. +* Hash table format. Pebble does not support the hash table sstable + format. +* Plain table format. Pebble does not support the plain table sstable + format. +* SSTable format version 3 and 4. Pebble does not support version 3 + and version 4 format sstables. The sstable format version is + controlled by the `BlockBasedTableOptions::format_version` option. + See [#97](https://github.com/cockroachdb/pebble/issues/97). + +## Format major versions + +Over time Pebble has introduced new physical file formats. Backwards +incompatible changes are made through the introduction of 'format major +versions'. By default, when Pebble opens a database, it defaults to +`FormatMostCompatible`. This version is bi-directionally compatible with RocksDB +6.2.1 (with the caveats described above). + +To opt into new formats, a user may set `FormatMajorVersion` on the +[`Options`](https://pkg.go.dev/github.com/cockroachdb/pebble#Options) +supplied to +[`Open`](https://pkg.go.dev/github.com/cockroachdb/pebble#Open), or +upgrade the format major version at runtime using +[`DB.RatchetFormatMajorVersion`](https://pkg.go.dev/github.com/cockroachdb/pebble#DB.RatchetFormatMajorVersion). +Format major version upgrades are permanent; There is no option to +return to an earlier format. + +The table below outlines the history of format major versions: + +| Name | Value | Migration | +|------------------------------------|-------|------------| +| FormatMostCompatible | 1 | No | +| FormatVersioned | 3 | No | +| FormatSetWithDelete | 4 | No | +| FormatBlockPropertyCollector | 5 | No | +| FormatSplitUserKeysMarked | 6 | Background | +| FormatSplitUserKeysMarkedCompacted | 7 | Blocking | +| FormatRangeKeys | 8 | No | +| FormatMinTableFormatPebblev1 | 9 | No | +| FormatPrePebblev1Marked | 10 | Background | +| FormatSSTableValueBlocks | 12 | No | +| FormatFlushableIngest | 13 | No | +| FormatPrePebblev1MarkedCompacted | 14 | Blocking | +| FormatDeleteSizedAndObsolete | 15 | No | +| FormatVirtualSSTables | 16 | No | + +Upgrading to a format major version with 'Background' in the migration +column may trigger background activity to rewrite physical file +formats, typically through compactions. Upgrading to a format major +version with 'Blocking' in the migration column will block until a +migration is complete. The database may continue to serve reads and +writes if upgrading a live database through +`RatchetFormatMajorVersion`, but the method call will not return until +the migration is complete. + +For reference, the table below lists the range of supported Pebble format major +versions for CockroachDB releases. + +| CockroachDB release | Earliest supported | Latest supported | +|---------------------|------------------------------------|---------------------------| +| 20.1 through 21.1 | FormatMostCompatible | FormatMostCompatible | +| 21.2 | FormatMostCompatible | FormatSetWithDelete | +| 21.2 | FormatMostCompatible | FormatSetWithDelete | +| 22.1 | FormatMostCompatible | FormatSplitUserKeysMarked | +| 22.2 | FormatMostCompatible | FormatPrePebblev1Marked | +| 23.1 | FormatSplitUserKeysMarkedCompacted | FormatFlushableIngest | +| 23.2 | FormatSplitUserKeysMarkedCompacted | FormatVirtualSSTables | +| 24.1 plan | FormatSSTableValueBlocks | | + +## Pedigree + +Pebble is based on the incomplete Go version of LevelDB: + +https://github.com/golang/leveldb + +The Go version of LevelDB is based on the C++ original: + +https://github.com/google/leveldb + +Optimizations and inspiration were drawn from RocksDB: + +https://github.com/facebook/rocksdb + +## Getting Started + +### Example Code + +```go +package main + +import ( + "fmt" + "log" + + "github.com/cockroachdb/pebble" +) + +func main() { + db, err := pebble.Open("demo", &pebble.Options{}) + if err != nil { + log.Fatal(err) + } + key := []byte("hello") + if err := db.Set(key, []byte("world"), pebble.Sync); err != nil { + log.Fatal(err) + } + value, closer, err := db.Get(key) + if err != nil { + log.Fatal(err) + } + fmt.Printf("%s %s\n", key, value) + if err := closer.Close(); err != nil { + log.Fatal(err) + } + if err := db.Close(); err != nil { + log.Fatal(err) + } +} +``` diff --git a/pebble/batch.go b/pebble/batch.go new file mode 100644 index 0000000..c3dbfcc --- /dev/null +++ b/pebble/batch.go @@ -0,0 +1,2312 @@ +// Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "context" + "encoding/binary" + "fmt" + "io" + "math" + "sort" + "sync" + "sync/atomic" + "time" + "unsafe" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/batchskl" + "github.com/cockroachdb/pebble/internal/humanize" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/private" + "github.com/cockroachdb/pebble/internal/rangedel" + "github.com/cockroachdb/pebble/internal/rangekey" + "github.com/cockroachdb/pebble/internal/rawalloc" +) + +const ( + batchCountOffset = 8 + batchHeaderLen = 12 + batchInitialSize = 1 << 10 // 1 KB + batchMaxRetainedSize = 1 << 20 // 1 MB + invalidBatchCount = 1<<32 - 1 + maxVarintLen32 = 5 +) + +// ErrNotIndexed means that a read operation on a batch failed because the +// batch is not indexed and thus doesn't support reads. +var ErrNotIndexed = errors.New("pebble: batch not indexed") + +// ErrInvalidBatch indicates that a batch is invalid or otherwise corrupted. +var ErrInvalidBatch = base.MarkCorruptionError(errors.New("pebble: invalid batch")) + +// ErrBatchTooLarge indicates that a batch is invalid or otherwise corrupted. +var ErrBatchTooLarge = base.MarkCorruptionError(errors.Newf("pebble: batch too large: >= %s", humanize.Bytes.Uint64(maxBatchSize))) + +// DeferredBatchOp represents a batch operation (eg. set, merge, delete) that is +// being inserted into the batch. Indexing is not performed on the specified key +// until Finish is called, hence the name deferred. This struct lets the caller +// copy or encode keys/values directly into the batch representation instead of +// copying into an intermediary buffer then having pebble.Batch copy off of it. +type DeferredBatchOp struct { + index *batchskl.Skiplist + + // Key and Value point to parts of the binary batch representation where + // keys and values should be encoded/copied into. len(Key) and len(Value) + // bytes must be copied into these slices respectively before calling + // Finish(). Changing where these slices point to is not allowed. + Key, Value []byte + offset uint32 +} + +// Finish completes the addition of this batch operation, and adds it to the +// index if necessary. Must be called once (and exactly once) keys/values +// have been filled into Key and Value. Not calling Finish or not +// copying/encoding keys will result in an incomplete index, and calling Finish +// twice may result in a panic. +func (d DeferredBatchOp) Finish() error { + if d.index != nil { + if err := d.index.Add(d.offset); err != nil { + return err + } + } + return nil +} + +// A Batch is a sequence of Sets, Merges, Deletes, DeleteRanges, RangeKeySets, +// RangeKeyUnsets, and/or RangeKeyDeletes that are applied atomically. Batch +// implements the Reader interface, but only an indexed batch supports reading +// (without error) via Get or NewIter. A non-indexed batch will return +// ErrNotIndexed when read from. A batch is not safe for concurrent use, and +// consumers should use a batch per goroutine or provide their own +// synchronization. +// +// # Indexing +// +// Batches can be optionally indexed (see DB.NewIndexedBatch). An indexed batch +// allows iteration via an Iterator (see Batch.NewIter). The iterator provides +// a merged view of the operations in the batch and the underlying +// database. This is implemented by treating the batch as an additional layer +// in the LSM where every entry in the batch is considered newer than any entry +// in the underlying database (batch entries have the InternalKeySeqNumBatch +// bit set). By treating the batch as an additional layer in the LSM, iteration +// supports all batch operations (i.e. Set, Merge, Delete, DeleteRange, +// RangeKeySet, RangeKeyUnset, RangeKeyDelete) with minimal effort. +// +// The same key can be operated on multiple times in a batch, though only the +// latest operation will be visible. For example, Put("a", "b"), Delete("a") +// will cause the key "a" to not be visible in the batch. Put("a", "b"), +// Put("a", "c") will cause a read of "a" to return the value "c". +// +// The batch index is implemented via an skiplist (internal/batchskl). While +// the skiplist implementation is very fast, inserting into an indexed batch is +// significantly slower than inserting into a non-indexed batch. Only use an +// indexed batch if you require reading from it. +// +// # Atomic commit +// +// The operations in a batch are persisted by calling Batch.Commit which is +// equivalent to calling DB.Apply(batch). A batch is committed atomically by +// writing the internal batch representation to the WAL, adding all of the +// batch operations to the memtable associated with the WAL, and then +// incrementing the visible sequence number so that subsequent reads can see +// the effects of the batch operations. If WriteOptions.Sync is true, a call to +// Batch.Commit will guarantee that the batch is persisted to disk before +// returning. See commitPipeline for more on the implementation details. +// +// # Large batches +// +// The size of a batch is limited only by available memory (be aware that +// indexed batches require considerably additional memory for the skiplist +// structure). A given WAL file has a single memtable associated with it (this +// restriction could be removed, but doing so is onerous and complex). And a +// memtable has a fixed size due to the underlying fixed size arena. Note that +// this differs from RocksDB where a memtable can grow arbitrarily large using +// a list of arena chunks. In RocksDB this is accomplished by storing pointers +// in the arena memory, but that isn't possible in Go. +// +// During Batch.Commit, a batch which is larger than a threshold (> +// MemTableSize/2) is wrapped in a flushableBatch and inserted into the queue +// of memtables. A flushableBatch forces WAL to be rotated, but that happens +// anyways when the memtable becomes full so this does not cause significant +// WAL churn. Because the flushableBatch is readable as another layer in the +// LSM, Batch.Commit returns as soon as the flushableBatch has been added to +// the queue of memtables. +// +// Internally, a flushableBatch provides Iterator support by sorting the batch +// contents (the batch is sorted once, when it is added to the memtable +// queue). Sorting the batch contents and insertion of the contents into a +// memtable have the same big-O time, but the constant factor dominates +// here. Sorting is significantly faster and uses significantly less memory. +// +// # Internal representation +// +// The internal batch representation is a contiguous byte buffer with a fixed +// 12-byte header, followed by a series of records. +// +// +-------------+------------+--- ... ---+ +// | SeqNum (8B) | Count (4B) | Entries | +// +-------------+------------+--- ... ---+ +// +// Each record has a 1-byte kind tag prefix, followed by 1 or 2 length prefixed +// strings (varstring): +// +// +-----------+-----------------+-------------------+ +// | Kind (1B) | Key (varstring) | Value (varstring) | +// +-----------+-----------------+-------------------+ +// +// A varstring is a varint32 followed by N bytes of data. The Kind tags are +// exactly those specified by InternalKeyKind. The following table shows the +// format for records of each kind: +// +// InternalKeyKindDelete varstring +// InternalKeyKindLogData varstring +// InternalKeyKindIngestSST varstring +// InternalKeyKindSet varstring varstring +// InternalKeyKindMerge varstring varstring +// InternalKeyKindRangeDelete varstring varstring +// InternalKeyKindRangeKeySet varstring varstring +// InternalKeyKindRangeKeyUnset varstring varstring +// InternalKeyKindRangeKeyDelete varstring varstring +// +// The intuitive understanding here are that the arguments to Delete, Set, +// Merge, DeleteRange and RangeKeyDelete are encoded into the batch. The +// RangeKeySet and RangeKeyUnset operations are slightly more complicated, +// encoding their end key, suffix and value [in the case of RangeKeySet] within +// the Value varstring. For more information on the value encoding for +// RangeKeySet and RangeKeyUnset, see the internal/rangekey package. +// +// The internal batch representation is the on disk format for a batch in the +// WAL, and thus stable. New record kinds may be added, but the existing ones +// will not be modified. +type Batch struct { + batchInternal + applied atomic.Bool +} + +// batchInternal contains the set of fields within Batch that are non-atomic and +// capable of being reset using a *b = batchInternal{} struct copy. +type batchInternal struct { + // Data is the wire format of a batch's log entry: + // - 8 bytes for a sequence number of the first batch element, + // or zeroes if the batch has not yet been applied, + // - 4 bytes for the count: the number of elements in the batch, + // or "\xff\xff\xff\xff" if the batch is invalid, + // - count elements, being: + // - one byte for the kind + // - the varint-string user key, + // - the varint-string value (if kind != delete). + // The sequence number and count are stored in little-endian order. + // + // The data field can be (but is not guaranteed to be) nil for new + // batches. Large batches will set the data field to nil when committed as + // the data has been moved to a flushableBatch and inserted into the queue of + // memtables. + data []byte + cmp Compare + formatKey base.FormatKey + abbreviatedKey AbbreviatedKey + + // An upper bound on required space to add this batch to a memtable. + // Note that although batches are limited to 4 GiB in size, that limit + // applies to len(data), not the memtable size. The upper bound on the + // size of a memtable node is larger than the overhead of the batch's log + // encoding, so memTableSize is larger than len(data) and may overflow a + // uint32. + memTableSize uint64 + + // The db to which the batch will be committed. Do not change this field + // after the batch has been created as it might invalidate internal state. + // Batch.memTableSize is only refreshed if Batch.db is set. Setting db to + // nil once it has been set implies that the Batch has encountered an error. + db *DB + + // The count of records in the batch. This count will be stored in the batch + // data whenever Repr() is called. + count uint64 + + // The count of range deletions in the batch. Updated every time a range + // deletion is added. + countRangeDels uint64 + + // The count of range key sets, unsets and deletes in the batch. Updated + // every time a RANGEKEYSET, RANGEKEYUNSET or RANGEKEYDEL key is added. + countRangeKeys uint64 + + // A deferredOp struct, stored in the Batch so that a pointer can be returned + // from the *Deferred() methods rather than a value. + deferredOp DeferredBatchOp + + // An optional skiplist keyed by offset into data of the entry. + index *batchskl.Skiplist + rangeDelIndex *batchskl.Skiplist + rangeKeyIndex *batchskl.Skiplist + + // Fragmented range deletion tombstones. Cached the first time a range + // deletion iterator is requested. The cache is invalidated whenever a new + // range deletion is added to the batch. This cache can only be used when + // opening an iterator to read at a batch sequence number >= + // tombstonesSeqNum. This is the case for all new iterators created over a + // batch but it's not the case for all cloned iterators. + tombstones []keyspan.Span + tombstonesSeqNum uint64 + + // Fragmented range key spans. Cached the first time a range key iterator is + // requested. The cache is invalidated whenever a new range key + // (RangeKey{Set,Unset,Del}) is added to the batch. This cache can only be + // used when opening an iterator to read at a batch sequence number >= + // tombstonesSeqNum. This is the case for all new iterators created over a + // batch but it's not the case for all cloned iterators. + rangeKeys []keyspan.Span + rangeKeysSeqNum uint64 + + // The flushableBatch wrapper if the batch is too large to fit in the + // memtable. + flushable *flushableBatch + + // minimumFormatMajorVersion indicates the format major version required in + // order to commit this batch. If an operation requires a particular format + // major version, it ratchets the batch's minimumFormatMajorVersion. When + // the batch is committed, this is validated against the database's current + // format major version. + minimumFormatMajorVersion FormatMajorVersion + + // Synchronous Apply uses the commit WaitGroup for both publishing the + // seqnum and waiting for the WAL fsync (if needed). Asynchronous + // ApplyNoSyncWait, which implies WriteOptions.Sync is true, uses the commit + // WaitGroup for publishing the seqnum and the fsyncWait WaitGroup for + // waiting for the WAL fsync. + // + // TODO(sumeer): if we find that ApplyNoSyncWait in conjunction with + // SyncWait is causing higher memory usage because of the time duration + // between when the sync is already done, and a goroutine calls SyncWait + // (followed by Batch.Close), we could separate out {fsyncWait, commitErr} + // into a separate struct that is allocated separately (using another + // sync.Pool), and only that struct needs to outlive Batch.Close (which + // could then be called immediately after ApplyNoSyncWait). commitStats + // will also need to be in this separate struct. + commit sync.WaitGroup + fsyncWait sync.WaitGroup + + commitStats BatchCommitStats + + commitErr error + + // Position bools together to reduce the sizeof the struct. + + // ingestedSSTBatch indicates that the batch contains one or more key kinds + // of InternalKeyKindIngestSST. If the batch contains key kinds of IngestSST + // then it will only contain key kinds of IngestSST. + ingestedSSTBatch bool + + // committing is set to true when a batch begins to commit. It's used to + // ensure the batch is not mutated concurrently. It is not an atomic + // deliberately, so as to avoid the overhead on batch mutations. This is + // okay, because under correct usage this field will never be accessed + // concurrently. It's only under incorrect usage the memory accesses of this + // variable may violate memory safety. Since we don't use atomics here, + // false negatives are possible. + committing bool +} + +// BatchCommitStats exposes stats related to committing a batch. +// +// NB: there is no Pebble internal tracing (using LoggerAndTracer) of slow +// batch commits. The caller can use these stats to do their own tracing as +// needed. +type BatchCommitStats struct { + // TotalDuration is the time spent in DB.{Apply,ApplyNoSyncWait} or + // Batch.Commit, plus the time waiting in Batch.SyncWait. If there is a gap + // between calling ApplyNoSyncWait and calling SyncWait, that gap could + // include some duration in which real work was being done for the commit + // and will not be included here. This missing time is considered acceptable + // since the goal of these stats is to understand user-facing latency. + // + // TotalDuration includes time spent in various queues both inside Pebble + // and outside Pebble (I/O queues, goroutine scheduler queue, mutex wait + // etc.). For some of these queues (which we consider important) the wait + // times are included below -- these expose low-level implementation detail + // and are meant for expert diagnosis and subject to change. There may be + // unaccounted time after subtracting those values from TotalDuration. + TotalDuration time.Duration + // SemaphoreWaitDuration is the wait time for semaphores in + // commitPipeline.Commit. + SemaphoreWaitDuration time.Duration + // WALQueueWaitDuration is the wait time for allocating memory blocks in the + // LogWriter (due to the LogWriter not writing fast enough). At the moment + // this is duration is always zero because a single WAL will allow + // allocating memory blocks up to the entire memtable size. In the future, + // we may pipeline WALs and bound the WAL queued blocks separately, so this + // field is preserved for that possibility. + WALQueueWaitDuration time.Duration + // MemTableWriteStallDuration is the wait caused by a write stall due to too + // many memtables (due to not flushing fast enough). + MemTableWriteStallDuration time.Duration + // L0ReadAmpWriteStallDuration is the wait caused by a write stall due to + // high read amplification in L0 (due to not compacting fast enough out of + // L0). + L0ReadAmpWriteStallDuration time.Duration + // WALRotationDuration is the wait time for WAL rotation, which includes + // syncing and closing the old WAL and creating (or reusing) a new one. + WALRotationDuration time.Duration + // CommitWaitDuration is the wait for publishing the seqnum plus the + // duration for the WAL sync (if requested). The former should be tiny and + // one can assume that this is all due to the WAL sync. + CommitWaitDuration time.Duration +} + +var _ Reader = (*Batch)(nil) +var _ Writer = (*Batch)(nil) + +var batchPool = sync.Pool{ + New: func() interface{} { + return &Batch{} + }, +} + +type indexedBatch struct { + batch Batch + index batchskl.Skiplist +} + +var indexedBatchPool = sync.Pool{ + New: func() interface{} { + return &indexedBatch{} + }, +} + +func newBatch(db *DB) *Batch { + b := batchPool.Get().(*Batch) + b.db = db + return b +} + +func newBatchWithSize(db *DB, size int) *Batch { + b := newBatch(db) + if cap(b.data) < size { + b.data = rawalloc.New(0, size) + } + return b +} + +func newIndexedBatch(db *DB, comparer *Comparer) *Batch { + i := indexedBatchPool.Get().(*indexedBatch) + i.batch.cmp = comparer.Compare + i.batch.formatKey = comparer.FormatKey + i.batch.abbreviatedKey = comparer.AbbreviatedKey + i.batch.db = db + i.batch.index = &i.index + i.batch.index.Init(&i.batch.data, i.batch.cmp, i.batch.abbreviatedKey) + return &i.batch +} + +func newIndexedBatchWithSize(db *DB, comparer *Comparer, size int) *Batch { + b := newIndexedBatch(db, comparer) + if cap(b.data) < size { + b.data = rawalloc.New(0, size) + } + return b +} + +// nextSeqNum returns the batch "sequence number" that will be given to the next +// key written to the batch. During iteration keys within an indexed batch are +// given a sequence number consisting of their offset within the batch combined +// with the base.InternalKeySeqNumBatch bit. These sequence numbers are only +// used during iteration, and the keys are assigned ordinary sequence numbers +// when the batch is committed. +func (b *Batch) nextSeqNum() uint64 { + return uint64(len(b.data)) | base.InternalKeySeqNumBatch +} + +func (b *Batch) release() { + if b.db == nil { + // The batch was not created using newBatch or newIndexedBatch, or an error + // was encountered. We don't try to reuse batches that encountered an error + // because they might be stuck somewhere in the system and attempting to + // reuse such batches is a recipe for onerous debugging sessions. Instead, + // let the GC do its job. + return + } + b.db = nil + + // NB: This is ugly (it would be cleaner if we could just assign a Batch{}), + // but necessary so that we can use atomic.StoreUint32 for the Batch.applied + // field. Without using an atomic to clear that field the Go race detector + // complains. + b.Reset() + b.cmp = nil + b.formatKey = nil + b.abbreviatedKey = nil + + if b.index == nil { + batchPool.Put(b) + } else { + b.index, b.rangeDelIndex, b.rangeKeyIndex = nil, nil, nil + indexedBatchPool.Put((*indexedBatch)(unsafe.Pointer(b))) + } +} + +func (b *Batch) refreshMemTableSize() error { + b.memTableSize = 0 + if len(b.data) < batchHeaderLen { + return nil + } + + b.countRangeDels = 0 + b.countRangeKeys = 0 + b.minimumFormatMajorVersion = 0 + for r := b.Reader(); ; { + kind, key, value, ok, err := r.Next() + if !ok { + if err != nil { + return err + } + break + } + switch kind { + case InternalKeyKindRangeDelete: + b.countRangeDels++ + case InternalKeyKindRangeKeySet, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeyDelete: + b.countRangeKeys++ + case InternalKeyKindDeleteSized: + if b.minimumFormatMajorVersion < FormatDeleteSizedAndObsolete { + b.minimumFormatMajorVersion = FormatDeleteSizedAndObsolete + } + case InternalKeyKindIngestSST: + if b.minimumFormatMajorVersion < FormatFlushableIngest { + b.minimumFormatMajorVersion = FormatFlushableIngest + } + // This key kind doesn't contribute to the memtable size. + continue + } + b.memTableSize += memTableEntrySize(len(key), len(value)) + } + if b.countRangeKeys > 0 && b.minimumFormatMajorVersion < FormatRangeKeys { + b.minimumFormatMajorVersion = FormatRangeKeys + } + return nil +} + +// Apply the operations contained in the batch to the receiver batch. +// +// It is safe to modify the contents of the arguments after Apply returns. +func (b *Batch) Apply(batch *Batch, _ *WriteOptions) error { + if b.ingestedSSTBatch { + panic("pebble: invalid batch application") + } + if len(batch.data) == 0 { + return nil + } + if len(batch.data) < batchHeaderLen { + return ErrInvalidBatch + } + + offset := len(b.data) + if offset == 0 { + b.init(offset) + offset = batchHeaderLen + } + b.data = append(b.data, batch.data[batchHeaderLen:]...) + + b.setCount(b.Count() + batch.Count()) + + if b.db != nil || b.index != nil { + // Only iterate over the new entries if we need to track memTableSize or in + // order to update the index. + for iter := BatchReader(b.data[offset:]); len(iter) > 0; { + offset := uintptr(unsafe.Pointer(&iter[0])) - uintptr(unsafe.Pointer(&b.data[0])) + kind, key, value, ok, err := iter.Next() + if !ok { + if err != nil { + return err + } + break + } + switch kind { + case InternalKeyKindRangeDelete: + b.countRangeDels++ + case InternalKeyKindRangeKeySet, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeyDelete: + b.countRangeKeys++ + case InternalKeyKindIngestSST: + panic("pebble: invalid key kind for batch") + } + if b.index != nil { + var err error + switch kind { + case InternalKeyKindRangeDelete: + b.tombstones = nil + b.tombstonesSeqNum = 0 + if b.rangeDelIndex == nil { + b.rangeDelIndex = batchskl.NewSkiplist(&b.data, b.cmp, b.abbreviatedKey) + } + err = b.rangeDelIndex.Add(uint32(offset)) + case InternalKeyKindRangeKeySet, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeyDelete: + b.rangeKeys = nil + b.rangeKeysSeqNum = 0 + if b.rangeKeyIndex == nil { + b.rangeKeyIndex = batchskl.NewSkiplist(&b.data, b.cmp, b.abbreviatedKey) + } + err = b.rangeKeyIndex.Add(uint32(offset)) + default: + err = b.index.Add(uint32(offset)) + } + if err != nil { + return err + } + } + b.memTableSize += memTableEntrySize(len(key), len(value)) + } + } + return nil +} + +// Get gets the value for the given key. It returns ErrNotFound if the Batch +// does not contain the key. +// +// The caller should not modify the contents of the returned slice, but it is +// safe to modify the contents of the argument after Get returns. The returned +// slice will remain valid until the returned Closer is closed. On success, the +// caller MUST call closer.Close() or a memory leak will occur. +func (b *Batch) Get(key []byte) ([]byte, io.Closer, error) { + if b.index == nil { + return nil, nil, ErrNotIndexed + } + return b.db.getInternal(key, b, nil /* snapshot */) +} + +func (b *Batch) prepareDeferredKeyValueRecord(keyLen, valueLen int, kind InternalKeyKind) { + if b.committing { + panic("pebble: batch already committing") + } + if len(b.data) == 0 { + b.init(keyLen + valueLen + 2*binary.MaxVarintLen64 + batchHeaderLen) + } + b.count++ + b.memTableSize += memTableEntrySize(keyLen, valueLen) + + pos := len(b.data) + b.deferredOp.offset = uint32(pos) + b.grow(1 + 2*maxVarintLen32 + keyLen + valueLen) + b.data[pos] = byte(kind) + pos++ + + { + // TODO(peter): Manually inlined version binary.PutUvarint(). This is 20% + // faster on BenchmarkBatchSet on go1.13. Remove if go1.14 or future + // versions show this to not be a performance win. + x := uint32(keyLen) + for x >= 0x80 { + b.data[pos] = byte(x) | 0x80 + x >>= 7 + pos++ + } + b.data[pos] = byte(x) + pos++ + } + + b.deferredOp.Key = b.data[pos : pos+keyLen] + pos += keyLen + + { + // TODO(peter): Manually inlined version binary.PutUvarint(). This is 20% + // faster on BenchmarkBatchSet on go1.13. Remove if go1.14 or future + // versions show this to not be a performance win. + x := uint32(valueLen) + for x >= 0x80 { + b.data[pos] = byte(x) | 0x80 + x >>= 7 + pos++ + } + b.data[pos] = byte(x) + pos++ + } + + b.deferredOp.Value = b.data[pos : pos+valueLen] + // Shrink data since varints may be shorter than the upper bound. + b.data = b.data[:pos+valueLen] +} + +func (b *Batch) prepareDeferredKeyRecord(keyLen int, kind InternalKeyKind) { + if b.committing { + panic("pebble: batch already committing") + } + if len(b.data) == 0 { + b.init(keyLen + binary.MaxVarintLen64 + batchHeaderLen) + } + b.count++ + b.memTableSize += memTableEntrySize(keyLen, 0) + + pos := len(b.data) + b.deferredOp.offset = uint32(pos) + b.grow(1 + maxVarintLen32 + keyLen) + b.data[pos] = byte(kind) + pos++ + + { + // TODO(peter): Manually inlined version binary.PutUvarint(). Remove if + // go1.13 or future versions show this to not be a performance win. See + // BenchmarkBatchSet. + x := uint32(keyLen) + for x >= 0x80 { + b.data[pos] = byte(x) | 0x80 + x >>= 7 + pos++ + } + b.data[pos] = byte(x) + pos++ + } + + b.deferredOp.Key = b.data[pos : pos+keyLen] + b.deferredOp.Value = nil + + // Shrink data since varint may be shorter than the upper bound. + b.data = b.data[:pos+keyLen] +} + +// AddInternalKey allows the caller to add an internal key of point key or range +// key kinds (but not RangeDelete) to a batch. Passing in an internal key of +// kind RangeDelete will result in a panic. Note that the seqnum in the internal +// key is effectively ignored, even though the Kind is preserved. This is +// because the batch format does not allow for a per-key seqnum to be specified, +// only a batch-wide one. +// +// Note that non-indexed keys (IngestKeyKind{LogData,IngestSST}) are not +// supported with this method as they require specialized logic. +func (b *Batch) AddInternalKey(key *base.InternalKey, value []byte, _ *WriteOptions) error { + keyLen := len(key.UserKey) + hasValue := false + switch kind := key.Kind(); kind { + case InternalKeyKindRangeDelete: + panic("unexpected range delete in AddInternalKey") + case InternalKeyKindSingleDelete, InternalKeyKindDelete: + b.prepareDeferredKeyRecord(keyLen, kind) + b.deferredOp.index = b.index + case InternalKeyKindRangeKeySet, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeyDelete: + b.prepareDeferredKeyValueRecord(keyLen, len(value), kind) + hasValue = true + b.incrementRangeKeysCount() + default: + b.prepareDeferredKeyValueRecord(keyLen, len(value), kind) + hasValue = true + b.deferredOp.index = b.index + } + copy(b.deferredOp.Key, key.UserKey) + if hasValue { + copy(b.deferredOp.Value, value) + } + + // TODO(peter): Manually inline DeferredBatchOp.Finish(). Mid-stack inlining + // in go1.13 will remove the need for this. + if b.index != nil { + if err := b.index.Add(b.deferredOp.offset); err != nil { + return err + } + } + return nil +} + +// Set adds an action to the batch that sets the key to map to the value. +// +// It is safe to modify the contents of the arguments after Set returns. +func (b *Batch) Set(key, value []byte, _ *WriteOptions) error { + deferredOp := b.SetDeferred(len(key), len(value)) + copy(deferredOp.Key, key) + copy(deferredOp.Value, value) + // TODO(peter): Manually inline DeferredBatchOp.Finish(). Mid-stack inlining + // in go1.13 will remove the need for this. + if b.index != nil { + if err := b.index.Add(deferredOp.offset); err != nil { + return err + } + } + return nil +} + +// SetDeferred is similar to Set in that it adds a set operation to the batch, +// except it only takes in key/value lengths instead of complete slices, +// letting the caller encode into those objects and then call Finish() on the +// returned object. +func (b *Batch) SetDeferred(keyLen, valueLen int) *DeferredBatchOp { + b.prepareDeferredKeyValueRecord(keyLen, valueLen, InternalKeyKindSet) + b.deferredOp.index = b.index + return &b.deferredOp +} + +// Merge adds an action to the batch that merges the value at key with the new +// value. The details of the merge are dependent upon the configured merge +// operator. +// +// It is safe to modify the contents of the arguments after Merge returns. +func (b *Batch) Merge(key, value []byte, _ *WriteOptions) error { + deferredOp := b.MergeDeferred(len(key), len(value)) + copy(deferredOp.Key, key) + copy(deferredOp.Value, value) + // TODO(peter): Manually inline DeferredBatchOp.Finish(). Mid-stack inlining + // in go1.13 will remove the need for this. + if b.index != nil { + if err := b.index.Add(deferredOp.offset); err != nil { + return err + } + } + return nil +} + +// MergeDeferred is similar to Merge in that it adds a merge operation to the +// batch, except it only takes in key/value lengths instead of complete slices, +// letting the caller encode into those objects and then call Finish() on the +// returned object. +func (b *Batch) MergeDeferred(keyLen, valueLen int) *DeferredBatchOp { + b.prepareDeferredKeyValueRecord(keyLen, valueLen, InternalKeyKindMerge) + b.deferredOp.index = b.index + return &b.deferredOp +} + +// Delete adds an action to the batch that deletes the entry for key. +// +// It is safe to modify the contents of the arguments after Delete returns. +func (b *Batch) Delete(key []byte, _ *WriteOptions) error { + deferredOp := b.DeleteDeferred(len(key)) + copy(deferredOp.Key, key) + // TODO(peter): Manually inline DeferredBatchOp.Finish(). Mid-stack inlining + // in go1.13 will remove the need for this. + if b.index != nil { + if err := b.index.Add(deferredOp.offset); err != nil { + return err + } + } + return nil +} + +// DeleteDeferred is similar to Delete in that it adds a delete operation to +// the batch, except it only takes in key/value lengths instead of complete +// slices, letting the caller encode into those objects and then call Finish() +// on the returned object. +func (b *Batch) DeleteDeferred(keyLen int) *DeferredBatchOp { + b.prepareDeferredKeyRecord(keyLen, InternalKeyKindDelete) + b.deferredOp.index = b.index + return &b.deferredOp +} + +// DeleteSized behaves identically to Delete, but takes an additional +// argument indicating the size of the value being deleted. DeleteSized +// should be preferred when the caller has the expectation that there exists +// a single internal KV pair for the key (eg, the key has not been +// overwritten recently), and the caller knows the size of its value. +// +// DeleteSized will record the value size within the tombstone and use it to +// inform compaction-picking heuristics which strive to reduce space +// amplification in the LSM. This "calling your shot" mechanic allows the +// storage engine to more accurately estimate and reduce space amplification. +// +// It is safe to modify the contents of the arguments after DeleteSized +// returns. +func (b *Batch) DeleteSized(key []byte, deletedValueSize uint32, _ *WriteOptions) error { + deferredOp := b.DeleteSizedDeferred(len(key), deletedValueSize) + copy(b.deferredOp.Key, key) + // TODO(peter): Manually inline DeferredBatchOp.Finish(). Check if in a + // later Go release this is unnecessary. + if b.index != nil { + if err := b.index.Add(deferredOp.offset); err != nil { + return err + } + } + return nil +} + +// DeleteSizedDeferred is similar to DeleteSized in that it adds a sized delete +// operation to the batch, except it only takes in key length instead of a +// complete key slice, letting the caller encode into the DeferredBatchOp.Key +// slice and then call Finish() on the returned object. +func (b *Batch) DeleteSizedDeferred(keyLen int, deletedValueSize uint32) *DeferredBatchOp { + if b.minimumFormatMajorVersion < FormatDeleteSizedAndObsolete { + b.minimumFormatMajorVersion = FormatDeleteSizedAndObsolete + } + + // Encode the sum of the key length and the value in the value. + v := uint64(deletedValueSize) + uint64(keyLen) + + // Encode `v` as a varint. + var buf [binary.MaxVarintLen64]byte + n := 0 + { + x := v + for x >= 0x80 { + buf[n] = byte(x) | 0x80 + x >>= 7 + n++ + } + buf[n] = byte(x) + n++ + } + + // NB: In batch entries and sstable entries, values are stored as + // varstrings. Here, the value is itself a simple varint. This results in an + // unnecessary double layer of encoding: + // varint(n) varint(deletedValueSize) + // The first varint will always be 1-byte, since a varint-encoded uint64 + // will never exceed 128 bytes. This unnecessary extra byte and wrapping is + // preserved to avoid special casing across the database, and in particular + // in sstable block decoding which is performance sensitive. + b.prepareDeferredKeyValueRecord(keyLen, n, InternalKeyKindDeleteSized) + b.deferredOp.index = b.index + copy(b.deferredOp.Value, buf[:n]) + return &b.deferredOp +} + +// SingleDelete adds an action to the batch that single deletes the entry for key. +// See Writer.SingleDelete for more details on the semantics of SingleDelete. +// +// It is safe to modify the contents of the arguments after SingleDelete returns. +func (b *Batch) SingleDelete(key []byte, _ *WriteOptions) error { + deferredOp := b.SingleDeleteDeferred(len(key)) + copy(deferredOp.Key, key) + // TODO(peter): Manually inline DeferredBatchOp.Finish(). Mid-stack inlining + // in go1.13 will remove the need for this. + if b.index != nil { + if err := b.index.Add(deferredOp.offset); err != nil { + return err + } + } + return nil +} + +// SingleDeleteDeferred is similar to SingleDelete in that it adds a single delete +// operation to the batch, except it only takes in key/value lengths instead of +// complete slices, letting the caller encode into those objects and then call +// Finish() on the returned object. +func (b *Batch) SingleDeleteDeferred(keyLen int) *DeferredBatchOp { + b.prepareDeferredKeyRecord(keyLen, InternalKeyKindSingleDelete) + b.deferredOp.index = b.index + return &b.deferredOp +} + +// DeleteRange deletes all of the point keys (and values) in the range +// [start,end) (inclusive on start, exclusive on end). DeleteRange does NOT +// delete overlapping range keys (eg, keys set via RangeKeySet). +// +// It is safe to modify the contents of the arguments after DeleteRange +// returns. +func (b *Batch) DeleteRange(start, end []byte, _ *WriteOptions) error { + deferredOp := b.DeleteRangeDeferred(len(start), len(end)) + copy(deferredOp.Key, start) + copy(deferredOp.Value, end) + // TODO(peter): Manually inline DeferredBatchOp.Finish(). Mid-stack inlining + // in go1.13 will remove the need for this. + if deferredOp.index != nil { + if err := deferredOp.index.Add(deferredOp.offset); err != nil { + return err + } + } + return nil +} + +// DeleteRangeDeferred is similar to DeleteRange in that it adds a delete range +// operation to the batch, except it only takes in key lengths instead of +// complete slices, letting the caller encode into those objects and then call +// Finish() on the returned object. Note that DeferredBatchOp.Key should be +// populated with the start key, and DeferredBatchOp.Value should be populated +// with the end key. +func (b *Batch) DeleteRangeDeferred(startLen, endLen int) *DeferredBatchOp { + b.prepareDeferredKeyValueRecord(startLen, endLen, InternalKeyKindRangeDelete) + b.countRangeDels++ + if b.index != nil { + b.tombstones = nil + b.tombstonesSeqNum = 0 + // Range deletions are rare, so we lazily allocate the index for them. + if b.rangeDelIndex == nil { + b.rangeDelIndex = batchskl.NewSkiplist(&b.data, b.cmp, b.abbreviatedKey) + } + b.deferredOp.index = b.rangeDelIndex + } + return &b.deferredOp +} + +// RangeKeySet sets a range key mapping the key range [start, end) at the MVCC +// timestamp suffix to value. The suffix is optional. If any portion of the key +// range [start, end) is already set by a range key with the same suffix value, +// RangeKeySet overrides it. +// +// It is safe to modify the contents of the arguments after RangeKeySet returns. +func (b *Batch) RangeKeySet(start, end, suffix, value []byte, _ *WriteOptions) error { + suffixValues := [1]rangekey.SuffixValue{{Suffix: suffix, Value: value}} + internalValueLen := rangekey.EncodedSetValueLen(end, suffixValues[:]) + + deferredOp := b.rangeKeySetDeferred(len(start), internalValueLen) + copy(deferredOp.Key, start) + n := rangekey.EncodeSetValue(deferredOp.Value, end, suffixValues[:]) + if n != internalValueLen { + panic("unexpected internal value length mismatch") + } + + // Manually inline DeferredBatchOp.Finish(). + if deferredOp.index != nil { + if err := deferredOp.index.Add(deferredOp.offset); err != nil { + return err + } + } + return nil +} + +func (b *Batch) rangeKeySetDeferred(startLen, internalValueLen int) *DeferredBatchOp { + b.prepareDeferredKeyValueRecord(startLen, internalValueLen, InternalKeyKindRangeKeySet) + b.incrementRangeKeysCount() + return &b.deferredOp +} + +func (b *Batch) incrementRangeKeysCount() { + b.countRangeKeys++ + if b.minimumFormatMajorVersion < FormatRangeKeys { + b.minimumFormatMajorVersion = FormatRangeKeys + } + if b.index != nil { + b.rangeKeys = nil + b.rangeKeysSeqNum = 0 + // Range keys are rare, so we lazily allocate the index for them. + if b.rangeKeyIndex == nil { + b.rangeKeyIndex = batchskl.NewSkiplist(&b.data, b.cmp, b.abbreviatedKey) + } + b.deferredOp.index = b.rangeKeyIndex + } +} + +// RangeKeyUnset removes a range key mapping the key range [start, end) at the +// MVCC timestamp suffix. The suffix may be omitted to remove an unsuffixed +// range key. RangeKeyUnset only removes portions of range keys that fall within +// the [start, end) key span, and only range keys with suffixes that exactly +// match the unset suffix. +// +// It is safe to modify the contents of the arguments after RangeKeyUnset +// returns. +func (b *Batch) RangeKeyUnset(start, end, suffix []byte, _ *WriteOptions) error { + suffixes := [1][]byte{suffix} + internalValueLen := rangekey.EncodedUnsetValueLen(end, suffixes[:]) + + deferredOp := b.rangeKeyUnsetDeferred(len(start), internalValueLen) + copy(deferredOp.Key, start) + n := rangekey.EncodeUnsetValue(deferredOp.Value, end, suffixes[:]) + if n != internalValueLen { + panic("unexpected internal value length mismatch") + } + + // Manually inline DeferredBatchOp.Finish() + if deferredOp.index != nil { + if err := deferredOp.index.Add(deferredOp.offset); err != nil { + return err + } + } + return nil +} + +func (b *Batch) rangeKeyUnsetDeferred(startLen, internalValueLen int) *DeferredBatchOp { + b.prepareDeferredKeyValueRecord(startLen, internalValueLen, InternalKeyKindRangeKeyUnset) + b.incrementRangeKeysCount() + return &b.deferredOp +} + +// RangeKeyDelete deletes all of the range keys in the range [start,end) +// (inclusive on start, exclusive on end). It does not delete point keys (for +// that use DeleteRange). RangeKeyDelete removes all range keys within the +// bounds, including those with or without suffixes. +// +// It is safe to modify the contents of the arguments after RangeKeyDelete +// returns. +func (b *Batch) RangeKeyDelete(start, end []byte, _ *WriteOptions) error { + deferredOp := b.RangeKeyDeleteDeferred(len(start), len(end)) + copy(deferredOp.Key, start) + copy(deferredOp.Value, end) + // Manually inline DeferredBatchOp.Finish(). + if deferredOp.index != nil { + if err := deferredOp.index.Add(deferredOp.offset); err != nil { + return err + } + } + return nil +} + +// RangeKeyDeleteDeferred is similar to RangeKeyDelete in that it adds an +// operation to delete range keys to the batch, except it only takes in key +// lengths instead of complete slices, letting the caller encode into those +// objects and then call Finish() on the returned object. Note that +// DeferredBatchOp.Key should be populated with the start key, and +// DeferredBatchOp.Value should be populated with the end key. +func (b *Batch) RangeKeyDeleteDeferred(startLen, endLen int) *DeferredBatchOp { + b.prepareDeferredKeyValueRecord(startLen, endLen, InternalKeyKindRangeKeyDelete) + b.incrementRangeKeysCount() + return &b.deferredOp +} + +// LogData adds the specified to the batch. The data will be written to the +// WAL, but not added to memtables or sstables. Log data is never indexed, +// which makes it useful for testing WAL performance. +// +// It is safe to modify the contents of the argument after LogData returns. +func (b *Batch) LogData(data []byte, _ *WriteOptions) error { + origCount, origMemTableSize := b.count, b.memTableSize + b.prepareDeferredKeyRecord(len(data), InternalKeyKindLogData) + copy(b.deferredOp.Key, data) + // Since LogData only writes to the WAL and does not affect the memtable, we + // restore b.count and b.memTableSize to their origin values. Note that + // Batch.count only refers to records that are added to the memtable. + b.count, b.memTableSize = origCount, origMemTableSize + return nil +} + +// IngestSST adds the FileNum for an sstable to the batch. The data will only be +// written to the WAL (not added to memtables or sstables). +func (b *Batch) ingestSST(fileNum base.FileNum) { + if b.Empty() { + b.ingestedSSTBatch = true + } else if !b.ingestedSSTBatch { + // Batch contains other key kinds. + panic("pebble: invalid call to ingestSST") + } + + origMemTableSize := b.memTableSize + var buf [binary.MaxVarintLen64]byte + length := binary.PutUvarint(buf[:], uint64(fileNum)) + b.prepareDeferredKeyRecord(length, InternalKeyKindIngestSST) + copy(b.deferredOp.Key, buf[:length]) + // Since IngestSST writes only to the WAL and does not affect the memtable, + // we restore b.memTableSize to its original value. Note that Batch.count + // is not reset because for the InternalKeyKindIngestSST the count is the + // number of sstable paths which have been added to the batch. + b.memTableSize = origMemTableSize + b.minimumFormatMajorVersion = FormatFlushableIngest +} + +// Empty returns true if the batch is empty, and false otherwise. +func (b *Batch) Empty() bool { + return len(b.data) <= batchHeaderLen +} + +// Len returns the current size of the batch in bytes. +func (b *Batch) Len() int { + if len(b.data) <= batchHeaderLen { + return batchHeaderLen + } + return len(b.data) +} + +// Repr returns the underlying batch representation. It is not safe to modify +// the contents. Reset() will not change the contents of the returned value, +// though any other mutation operation may do so. +func (b *Batch) Repr() []byte { + if len(b.data) == 0 { + b.init(batchHeaderLen) + } + binary.LittleEndian.PutUint32(b.countData(), b.Count()) + return b.data +} + +// SetRepr sets the underlying batch representation. The batch takes ownership +// of the supplied slice. It is not safe to modify it afterwards until the +// Batch is no longer in use. +func (b *Batch) SetRepr(data []byte) error { + if len(data) < batchHeaderLen { + return base.CorruptionErrorf("invalid batch") + } + b.data = data + b.count = uint64(binary.LittleEndian.Uint32(b.countData())) + var err error + if b.db != nil { + // Only track memTableSize for batches that will be committed to the DB. + err = b.refreshMemTableSize() + } + return err +} + +// NewIter returns an iterator that is unpositioned (Iterator.Valid() will +// return false). The iterator can be positioned via a call to SeekGE, +// SeekPrefixGE, SeekLT, First or Last. Only indexed batches support iterators. +// +// The returned Iterator observes all of the Batch's existing mutations, but no +// later mutations. Its view can be refreshed via RefreshBatchSnapshot or +// SetOptions(). +func (b *Batch) NewIter(o *IterOptions) (*Iterator, error) { + return b.NewIterWithContext(context.Background(), o) +} + +// NewIterWithContext is like NewIter, and additionally accepts a context for +// tracing. +func (b *Batch) NewIterWithContext(ctx context.Context, o *IterOptions) (*Iterator, error) { + if b.index == nil { + return nil, ErrNotIndexed + } + return b.db.newIter(ctx, b, newIterOpts{}, o), nil +} + +// NewBatchOnlyIter constructs an iterator that only reads the contents of the +// batch, and does not overlay the batch mutations on top of the DB state. +// +// The returned Iterator observes all of the Batch's existing mutations, but +// no later mutations. Its view can be refreshed via RefreshBatchSnapshot or +// SetOptions(). +func (b *Batch) NewBatchOnlyIter(ctx context.Context, o *IterOptions) (*Iterator, error) { + if b.index == nil { + return nil, ErrNotIndexed + } + return b.db.newIter(ctx, b, newIterOpts{batch: batchIterOpts{batchOnly: true}}, o), nil +} + +// newInternalIter creates a new internalIterator that iterates over the +// contents of the batch. +func (b *Batch) newInternalIter(o *IterOptions) *batchIter { + iter := &batchIter{} + b.initInternalIter(o, iter) + return iter +} + +func (b *Batch) initInternalIter(o *IterOptions, iter *batchIter) { + *iter = batchIter{ + cmp: b.cmp, + batch: b, + iter: b.index.NewIter(o.GetLowerBound(), o.GetUpperBound()), + // NB: We explicitly do not propagate the batch snapshot to the point + // key iterator. Filtering point keys within the batch iterator can + // cause pathological behavior where a batch iterator advances + // significantly farther than necessary filtering many batch keys that + // are not visible at the batch sequence number. Instead, the merging + // iterator enforces bounds. + // + // For example, consider an engine that contains the committed keys + // 'bar' and 'bax', with no keys between them. Consider a batch + // containing keys 1,000 keys within the range [a,z]. All of the + // batch keys were added to the batch after the iterator was + // constructed, so they are not visible to the iterator. A call to + // SeekGE('bax') would seek the LSM iterators and discover the key + // 'bax'. It would also seek the batch iterator, landing on the key + // 'baz' but discover it that it's not visible. The batch iterator would + // next through the rest of the batch's keys, only to discover there are + // no visible keys greater than or equal to 'bax'. + // + // Filtering these batch points within the merging iterator ensures that + // the batch iterator never needs to iterate beyond 'baz', because it + // already found a smaller, visible key 'bax'. + snapshot: base.InternalKeySeqNumMax, + } +} + +func (b *Batch) newRangeDelIter(o *IterOptions, batchSnapshot uint64) *keyspan.Iter { + // Construct an iterator even if rangeDelIndex is nil, because it is allowed + // to refresh later, so we need the container to exist. + iter := new(keyspan.Iter) + b.initRangeDelIter(o, iter, batchSnapshot) + return iter +} + +func (b *Batch) initRangeDelIter(_ *IterOptions, iter *keyspan.Iter, batchSnapshot uint64) { + if b.rangeDelIndex == nil { + iter.Init(b.cmp, nil) + return + } + + // Fragment the range tombstones the first time a range deletion iterator is + // requested. The cached tombstones are invalidated if another range + // deletion tombstone is added to the batch. This cache is only guaranteed + // to be correct if we're opening an iterator to read at a batch sequence + // number at least as high as tombstonesSeqNum. The cache is guaranteed to + // include all tombstones up to tombstonesSeqNum, and if any additional + // tombstones were added after that sequence number the cache would've been + // cleared. + nextSeqNum := b.nextSeqNum() + if b.tombstones != nil && b.tombstonesSeqNum <= batchSnapshot { + iter.Init(b.cmp, b.tombstones) + return + } + + tombstones := make([]keyspan.Span, 0, b.countRangeDels) + frag := &keyspan.Fragmenter{ + Cmp: b.cmp, + Format: b.formatKey, + Emit: func(s keyspan.Span) { + tombstones = append(tombstones, s) + }, + } + it := &batchIter{ + cmp: b.cmp, + batch: b, + iter: b.rangeDelIndex.NewIter(nil, nil), + snapshot: batchSnapshot, + } + fragmentRangeDels(frag, it, int(b.countRangeDels)) + iter.Init(b.cmp, tombstones) + + // If we just read all the tombstones in the batch (eg, batchSnapshot was + // set to b.nextSeqNum()), then cache the tombstones so that a subsequent + // call to initRangeDelIter may use them without refragmenting. + if nextSeqNum == batchSnapshot { + b.tombstones = tombstones + b.tombstonesSeqNum = nextSeqNum + } +} + +func fragmentRangeDels(frag *keyspan.Fragmenter, it internalIterator, count int) { + // The memory management here is a bit subtle. The keys and values returned + // by the iterator are slices in Batch.data. Thus the fragmented tombstones + // are slices within Batch.data. If additional entries are added to the + // Batch, Batch.data may be reallocated. The references in the fragmented + // tombstones will remain valid, pointing into the old Batch.data. GC for + // the win. + + // Use a single []keyspan.Key buffer to avoid allocating many + // individual []keyspan.Key slices with a single element each. + keyBuf := make([]keyspan.Key, 0, count) + for key, val := it.First(); key != nil; key, val = it.Next() { + s := rangedel.Decode(*key, val.InPlaceValue(), keyBuf) + keyBuf = s.Keys[len(s.Keys):] + + // Set a fixed capacity to avoid accidental overwriting. + s.Keys = s.Keys[:len(s.Keys):len(s.Keys)] + frag.Add(s) + } + frag.Finish() +} + +func (b *Batch) newRangeKeyIter(o *IterOptions, batchSnapshot uint64) *keyspan.Iter { + // Construct an iterator even if rangeKeyIndex is nil, because it is allowed + // to refresh later, so we need the container to exist. + iter := new(keyspan.Iter) + b.initRangeKeyIter(o, iter, batchSnapshot) + return iter +} + +func (b *Batch) initRangeKeyIter(_ *IterOptions, iter *keyspan.Iter, batchSnapshot uint64) { + if b.rangeKeyIndex == nil { + iter.Init(b.cmp, nil) + return + } + + // Fragment the range keys the first time a range key iterator is requested. + // The cached spans are invalidated if another range key is added to the + // batch. This cache is only guaranteed to be correct if we're opening an + // iterator to read at a batch sequence number at least as high as + // rangeKeysSeqNum. The cache is guaranteed to include all range keys up to + // rangeKeysSeqNum, and if any additional range keys were added after that + // sequence number the cache would've been cleared. + nextSeqNum := b.nextSeqNum() + if b.rangeKeys != nil && b.rangeKeysSeqNum <= batchSnapshot { + iter.Init(b.cmp, b.rangeKeys) + return + } + + rangeKeys := make([]keyspan.Span, 0, b.countRangeKeys) + frag := &keyspan.Fragmenter{ + Cmp: b.cmp, + Format: b.formatKey, + Emit: func(s keyspan.Span) { + rangeKeys = append(rangeKeys, s) + }, + } + it := &batchIter{ + cmp: b.cmp, + batch: b, + iter: b.rangeKeyIndex.NewIter(nil, nil), + snapshot: batchSnapshot, + } + fragmentRangeKeys(frag, it, int(b.countRangeKeys)) + iter.Init(b.cmp, rangeKeys) + + // If we just read all the range keys in the batch (eg, batchSnapshot was + // set to b.nextSeqNum()), then cache the range keys so that a subsequent + // call to initRangeKeyIter may use them without refragmenting. + if nextSeqNum == batchSnapshot { + b.rangeKeys = rangeKeys + b.rangeKeysSeqNum = nextSeqNum + } +} + +func fragmentRangeKeys(frag *keyspan.Fragmenter, it internalIterator, count int) error { + // The memory management here is a bit subtle. The keys and values + // returned by the iterator are slices in Batch.data. Thus the + // fragmented key spans are slices within Batch.data. If additional + // entries are added to the Batch, Batch.data may be reallocated. The + // references in the fragmented keys will remain valid, pointing into + // the old Batch.data. GC for the win. + + // Use a single []keyspan.Key buffer to avoid allocating many + // individual []keyspan.Key slices with a single element each. + keyBuf := make([]keyspan.Key, 0, count) + for ik, val := it.First(); ik != nil; ik, val = it.Next() { + s, err := rangekey.Decode(*ik, val.InPlaceValue(), keyBuf) + if err != nil { + return err + } + keyBuf = s.Keys[len(s.Keys):] + + // Set a fixed capacity to avoid accidental overwriting. + s.Keys = s.Keys[:len(s.Keys):len(s.Keys)] + frag.Add(s) + } + frag.Finish() + return nil +} + +// Commit applies the batch to its parent writer. +func (b *Batch) Commit(o *WriteOptions) error { + return b.db.Apply(b, o) +} + +// Close closes the batch without committing it. +func (b *Batch) Close() error { + b.release() + return nil +} + +// Indexed returns true if the batch is indexed (i.e. supports read +// operations). +func (b *Batch) Indexed() bool { + return b.index != nil +} + +// init ensures that the batch data slice is initialized to meet the +// minimum required size and allocates space for the batch header. +func (b *Batch) init(size int) { + n := batchInitialSize + for n < size { + n *= 2 + } + if cap(b.data) < n { + b.data = rawalloc.New(batchHeaderLen, n) + } + b.setCount(0) + b.setSeqNum(0) + b.data = b.data[:batchHeaderLen] +} + +// Reset resets the batch for reuse. The underlying byte slice (that is +// returned by Repr()) may not be modified. It is only necessary to call this +// method if a batch is explicitly being reused. Close automatically takes are +// of releasing resources when appropriate for batches that are internally +// being reused. +func (b *Batch) Reset() { + // Zero out the struct, retaining only the fields necessary for manual + // reuse. + b.batchInternal = batchInternal{ + data: b.data, + cmp: b.cmp, + formatKey: b.formatKey, + abbreviatedKey: b.abbreviatedKey, + index: b.index, + db: b.db, + } + b.applied.Store(false) + if b.data != nil { + if cap(b.data) > batchMaxRetainedSize { + // If the capacity of the buffer is larger than our maximum + // retention size, don't re-use it. Let it be GC-ed instead. + // This prevents the memory from an unusually large batch from + // being held on to indefinitely. + b.data = nil + } else { + // Otherwise, reset the buffer for re-use. + b.data = b.data[:batchHeaderLen] + b.setSeqNum(0) + } + } + if b.index != nil { + b.index.Init(&b.data, b.cmp, b.abbreviatedKey) + } +} + +// seqNumData returns the 8 byte little-endian sequence number. Zero means that +// the batch has not yet been applied. +func (b *Batch) seqNumData() []byte { + return b.data[:8] +} + +// countData returns the 4 byte little-endian count data. "\xff\xff\xff\xff" +// means that the batch is invalid. +func (b *Batch) countData() []byte { + return b.data[8:12] +} + +func (b *Batch) grow(n int) { + newSize := len(b.data) + n + if uint64(newSize) >= maxBatchSize { + panic(ErrBatchTooLarge) + } + if newSize > cap(b.data) { + newCap := 2 * cap(b.data) + for newCap < newSize { + newCap *= 2 + } + newData := rawalloc.New(len(b.data), newCap) + copy(newData, b.data) + b.data = newData + } + b.data = b.data[:newSize] +} + +func (b *Batch) setSeqNum(seqNum uint64) { + binary.LittleEndian.PutUint64(b.seqNumData(), seqNum) +} + +// SeqNum returns the batch sequence number which is applied to the first +// record in the batch. The sequence number is incremented for each subsequent +// record. It returns zero if the batch is empty. +func (b *Batch) SeqNum() uint64 { + if len(b.data) == 0 { + b.init(batchHeaderLen) + } + return binary.LittleEndian.Uint64(b.seqNumData()) +} + +func (b *Batch) setCount(v uint32) { + b.count = uint64(v) +} + +// Count returns the count of memtable-modifying operations in this batch. All +// operations with the except of LogData increment this count. For IngestSSTs, +// count is only used to indicate the number of SSTs ingested in the record, the +// batch isn't applied to the memtable. +func (b *Batch) Count() uint32 { + if b.count > math.MaxUint32 { + panic(ErrInvalidBatch) + } + return uint32(b.count) +} + +// Reader returns a BatchReader for the current batch contents. If the batch is +// mutated, the new entries will not be visible to the reader. +func (b *Batch) Reader() BatchReader { + if len(b.data) == 0 { + b.init(batchHeaderLen) + } + return b.data[batchHeaderLen:] +} + +func batchDecodeStr(data []byte) (odata []byte, s []byte, ok bool) { + // TODO(jackson): This will index out of bounds if there's no varint or an + // invalid varint (eg, a single 0xff byte). Correcting will add a bit of + // overhead. We could avoid that overhead whenever len(data) >= + // binary.MaxVarint32? + + var v uint32 + var n int + ptr := unsafe.Pointer(&data[0]) + if a := *((*uint8)(ptr)); a < 128 { + v = uint32(a) + n = 1 + } else if a, b := a&0x7f, *((*uint8)(unsafe.Pointer(uintptr(ptr) + 1))); b < 128 { + v = uint32(b)<<7 | uint32(a) + n = 2 + } else if b, c := b&0x7f, *((*uint8)(unsafe.Pointer(uintptr(ptr) + 2))); c < 128 { + v = uint32(c)<<14 | uint32(b)<<7 | uint32(a) + n = 3 + } else if c, d := c&0x7f, *((*uint8)(unsafe.Pointer(uintptr(ptr) + 3))); d < 128 { + v = uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a) + n = 4 + } else { + d, e := d&0x7f, *((*uint8)(unsafe.Pointer(uintptr(ptr) + 4))) + v = uint32(e)<<28 | uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a) + n = 5 + } + + data = data[n:] + if v > uint32(len(data)) { + return nil, nil, false + } + return data[v:], data[:v], true +} + +// SyncWait is to be used in conjunction with DB.ApplyNoSyncWait. +func (b *Batch) SyncWait() error { + now := time.Now() + b.fsyncWait.Wait() + if b.commitErr != nil { + b.db = nil // prevent batch reuse on error + } + waitDuration := time.Since(now) + b.commitStats.CommitWaitDuration += waitDuration + b.commitStats.TotalDuration += waitDuration + return b.commitErr +} + +// CommitStats returns stats related to committing the batch. Should be called +// after Batch.Commit, DB.Apply. If DB.ApplyNoSyncWait is used, should be +// called after Batch.SyncWait. +func (b *Batch) CommitStats() BatchCommitStats { + return b.commitStats +} + +// BatchReader iterates over the entries contained in a batch. +type BatchReader []byte + +// ReadBatch constructs a BatchReader from a batch representation. The +// header is not validated. ReadBatch returns a new batch reader and the +// count of entries contained within the batch. +func ReadBatch(repr []byte) (r BatchReader, count uint32) { + if len(repr) <= batchHeaderLen { + return nil, count + } + count = binary.LittleEndian.Uint32(repr[batchCountOffset:batchHeaderLen]) + return repr[batchHeaderLen:], count +} + +// Next returns the next entry in this batch, if there is one. If the reader has +// reached the end of the batch, Next returns ok=false and a nil error. If the +// batch is corrupt and the next entry is illegible, Next returns ok=false and a +// non-nil error. +func (r *BatchReader) Next() (kind InternalKeyKind, ukey []byte, value []byte, ok bool, err error) { + if len(*r) == 0 { + return 0, nil, nil, false, nil + } + kind = InternalKeyKind((*r)[0]) + if kind > InternalKeyKindMax { + return 0, nil, nil, false, errors.Wrapf(ErrInvalidBatch, "invalid key kind 0x%x", (*r)[0]) + } + *r, ukey, ok = batchDecodeStr((*r)[1:]) + if !ok { + return 0, nil, nil, false, errors.Wrapf(ErrInvalidBatch, "decoding user key") + } + switch kind { + case InternalKeyKindSet, InternalKeyKindMerge, InternalKeyKindRangeDelete, + InternalKeyKindRangeKeySet, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeyDelete, + InternalKeyKindDeleteSized: + *r, value, ok = batchDecodeStr(*r) + if !ok { + return 0, nil, nil, false, errors.Wrapf(ErrInvalidBatch, "decoding %s value", kind) + } + } + return kind, ukey, value, true, nil +} + +// Note: batchIter mirrors the implementation of flushableBatchIter. Keep the +// two in sync. +type batchIter struct { + cmp Compare + batch *Batch + iter batchskl.Iterator + err error + // snapshot holds a batch "sequence number" at which the batch is being + // read. This sequence number has the InternalKeySeqNumBatch bit set, so it + // encodes an offset within the batch. Only batch entries earlier than the + // offset are visible during iteration. + snapshot uint64 +} + +// batchIter implements the base.InternalIterator interface. +var _ base.InternalIterator = (*batchIter)(nil) + +func (i *batchIter) String() string { + return "batch" +} + +func (i *batchIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, base.LazyValue) { + // Ignore TrySeekUsingNext if the view of the batch changed. + if flags.TrySeekUsingNext() && flags.BatchJustRefreshed() { + flags = flags.DisableTrySeekUsingNext() + } + + i.err = nil // clear cached iteration error + ikey := i.iter.SeekGE(key, flags) + for ikey != nil && ikey.SeqNum() >= i.snapshot { + ikey = i.iter.Next() + } + if ikey == nil { + return nil, base.LazyValue{} + } + return ikey, base.MakeInPlaceValue(i.value()) +} + +func (i *batchIter) SeekPrefixGE( + prefix, key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + i.err = nil // clear cached iteration error + return i.SeekGE(key, flags) +} + +func (i *batchIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, base.LazyValue) { + i.err = nil // clear cached iteration error + ikey := i.iter.SeekLT(key) + for ikey != nil && ikey.SeqNum() >= i.snapshot { + ikey = i.iter.Prev() + } + if ikey == nil { + return nil, base.LazyValue{} + } + return ikey, base.MakeInPlaceValue(i.value()) +} + +func (i *batchIter) First() (*InternalKey, base.LazyValue) { + i.err = nil // clear cached iteration error + ikey := i.iter.First() + for ikey != nil && ikey.SeqNum() >= i.snapshot { + ikey = i.iter.Next() + } + if ikey == nil { + return nil, base.LazyValue{} + } + return ikey, base.MakeInPlaceValue(i.value()) +} + +func (i *batchIter) Last() (*InternalKey, base.LazyValue) { + i.err = nil // clear cached iteration error + ikey := i.iter.Last() + for ikey != nil && ikey.SeqNum() >= i.snapshot { + ikey = i.iter.Prev() + } + if ikey == nil { + return nil, base.LazyValue{} + } + return ikey, base.MakeInPlaceValue(i.value()) +} + +func (i *batchIter) Next() (*InternalKey, base.LazyValue) { + ikey := i.iter.Next() + for ikey != nil && ikey.SeqNum() >= i.snapshot { + ikey = i.iter.Next() + } + if ikey == nil { + return nil, base.LazyValue{} + } + return ikey, base.MakeInPlaceValue(i.value()) +} + +func (i *batchIter) NextPrefix(succKey []byte) (*InternalKey, LazyValue) { + // Because NextPrefix was invoked `succKey` must be ≥ the key at i's current + // position. Seek the arena iterator using TrySeekUsingNext. + ikey := i.iter.SeekGE(succKey, base.SeekGEFlagsNone.EnableTrySeekUsingNext()) + for ikey != nil && ikey.SeqNum() >= i.snapshot { + ikey = i.iter.Next() + } + if ikey == nil { + return nil, base.LazyValue{} + } + return ikey, base.MakeInPlaceValue(i.value()) +} + +func (i *batchIter) Prev() (*InternalKey, base.LazyValue) { + ikey := i.iter.Prev() + for ikey != nil && ikey.SeqNum() >= i.snapshot { + ikey = i.iter.Prev() + } + if ikey == nil { + return nil, base.LazyValue{} + } + return ikey, base.MakeInPlaceValue(i.value()) +} + +func (i *batchIter) value() []byte { + offset, _, keyEnd := i.iter.KeyInfo() + data := i.batch.data + if len(data[offset:]) == 0 { + i.err = base.CorruptionErrorf("corrupted batch") + return nil + } + + switch InternalKeyKind(data[offset]) { + case InternalKeyKindSet, InternalKeyKindMerge, InternalKeyKindRangeDelete, + InternalKeyKindRangeKeySet, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeyDelete, + InternalKeyKindDeleteSized: + _, value, ok := batchDecodeStr(data[keyEnd:]) + if !ok { + return nil + } + return value + default: + return nil + } +} + +func (i *batchIter) Error() error { + return i.err +} + +func (i *batchIter) Close() error { + _ = i.iter.Close() + return i.err +} + +func (i *batchIter) SetBounds(lower, upper []byte) { + i.iter.SetBounds(lower, upper) +} + +func (i *batchIter) SetContext(_ context.Context) {} + +type flushableBatchEntry struct { + // offset is the byte offset of the record within the batch repr. + offset uint32 + // index is the 0-based ordinal number of the record within the batch. Used + // to compute the seqnum for the record. + index uint32 + // key{Start,End} are the start and end byte offsets of the key within the + // batch repr. Cached to avoid decoding the key length on every + // comparison. The value is stored starting at keyEnd. + keyStart uint32 + keyEnd uint32 +} + +// flushableBatch wraps an existing batch and provides the interfaces needed +// for making the batch flushable (i.e. able to mimic a memtable). +type flushableBatch struct { + cmp Compare + formatKey base.FormatKey + data []byte + + // The base sequence number for the entries in the batch. This is the same + // value as Batch.seqNum() and is cached here for performance. + seqNum uint64 + + // A slice of offsets and indices for the entries in the batch. Used to + // implement flushableBatchIter. Unlike the indexing on a normal batch, a + // flushable batch is indexed such that batch entry i will be given the + // sequence number flushableBatch.seqNum+i. + // + // Sorted in increasing order of key and decreasing order of offset (since + // higher offsets correspond to higher sequence numbers). + // + // Does not include range deletion entries or range key entries. + offsets []flushableBatchEntry + + // Fragmented range deletion tombstones. + tombstones []keyspan.Span + + // Fragmented range keys. + rangeKeys []keyspan.Span +} + +var _ flushable = (*flushableBatch)(nil) + +// newFlushableBatch creates a new batch that implements the flushable +// interface. This allows the batch to act like a memtable and be placed in the +// queue of flushable memtables. Note that the flushable batch takes ownership +// of the batch data. +func newFlushableBatch(batch *Batch, comparer *Comparer) (*flushableBatch, error) { + b := &flushableBatch{ + data: batch.data, + cmp: comparer.Compare, + formatKey: comparer.FormatKey, + offsets: make([]flushableBatchEntry, 0, batch.Count()), + } + if b.data != nil { + // Note that this sequence number is not correct when this batch has not + // been applied since the sequence number has not been assigned yet. The + // correct sequence number will be set later. But it is correct when the + // batch is being replayed from the WAL. + b.seqNum = batch.SeqNum() + } + var rangeDelOffsets []flushableBatchEntry + var rangeKeyOffsets []flushableBatchEntry + if len(b.data) > batchHeaderLen { + // Non-empty batch. + var index uint32 + for iter := BatchReader(b.data[batchHeaderLen:]); len(iter) > 0; index++ { + offset := uintptr(unsafe.Pointer(&iter[0])) - uintptr(unsafe.Pointer(&b.data[0])) + kind, key, _, ok, err := iter.Next() + if !ok { + if err != nil { + return nil, err + } + break + } + entry := flushableBatchEntry{ + offset: uint32(offset), + index: uint32(index), + } + if keySize := uint32(len(key)); keySize == 0 { + // Must add 2 to the offset. One byte encodes `kind` and the next + // byte encodes `0`, which is the length of the key. + entry.keyStart = uint32(offset) + 2 + entry.keyEnd = entry.keyStart + } else { + entry.keyStart = uint32(uintptr(unsafe.Pointer(&key[0])) - + uintptr(unsafe.Pointer(&b.data[0]))) + entry.keyEnd = entry.keyStart + keySize + } + switch kind { + case InternalKeyKindRangeDelete: + rangeDelOffsets = append(rangeDelOffsets, entry) + case InternalKeyKindRangeKeySet, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeyDelete: + rangeKeyOffsets = append(rangeKeyOffsets, entry) + default: + b.offsets = append(b.offsets, entry) + } + } + } + + // Sort all of offsets, rangeDelOffsets and rangeKeyOffsets, using *batch's + // sort.Interface implementation. + pointOffsets := b.offsets + sort.Sort(b) + b.offsets = rangeDelOffsets + sort.Sort(b) + b.offsets = rangeKeyOffsets + sort.Sort(b) + b.offsets = pointOffsets + + if len(rangeDelOffsets) > 0 { + frag := &keyspan.Fragmenter{ + Cmp: b.cmp, + Format: b.formatKey, + Emit: func(s keyspan.Span) { + b.tombstones = append(b.tombstones, s) + }, + } + it := &flushableBatchIter{ + batch: b, + data: b.data, + offsets: rangeDelOffsets, + cmp: b.cmp, + index: -1, + } + fragmentRangeDels(frag, it, len(rangeDelOffsets)) + } + if len(rangeKeyOffsets) > 0 { + frag := &keyspan.Fragmenter{ + Cmp: b.cmp, + Format: b.formatKey, + Emit: func(s keyspan.Span) { + b.rangeKeys = append(b.rangeKeys, s) + }, + } + it := &flushableBatchIter{ + batch: b, + data: b.data, + offsets: rangeKeyOffsets, + cmp: b.cmp, + index: -1, + } + fragmentRangeKeys(frag, it, len(rangeKeyOffsets)) + } + return b, nil +} + +func (b *flushableBatch) setSeqNum(seqNum uint64) { + if b.seqNum != 0 { + panic(fmt.Sprintf("pebble: flushableBatch.seqNum already set: %d", b.seqNum)) + } + b.seqNum = seqNum + for i := range b.tombstones { + for j := range b.tombstones[i].Keys { + b.tombstones[i].Keys[j].Trailer = base.MakeTrailer( + b.tombstones[i].Keys[j].SeqNum()+seqNum, + b.tombstones[i].Keys[j].Kind(), + ) + } + } + for i := range b.rangeKeys { + for j := range b.rangeKeys[i].Keys { + b.rangeKeys[i].Keys[j].Trailer = base.MakeTrailer( + b.rangeKeys[i].Keys[j].SeqNum()+seqNum, + b.rangeKeys[i].Keys[j].Kind(), + ) + } + } +} + +func (b *flushableBatch) Len() int { + return len(b.offsets) +} + +func (b *flushableBatch) Less(i, j int) bool { + ei := &b.offsets[i] + ej := &b.offsets[j] + ki := b.data[ei.keyStart:ei.keyEnd] + kj := b.data[ej.keyStart:ej.keyEnd] + switch c := b.cmp(ki, kj); { + case c < 0: + return true + case c > 0: + return false + default: + return ei.offset > ej.offset + } +} + +func (b *flushableBatch) Swap(i, j int) { + b.offsets[i], b.offsets[j] = b.offsets[j], b.offsets[i] +} + +// newIter is part of the flushable interface. +func (b *flushableBatch) newIter(o *IterOptions) internalIterator { + return &flushableBatchIter{ + batch: b, + data: b.data, + offsets: b.offsets, + cmp: b.cmp, + index: -1, + lower: o.GetLowerBound(), + upper: o.GetUpperBound(), + } +} + +// newFlushIter is part of the flushable interface. +func (b *flushableBatch) newFlushIter(o *IterOptions, bytesFlushed *uint64) internalIterator { + return &flushFlushableBatchIter{ + flushableBatchIter: flushableBatchIter{ + batch: b, + data: b.data, + offsets: b.offsets, + cmp: b.cmp, + index: -1, + }, + bytesIterated: bytesFlushed, + } +} + +// newRangeDelIter is part of the flushable interface. +func (b *flushableBatch) newRangeDelIter(o *IterOptions) keyspan.FragmentIterator { + if len(b.tombstones) == 0 { + return nil + } + return keyspan.NewIter(b.cmp, b.tombstones) +} + +// newRangeKeyIter is part of the flushable interface. +func (b *flushableBatch) newRangeKeyIter(o *IterOptions) keyspan.FragmentIterator { + if len(b.rangeKeys) == 0 { + return nil + } + return keyspan.NewIter(b.cmp, b.rangeKeys) +} + +// containsRangeKeys is part of the flushable interface. +func (b *flushableBatch) containsRangeKeys() bool { return len(b.rangeKeys) > 0 } + +// inuseBytes is part of the flushable interface. +func (b *flushableBatch) inuseBytes() uint64 { + return uint64(len(b.data) - batchHeaderLen) +} + +// totalBytes is part of the flushable interface. +func (b *flushableBatch) totalBytes() uint64 { + return uint64(cap(b.data)) +} + +// readyForFlush is part of the flushable interface. +func (b *flushableBatch) readyForFlush() bool { + // A flushable batch is always ready for flush; it must be flushed together + // with the previous memtable. + return true +} + +// Note: flushableBatchIter mirrors the implementation of batchIter. Keep the +// two in sync. +type flushableBatchIter struct { + // Members to be initialized by creator. + batch *flushableBatch + // The bytes backing the batch. Always the same as batch.data? + data []byte + // The sorted entries. This is not always equal to batch.offsets. + offsets []flushableBatchEntry + cmp Compare + // Must be initialized to -1. It is the index into offsets that represents + // the current iterator position. + index int + + // For internal use by the implementation. + key InternalKey + err error + + // Optionally initialize to bounds of iteration, if any. + lower []byte + upper []byte +} + +// flushableBatchIter implements the base.InternalIterator interface. +var _ base.InternalIterator = (*flushableBatchIter)(nil) + +func (i *flushableBatchIter) String() string { + return "flushable-batch" +} + +// SeekGE implements internalIterator.SeekGE, as documented in the pebble +// package. Ignore flags.TrySeekUsingNext() since we don't expect this +// optimization to provide much benefit here at the moment. +func (i *flushableBatchIter) SeekGE( + key []byte, flags base.SeekGEFlags, +) (*InternalKey, base.LazyValue) { + i.err = nil // clear cached iteration error + ikey := base.MakeSearchKey(key) + i.index = sort.Search(len(i.offsets), func(j int) bool { + return base.InternalCompare(i.cmp, ikey, i.getKey(j)) <= 0 + }) + if i.index >= len(i.offsets) { + return nil, base.LazyValue{} + } + i.key = i.getKey(i.index) + if i.upper != nil && i.cmp(i.key.UserKey, i.upper) >= 0 { + i.index = len(i.offsets) + return nil, base.LazyValue{} + } + return &i.key, i.value() +} + +// SeekPrefixGE implements internalIterator.SeekPrefixGE, as documented in the +// pebble package. +func (i *flushableBatchIter) SeekPrefixGE( + prefix, key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + return i.SeekGE(key, flags) +} + +// SeekLT implements internalIterator.SeekLT, as documented in the pebble +// package. +func (i *flushableBatchIter) SeekLT( + key []byte, flags base.SeekLTFlags, +) (*InternalKey, base.LazyValue) { + i.err = nil // clear cached iteration error + ikey := base.MakeSearchKey(key) + i.index = sort.Search(len(i.offsets), func(j int) bool { + return base.InternalCompare(i.cmp, ikey, i.getKey(j)) <= 0 + }) + i.index-- + if i.index < 0 { + return nil, base.LazyValue{} + } + i.key = i.getKey(i.index) + if i.lower != nil && i.cmp(i.key.UserKey, i.lower) < 0 { + i.index = -1 + return nil, base.LazyValue{} + } + return &i.key, i.value() +} + +// First implements internalIterator.First, as documented in the pebble +// package. +func (i *flushableBatchIter) First() (*InternalKey, base.LazyValue) { + i.err = nil // clear cached iteration error + if len(i.offsets) == 0 { + return nil, base.LazyValue{} + } + i.index = 0 + i.key = i.getKey(i.index) + if i.upper != nil && i.cmp(i.key.UserKey, i.upper) >= 0 { + i.index = len(i.offsets) + return nil, base.LazyValue{} + } + return &i.key, i.value() +} + +// Last implements internalIterator.Last, as documented in the pebble +// package. +func (i *flushableBatchIter) Last() (*InternalKey, base.LazyValue) { + i.err = nil // clear cached iteration error + if len(i.offsets) == 0 { + return nil, base.LazyValue{} + } + i.index = len(i.offsets) - 1 + i.key = i.getKey(i.index) + if i.lower != nil && i.cmp(i.key.UserKey, i.lower) < 0 { + i.index = -1 + return nil, base.LazyValue{} + } + return &i.key, i.value() +} + +// Note: flushFlushableBatchIter.Next mirrors the implementation of +// flushableBatchIter.Next due to performance. Keep the two in sync. +func (i *flushableBatchIter) Next() (*InternalKey, base.LazyValue) { + if i.index == len(i.offsets) { + return nil, base.LazyValue{} + } + i.index++ + if i.index == len(i.offsets) { + return nil, base.LazyValue{} + } + i.key = i.getKey(i.index) + if i.upper != nil && i.cmp(i.key.UserKey, i.upper) >= 0 { + i.index = len(i.offsets) + return nil, base.LazyValue{} + } + return &i.key, i.value() +} + +func (i *flushableBatchIter) Prev() (*InternalKey, base.LazyValue) { + if i.index < 0 { + return nil, base.LazyValue{} + } + i.index-- + if i.index < 0 { + return nil, base.LazyValue{} + } + i.key = i.getKey(i.index) + if i.lower != nil && i.cmp(i.key.UserKey, i.lower) < 0 { + i.index = -1 + return nil, base.LazyValue{} + } + return &i.key, i.value() +} + +// Note: flushFlushableBatchIter.NextPrefix mirrors the implementation of +// flushableBatchIter.NextPrefix due to performance. Keep the two in sync. +func (i *flushableBatchIter) NextPrefix(succKey []byte) (*InternalKey, LazyValue) { + return i.SeekGE(succKey, base.SeekGEFlagsNone.EnableTrySeekUsingNext()) +} + +func (i *flushableBatchIter) getKey(index int) InternalKey { + e := &i.offsets[index] + kind := InternalKeyKind(i.data[e.offset]) + key := i.data[e.keyStart:e.keyEnd] + return base.MakeInternalKey(key, i.batch.seqNum+uint64(e.index), kind) +} + +func (i *flushableBatchIter) value() base.LazyValue { + p := i.data[i.offsets[i.index].offset:] + if len(p) == 0 { + i.err = base.CorruptionErrorf("corrupted batch") + return base.LazyValue{} + } + kind := InternalKeyKind(p[0]) + if kind > InternalKeyKindMax { + i.err = base.CorruptionErrorf("corrupted batch") + return base.LazyValue{} + } + var value []byte + var ok bool + switch kind { + case InternalKeyKindSet, InternalKeyKindMerge, InternalKeyKindRangeDelete, + InternalKeyKindRangeKeySet, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeyDelete, + InternalKeyKindDeleteSized: + keyEnd := i.offsets[i.index].keyEnd + _, value, ok = batchDecodeStr(i.data[keyEnd:]) + if !ok { + i.err = base.CorruptionErrorf("corrupted batch") + return base.LazyValue{} + } + } + return base.MakeInPlaceValue(value) +} + +func (i *flushableBatchIter) Valid() bool { + return i.index >= 0 && i.index < len(i.offsets) +} + +func (i *flushableBatchIter) Error() error { + return i.err +} + +func (i *flushableBatchIter) Close() error { + return i.err +} + +func (i *flushableBatchIter) SetBounds(lower, upper []byte) { + i.lower = lower + i.upper = upper +} + +func (i *flushableBatchIter) SetContext(_ context.Context) {} + +// flushFlushableBatchIter is similar to flushableBatchIter but it keeps track +// of number of bytes iterated. +type flushFlushableBatchIter struct { + flushableBatchIter + bytesIterated *uint64 +} + +// flushFlushableBatchIter implements the base.InternalIterator interface. +var _ base.InternalIterator = (*flushFlushableBatchIter)(nil) + +func (i *flushFlushableBatchIter) String() string { + return "flushable-batch" +} + +func (i *flushFlushableBatchIter) SeekGE( + key []byte, flags base.SeekGEFlags, +) (*InternalKey, base.LazyValue) { + panic("pebble: SeekGE unimplemented") +} + +func (i *flushFlushableBatchIter) SeekPrefixGE( + prefix, key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + panic("pebble: SeekPrefixGE unimplemented") +} + +func (i *flushFlushableBatchIter) SeekLT( + key []byte, flags base.SeekLTFlags, +) (*InternalKey, base.LazyValue) { + panic("pebble: SeekLT unimplemented") +} + +func (i *flushFlushableBatchIter) First() (*InternalKey, base.LazyValue) { + i.err = nil // clear cached iteration error + key, val := i.flushableBatchIter.First() + if key == nil { + return nil, base.LazyValue{} + } + entryBytes := i.offsets[i.index].keyEnd - i.offsets[i.index].offset + *i.bytesIterated += uint64(entryBytes) + i.valueSize() + return key, val +} + +func (i *flushFlushableBatchIter) NextPrefix(succKey []byte) (*InternalKey, base.LazyValue) { + panic("pebble: Prev unimplemented") +} + +// Note: flushFlushableBatchIter.Next mirrors the implementation of +// flushableBatchIter.Next due to performance. Keep the two in sync. +func (i *flushFlushableBatchIter) Next() (*InternalKey, base.LazyValue) { + if i.index == len(i.offsets) { + return nil, base.LazyValue{} + } + i.index++ + if i.index == len(i.offsets) { + return nil, base.LazyValue{} + } + i.key = i.getKey(i.index) + entryBytes := i.offsets[i.index].keyEnd - i.offsets[i.index].offset + *i.bytesIterated += uint64(entryBytes) + i.valueSize() + return &i.key, i.value() +} + +func (i flushFlushableBatchIter) Prev() (*InternalKey, base.LazyValue) { + panic("pebble: Prev unimplemented") +} + +func (i flushFlushableBatchIter) valueSize() uint64 { + p := i.data[i.offsets[i.index].offset:] + if len(p) == 0 { + i.err = base.CorruptionErrorf("corrupted batch") + return 0 + } + kind := InternalKeyKind(p[0]) + if kind > InternalKeyKindMax { + i.err = base.CorruptionErrorf("corrupted batch") + return 0 + } + var length uint64 + switch kind { + case InternalKeyKindSet, InternalKeyKindMerge, InternalKeyKindRangeDelete: + keyEnd := i.offsets[i.index].keyEnd + v, n := binary.Uvarint(i.data[keyEnd:]) + if n <= 0 { + i.err = base.CorruptionErrorf("corrupted batch") + return 0 + } + length = v + uint64(n) + } + return length +} + +// batchSort returns iterators for the sorted contents of the batch. It is +// intended for testing use only. The batch.Sort dance is done to prevent +// exposing this method in the public pebble interface. +func batchSort( + i interface{}, +) ( + points internalIterator, + rangeDels keyspan.FragmentIterator, + rangeKeys keyspan.FragmentIterator, +) { + b := i.(*Batch) + if b.Indexed() { + pointIter := b.newInternalIter(nil) + rangeDelIter := b.newRangeDelIter(nil, math.MaxUint64) + rangeKeyIter := b.newRangeKeyIter(nil, math.MaxUint64) + return pointIter, rangeDelIter, rangeKeyIter + } + f, err := newFlushableBatch(b, b.db.opts.Comparer) + if err != nil { + panic(err) + } + return f.newIter(nil), f.newRangeDelIter(nil), f.newRangeKeyIter(nil) +} + +func init() { + private.BatchSort = batchSort +} diff --git a/pebble/batch_test.go b/pebble/batch_test.go new file mode 100644 index 0000000..c977874 --- /dev/null +++ b/pebble/batch_test.go @@ -0,0 +1,1652 @@ +// Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "context" + "encoding/binary" + "encoding/hex" + "fmt" + "io" + "math" + "math/rand" + "strconv" + "strings" + "sync" + "testing" + "time" + "unicode" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/batchskl" + "github.com/cockroachdb/pebble/internal/itertest" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/cockroachdb/pebble/vfs" + "github.com/stretchr/testify/require" +) + +func TestBatch(t *testing.T) { + testBatch(t, 0) + testBatch(t, batchInitialSize) +} + +func testBatch(t *testing.T, size int) { + type testCase struct { + kind InternalKeyKind + key, value string + valueInt uint32 + } + + verifyTestCases := func(b *Batch, testCases []testCase, indexedPointKindsOnly bool) { + r := b.Reader() + + for _, tc := range testCases { + if indexedPointKindsOnly && (tc.kind == InternalKeyKindLogData || tc.kind == InternalKeyKindIngestSST || + tc.kind == InternalKeyKindRangeDelete) { + continue + } + kind, k, v, ok, err := r.Next() + if !ok { + if err != nil { + t.Fatal(err) + } + t.Fatalf("next returned !ok: test case = %v", tc) + } + key, value := string(k), string(v) + if kind != tc.kind || key != tc.key || value != tc.value { + t.Errorf("got (%d, %q, %q), want (%d, %q, %q)", + kind, key, value, tc.kind, tc.key, tc.value) + } + } + if len(r) != 0 { + t.Errorf("reader was not exhausted: remaining bytes = %q", r) + } + } + + encodeFileNum := func(n base.FileNum) string { + return string(binary.AppendUvarint(nil, uint64(n))) + } + decodeFileNum := func(d []byte) base.FileNum { + val, n := binary.Uvarint(d) + if n <= 0 { + t.Fatalf("invalid filenum encoding") + } + return base.FileNum(val) + } + + // RangeKeySet and RangeKeyUnset are untested here because they don't expose + // deferred variants. This is a consequence of these keys' more complex + // value encodings. + testCases := []testCase{ + {InternalKeyKindIngestSST, encodeFileNum(1), "", 0}, + {InternalKeyKindSet, "roses", "red", 0}, + {InternalKeyKindSet, "violets", "blue", 0}, + {InternalKeyKindDelete, "roses", "", 0}, + {InternalKeyKindSingleDelete, "roses", "", 0}, + {InternalKeyKindSet, "", "", 0}, + {InternalKeyKindSet, "", "non-empty", 0}, + {InternalKeyKindDelete, "", "", 0}, + {InternalKeyKindSingleDelete, "", "", 0}, + {InternalKeyKindSet, "grass", "green", 0}, + {InternalKeyKindSet, "grass", "greener", 0}, + {InternalKeyKindSet, "eleventy", strings.Repeat("!!11!", 100), 0}, + {InternalKeyKindDelete, "nosuchkey", "", 0}, + {InternalKeyKindDeleteSized, "eleventy", string(binary.AppendUvarint([]byte(nil), 508)), 500}, + {InternalKeyKindSingleDelete, "nosuchkey", "", 0}, + {InternalKeyKindSet, "binarydata", "\x00", 0}, + {InternalKeyKindSet, "binarydata", "\xff", 0}, + {InternalKeyKindMerge, "merge", "mergedata", 0}, + {InternalKeyKindMerge, "merge", "", 0}, + {InternalKeyKindMerge, "", "", 0}, + {InternalKeyKindRangeDelete, "a", "b", 0}, + {InternalKeyKindRangeDelete, "", "", 0}, + {InternalKeyKindLogData, "logdata", "", 0}, + {InternalKeyKindLogData, "", "", 0}, + {InternalKeyKindRangeKeyDelete, "grass", "green", 0}, + {InternalKeyKindRangeKeyDelete, "", "", 0}, + {InternalKeyKindDeleteSized, "nosuchkey", string(binary.AppendUvarint([]byte(nil), 11)), 2}, + } + b := newBatchWithSize(nil, size) + for _, tc := range testCases { + switch tc.kind { + case InternalKeyKindSet: + _ = b.Set([]byte(tc.key), []byte(tc.value), nil) + case InternalKeyKindMerge: + _ = b.Merge([]byte(tc.key), []byte(tc.value), nil) + case InternalKeyKindDelete: + _ = b.Delete([]byte(tc.key), nil) + case InternalKeyKindDeleteSized: + _ = b.DeleteSized([]byte(tc.key), tc.valueInt, nil) + case InternalKeyKindSingleDelete: + _ = b.SingleDelete([]byte(tc.key), nil) + case InternalKeyKindRangeDelete: + _ = b.DeleteRange([]byte(tc.key), []byte(tc.value), nil) + case InternalKeyKindLogData: + _ = b.LogData([]byte(tc.key), nil) + case InternalKeyKindRangeKeyDelete: + _ = b.RangeKeyDelete([]byte(tc.key), []byte(tc.value), nil) + case InternalKeyKindIngestSST: + b.ingestSST(decodeFileNum([]byte(tc.key))) + } + } + verifyTestCases(b, testCases, false /* indexedKindsOnly */) + + b.Reset() + // Run the same operations, this time using the Deferred variants of each + // operation (eg. SetDeferred). + for _, tc := range testCases { + key := []byte(tc.key) + value := []byte(tc.value) + switch tc.kind { + case InternalKeyKindSet: + d := b.SetDeferred(len(key), len(value)) + copy(d.Key, key) + copy(d.Value, value) + d.Finish() + case InternalKeyKindMerge: + d := b.MergeDeferred(len(key), len(value)) + copy(d.Key, key) + copy(d.Value, value) + d.Finish() + case InternalKeyKindDelete: + d := b.DeleteDeferred(len(key)) + copy(d.Key, key) + copy(d.Value, value) + d.Finish() + case InternalKeyKindDeleteSized: + d := b.DeleteSizedDeferred(len(tc.key), tc.valueInt) + copy(d.Key, key) + d.Finish() + case InternalKeyKindSingleDelete: + d := b.SingleDeleteDeferred(len(key)) + copy(d.Key, key) + copy(d.Value, value) + d.Finish() + case InternalKeyKindRangeDelete: + d := b.DeleteRangeDeferred(len(key), len(value)) + copy(d.Key, key) + copy(d.Value, value) + d.Finish() + case InternalKeyKindLogData: + _ = b.LogData([]byte(tc.key), nil) + case InternalKeyKindIngestSST: + b.ingestSST(decodeFileNum([]byte(tc.key))) + case InternalKeyKindRangeKeyDelete: + d := b.RangeKeyDeleteDeferred(len(key), len(value)) + copy(d.Key, key) + copy(d.Value, value) + d.Finish() + } + } + verifyTestCases(b, testCases, false /* indexedKindsOnly */) + + b.Reset() + // Run the same operations, this time using AddInternalKey instead of the + // Kind-specific methods. + for _, tc := range testCases { + if tc.kind == InternalKeyKindLogData || tc.kind == InternalKeyKindIngestSST || + tc.kind == InternalKeyKindRangeDelete { + continue + } + key := []byte(tc.key) + value := []byte(tc.value) + b.AddInternalKey(&InternalKey{UserKey: key, Trailer: base.MakeTrailer(0, tc.kind)}, value, nil) + } + verifyTestCases(b, testCases, true /* indexedKindsOnly */) +} + +func TestBatchPreAlloc(t *testing.T) { + var cases = []struct { + size int + exp int + }{ + {0, batchInitialSize}, + {batchInitialSize, batchInitialSize}, + {2 * batchInitialSize, 2 * batchInitialSize}, + } + for _, c := range cases { + b := newBatchWithSize(nil, c.size) + b.Set([]byte{0x1}, []byte{0x2}, nil) + if cap(b.data) != c.exp { + t.Errorf("Unexpected memory space, required: %d, got: %d", c.exp, cap(b.data)) + } + } +} + +func TestBatchIngestSST(t *testing.T) { + // Verify that Batch.IngestSST has the correct batch count and memtable + // size. + var b Batch + b.ingestSST(1) + require.Equal(t, int(b.Count()), 1) + b.ingestSST(2) + require.Equal(t, int(b.Count()), 2) + require.Equal(t, int(b.memTableSize), 0) + require.Equal(t, b.ingestedSSTBatch, true) +} + +func TestBatchLen(t *testing.T) { + var b Batch + + requireLenAndReprEq := func(size int) { + require.Equal(t, size, b.Len()) + require.Equal(t, size, len(b.Repr())) + } + + requireLenAndReprEq(batchHeaderLen) + + key := "test-key" + value := "test-value" + + err := b.Set([]byte(key), []byte(value), nil) + require.NoError(t, err) + + requireLenAndReprEq(33) + + err = b.Delete([]byte(key), nil) + require.NoError(t, err) + + requireLenAndReprEq(43) +} + +func TestBatchEmpty(t *testing.T) { + testBatchEmpty(t, 0) + testBatchEmpty(t, batchInitialSize) +} + +func testBatchEmpty(t *testing.T, size int) { + b := newBatchWithSize(nil, size) + require.True(t, b.Empty()) + + ops := []func(*Batch) error{ + func(b *Batch) error { return b.Set(nil, nil, nil) }, + func(b *Batch) error { return b.Merge(nil, nil, nil) }, + func(b *Batch) error { return b.Delete(nil, nil) }, + func(b *Batch) error { return b.DeleteRange(nil, nil, nil) }, + func(b *Batch) error { return b.LogData(nil, nil) }, + func(b *Batch) error { return b.RangeKeySet(nil, nil, nil, nil, nil) }, + func(b *Batch) error { return b.RangeKeyUnset(nil, nil, nil, nil) }, + func(b *Batch) error { return b.RangeKeyDelete(nil, nil, nil) }, + } + + for _, op := range ops { + require.NoError(t, op(b)) + require.False(t, b.Empty()) + b.Reset() + require.True(t, b.Empty()) + // Reset may choose to reuse b.data, so clear it to the zero value in + // order to test the lazy initialization of b.data. + b = newBatchWithSize(nil, size) + } + + _ = b.Reader() + require.True(t, b.Empty()) + b.Reset() + require.True(t, b.Empty()) + b = newBatchWithSize(nil, size) + + require.Equal(t, uint64(0), b.SeqNum()) + require.True(t, b.Empty()) + b.Reset() + require.True(t, b.Empty()) + b = &Batch{} + + d, err := Open("", &Options{ + FS: vfs.NewMem(), + }) + require.NoError(t, err) + defer d.Close() + ib := newIndexedBatch(d, DefaultComparer) + iter, _ := ib.NewIter(nil) + require.False(t, iter.First()) + iter2, err := iter.Clone(CloneOptions{}) + require.NoError(t, err) + require.NoError(t, iter.Close()) + _, err = iter.Clone(CloneOptions{}) + require.True(t, err != nil) + require.False(t, iter2.First()) + require.NoError(t, iter2.Close()) + iter3, err := ib.NewBatchOnlyIter(context.Background(), nil) + require.NoError(t, err) + require.False(t, iter3.First()) + _, err = iter3.Clone(CloneOptions{}) + require.Error(t, err) + require.NoError(t, iter3.Close()) +} + +func TestBatchApplyNoSyncWait(t *testing.T) { + db, err := Open("", &Options{ + FS: vfs.NewMem(), + }) + require.NoError(t, err) + defer db.Close() + var batches []*Batch + options := &WriteOptions{Sync: true} + for i := 0; i < 10000; i++ { + b := db.NewBatch() + str := fmt.Sprintf("a%d", i) + require.NoError(t, b.Set([]byte(str), []byte(str), nil)) + require.NoError(t, db.ApplyNoSyncWait(b, options)) + // k-v pair is visible even if not yet synced. + val, closer, err := db.Get([]byte(str)) + require.NoError(t, err) + require.Equal(t, str, string(val)) + closer.Close() + batches = append(batches, b) + } + for _, b := range batches { + require.NoError(t, b.SyncWait()) + b.Close() + } +} + +func TestBatchReset(t *testing.T) { + db, err := Open("", &Options{ + FS: vfs.NewMem(), + }) + require.NoError(t, err) + defer db.Close() + key := "test-key" + value := "test-value" + b := db.NewBatch() + require.NoError(t, b.Set([]byte(key), []byte(value), nil)) + dd := b.DeleteRangeDeferred(len(key), len(value)) + copy(dd.Key, key) + copy(dd.Value, value) + dd.Finish() + + require.NoError(t, b.RangeKeySet([]byte(key), []byte(value), []byte(value), []byte(value), nil)) + + b.setSeqNum(100) + b.applied.Store(true) + b.commitErr = errors.New("test-error") + b.commit.Add(1) + b.fsyncWait.Add(1) + require.Equal(t, uint32(3), b.Count()) + require.Equal(t, uint64(1), b.countRangeDels) + require.Equal(t, uint64(1), b.countRangeKeys) + require.True(t, len(b.data) > 0) + require.True(t, b.SeqNum() > 0) + require.True(t, b.memTableSize > 0) + require.NotEqual(t, b.deferredOp, DeferredBatchOp{}) + // At this point b.data has not been modified since the db.NewBatch() and is + // either nil or contains a byte slice of length batchHeaderLen, with a 0 + // seqnum encoded in data[0:8] and an arbitrary count encoded in data[8:12]. + // The following commented code will often fail. + // count := binary.LittleEndian.Uint32(b.countData()) + // if count != 0 && count != 3 { + // t.Fatalf("count: %d", count) + // } + // If we simply called b.Reset now and later used b.data to initialize + // expected, the count in expected will also be arbitrary. So we fix the + // count in b.data now by calling b.Repr(). This call isn't essential, since + // we will call b.Repr() again, and just shows that it fixes the count in + // b.data. + _ = b.Repr() + require.Equal(t, uint32(3), binary.LittleEndian.Uint32(b.countData())) + + b.Reset() + require.Equal(t, db, b.db) + require.Equal(t, false, b.applied.Load()) + require.Nil(t, b.commitErr) + require.Equal(t, uint32(0), b.Count()) + require.Equal(t, uint64(0), b.countRangeDels) + require.Equal(t, uint64(0), b.countRangeKeys) + require.Equal(t, batchHeaderLen, len(b.data)) + require.Equal(t, uint64(0), b.SeqNum()) + require.Equal(t, uint64(0), b.memTableSize) + require.Equal(t, FormatMajorVersion(0x00), b.minimumFormatMajorVersion) + require.Equal(t, b.deferredOp, DeferredBatchOp{}) + _ = b.Repr() + + var expected Batch + require.NoError(t, expected.SetRepr(b.data)) + expected.db = db + require.Equal(t, &expected, b) + + // Reset batch can be used to write and commit a new record. + b.Set([]byte(key), []byte(value), nil) + require.NoError(t, db.Apply(b, nil)) + v, closer, err := db.Get([]byte(key)) + require.NoError(t, err) + defer closer.Close() + require.Equal(t, v, []byte(value)) +} + +func TestIndexedBatchReset(t *testing.T) { + indexCount := func(sl *batchskl.Skiplist) int { + count := 0 + iter := sl.NewIter(nil, nil) + defer iter.Close() + for iter.First(); iter.Valid(); iter.Next() { + count++ + } + return count + } + db, err := Open("", &Options{ + FS: vfs.NewMem(), + }) + require.NoError(t, err) + defer db.Close() + b := newIndexedBatch(db, DefaultComparer) + start := "start-key" + end := "end-key" + key := "test-key" + value := "test-value" + b.DeleteRange([]byte(start), []byte(end), nil) + b.Set([]byte(key), []byte(value), nil) + require.NoError(t, b. + RangeKeySet([]byte(start), []byte(end), []byte("suffix"), []byte(value), nil)) + require.NotNil(t, b.rangeKeyIndex) + require.NotNil(t, b.rangeDelIndex) + require.NotNil(t, b.index) + require.Equal(t, 1, indexCount(b.index)) + + b.Reset() + require.NotNil(t, b.cmp) + require.NotNil(t, b.formatKey) + require.NotNil(t, b.abbreviatedKey) + require.NotNil(t, b.index) + require.Nil(t, b.rangeDelIndex) + require.Nil(t, b.rangeKeyIndex) + + count := func(ib *Batch) int { + iter, _ := ib.NewIter(nil) + defer iter.Close() + iter2, err := iter.Clone(CloneOptions{}) + require.NoError(t, err) + defer iter2.Close() + iter3, err := ib.NewBatchOnlyIter(context.Background(), nil) + require.NoError(t, err) + defer iter3.Close() + var count [3]int + for i, it := range []*Iterator{iter, iter2, iter3} { + for it.First(); it.Valid(); it.Next() { + count[i]++ + } + } + require.Equal(t, count[0], count[1]) + require.Equal(t, count[0], count[2]) + return count[0] + } + contains := func(ib *Batch, key, value string) bool { + iter, _ := ib.NewIter(nil) + defer iter.Close() + iter2, err := iter.Clone(CloneOptions{}) + require.NoError(t, err) + defer iter2.Close() + iter3, err := ib.NewBatchOnlyIter(context.Background(), nil) + require.NoError(t, err) + defer iter3.Close() + var found [3]bool + for i, it := range []*Iterator{iter, iter2, iter3} { + for it.First(); it.Valid(); it.Next() { + if string(it.Key()) == key && + string(it.Value()) == value { + found[i] = true + } + } + } + require.Equal(t, found[0], found[1]) + require.Equal(t, found[0], found[2]) + return found[0] + } + // Set a key and check whether the key-value pair is visible. + b.Set([]byte(key), []byte(value), nil) + require.Equal(t, 1, indexCount(b.index)) + require.Equal(t, 1, count(b)) + require.True(t, contains(b, key, value)) + + // Use range delete to delete the above inserted key-value pair. + b.DeleteRange([]byte(key), []byte(value), nil) + require.NotNil(t, b.rangeDelIndex) + require.Equal(t, 1, indexCount(b.rangeDelIndex)) + require.Equal(t, 0, count(b)) + require.False(t, contains(b, key, value)) +} + +// TestIndexedBatchMutation tests mutating an indexed batch with an open +// iterator. +func TestIndexedBatchMutation(t *testing.T) { + opts := &Options{ + Comparer: testkeys.Comparer, + FS: vfs.NewMem(), + FormatMajorVersion: internalFormatNewest, + } + d, err := Open("", opts) + require.NoError(t, err) + defer func() { d.Close() }() + + b := newIndexedBatch(d, DefaultComparer) + iters := map[string]*Iterator{} + defer func() { + for _, iter := range iters { + require.NoError(t, iter.Close()) + } + }() + + datadriven.RunTest(t, "testdata/indexed_batch_mutation", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "batch": + writeBatch := newBatch(d) + if err := runBatchDefineCmd(td, writeBatch); err != nil { + return err.Error() + } + if err := writeBatch.Commit(nil); err != nil { + return err.Error() + } + return "" + case "new-batch-iter": + name := td.CmdArgs[0].String() + iters[name], _ = b.NewIter(&IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + }) + return "" + case "new-batch-only-iter": + name := td.CmdArgs[0].String() + iters[name], _ = b.NewBatchOnlyIter(context.Background(), &IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + }) + return "" + case "new-db-iter": + name := td.CmdArgs[0].String() + iters[name], _ = d.NewIter(&IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + }) + return "" + case "new-batch": + if b != nil { + require.NoError(t, b.Close()) + } + b = newIndexedBatch(d, opts.Comparer) + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + return "" + case "flush": + require.NoError(t, d.Flush()) + return "" + case "iter": + var iter string + td.ScanArgs(t, "iter", &iter) + return runIterCmd(td, iters[iter], false /* closeIter */) + case "mutate": + mut := newBatch(d) + if err := runBatchDefineCmd(td, mut); err != nil { + return err.Error() + } + if err := b.Apply(mut, nil); err != nil { + return err.Error() + } + return "" + case "clone": + var from, to string + var refreshBatchView bool + td.ScanArgs(t, "from", &from) + td.ScanArgs(t, "to", &to) + td.ScanArgs(t, "refresh-batch", &refreshBatchView) + var err error + iters[to], err = iters[from].Clone(CloneOptions{RefreshBatchView: refreshBatchView}) + if err != nil { + return err.Error() + } + return "" + case "reset": + for key, iter := range iters { + if err := iter.Close(); err != nil { + return err.Error() + } + delete(iters, key) + } + if d != nil { + if err := d.Close(); err != nil { + return err.Error() + } + } + opts.FS = vfs.NewMem() + d, err = Open("", opts) + require.NoError(t, err) + return "" + default: + return fmt.Sprintf("unrecognized command %q", td.Cmd) + } + }) +} + +func TestIndexedBatch_GlobalVisibility(t *testing.T) { + opts := &Options{ + FS: vfs.NewMem(), + FormatMajorVersion: internalFormatNewest, + Comparer: testkeys.Comparer, + } + d, err := Open("", opts) + require.NoError(t, err) + defer d.Close() + + require.NoError(t, d.Set([]byte("foo"), []byte("foo"), nil)) + + // Create an iterator over an empty indexed batch. + b := newIndexedBatch(d, DefaultComparer) + iterOpts := IterOptions{KeyTypes: IterKeyTypePointsAndRanges} + iter, _ := b.NewIter(&iterOpts) + defer iter.Close() + + // Mutate the database's committed state. + mut := newBatch(d) + require.NoError(t, mut.Set([]byte("bar"), []byte("bar"), nil)) + require.NoError(t, mut.DeleteRange([]byte("e"), []byte("g"), nil)) + require.NoError(t, mut.RangeKeySet([]byte("a"), []byte("c"), []byte("@1"), []byte("v"), nil)) + require.NoError(t, mut.Commit(nil)) + + scanIter := func() string { + var buf bytes.Buffer + for valid := iter.First(); valid; valid = iter.Next() { + fmt.Fprintf(&buf, "%s: (", iter.Key()) + hasPoint, hasRange := iter.HasPointAndRange() + if hasPoint { + fmt.Fprintf(&buf, "%s,", iter.Value()) + } else { + fmt.Fprintf(&buf, ".,") + } + if hasRange { + start, end := iter.RangeBounds() + fmt.Fprintf(&buf, "[%s-%s)", start, end) + writeRangeKeys(&buf, iter) + } else { + fmt.Fprintf(&buf, ".") + } + fmt.Fprintln(&buf, ")") + } + return strings.TrimSpace(buf.String()) + } + // Scanning the iterator should only see the point key written before the + // iterator was constructed. + require.Equal(t, `foo: (foo,.)`, scanIter()) + + // After calling SetOptions, the iterator should still only see the point + // key written before the iterator was constructed. SetOptions refreshes the + // iterator's view of its own indexed batch, but not committed state. + iter.SetOptions(&iterOpts) + require.Equal(t, `foo: (foo,.)`, scanIter()) +} + +func TestFlushableBatchReset(t *testing.T) { + var b Batch + var err error + b.flushable, err = newFlushableBatch(&b, DefaultComparer) + require.NoError(t, err) + + b.Reset() + require.Nil(t, b.flushable) +} + +func TestBatchIncrement(t *testing.T) { + testCases := []uint32{ + 0x00000000, + 0x00000001, + 0x00000002, + 0x0000007f, + 0x00000080, + 0x000000fe, + 0x000000ff, + 0x00000100, + 0x00000101, + 0x000001ff, + 0x00000200, + 0x00000fff, + 0x00001234, + 0x0000fffe, + 0x0000ffff, + 0x00010000, + 0x00010001, + 0x000100fe, + 0x000100ff, + 0x00020100, + 0x03fffffe, + 0x03ffffff, + 0x04000000, + 0x04000001, + 0x7fffffff, + 0xfffffffe, + } + for _, tc := range testCases { + var buf [batchHeaderLen]byte + binary.LittleEndian.PutUint32(buf[8:12], tc) + var b Batch + b.SetRepr(buf[:]) + b.count++ + got := binary.LittleEndian.Uint32(b.Repr()[8:12]) + want := tc + 1 + if got != want { + t.Errorf("input=%d: got %d, want %d", tc, got, want) + } + _, count := ReadBatch(b.Repr()) + if got != want { + t.Errorf("input=%d: got %d, want %d", tc, count, want) + } + } + + err := func() (err error) { + defer func() { + if v := recover(); v != nil { + if verr, ok := v.(error); ok { + err = verr + } + } + }() + var buf [batchHeaderLen]byte + binary.LittleEndian.PutUint32(buf[8:12], 0xffffffff) + var b Batch + b.SetRepr(buf[:]) + b.count++ + b.Repr() + return nil + }() + if err != ErrInvalidBatch { + t.Fatalf("expected %v, but found %v", ErrInvalidBatch, err) + } +} + +func TestBatchOpDoesIncrement(t *testing.T) { + var b Batch + key := []byte("foo") + value := []byte("bar") + + if b.Count() != 0 { + t.Fatalf("new batch has a nonzero count: %d", b.Count()) + } + + // Should increment count by 1 + _ = b.Set(key, value, nil) + if b.Count() != 1 { + t.Fatalf("expected count: %d, got %d", 1, b.Count()) + } + + var b2 Batch + // Should increment count by 1 each + _ = b2.Set(key, value, nil) + _ = b2.Delete(key, nil) + if b2.Count() != 2 { + t.Fatalf("expected count: %d, got %d", 2, b2.Count()) + } + + // Should increment count by b2.count() + _ = b.Apply(&b2, nil) + if b.Count() != 3 { + t.Fatalf("expected count: %d, got %d", 3, b.Count()) + } + + // Should increment count by 1 + _ = b.Merge(key, value, nil) + if b.Count() != 4 { + t.Fatalf("expected count: %d, got %d", 4, b.Count()) + } + + // Should NOT increment count. + _ = b.LogData([]byte("foobarbaz"), nil) + if b.Count() != 4 { + t.Fatalf("expected count: %d, got %d", 4, b.Count()) + } +} + +func TestBatchGet(t *testing.T) { + testCases := []struct { + method string + memTableSize uint64 + }{ + {"build", 64 << 20}, + {"build", 2 << 10}, + {"apply", 64 << 20}, + } + + for _, c := range testCases { + t.Run(fmt.Sprintf("%s,mem=%d", c.method, c.memTableSize), func(t *testing.T) { + d, err := Open("", &Options{ + FS: vfs.NewMem(), + MemTableSize: c.memTableSize, + }) + if err != nil { + t.Fatalf("Open: %v", err) + } + defer d.Close() + var b *Batch + + datadriven.RunTest(t, "testdata/batch_get", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + switch c.method { + case "build": + b = d.NewIndexedBatch() + case "apply": + b = d.NewBatch() + } + + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + + switch c.method { + case "apply": + tmp := d.NewIndexedBatch() + tmp.Apply(b, nil) + b = tmp + } + return "" + + case "commit": + if err := b.Commit(nil); err != nil { + return err.Error() + } + b = nil + return "" + + case "get": + if len(td.CmdArgs) != 1 { + return fmt.Sprintf("%s expects 1 argument", td.Cmd) + } + v, closer, err := b.Get([]byte(td.CmdArgs[0].String())) + if err != nil { + return err.Error() + } + defer closer.Close() + return string(v) + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) + }) + } +} + +func TestBatchIter(t *testing.T) { + var b *Batch + + for _, method := range []string{"build", "apply"} { + for _, testdata := range []string{ + "testdata/internal_iter_next", "testdata/internal_iter_bounds"} { + t.Run(method, func(t *testing.T) { + datadriven.RunTest(t, testdata, func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + switch method { + case "build": + b = newIndexedBatch(nil, DefaultComparer) + case "apply": + b = newBatch(nil) + } + + for _, key := range strings.Split(d.Input, "\n") { + j := strings.Index(key, ":") + ikey := base.ParseInternalKey(key[:j]) + value := []byte(key[j+1:]) + b.Set(ikey.UserKey, value, nil) + } + + switch method { + case "apply": + tmp := newIndexedBatch(nil, DefaultComparer) + tmp.Apply(b, nil) + b = tmp + } + return "" + + case "iter": + var options IterOptions + for _, arg := range d.CmdArgs { + switch arg.Key { + case "lower": + if len(arg.Vals) != 1 { + return fmt.Sprintf( + "%s expects at most 1 value for lower", d.Cmd) + } + options.LowerBound = []byte(arg.Vals[0]) + case "upper": + if len(arg.Vals) != 1 { + return fmt.Sprintf( + "%s expects at most 1 value for upper", d.Cmd) + } + options.UpperBound = []byte(arg.Vals[0]) + default: + return fmt.Sprintf("unknown arg: %s", arg.Key) + } + } + iter := b.newInternalIter(&options) + defer iter.Close() + return itertest.RunInternalIterCmd(t, d, iter) + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) + }) + } + } +} + +func TestBatchRangeOps(t *testing.T) { + var b *Batch + + datadriven.RunTest(t, "testdata/batch_range_ops", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "clear": + b = nil + return "" + + case "apply": + if b == nil { + b = newIndexedBatch(nil, DefaultComparer) + } + t := newBatch(nil) + if err := runBatchDefineCmd(td, t); err != nil { + return err.Error() + } + if err := b.Apply(t, nil); err != nil { + return err.Error() + } + return "" + + case "define": + if b == nil { + b = newIndexedBatch(nil, DefaultComparer) + } + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + return "" + + case "scan": + if len(td.CmdArgs) > 1 { + return fmt.Sprintf("%s expects at most 1 argument", td.Cmd) + } + var fragmentIter keyspan.FragmentIterator + var internalIter base.InternalIterator + switch { + case td.HasArg("range-del"): + fragmentIter = b.newRangeDelIter(nil, math.MaxUint64) + defer fragmentIter.Close() + case td.HasArg("range-key"): + fragmentIter = b.newRangeKeyIter(nil, math.MaxUint64) + defer fragmentIter.Close() + default: + internalIter = b.newInternalIter(nil) + defer internalIter.Close() + } + + var buf bytes.Buffer + if fragmentIter != nil { + for s := fragmentIter.First(); s != nil; s = fragmentIter.Next() { + for i := range s.Keys { + s.Keys[i].Trailer = base.MakeTrailer( + s.Keys[i].SeqNum()&^base.InternalKeySeqNumBatch, + s.Keys[i].Kind(), + ) + } + fmt.Fprintln(&buf, s) + } + } else { + for k, v := internalIter.First(); k != nil; k, v = internalIter.Next() { + k.SetSeqNum(k.SeqNum() &^ InternalKeySeqNumBatch) + fmt.Fprintf(&buf, "%s:%s\n", k, v.InPlaceValue()) + } + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestBatchTooLarge(t *testing.T) { + var b Batch + var result interface{} + func() { + defer func() { + if r := recover(); r != nil { + result = r + } + }() + b.grow(maxBatchSize) + }() + require.EqualValues(t, ErrBatchTooLarge, result) +} + +func TestFlushableBatchIter(t *testing.T) { + var b *flushableBatch + datadriven.RunTest(t, "testdata/internal_iter_next", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + batch := newBatch(nil) + for _, key := range strings.Split(d.Input, "\n") { + j := strings.Index(key, ":") + ikey := base.ParseInternalKey(key[:j]) + value := []byte(fmt.Sprint(ikey.SeqNum())) + batch.Set(ikey.UserKey, value, nil) + } + var err error + b, err = newFlushableBatch(batch, DefaultComparer) + require.NoError(t, err) + return "" + + case "iter": + iter := b.newIter(nil) + defer iter.Close() + return itertest.RunInternalIterCmd(t, d, iter) + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestFlushableBatch(t *testing.T) { + var b *flushableBatch + datadriven.RunTest(t, "testdata/flushable_batch", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + batch := newBatch(nil) + for _, key := range strings.Split(d.Input, "\n") { + j := strings.Index(key, ":") + ikey := base.ParseInternalKey(key[:j]) + value := []byte(fmt.Sprint(ikey.SeqNum())) + switch ikey.Kind() { + case InternalKeyKindDelete: + require.NoError(t, batch.Delete(ikey.UserKey, nil)) + case InternalKeyKindSet: + require.NoError(t, batch.Set(ikey.UserKey, value, nil)) + case InternalKeyKindMerge: + require.NoError(t, batch.Merge(ikey.UserKey, value, nil)) + case InternalKeyKindRangeDelete: + require.NoError(t, batch.DeleteRange(ikey.UserKey, value, nil)) + case InternalKeyKindRangeKeyDelete: + require.NoError(t, batch.RangeKeyDelete(ikey.UserKey, value, nil)) + case InternalKeyKindRangeKeySet: + require.NoError(t, batch.RangeKeySet(ikey.UserKey, value, value, value, nil)) + case InternalKeyKindRangeKeyUnset: + require.NoError(t, batch.RangeKeyUnset(ikey.UserKey, value, value, nil)) + } + } + var err error + b, err = newFlushableBatch(batch, DefaultComparer) + require.NoError(t, err) + return "" + + case "iter": + var opts IterOptions + for _, arg := range d.CmdArgs { + if len(arg.Vals) != 1 { + return fmt.Sprintf("%s: %s=", d.Cmd, arg.Key) + } + switch arg.Key { + case "lower": + opts.LowerBound = []byte(arg.Vals[0]) + case "upper": + opts.UpperBound = []byte(arg.Vals[0]) + default: + return fmt.Sprintf("%s: unknown arg: %s", d.Cmd, arg.Key) + } + } + + iter := b.newIter(&opts) + defer iter.Close() + return itertest.RunInternalIterCmd(t, d, iter) + + case "dump": + if len(d.CmdArgs) != 1 || len(d.CmdArgs[0].Vals) != 1 || d.CmdArgs[0].Key != "seq" { + return "dump seq=\n" + } + seqNum, err := strconv.Atoi(d.CmdArgs[0].Vals[0]) + if err != nil { + return err.Error() + } + b.setSeqNum(uint64(seqNum)) + + var buf bytes.Buffer + + iter := newInternalIterAdapter(b.newIter(nil)) + for valid := iter.First(); valid; valid = iter.Next() { + fmt.Fprintf(&buf, "%s:%s\n", iter.Key(), iter.Value()) + } + iter.Close() + + if rangeDelIter := b.newRangeDelIter(nil); rangeDelIter != nil { + scanKeyspanIterator(&buf, rangeDelIter) + rangeDelIter.Close() + } + if rangeKeyIter := b.newRangeKeyIter(nil); rangeKeyIter != nil { + scanKeyspanIterator(&buf, rangeKeyIter) + rangeKeyIter.Close() + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestFlushableBatchDeleteRange(t *testing.T) { + var fb *flushableBatch + var input string + + datadriven.RunTest(t, "testdata/delete_range", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "clear": + input = "" + return "" + + case "define": + b := newBatch(nil) + // NB: We can't actually add to the flushable batch as we can to a + // memtable (which shares the "testdata/delete_range" data), so we fake + // it by concatenating the input and rebuilding the flushable batch from + // scratch. + input += "\n" + td.Input + td.Input = input + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + var err error + fb, err = newFlushableBatch(b, DefaultComparer) + require.NoError(t, err) + return "" + + case "scan": + var buf bytes.Buffer + if td.HasArg("range-del") { + fi := fb.newRangeDelIter(nil) + defer fi.Close() + scanKeyspanIterator(&buf, fi) + } else { + ii := fb.newIter(nil) + defer ii.Close() + scanInternalIter(&buf, ii) + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func scanInternalIter(w io.Writer, ii internalIterator) { + for k, v := ii.First(); k != nil; k, v = ii.Next() { + fmt.Fprintf(w, "%s:%s\n", k, v.InPlaceValue()) + } +} + +func scanKeyspanIterator(w io.Writer, ki keyspan.FragmentIterator) { + for s := ki.First(); s != nil; s = ki.Next() { + fmt.Fprintln(w, s) + } +} + +func TestFlushableBatchBytesIterated(t *testing.T) { + batch := newBatch(nil) + for j := 0; j < 1000; j++ { + key := make([]byte, 8+j%3) + value := make([]byte, 7+j%5) + batch.Set(key, value, nil) + + fb, err := newFlushableBatch(batch, DefaultComparer) + require.NoError(t, err) + + var bytesIterated uint64 + it := fb.newFlushIter(nil, &bytesIterated) + + var prevIterated uint64 + for key, _ := it.First(); key != nil; key, _ = it.Next() { + if bytesIterated < prevIterated { + t.Fatalf("bytesIterated moved backward: %d < %d", bytesIterated, prevIterated) + } + prevIterated = bytesIterated + } + + expected := fb.inuseBytes() + if bytesIterated != expected { + t.Fatalf("bytesIterated: got %d, want %d", bytesIterated, expected) + } + } +} + +func TestEmptyFlushableBatch(t *testing.T) { + // Verify that we can create a flushable batch on an empty batch. + fb, err := newFlushableBatch(newBatch(nil), DefaultComparer) + require.NoError(t, err) + it := newInternalIterAdapter(fb.newIter(nil)) + require.False(t, it.First()) +} + +func TestBatchCommitStats(t *testing.T) { + testFunc := func() error { + db, err := Open("", &Options{ + FS: vfs.NewMem(), + }) + require.NoError(t, err) + defer db.Close() + b := db.NewBatch() + defer b.Close() + stats := b.CommitStats() + require.Equal(t, BatchCommitStats{}, stats) + + // The stall code peers into the internals, instead of adding general + // purpose hooks, to avoid changing production code. We can revisit this + // choice if it becomes hard to maintain. + + // Commit semaphore stall funcs. + var unstallCommitSemaphore func() + stallCommitSemaphore := func() { + commitPipeline := db.commit + commitSemaphoreReserved := 0 + done := false + for !done { + select { + case commitPipeline.commitQueueSem <- struct{}{}: + commitSemaphoreReserved++ + default: + done = true + } + if done { + break + } + } + unstallCommitSemaphore = func() { + for i := 0; i < commitSemaphoreReserved; i++ { + <-commitPipeline.commitQueueSem + } + } + } + + // Memstable stall funcs. + var unstallMemtable func() + stallMemtable := func() { + db.mu.Lock() + defer db.mu.Unlock() + prev := db.opts.MemTableStopWritesThreshold + db.opts.MemTableStopWritesThreshold = 0 + unstallMemtable = func() { + db.mu.Lock() + defer db.mu.Unlock() + db.opts.MemTableStopWritesThreshold = prev + db.mu.compact.cond.Broadcast() + } + } + + // L0 read-amp stall funcs. + var unstallL0ReadAmp func() + stallL0ReadAmp := func() { + db.mu.Lock() + defer db.mu.Unlock() + prev := db.opts.L0StopWritesThreshold + db.opts.L0StopWritesThreshold = 0 + unstallL0ReadAmp = func() { + db.mu.Lock() + defer db.mu.Unlock() + db.opts.L0StopWritesThreshold = prev + db.mu.compact.cond.Broadcast() + } + } + + // Commit wait stall funcs. + var unstallCommitWait func() + stallCommitWait := func() { + b.commit.Add(1) + unstallCommitWait = func() { + b.commit.Done() + } + } + + // Stall everything. + stallCommitSemaphore() + stallMemtable() + stallL0ReadAmp() + stallCommitWait() + + // Exceed initialMemTableSize -- this is needed to make stallMemtable work. + require.NoError(t, b.Set(make([]byte, initialMemTableSize), nil, nil)) + + var commitWG sync.WaitGroup + commitWG.Add(1) + go func() { + require.NoError(t, db.Apply(b, &WriteOptions{Sync: true})) + commitWG.Done() + }() + // Unstall things in the order that the stalls will happen. + sleepDuration := 10 * time.Millisecond + time.Sleep(sleepDuration) + unstallCommitSemaphore() + time.Sleep(sleepDuration) + unstallMemtable() + time.Sleep(sleepDuration) + unstallL0ReadAmp() + time.Sleep(sleepDuration) + unstallCommitWait() + + // Wait for Apply to return. + commitWG.Wait() + stats = b.CommitStats() + expectedDuration := (2 * sleepDuration) / 3 + if expectedDuration > stats.SemaphoreWaitDuration { + return errors.Errorf("SemaphoreWaitDuration %s is too low", + stats.SemaphoreWaitDuration.String()) + } + if expectedDuration > stats.MemTableWriteStallDuration { + return errors.Errorf("MemTableWriteStallDuration %s is too low", + stats.MemTableWriteStallDuration.String()) + } + if expectedDuration > stats.L0ReadAmpWriteStallDuration { + return errors.Errorf("L0ReadAmpWriteStallDuration %s is too low", + stats.L0ReadAmpWriteStallDuration) + } + if expectedDuration > stats.CommitWaitDuration { + return errors.Errorf("CommitWaitDuration %s is too low", + stats.CommitWaitDuration) + } + if 5*expectedDuration > stats.TotalDuration { + return errors.Errorf("TotalDuration %s is too low", + stats.TotalDuration) + } + return nil + } + // Try a few times, and succeed if one of them succeeds. + var err error + for i := 0; i < 5; i++ { + err = testFunc() + if err == nil { + break + } + } + require.NoError(t, err) +} + +func TestBatchReader(t *testing.T) { + datadriven.RunTest(t, "testdata/batch_reader", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "scan": + var repr bytes.Buffer + for i, l := range strings.Split(td.Input, "\n") { + // Remove any trailing comments behind #. + if i := strings.IndexRune(l, '#'); i >= 0 { + l = l[:i] + } + // Strip all whitespace from the line. + l = strings.Map(func(r rune) rune { + if unicode.IsSpace(r) { + return -1 + } + return r + }, l) + b, err := hex.DecodeString(l) + if err != nil { + return fmt.Sprintf("failed to decode hex; line %d", i) + } + repr.Write(b) + } + r, count := ReadBatch(repr.Bytes()) + var out strings.Builder + fmt.Fprintf(&out, "Count: %d\n", count) + for { + kind, ukey, value, ok, err := r.Next() + if !ok { + if err != nil { + fmt.Fprintf(&out, "err: %s\n", err) + } else { + fmt.Fprint(&out, "eof") + } + break + } + fmt.Fprintf(&out, "%s: %q: %q\n", kind, ukey, value) + } + return out.String() + + default: + return fmt.Sprintf("unrecognized command %q", td.Cmd) + } + }) +} + +func BenchmarkBatchSet(b *testing.B) { + value := make([]byte, 10) + for i := range value { + value[i] = byte(i) + } + key := make([]byte, 8) + batch := newBatch(nil) + + b.ResetTimer() + + const batchSize = 1000 + for i := 0; i < b.N; i += batchSize { + end := i + batchSize + if end > b.N { + end = b.N + } + + for j := i; j < end; j++ { + binary.BigEndian.PutUint64(key, uint64(j)) + batch.Set(key, value, nil) + } + batch.Reset() + } + + b.StopTimer() +} + +func BenchmarkIndexedBatchSet(b *testing.B) { + value := make([]byte, 10) + for i := range value { + value[i] = byte(i) + } + key := make([]byte, 8) + batch := newIndexedBatch(nil, DefaultComparer) + + b.ResetTimer() + + const batchSize = 1000 + for i := 0; i < b.N; i += batchSize { + end := i + batchSize + if end > b.N { + end = b.N + } + + for j := i; j < end; j++ { + binary.BigEndian.PutUint64(key, uint64(j)) + batch.Set(key, value, nil) + } + batch.Reset() + } + + b.StopTimer() +} + +func BenchmarkBatchSetDeferred(b *testing.B) { + value := make([]byte, 10) + for i := range value { + value[i] = byte(i) + } + key := make([]byte, 8) + batch := newBatch(nil) + + b.ResetTimer() + + const batchSize = 1000 + for i := 0; i < b.N; i += batchSize { + end := i + batchSize + if end > b.N { + end = b.N + } + + for j := i; j < end; j++ { + binary.BigEndian.PutUint64(key, uint64(j)) + deferredOp := batch.SetDeferred(len(key), len(value)) + + copy(deferredOp.Key, key) + copy(deferredOp.Value, value) + + deferredOp.Finish() + } + batch.Reset() + } + + b.StopTimer() +} + +func BenchmarkIndexedBatchSetDeferred(b *testing.B) { + value := make([]byte, 10) + for i := range value { + value[i] = byte(i) + } + key := make([]byte, 8) + batch := newIndexedBatch(nil, DefaultComparer) + + b.ResetTimer() + + const batchSize = 1000 + for i := 0; i < b.N; i += batchSize { + end := i + batchSize + if end > b.N { + end = b.N + } + + for j := i; j < end; j++ { + binary.BigEndian.PutUint64(key, uint64(j)) + deferredOp := batch.SetDeferred(len(key), len(value)) + + copy(deferredOp.Key, key) + copy(deferredOp.Value, value) + + deferredOp.Finish() + } + batch.Reset() + } + + b.StopTimer() +} + +func TestBatchMemTableSizeOverflow(t *testing.T) { + opts := &Options{ + FS: vfs.NewMem(), + } + opts.EnsureDefaults() + d, err := Open("", opts) + require.NoError(t, err) + + bigValue := make([]byte, 1000) + b := d.NewBatch() + + // memTableSize can overflow as a uint32. + b.memTableSize = math.MaxUint32 - 50 + for i := 0; i < 10; i++ { + k := fmt.Sprintf("key-%05d", i) + require.NoError(t, b.Set([]byte(k), bigValue, nil)) + } + require.Greater(t, b.memTableSize, uint64(math.MaxUint32)) + require.NoError(t, b.Close()) + require.NoError(t, d.Close()) +} + +// TestBatchSpanCaching stress tests the caching of keyspan.Spans for range +// tombstones and range keys. +func TestBatchSpanCaching(t *testing.T) { + opts := &Options{ + Comparer: testkeys.Comparer, + FS: vfs.NewMem(), + FormatMajorVersion: internalFormatNewest, + } + d, err := Open("", opts) + require.NoError(t, err) + defer d.Close() + + ks := testkeys.Alpha(1) + b := d.NewIndexedBatch() + for i := int64(0); i < ks.Count(); i++ { + k := testkeys.Key(ks, i) + require.NoError(t, b.Set(k, k, nil)) + } + + seed := int64(time.Now().UnixNano()) + t.Logf("seed = %d", seed) + rng := rand.New(rand.NewSource(seed)) + iters := make([][]*Iterator, ks.Count()) + defer func() { + for _, keyIters := range iters { + for _, iter := range keyIters { + _ = iter.Close() + } + } + }() + + // This test begins with one point key for every letter of the alphabet. + // Over the course of the test, point keys are 'replaced' with range keys + // with narrow bounds from left to right. Iterators are created at random, + // sometimes from the batch and sometimes by cloning existing iterators. + + checkIter := func(iter *Iterator, nextKey int64) { + var i int64 + for valid := iter.First(); valid; valid = iter.Next() { + hasPoint, hasRange := iter.HasPointAndRange() + require.Equal(t, testkeys.Key(ks, i), iter.Key()) + if i < nextKey { + // This key should not exist as a point key, just a range key. + require.False(t, hasPoint) + require.True(t, hasRange) + } else { + require.True(t, hasPoint) + require.False(t, hasRange) + } + i++ + } + require.Equal(t, ks.Count(), i) + } + + // Each iteration of the below loop either reads or writes. + // + // A write iteration writes a new RANGEDEL and RANGEKEYSET into the batch, + // covering a single point key seeded above. Writing these two span keys + // together 'replaces' the point key with a range key. Each write iteration + // ratchets nextWriteKey so the next write iteration will write the next + // key. + // + // A read iteration creates a new iterator and ensures its state is + // expected: some prefix of only point keys, followed by a suffix of only + // range keys. Iterators created through Clone should observe the point keys + // that existed when the cloned iterator was created. + for nextWriteKey := int64(0); nextWriteKey < ks.Count(); { + p := rng.Float64() + switch { + case p < .10: /* 10 % */ + // Write a new range deletion and range key. + start := testkeys.Key(ks, nextWriteKey) + end := append(start, 0x00) + require.NoError(t, b.DeleteRange(start, end, nil)) + require.NoError(t, b.RangeKeySet(start, end, nil, []byte("foo"), nil)) + nextWriteKey++ + case p < .55: /* 45 % */ + // Create a new iterator directly from the batch and check that it + // observes the correct state. + iter, _ := b.NewIter(&IterOptions{KeyTypes: IterKeyTypePointsAndRanges}) + checkIter(iter, nextWriteKey) + iters[nextWriteKey] = append(iters[nextWriteKey], iter) + default: /* 45 % */ + // Create a new iterator through cloning a random existing iterator + // and check that it observes the right state. + readKey := rng.Int63n(nextWriteKey + 1) + itersForReadKey := iters[readKey] + if len(itersForReadKey) == 0 { + continue + } + iter, err := itersForReadKey[rng.Intn(len(itersForReadKey))].Clone(CloneOptions{}) + require.NoError(t, err) + checkIter(iter, readKey) + iters[readKey] = append(iters[readKey], iter) + } + } +} diff --git a/pebble/bloom/bloom.go b/pebble/bloom/bloom.go new file mode 100644 index 0000000..bf72e1d --- /dev/null +++ b/pebble/bloom/bloom.go @@ -0,0 +1,250 @@ +// Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +// Package bloom implements Bloom filters. +package bloom // import "github.com/cockroachdb/pebble/bloom" + +import ( + "encoding/binary" + "fmt" + "sync" + + "github.com/cockroachdb/pebble/internal/base" +) + +const ( + cacheLineSize = 64 + cacheLineBits = cacheLineSize * 8 +) + +type tableFilter []byte + +func (f tableFilter) MayContain(key []byte) bool { + if len(f) <= 5 { + return false + } + n := len(f) - 5 + nProbes := f[n] + nLines := binary.LittleEndian.Uint32(f[n+1:]) + cacheLineBits := 8 * (uint32(n) / nLines) + + h := hash(key) + delta := h>>17 | h<<15 + b := (h % nLines) * cacheLineBits + + for j := uint8(0); j < nProbes; j++ { + bitPos := b + (h % cacheLineBits) + if f[bitPos/8]&(1<<(bitPos%8)) == 0 { + return false + } + h += delta + } + return true +} + +func calculateProbes(bitsPerKey int) uint32 { + // We intentionally round down to reduce probing cost a little bit + n := uint32(float64(bitsPerKey) * 0.69) // 0.69 =~ ln(2) + if n < 1 { + n = 1 + } + if n > 30 { + n = 30 + } + return n +} + +// extend appends n zero bytes to b. It returns the overall slice (of length +// n+len(originalB)) and the slice of n trailing zeroes. +func extend(b []byte, n int) (overall, trailer []byte) { + want := n + len(b) + if want <= cap(b) { + overall = b[:want] + trailer = overall[len(b):] + for i := range trailer { + trailer[i] = 0 + } + } else { + // Grow the capacity exponentially, with a 1KiB minimum. + c := 1024 + for c < want { + c += c / 4 + } + overall = make([]byte, want, c) + trailer = overall[len(b):] + copy(overall, b) + } + return overall, trailer +} + +// hash implements a hashing algorithm similar to the Murmur hash. +func hash(b []byte) uint32 { + const ( + seed = 0xbc9f1d34 + m = 0xc6a4a793 + ) + h := uint32(seed) ^ uint32(uint64(uint32(len(b))*m)) + for ; len(b) >= 4; b = b[4:] { + h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 + h *= m + h ^= h >> 16 + } + + // The code below first casts each byte to a signed 8-bit integer. This is + // necessary to match RocksDB's behavior. Note that the `byte` type in Go is + // unsigned. What is the difference between casting a signed 8-bit value vs + // unsigned 8-bit value into an unsigned 32-bit value? + // Sign-extension. Consider the value 250 which has the bit pattern 11111010: + // + // uint32(250) = 00000000000000000000000011111010 + // uint32(int8(250)) = 11111111111111111111111111111010 + // + // Note that the original LevelDB code did not explicitly cast to a signed + // 8-bit value which left the behavior dependent on whether C characters were + // signed or unsigned which is a compiler flag for gcc (-funsigned-char). + switch len(b) { + case 3: + h += uint32(int8(b[2])) << 16 + fallthrough + case 2: + h += uint32(int8(b[1])) << 8 + fallthrough + case 1: + h += uint32(int8(b[0])) + h *= m + h ^= h >> 24 + } + return h +} + +const hashBlockLen = 16384 + +type hashBlock [hashBlockLen]uint32 + +var hashBlockPool = sync.Pool{ + New: func() interface{} { + return &hashBlock{} + }, +} + +type tableFilterWriter struct { + bitsPerKey int + + numHashes int + // We store the hashes in blocks. + blocks []*hashBlock + lastHash uint32 + + // Initial "in-line" storage for the blocks slice (to avoid some small + // allocations). + blocksBuf [16]*hashBlock +} + +func newTableFilterWriter(bitsPerKey int) *tableFilterWriter { + w := &tableFilterWriter{ + bitsPerKey: bitsPerKey, + } + w.blocks = w.blocksBuf[:0] + return w +} + +// AddKey implements the base.FilterWriter interface. +func (w *tableFilterWriter) AddKey(key []byte) { + h := hash(key) + if w.numHashes != 0 && h == w.lastHash { + return + } + ofs := w.numHashes % hashBlockLen + if ofs == 0 { + // Time for a new block. + w.blocks = append(w.blocks, hashBlockPool.Get().(*hashBlock)) + } + w.blocks[len(w.blocks)-1][ofs] = h + w.numHashes++ + w.lastHash = h +} + +// Finish implements the base.FilterWriter interface. +func (w *tableFilterWriter) Finish(buf []byte) []byte { + // The table filter format matches the RocksDB full-file filter format. + var nLines int + if w.numHashes != 0 { + nLines = (w.numHashes*w.bitsPerKey + cacheLineBits - 1) / (cacheLineBits) + // Make nLines an odd number to make sure more bits are involved when + // determining which block. + if nLines%2 == 0 { + nLines++ + } + } + + nBytes := nLines * cacheLineSize + // +5: 4 bytes for num-lines, 1 byte for num-probes + buf, filter := extend(buf, nBytes+5) + + if nLines != 0 { + nProbes := calculateProbes(w.bitsPerKey) + for bIdx, b := range w.blocks { + length := hashBlockLen + if bIdx == len(w.blocks)-1 && w.numHashes%hashBlockLen != 0 { + length = w.numHashes % hashBlockLen + } + for _, h := range b[:length] { + delta := h>>17 | h<<15 // rotate right 17 bits + b := (h % uint32(nLines)) * (cacheLineBits) + for i := uint32(0); i < nProbes; i++ { + bitPos := b + (h % cacheLineBits) + filter[bitPos/8] |= (1 << (bitPos % 8)) + h += delta + } + } + } + filter[nBytes] = byte(nProbes) + binary.LittleEndian.PutUint32(filter[nBytes+1:], uint32(nLines)) + } + + // Release the hash blocks. + for i, b := range w.blocks { + hashBlockPool.Put(b) + w.blocks[i] = nil + } + w.blocks = w.blocks[:0] + w.numHashes = 0 + return buf +} + +// FilterPolicy implements the FilterPolicy interface from the pebble package. +// +// The integer value is the approximate number of bits used per key. A good +// value is 10, which yields a filter with ~ 1% false positive rate. +type FilterPolicy int + +var _ base.FilterPolicy = FilterPolicy(0) + +// Name implements the pebble.FilterPolicy interface. +func (p FilterPolicy) Name() string { + // This string looks arbitrary, but its value is written to LevelDB .sst + // files, and should be this exact value to be compatible with those files + // and with the C++ LevelDB code. + return "rocksdb.BuiltinBloomFilter" +} + +// MayContain implements the pebble.FilterPolicy interface. +func (p FilterPolicy) MayContain(ftype base.FilterType, f, key []byte) bool { + switch ftype { + case base.TableFilter: + return tableFilter(f).MayContain(key) + default: + panic(fmt.Sprintf("unknown filter type: %v", ftype)) + } +} + +// NewWriter implements the pebble.FilterPolicy interface. +func (p FilterPolicy) NewWriter(ftype base.FilterType) base.FilterWriter { + switch ftype { + case base.TableFilter: + return newTableFilterWriter(int(p)) + default: + panic(fmt.Sprintf("unknown filter type: %v", ftype)) + } +} diff --git a/pebble/bloom/bloom_test.go b/pebble/bloom/bloom_test.go new file mode 100644 index 0000000..74a6f62 --- /dev/null +++ b/pebble/bloom/bloom_test.go @@ -0,0 +1,219 @@ +// Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package bloom + +import ( + "crypto/rand" + "strings" + "testing" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/stretchr/testify/require" +) + +func (f tableFilter) String() string { + var buf strings.Builder + for i, x := range f { + if i > 0 { + if i%8 == 0 { + buf.WriteString("\n") + } else { + buf.WriteString(" ") + } + } + + for j := uint(0); j < 8; j++ { + if x&(1<<(7-j)) != 0 { + buf.WriteString("1") + } else { + buf.WriteString(".") + } + } + } + buf.WriteString("\n") + return buf.String() +} + +func newTableFilter(bitsPerKey int, keys ...[]byte) tableFilter { + w := FilterPolicy(bitsPerKey).NewWriter(base.TableFilter) + for _, key := range keys { + w.AddKey(key) + } + return tableFilter(w.Finish(nil)) +} + +func TestSmallBloomFilter(t *testing.T) { + f := newTableFilter(10, []byte("hello"), []byte("world")) + + // The magic expected string comes from running RocksDB's util/bloom_test.cc:FullBloomTest.FullSmall. + want := ` +........ ........ ........ .......1 ........ ........ ........ ........ +........ .1...... ........ .1...... ........ ........ ........ ........ +...1.... ........ ........ ........ ........ ........ ........ ........ +........ ........ ........ ........ ........ ........ ........ ...1.... +........ ........ ........ ........ .....1.. ........ ........ ........ +.......1 ........ ........ ........ ........ ........ .1...... ........ +........ ........ ........ ........ ........ ...1.... ........ ........ +.......1 ........ ........ ........ .1...1.. ........ ........ ........ +.....11. .......1 ........ ........ ........ +` + want = strings.TrimLeft(want, "\n") + require.EqualValues(t, want, f.String()) + + m := map[string]bool{ + "hello": true, + "world": true, + "x": false, + "foo": false, + } + for k, want := range m { + require.EqualValues(t, want, f.MayContain([]byte(k))) + } +} + +func TestBloomFilter(t *testing.T) { + nextLength := func(x int) int { + if x < 10 { + return x + 1 + } + if x < 100 { + return x + 10 + } + if x < 1000 { + return x + 100 + } + return x + 1000 + } + le32 := func(i int) []byte { + b := make([]byte, 4) + b[0] = uint8(uint32(i) >> 0) + b[1] = uint8(uint32(i) >> 8) + b[2] = uint8(uint32(i) >> 16) + b[3] = uint8(uint32(i) >> 24) + return b + } + + nMediocreFilters, nGoodFilters := 0, 0 +loop: + for length := 1; length <= 10000; length = nextLength(length) { + keys := make([][]byte, 0, length) + for i := 0; i < length; i++ { + keys = append(keys, le32(i)) + } + f := newTableFilter(10, keys...) + // The size of the table bloom filter is measured in multiples of the + // cache line size. The '+2' contribution captures the rounding up in the + // length division plus preferring an odd number of cache lines. As such, + // this formula isn't exact, but the exact formula is hard to read. + maxLen := 5 + ((length*10)/cacheLineBits+2)*cacheLineSize + if len(f) > maxLen { + t.Errorf("length=%d: len(f)=%d > max len %d", length, len(f), maxLen) + continue + } + + // All added keys must match. + for _, key := range keys { + if !f.MayContain(key) { + t.Errorf("length=%d: did not contain key %q", length, key) + continue loop + } + } + + // Check false positive rate. + nFalsePositive := 0 + for i := 0; i < 10000; i++ { + if f.MayContain(le32(1e9 + i)) { + nFalsePositive++ + } + } + if nFalsePositive > 0.02*10000 { + t.Errorf("length=%d: %d false positives in 10000", length, nFalsePositive) + continue + } + if nFalsePositive > 0.0125*10000 { + nMediocreFilters++ + } else { + nGoodFilters++ + } + } + + if nMediocreFilters > nGoodFilters/5 { + t.Errorf("%d mediocre filters but only %d good filters", nMediocreFilters, nGoodFilters) + } +} + +func TestHash(t *testing.T) { + testCases := []struct { + s string + expected uint32 + }{ + // The magic expected numbers come from RocksDB's util/hash_test.cc:TestHash. + {"", 3164544308}, + {"\x08", 422599524}, + {"\x17", 3168152998}, + {"\x9a", 3195034349}, + {"\x1c", 2651681383}, + {"\x4d\x76", 2447836956}, + {"\x52\xd5", 3854228105}, + {"\x91\xf7", 31066776}, + {"\xd6\x27", 1806091603}, + {"\x30\x46\x0b", 3808221797}, + {"\x56\xdc\xd6", 2157698265}, + {"\xd4\x52\x33", 1721992661}, + {"\x6a\xb5\xf4", 2469105222}, + {"\x67\x53\x81\x1c", 118283265}, + {"\x69\xb8\xc0\x88", 3416318611}, + {"\x1e\x84\xaf\x2d", 3315003572}, + {"\x46\xdc\x54\xbe", 447346355}, + {"\xd0\x7a\x6e\xea\x56", 4255445370}, + {"\x86\x83\xd5\xa4\xd8", 2390603402}, + {"\xb7\x46\xbb\x77\xce", 2048907743}, + {"\x6c\xa8\xbc\xe5\x99", 2177978500}, + {"\x5c\x5e\xe1\xa0\x73\x81", 1036846008}, + {"\x08\x5d\x73\x1c\xe5\x2e", 229980482}, + {"\x42\xfb\xf2\x52\xb4\x10", 3655585422}, + {"\x73\xe1\xff\x56\x9c\xce", 3502708029}, + {"\x5c\xbe\x97\x75\x54\x9a\x52", 815120748}, + {"\x16\x82\x39\x49\x88\x2b\x36", 3056033698}, + {"\x59\x77\xf0\xa7\x24\xf4\x78", 587205227}, + {"\xd3\xa5\x7c\x0e\xc0\x02\x07", 2030937252}, + {"\x31\x1b\x98\x75\x96\x22\xd3\x9a", 469635402}, + {"\x38\xd6\xf7\x28\x20\xb4\x8a\xe9", 3530274698}, + {"\xbb\x18\x5d\xf4\x12\x03\xf7\x99", 1974545809}, + {"\x80\xd4\x3b\x3b\xae\x22\xa2\x78", 3563570120}, + {"\x1a\xb5\xd0\xfe\xab\xc3\x61\xb2\x99", 2706087434}, + {"\x8e\x4a\xc3\x18\x20\x2f\x06\xe6\x3c", 1534654151}, + {"\xb6\xc0\xdd\x05\x3f\xc4\x86\x4c\xef", 2355554696}, + {"\x9a\x5f\x78\x0d\xaf\x50\xe1\x1f\x55", 1400800912}, + {"\x22\x6f\x39\x1f\xf8\xdd\x4f\x52\x17\x94", 3420325137}, + {"\x32\x89\x2a\x75\x48\x3a\x4a\x02\x69\xdd", 3427803584}, + {"\x06\x92\x5c\xf4\x88\x0e\x7e\x68\x38\x3e", 1152407945}, + {"\xbd\x2c\x63\x38\xbf\xe9\x78\xb7\xbf\x15", 3382479516}, + } + for _, tc := range testCases { + t.Run("", func(t *testing.T) { + require.EqualValues(t, tc.expected, hash([]byte(tc.s))) + }) + } +} + +func BenchmarkBloomFilter(b *testing.B) { + const keyLen = 128 + const numKeys = 1024 + keys := make([][]byte, numKeys) + for i := range keys { + keys[i] = make([]byte, keyLen) + _, _ = rand.Read(keys[i]) + } + b.ResetTimer() + policy := FilterPolicy(10) + for i := 0; i < b.N; i++ { + w := policy.NewWriter(base.TableFilter) + for _, key := range keys { + w.AddKey(key) + } + w.Finish(nil) + } +} diff --git a/pebble/cache.go b/pebble/cache.go new file mode 100644 index 0000000..91f5532 --- /dev/null +++ b/pebble/cache.go @@ -0,0 +1,23 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import "github.com/cockroachdb/pebble/internal/cache" + +// Cache exports the cache.Cache type. +type Cache = cache.Cache + +// NewCache creates a new cache of the specified size. Memory for the cache is +// allocated on demand, not during initialization. The cache is created with a +// reference count of 1. Each DB it is associated with adds a reference, so the +// creator of the cache should usually release their reference after the DB is +// created. +// +// c := pebble.NewCache(...) +// defer c.Unref() +// d, err := pebble.Open(pebble.Options{Cache: c}) +func NewCache(size int64) *cache.Cache { + return cache.New(size) +} diff --git a/pebble/checkpoint.go b/pebble/checkpoint.go new file mode 100644 index 0000000..f321c01 --- /dev/null +++ b/pebble/checkpoint.go @@ -0,0 +1,428 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "io" + "os" + + "github.com/cockroachdb/errors/oserror" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/record" + "github.com/cockroachdb/pebble/vfs" + "github.com/cockroachdb/pebble/vfs/atomicfs" +) + +// checkpointOptions hold the optional parameters to construct checkpoint +// snapshots. +type checkpointOptions struct { + // flushWAL set to true will force a flush and sync of the WAL prior to + // checkpointing. + flushWAL bool + + // If set, any SSTs that don't overlap with these spans are excluded from a checkpoint. + restrictToSpans []CheckpointSpan +} + +// CheckpointOption set optional parameters used by `DB.Checkpoint`. +type CheckpointOption func(*checkpointOptions) + +// WithFlushedWAL enables flushing and syncing the WAL prior to constructing a +// checkpoint. This guarantees that any writes committed before calling +// DB.Checkpoint will be part of that checkpoint. +// +// Note that this setting can only be useful in cases when some writes are +// performed with Sync = false. Otherwise, the guarantee will already be met. +// +// Passing this option is functionally equivalent to calling +// DB.LogData(nil, Sync) right before DB.Checkpoint. +func WithFlushedWAL() CheckpointOption { + return func(opt *checkpointOptions) { + opt.flushWAL = true + } +} + +// WithRestrictToSpans specifies spans of interest for the checkpoint. Any SSTs +// that don't overlap with any of these spans are excluded from the checkpoint. +// +// Note that the checkpoint can still surface keys outside of these spans (from +// the WAL and from SSTs that partially overlap with these spans). Moreover, +// these surface keys aren't necessarily "valid" in that they could have been +// modified but the SST containing the modification is excluded. +func WithRestrictToSpans(spans []CheckpointSpan) CheckpointOption { + return func(opt *checkpointOptions) { + opt.restrictToSpans = spans + } +} + +// CheckpointSpan is a key range [Start, End) (inclusive on Start, exclusive on +// End) of interest for a checkpoint. +type CheckpointSpan struct { + Start []byte + End []byte +} + +// excludeFromCheckpoint returns true if an SST file should be excluded from the +// checkpoint because it does not overlap with the spans of interest +// (opt.restrictToSpans). +func excludeFromCheckpoint(f *fileMetadata, opt *checkpointOptions, cmp Compare) bool { + if len(opt.restrictToSpans) == 0 { + // Option not set; don't exclude anything. + return false + } + for _, s := range opt.restrictToSpans { + if f.Overlaps(cmp, s.Start, s.End, true /* exclusiveEnd */) { + return false + } + } + // None of the restrictToSpans overlapped; we can exclude this file. + return true +} + +// mkdirAllAndSyncParents creates destDir and any of its missing parents. +// Those missing parents, as well as the closest existing ancestor, are synced. +// Returns a handle to the directory created at destDir. +func mkdirAllAndSyncParents(fs vfs.FS, destDir string) (vfs.File, error) { + // Collect paths for all directories between destDir (excluded) and its + // closest existing ancestor (included). + var parentPaths []string + foundExistingAncestor := false + for parentPath := fs.PathDir(destDir); parentPath != "."; parentPath = fs.PathDir(parentPath) { + parentPaths = append(parentPaths, parentPath) + _, err := fs.Stat(parentPath) + if err == nil { + // Exit loop at the closest existing ancestor. + foundExistingAncestor = true + break + } + if !oserror.IsNotExist(err) { + return nil, err + } + } + // Handle empty filesystem edge case. + if !foundExistingAncestor { + parentPaths = append(parentPaths, "") + } + // Create destDir and any of its missing parents. + if err := fs.MkdirAll(destDir, 0755); err != nil { + return nil, err + } + // Sync all the parent directories up to the closest existing ancestor, + // included. + for _, parentPath := range parentPaths { + parentDir, err := fs.OpenDir(parentPath) + if err != nil { + return nil, err + } + err = parentDir.Sync() + if err != nil { + _ = parentDir.Close() + return nil, err + } + err = parentDir.Close() + if err != nil { + return nil, err + } + } + return fs.OpenDir(destDir) +} + +// Checkpoint constructs a snapshot of the DB instance in the specified +// directory. The WAL, MANIFEST, OPTIONS, and sstables will be copied into the +// snapshot. Hard links will be used when possible. Beware of the significant +// space overhead for a checkpoint if hard links are disabled. Also beware that +// even if hard links are used, the space overhead for the checkpoint will +// increase over time as the DB performs compactions. +func (d *DB) Checkpoint( + destDir string, opts ...CheckpointOption, +) ( + ckErr error, /* used in deferred cleanup */ +) { + opt := &checkpointOptions{} + for _, fn := range opts { + fn(opt) + } + + if _, err := d.opts.FS.Stat(destDir); !oserror.IsNotExist(err) { + if err == nil { + return &os.PathError{ + Op: "checkpoint", + Path: destDir, + Err: oserror.ErrExist, + } + } + return err + } + + if opt.flushWAL && !d.opts.DisableWAL { + // Write an empty log-data record to flush and sync the WAL. + if err := d.LogData(nil /* data */, Sync); err != nil { + return err + } + } + + // Disable file deletions. + d.mu.Lock() + d.disableFileDeletions() + defer func() { + d.mu.Lock() + defer d.mu.Unlock() + d.enableFileDeletions() + }() + + // TODO(peter): RocksDB provides the option to roll the manifest if the + // MANIFEST size is too large. Should we do this too? + + // Lock the manifest before getting the current version. We need the + // length of the manifest that we read to match the current version that + // we read, otherwise we might copy a versionEdit not reflected in the + // sstables we copy/link. + d.mu.versions.logLock() + // Get the unflushed log files, the current version, and the current manifest + // file number. + memQueue := d.mu.mem.queue + current := d.mu.versions.currentVersion() + formatVers := d.FormatMajorVersion() + manifestFileNum := d.mu.versions.manifestFileNum + manifestSize := d.mu.versions.manifest.Size() + optionsFileNum := d.optionsFileNum + virtualBackingFiles := make(map[base.DiskFileNum]struct{}) + for diskFileNum := range d.mu.versions.backingState.fileBackingMap { + virtualBackingFiles[diskFileNum] = struct{}{} + } + // Release the manifest and DB.mu so we don't block other operations on + // the database. + d.mu.versions.logUnlock() + d.mu.Unlock() + + // Wrap the normal filesystem with one which wraps newly created files with + // vfs.NewSyncingFile. + fs := vfs.NewSyncingFS(d.opts.FS, vfs.SyncingFileOptions{ + NoSyncOnClose: d.opts.NoSyncOnClose, + BytesPerSync: d.opts.BytesPerSync, + }) + + // Create the dir and its parents (if necessary), and sync them. + var dir vfs.File + defer func() { + if dir != nil { + _ = dir.Close() + } + if ckErr != nil { + // Attempt to cleanup on error. + _ = fs.RemoveAll(destDir) + } + }() + dir, ckErr = mkdirAllAndSyncParents(fs, destDir) + if ckErr != nil { + return ckErr + } + + { + // Link or copy the OPTIONS. + srcPath := base.MakeFilepath(fs, d.dirname, fileTypeOptions, optionsFileNum) + destPath := fs.PathJoin(destDir, fs.PathBase(srcPath)) + ckErr = vfs.LinkOrCopy(fs, srcPath, destPath) + if ckErr != nil { + return ckErr + } + } + + { + // Set the format major version in the destination directory. + var versionMarker *atomicfs.Marker + versionMarker, _, ckErr = atomicfs.LocateMarker(fs, destDir, formatVersionMarkerName) + if ckErr != nil { + return ckErr + } + + // We use the marker to encode the active format version in the + // marker filename. Unlike other uses of the atomic marker, + // there is no file with the filename `formatVers.String()` on + // the filesystem. + ckErr = versionMarker.Move(formatVers.String()) + if ckErr != nil { + return ckErr + } + ckErr = versionMarker.Close() + if ckErr != nil { + return ckErr + } + } + + var excludedFiles map[deletedFileEntry]*fileMetadata + // Set of FileBacking.DiskFileNum which will be required by virtual sstables + // in the checkpoint. + requiredVirtualBackingFiles := make(map[base.DiskFileNum]struct{}) + // Link or copy the sstables. + for l := range current.Levels { + iter := current.Levels[l].Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if excludeFromCheckpoint(f, opt, d.cmp) { + if excludedFiles == nil { + excludedFiles = make(map[deletedFileEntry]*fileMetadata) + } + excludedFiles[deletedFileEntry{ + Level: l, + FileNum: f.FileNum, + }] = f + continue + } + + fileBacking := f.FileBacking + if f.Virtual { + if _, ok := requiredVirtualBackingFiles[fileBacking.DiskFileNum]; ok { + continue + } + requiredVirtualBackingFiles[fileBacking.DiskFileNum] = struct{}{} + } + + srcPath := base.MakeFilepath(fs, d.dirname, fileTypeTable, fileBacking.DiskFileNum) + destPath := fs.PathJoin(destDir, fs.PathBase(srcPath)) + ckErr = vfs.LinkOrCopy(fs, srcPath, destPath) + if ckErr != nil { + return ckErr + } + } + } + + var removeBackingTables []base.DiskFileNum + for diskFileNum := range virtualBackingFiles { + if _, ok := requiredVirtualBackingFiles[diskFileNum]; !ok { + // The backing sstable associated with fileNum is no longer + // required. + removeBackingTables = append(removeBackingTables, diskFileNum) + } + } + + ckErr = d.writeCheckpointManifest( + fs, formatVers, destDir, dir, manifestFileNum, manifestSize, + excludedFiles, removeBackingTables, + ) + if ckErr != nil { + return ckErr + } + + // Copy the WAL files. We copy rather than link because WAL file recycling + // will cause the WAL files to be reused which would invalidate the + // checkpoint. + for i := range memQueue { + logNum := memQueue[i].logNum + if logNum == 0 { + continue + } + srcPath := base.MakeFilepath(fs, d.walDirname, fileTypeLog, logNum) + destPath := fs.PathJoin(destDir, fs.PathBase(srcPath)) + ckErr = vfs.Copy(fs, srcPath, destPath) + if ckErr != nil { + return ckErr + } + } + + // Sync and close the checkpoint directory. + ckErr = dir.Sync() + if ckErr != nil { + return ckErr + } + ckErr = dir.Close() + dir = nil + return ckErr +} + +func (d *DB) writeCheckpointManifest( + fs vfs.FS, + formatVers FormatMajorVersion, + destDirPath string, + destDir vfs.File, + manifestFileNum base.DiskFileNum, + manifestSize int64, + excludedFiles map[deletedFileEntry]*fileMetadata, + removeBackingTables []base.DiskFileNum, +) error { + // Copy the MANIFEST, and create a pointer to it. We copy rather + // than link because additional version edits added to the + // MANIFEST after we took our snapshot of the sstables will + // reference sstables that aren't in our checkpoint. For a + // similar reason, we need to limit how much of the MANIFEST we + // copy. + // If some files are excluded from the checkpoint, also append a block that + // records those files as deleted. + if err := func() error { + srcPath := base.MakeFilepath(fs, d.dirname, fileTypeManifest, manifestFileNum) + destPath := fs.PathJoin(destDirPath, fs.PathBase(srcPath)) + src, err := fs.Open(srcPath, vfs.SequentialReadsOption) + if err != nil { + return err + } + defer src.Close() + + dst, err := fs.Create(destPath) + if err != nil { + return err + } + defer dst.Close() + + // Copy all existing records. We need to copy at the record level in case we + // need to append another record with the excluded files (we cannot simply + // append a record after a raw data copy; see + // https://github.com/cockroachdb/cockroach/issues/100935). + r := record.NewReader(&io.LimitedReader{R: src, N: manifestSize}, manifestFileNum) + w := record.NewWriter(dst) + for { + rr, err := r.Next() + if err != nil { + if err == io.EOF { + break + } + return err + } + + rw, err := w.Next() + if err != nil { + return err + } + if _, err := io.Copy(rw, rr); err != nil { + return err + } + } + + if len(excludedFiles) > 0 { + // Write out an additional VersionEdit that deletes the excluded SST files. + ve := versionEdit{ + DeletedFiles: excludedFiles, + RemovedBackingTables: removeBackingTables, + } + + rw, err := w.Next() + if err != nil { + return err + } + if err := ve.Encode(rw); err != nil { + return err + } + } + if err := w.Close(); err != nil { + return err + } + return dst.Sync() + }(); err != nil { + return err + } + + // Recent format versions use an atomic marker for setting the + // active manifest. Older versions use the CURRENT file. The + // setCurrentFunc function will return a closure that will + // take the appropriate action for the database's format + // version. + var manifestMarker *atomicfs.Marker + manifestMarker, _, err := atomicfs.LocateMarker(fs, destDirPath, manifestMarkerName) + if err != nil { + return err + } + if err := setCurrentFunc(formatVers, manifestMarker, fs, destDirPath, destDir)(manifestFileNum); err != nil { + return err + } + return manifestMarker.Close() +} diff --git a/pebble/checkpoint_test.go b/pebble/checkpoint_test.go new file mode 100644 index 0000000..e5e20a9 --- /dev/null +++ b/pebble/checkpoint_test.go @@ -0,0 +1,415 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "context" + "fmt" + "math/rand" + "slices" + "sort" + "strings" + "sync" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/vfs" + "github.com/stretchr/testify/require" +) + +func TestCheckpoint(t *testing.T) { + dbs := make(map[string]*DB) + defer func() { + for _, db := range dbs { + if db.closed.Load() == nil { + require.NoError(t, db.Close()) + } + } + }() + + mem := vfs.NewMem() + var memLog base.InMemLogger + opts := &Options{ + FS: vfs.WithLogging(mem, memLog.Infof), + FormatMajorVersion: internalFormatNewest, + L0CompactionThreshold: 10, + DisableAutomaticCompactions: true, + } + opts.private.disableTableStats = true + opts.private.testingAlwaysWaitForCleanup = true + + datadriven.RunTest(t, "testdata/checkpoint", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "batch": + if len(td.CmdArgs) != 1 { + return "batch " + } + memLog.Reset() + d := dbs[td.CmdArgs[0].String()] + b := d.NewBatch() + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + if err := b.Commit(Sync); err != nil { + return err.Error() + } + return memLog.String() + + case "checkpoint": + if !(len(td.CmdArgs) == 2 || (len(td.CmdArgs) == 3 && td.CmdArgs[2].Key == "restrict")) { + return "checkpoint [restrict=(start-end, ...)]" + } + var opts []CheckpointOption + if len(td.CmdArgs) == 3 { + var spans []CheckpointSpan + for _, v := range td.CmdArgs[2].Vals { + splits := strings.SplitN(v, "-", 2) + if len(splits) != 2 { + return fmt.Sprintf("invalid restrict range %q", v) + } + spans = append(spans, CheckpointSpan{ + Start: []byte(splits[0]), + End: []byte(splits[1]), + }) + } + opts = append(opts, WithRestrictToSpans(spans)) + } + memLog.Reset() + d := dbs[td.CmdArgs[0].String()] + if err := d.Checkpoint(td.CmdArgs[1].String(), opts...); err != nil { + return err.Error() + } + return memLog.String() + + case "ingest-and-excise": + d := dbs[td.CmdArgs[0].String()] + + // Hacky but the command doesn't expect a db string. Get rid of it. + td.CmdArgs = td.CmdArgs[1:] + if err := runIngestAndExciseCmd(td, d, mem); err != nil { + return err.Error() + } + return "" + + case "build": + d := dbs[td.CmdArgs[0].String()] + + // Hacky but the command doesn't expect a db string. Get rid of it. + td.CmdArgs = td.CmdArgs[1:] + if err := runBuildCmd(td, d, mem); err != nil { + return err.Error() + } + return "" + + case "lsm": + d := dbs[td.CmdArgs[0].String()] + + // Hacky but the command doesn't expect a db string. Get rid of it. + td.CmdArgs = td.CmdArgs[1:] + return runLSMCmd(td, d) + + case "compact": + if len(td.CmdArgs) != 1 { + return "compact " + } + memLog.Reset() + d := dbs[td.CmdArgs[0].String()] + if err := d.Compact(nil, []byte("\xff"), false); err != nil { + return err.Error() + } + d.TestOnlyWaitForCleaning() + return memLog.String() + + case "print-backing": + // prints contents of the file backing map in the version. Used to + // test whether the checkpoint removed the filebackings correctly. + if len(td.CmdArgs) != 1 { + return "print-backing " + } + d := dbs[td.CmdArgs[0].String()] + d.mu.Lock() + d.mu.versions.logLock() + var fileNums []base.DiskFileNum + for _, b := range d.mu.versions.backingState.fileBackingMap { + fileNums = append(fileNums, b.DiskFileNum) + } + d.mu.versions.logUnlock() + d.mu.Unlock() + + slices.Sort(fileNums) + var buf bytes.Buffer + for _, f := range fileNums { + buf.WriteString(fmt.Sprintf("%s\n", f.String())) + } + return buf.String() + + case "close": + if len(td.CmdArgs) != 1 { + return "close " + } + d := dbs[td.CmdArgs[0].String()] + require.NoError(t, d.Close()) + return "" + + case "flush": + if len(td.CmdArgs) != 1 { + return "flush " + } + memLog.Reset() + d := dbs[td.CmdArgs[0].String()] + if err := d.Flush(); err != nil { + return err.Error() + } + return memLog.String() + + case "list": + if len(td.CmdArgs) != 1 { + return "list " + } + paths, err := mem.List(td.CmdArgs[0].String()) + if err != nil { + return err.Error() + } + sort.Strings(paths) + return fmt.Sprintf("%s\n", strings.Join(paths, "\n")) + + case "open": + if len(td.CmdArgs) != 1 && len(td.CmdArgs) != 2 { + return "open [readonly]" + } + opts.ReadOnly = false + if len(td.CmdArgs) == 2 { + if td.CmdArgs[1].String() != "readonly" { + return "open [readonly]" + } + opts.ReadOnly = true + } + + memLog.Reset() + dir := td.CmdArgs[0].String() + d, err := Open(dir, opts) + if err != nil { + return err.Error() + } + dbs[dir] = d + return memLog.String() + + case "scan": + if len(td.CmdArgs) != 1 { + return "scan " + } + memLog.Reset() + d := dbs[td.CmdArgs[0].String()] + iter, _ := d.NewIter(nil) + for valid := iter.First(); valid; valid = iter.Next() { + memLog.Infof("%s %s", iter.Key(), iter.Value()) + } + memLog.Infof(".") + if err := iter.Close(); err != nil { + memLog.Infof("%v\n", err) + } + return memLog.String() + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestCheckpointCompaction(t *testing.T) { + fs := vfs.NewMem() + d, err := Open("", &Options{FS: fs}) + require.NoError(t, err) + + ctx, cancel := context.WithCancel(context.Background()) + + var wg sync.WaitGroup + wg.Add(4) + go func() { + defer cancel() + defer wg.Done() + for i := 0; ctx.Err() == nil; i++ { + if err := d.Set([]byte(fmt.Sprintf("key%06d", i)), nil, nil); err != nil { + t.Error(err) + return + } + } + }() + go func() { + defer cancel() + defer wg.Done() + for ctx.Err() == nil { + if err := d.Compact([]byte("key"), []byte("key999999"), false); err != nil { + t.Error(err) + return + } + } + }() + check := make(chan string, 100) + go func() { + defer cancel() + defer close(check) + defer wg.Done() + for i := 0; ctx.Err() == nil && i < 200; i++ { + dir := fmt.Sprintf("checkpoint%06d", i) + if err := d.Checkpoint(dir); err != nil { + t.Error(err) + return + } + select { + case <-ctx.Done(): + return + case check <- dir: + } + } + }() + go func() { + opts := &Options{FS: fs} + defer cancel() + defer wg.Done() + for dir := range check { + d2, err := Open(dir, opts) + if err != nil { + t.Error(err) + return + } + // Check the checkpoint has all the sstables that the manifest + // claims it has. + tableInfos, _ := d2.SSTables() + for _, tables := range tableInfos { + for _, tbl := range tables { + if tbl.Virtual { + continue + } + if _, err := fs.Stat(base.MakeFilepath(fs, dir, base.FileTypeTable, tbl.FileNum.DiskFileNum())); err != nil { + t.Error(err) + return + } + } + } + if err := d2.Close(); err != nil { + t.Error(err) + return + } + } + }() + <-ctx.Done() + wg.Wait() + require.NoError(t, d.Close()) +} + +func TestCheckpointFlushWAL(t *testing.T) { + const checkpointPath = "checkpoints/checkpoint" + fs := vfs.NewStrictMem() + opts := &Options{FS: fs} + key, value := []byte("key"), []byte("value") + + // Create a checkpoint from an unsynced DB. + { + d, err := Open("", opts) + require.NoError(t, err) + { + wb := d.NewBatch() + err = wb.Set(key, value, nil) + require.NoError(t, err) + err = d.Apply(wb, NoSync) + require.NoError(t, err) + } + err = d.Checkpoint(checkpointPath, WithFlushedWAL()) + require.NoError(t, err) + require.NoError(t, d.Close()) + fs.ResetToSyncedState() + } + + // Check that the WAL has been flushed in the checkpoint. + { + files, err := fs.List(checkpointPath) + require.NoError(t, err) + hasLogFile := false + for _, f := range files { + info, err := fs.Stat(fs.PathJoin(checkpointPath, f)) + require.NoError(t, err) + if strings.HasSuffix(f, ".log") { + hasLogFile = true + require.NotZero(t, info.Size()) + } + } + require.True(t, hasLogFile) + } + + // Check that the checkpoint contains the expected data. + { + d, err := Open(checkpointPath, opts) + require.NoError(t, err) + iter, _ := d.NewIter(nil) + require.True(t, iter.First()) + require.Equal(t, key, iter.Key()) + require.Equal(t, value, iter.Value()) + require.False(t, iter.Next()) + require.NoError(t, iter.Close()) + require.NoError(t, d.Close()) + } +} + +func TestCheckpointManyFiles(t *testing.T) { + if testing.Short() { + t.Skip("skipping because of short flag") + } + const checkpointPath = "checkpoint" + opts := &Options{ + FS: vfs.NewMem(), + FormatMajorVersion: internalFormatNewest, + DisableAutomaticCompactions: true, + } + // Disable compression to speed up the test. + opts.EnsureDefaults() + for i := range opts.Levels { + opts.Levels[i].Compression = NoCompression + } + + d, err := Open("", opts) + require.NoError(t, err) + defer d.Close() + + mkKey := func(x int) []byte { + return []byte(fmt.Sprintf("key%06d", x)) + } + // We want to test the case where the appended record with the excluded files + // makes the manifest cross 32KB. This will happen for a range of values + // around 450. + n := 400 + rand.Intn(100) + for i := 0; i < n; i++ { + err := d.Set(mkKey(i), nil, nil) + require.NoError(t, err) + err = d.Flush() + require.NoError(t, err) + } + err = d.Checkpoint(checkpointPath, WithRestrictToSpans([]CheckpointSpan{ + { + Start: mkKey(0), + End: mkKey(10), + }, + })) + require.NoError(t, err) + + // Open the checkpoint and iterate through all the keys. + { + d, err := Open(checkpointPath, opts) + require.NoError(t, err) + iter, _ := d.NewIter(nil) + require.True(t, iter.First()) + require.NoError(t, iter.Error()) + n := 1 + for iter.Next() { + n++ + } + require.NoError(t, iter.Error()) + require.NoError(t, iter.Close()) + require.NoError(t, d.Close()) + require.Equal(t, 10, n) + } +} diff --git a/pebble/cleaner.go b/pebble/cleaner.go new file mode 100644 index 0000000..f9fa43b --- /dev/null +++ b/pebble/cleaner.go @@ -0,0 +1,295 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "context" + "runtime/pprof" + "sync" + "time" + + "github.com/cockroachdb/errors/oserror" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/objstorage" + "github.com/cockroachdb/tokenbucket" +) + +// Cleaner exports the base.Cleaner type. +type Cleaner = base.Cleaner + +// DeleteCleaner exports the base.DeleteCleaner type. +type DeleteCleaner = base.DeleteCleaner + +// ArchiveCleaner exports the base.ArchiveCleaner type. +type ArchiveCleaner = base.ArchiveCleaner + +type cleanupManager struct { + opts *Options + objProvider objstorage.Provider + onTableDeleteFn func(fileSize uint64) + deletePacer *deletionPacer + + // jobsCh is used as the cleanup job queue. + jobsCh chan *cleanupJob + // waitGroup is used to wait for the background goroutine to exit. + waitGroup sync.WaitGroup + + mu struct { + sync.Mutex + // totalJobs is the total number of enqueued jobs (completed or in progress). + totalJobs int + completedJobs int + completedJobsCond sync.Cond + jobsQueueWarningIssued bool + } +} + +// We can queue this many jobs before we have to block EnqueueJob. +const jobsQueueDepth = 1000 + +// obsoleteFile holds information about a file that needs to be deleted soon. +type obsoleteFile struct { + dir string + fileNum base.DiskFileNum + fileType fileType + fileSize uint64 +} + +type cleanupJob struct { + jobID int + obsoleteFiles []obsoleteFile +} + +// openCleanupManager creates a cleanupManager and starts its background goroutine. +// The cleanupManager must be Close()d. +func openCleanupManager( + opts *Options, + objProvider objstorage.Provider, + onTableDeleteFn func(fileSize uint64), + getDeletePacerInfo func() deletionPacerInfo, +) *cleanupManager { + cm := &cleanupManager{ + opts: opts, + objProvider: objProvider, + onTableDeleteFn: onTableDeleteFn, + deletePacer: newDeletionPacer(time.Now(), int64(opts.TargetByteDeletionRate), getDeletePacerInfo), + jobsCh: make(chan *cleanupJob, jobsQueueDepth), + } + cm.mu.completedJobsCond.L = &cm.mu.Mutex + cm.waitGroup.Add(1) + + go func() { + pprof.Do(context.Background(), gcLabels, func(context.Context) { + cm.mainLoop() + }) + }() + + return cm +} + +// Close stops the background goroutine, waiting until all queued jobs are completed. +// Delete pacing is disabled for the remaining jobs. +func (cm *cleanupManager) Close() { + close(cm.jobsCh) + cm.waitGroup.Wait() +} + +// EnqueueJob adds a cleanup job to the manager's queue. +func (cm *cleanupManager) EnqueueJob(jobID int, obsoleteFiles []obsoleteFile) { + job := &cleanupJob{ + jobID: jobID, + obsoleteFiles: obsoleteFiles, + } + + // Report deleted bytes to the pacer, which can use this data to potentially + // increase the deletion rate to keep up. We want to do this at enqueue time + // rather than when we get to the job, otherwise the reported bytes will be + // subject to the throttling rate which defeats the purpose. + var pacingBytes uint64 + for _, of := range obsoleteFiles { + if cm.needsPacing(of.fileType, of.fileNum) { + pacingBytes += of.fileSize + } + } + if pacingBytes > 0 { + cm.deletePacer.ReportDeletion(time.Now(), pacingBytes) + } + + cm.mu.Lock() + cm.mu.totalJobs++ + cm.maybeLogLocked() + cm.mu.Unlock() + + if invariants.Enabled && len(cm.jobsCh) >= cap(cm.jobsCh)-2 { + panic("cleanup jobs queue full") + } + + cm.jobsCh <- job +} + +// Wait until the completion of all jobs that were already queued. +// +// Does not wait for jobs that are enqueued during the call. +// +// Note that DB.mu should not be held while calling this method; the background +// goroutine needs to acquire DB.mu to update deleted table metrics. +func (cm *cleanupManager) Wait() { + cm.mu.Lock() + defer cm.mu.Unlock() + n := cm.mu.totalJobs + for cm.mu.completedJobs < n { + cm.mu.completedJobsCond.Wait() + } +} + +// mainLoop runs the manager's background goroutine. +func (cm *cleanupManager) mainLoop() { + defer cm.waitGroup.Done() + + var tb tokenbucket.TokenBucket + // Use a token bucket with 1 token / second refill rate and 1 token burst. + tb.Init(1.0, 1.0) + for job := range cm.jobsCh { + for _, of := range job.obsoleteFiles { + if of.fileType != fileTypeTable { + path := base.MakeFilepath(cm.opts.FS, of.dir, of.fileType, of.fileNum) + cm.deleteObsoleteFile(of.fileType, job.jobID, path, of.fileNum, of.fileSize) + } else { + cm.maybePace(&tb, of.fileType, of.fileNum, of.fileSize) + cm.onTableDeleteFn(of.fileSize) + cm.deleteObsoleteObject(fileTypeTable, job.jobID, of.fileNum) + } + } + cm.mu.Lock() + cm.mu.completedJobs++ + cm.mu.completedJobsCond.Broadcast() + cm.maybeLogLocked() + cm.mu.Unlock() + } +} + +func (cm *cleanupManager) needsPacing(fileType base.FileType, fileNum base.DiskFileNum) bool { + if fileType != fileTypeTable { + return false + } + meta, err := cm.objProvider.Lookup(fileType, fileNum) + if err != nil { + // The object was already removed from the provider; we won't actually + // delete anything, so we don't need to pace. + return false + } + // Don't throttle deletion of remote objects. + return !meta.IsRemote() +} + +// maybePace sleeps before deleting an object if appropriate. It is always +// called from the background goroutine. +func (cm *cleanupManager) maybePace( + tb *tokenbucket.TokenBucket, fileType base.FileType, fileNum base.DiskFileNum, fileSize uint64, +) { + if !cm.needsPacing(fileType, fileNum) { + return + } + + tokens := cm.deletePacer.PacingDelay(time.Now(), fileSize) + if tokens == 0.0 { + // The token bucket might be in debt; it could make us wait even for 0 + // tokens. We don't want that if the pacer decided throttling should be + // disabled. + return + } + // Wait for tokens. We use a token bucket instead of sleeping outright because + // the token bucket accumulates up to one second of unused tokens. + for { + ok, d := tb.TryToFulfill(tokenbucket.Tokens(tokens)) + if ok { + break + } + time.Sleep(d) + } +} + +// deleteObsoleteFile deletes a (non-object) file that is no longer needed. +func (cm *cleanupManager) deleteObsoleteFile( + fileType fileType, jobID int, path string, fileNum base.DiskFileNum, fileSize uint64, +) { + // TODO(peter): need to handle this error, probably by re-adding the + // file that couldn't be deleted to one of the obsolete slices map. + err := cm.opts.Cleaner.Clean(cm.opts.FS, fileType, path) + if oserror.IsNotExist(err) { + return + } + + switch fileType { + case fileTypeLog: + cm.opts.EventListener.WALDeleted(WALDeleteInfo{ + JobID: jobID, + Path: path, + FileNum: fileNum.FileNum(), + Err: err, + }) + case fileTypeManifest: + cm.opts.EventListener.ManifestDeleted(ManifestDeleteInfo{ + JobID: jobID, + Path: path, + FileNum: fileNum.FileNum(), + Err: err, + }) + case fileTypeTable: + panic("invalid deletion of object file") + } +} + +func (cm *cleanupManager) deleteObsoleteObject( + fileType fileType, jobID int, fileNum base.DiskFileNum, +) { + if fileType != fileTypeTable { + panic("not an object") + } + + var path string + meta, err := cm.objProvider.Lookup(fileType, fileNum) + if err != nil { + path = "" + } else { + path = cm.objProvider.Path(meta) + err = cm.objProvider.Remove(fileType, fileNum) + } + if cm.objProvider.IsNotExistError(err) { + return + } + + switch fileType { + case fileTypeTable: + cm.opts.EventListener.TableDeleted(TableDeleteInfo{ + JobID: jobID, + Path: path, + FileNum: fileNum.FileNum(), + Err: err, + }) + } +} + +// maybeLogLocked issues a log if the job queue gets 75% full and issues a log +// when the job queue gets back to less than 10% full. +// +// Must be called with cm.mu locked. +func (cm *cleanupManager) maybeLogLocked() { + const highThreshold = jobsQueueDepth * 3 / 4 + const lowThreshold = jobsQueueDepth / 10 + + jobsInQueue := cm.mu.totalJobs - cm.mu.completedJobs + + if !cm.mu.jobsQueueWarningIssued && jobsInQueue > highThreshold { + cm.mu.jobsQueueWarningIssued = true + cm.opts.Logger.Infof("cleanup falling behind; job queue has over %d jobs", highThreshold) + } + + if cm.mu.jobsQueueWarningIssued && jobsInQueue < lowThreshold { + cm.mu.jobsQueueWarningIssued = false + cm.opts.Logger.Infof("cleanup back to normal; job queue has under %d jobs", lowThreshold) + } +} diff --git a/pebble/cleaner_test.go b/pebble/cleaner_test.go new file mode 100644 index 0000000..11d9ab9 --- /dev/null +++ b/pebble/cleaner_test.go @@ -0,0 +1,137 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "fmt" + "sort" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/vfs" + "github.com/stretchr/testify/require" +) + +func TestCleaner(t *testing.T) { + dbs := make(map[string]*DB) + defer func() { + for _, db := range dbs { + require.NoError(t, db.Close()) + } + }() + + mem := vfs.NewMem() + var memLog base.InMemLogger + fs := vfs.WithLogging(mem, memLog.Infof) + datadriven.RunTest(t, "testdata/cleaner", func(t *testing.T, td *datadriven.TestData) string { + memLog.Reset() + switch td.Cmd { + case "batch": + if len(td.CmdArgs) != 1 { + return "batch " + } + d := dbs[td.CmdArgs[0].String()] + b := d.NewBatch() + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + if err := b.Commit(Sync); err != nil { + return err.Error() + } + return memLog.String() + + case "compact": + if len(td.CmdArgs) != 1 { + return "compact " + } + d := dbs[td.CmdArgs[0].String()] + if err := d.Compact(nil, []byte("\xff"), false); err != nil { + return err.Error() + } + return memLog.String() + + case "flush": + if len(td.CmdArgs) != 1 { + return "flush " + } + d := dbs[td.CmdArgs[0].String()] + if err := d.Flush(); err != nil { + return err.Error() + } + return memLog.String() + + case "close": + if len(td.CmdArgs) != 1 { + return "close " + } + dbDir := td.CmdArgs[0].String() + d := dbs[dbDir] + if err := d.Close(); err != nil { + return err.Error() + } + delete(dbs, dbDir) + return memLog.String() + + case "list": + if len(td.CmdArgs) != 1 { + return "list " + } + paths, err := mem.List(td.CmdArgs[0].String()) + if err != nil { + return err.Error() + } + sort.Strings(paths) + return fmt.Sprintf("%s\n", strings.Join(paths, "\n")) + + case "open": + if len(td.CmdArgs) < 1 || len(td.CmdArgs) > 3 { + return "open [archive] [readonly]" + } + dir := td.CmdArgs[0].String() + opts := (&Options{ + FS: fs, + WALDir: dir + "_wal", + }).WithFSDefaults() + + for i := 1; i < len(td.CmdArgs); i++ { + switch td.CmdArgs[i].String() { + case "readonly": + opts.ReadOnly = true + case "archive": + opts.Cleaner = ArchiveCleaner{} + default: + return "open [archive] [readonly]" + } + } + // Asynchronous table stats retrieval makes the output flaky. + opts.private.disableTableStats = true + opts.private.testingAlwaysWaitForCleanup = true + d, err := Open(dir, opts) + if err != nil { + return err.Error() + } + d.TestOnlyWaitForCleaning() + dbs[dir] = d + return memLog.String() + + case "create-bogus-file": + if len(td.CmdArgs) != 1 { + return "create-bogus-file " + } + dst, err := fs.Create(td.CmdArgs[0].String()) + require.NoError(t, err) + _, err = dst.Write([]byte("bogus data")) + require.NoError(t, err) + require.NoError(t, dst.Sync()) + require.NoError(t, dst.Close()) + return memLog.String() + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} diff --git a/pebble/cmd/pebble/.gitignore b/pebble/cmd/pebble/.gitignore new file mode 100644 index 0000000..812a2be --- /dev/null +++ b/pebble/cmd/pebble/.gitignore @@ -0,0 +1 @@ +pebble diff --git a/pebble/cmd/pebble/db.go b/pebble/cmd/pebble/db.go new file mode 100644 index 0000000..41c6e59 --- /dev/null +++ b/pebble/cmd/pebble/db.go @@ -0,0 +1,168 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +import ( + "log" + + "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/bloom" + "github.com/cockroachdb/pebble/internal/bytealloc" + "github.com/cockroachdb/pebble/objstorage/remote" + "github.com/cockroachdb/pebble/vfs" +) + +// DB specifies the minimal interfaces that need to be implemented to support +// the pebble command. +type DB interface { + NewIter(*pebble.IterOptions) iterator + NewBatch() batch + Scan(iter iterator, key []byte, count int64, reverse bool) error + Metrics() *pebble.Metrics + Flush() error +} + +type iterator interface { + SeekLT(key []byte) bool + SeekGE(key []byte) bool + Valid() bool + Key() []byte + Value() []byte + First() bool + Next() bool + Last() bool + Prev() bool + Close() error +} + +type batch interface { + Close() error + Commit(opts *pebble.WriteOptions) error + Set(key, value []byte, opts *pebble.WriteOptions) error + Delete(key []byte, opts *pebble.WriteOptions) error + LogData(data []byte, opts *pebble.WriteOptions) error +} + +// Adapters for Pebble. Since the interfaces above are based on Pebble's +// interfaces, it can simply forward calls for everything. +type pebbleDB struct { + d *pebble.DB + ballast []byte +} + +func newPebbleDB(dir string) DB { + cache := pebble.NewCache(cacheSize) + defer cache.Unref() + opts := &pebble.Options{ + Cache: cache, + Comparer: mvccComparer, + DisableWAL: disableWAL, + FormatMajorVersion: pebble.FormatNewest, + L0CompactionThreshold: 2, + L0StopWritesThreshold: 1000, + LBaseMaxBytes: 64 << 20, // 64 MB + Levels: make([]pebble.LevelOptions, 7), + MaxOpenFiles: 16384, + MemTableSize: 64 << 20, + MemTableStopWritesThreshold: 4, + Merger: &pebble.Merger{ + Name: "cockroach_merge_operator", + }, + MaxConcurrentCompactions: func() int { + return 3 + }, + } + + for i := 0; i < len(opts.Levels); i++ { + l := &opts.Levels[i] + l.BlockSize = 32 << 10 // 32 KB + l.IndexBlockSize = 256 << 10 // 256 KB + l.FilterPolicy = bloom.FilterPolicy(10) + l.FilterType = pebble.TableFilter + if i > 0 { + l.TargetFileSize = opts.Levels[i-1].TargetFileSize * 2 + } + l.EnsureDefaults() + } + opts.Levels[6].FilterPolicy = nil + opts.FlushSplitBytes = opts.Levels[0].TargetFileSize + + opts.EnsureDefaults() + + if verbose { + lel := pebble.MakeLoggingEventListener(nil) + opts.EventListener = &lel + opts.EventListener.TableDeleted = nil + opts.EventListener.TableIngested = nil + opts.EventListener.WALCreated = nil + opts.EventListener.WALDeleted = nil + } + + if pathToLocalSharedStorage != "" { + opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ + // Store all shared objects on local disk, for convenience. + "": remote.NewLocalFS(pathToLocalSharedStorage, vfs.Default), + }) + opts.Experimental.CreateOnShared = remote.CreateOnSharedAll + if secondaryCacheSize != 0 { + opts.Experimental.SecondaryCacheSizeBytes = secondaryCacheSize + } + } + + p, err := pebble.Open(dir, opts) + if err != nil { + log.Fatal(err) + } + if pathToLocalSharedStorage != "" { + if err := p.SetCreatorID(1); err != nil { + log.Fatal(err) + } + } + return pebbleDB{ + d: p, + ballast: make([]byte, 1<<30), + } +} + +func (p pebbleDB) Flush() error { + return p.d.Flush() +} + +func (p pebbleDB) NewIter(opts *pebble.IterOptions) iterator { + iter, _ := p.d.NewIter(opts) + return iter +} + +func (p pebbleDB) NewBatch() batch { + return p.d.NewBatch() +} + +func (p pebbleDB) Scan(iter iterator, key []byte, count int64, reverse bool) error { + var data bytealloc.A + if reverse { + for i, valid := 0, iter.SeekLT(key); valid; valid = iter.Prev() { + data, _ = data.Copy(iter.Key()) + data, _ = data.Copy(iter.Value()) + i++ + if i >= int(count) { + break + } + } + } else { + for i, valid := 0, iter.SeekGE(key); valid; valid = iter.Next() { + data, _ = data.Copy(iter.Key()) + data, _ = data.Copy(iter.Value()) + i++ + if i >= int(count) { + break + } + } + } + return nil +} + +func (p pebbleDB) Metrics() *pebble.Metrics { + return p.d.Metrics() +} diff --git a/pebble/cmd/pebble/fsbench.go b/pebble/cmd/pebble/fsbench.go new file mode 100644 index 0000000..94d437d --- /dev/null +++ b/pebble/cmd/pebble/fsbench.go @@ -0,0 +1,707 @@ +package main + +import ( + "bytes" + "fmt" + "log" + "os" + "path" + "sync" + "sync/atomic" + "time" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/vfs" + "github.com/spf13/cobra" +) + +var fsBenchCmd = &cobra.Command{ + Use: "fs ", + Short: "Run file system benchmarks.", + Long: ` +Run file system benchmarks. Each benchmark is predefined and can be +run using the command "bench fs --bench-name ". +Each possible which can be run is defined in the code. +Benchmarks may require the specification of a --duration or +--max-ops flag, to prevent the benchmark from running forever +or running out of memory. + +The --num-times flag can be used to run the entire benchmark, more than +once. If the flag isn't provided, then the benchmark is only run once. +`, + Args: cobra.ExactArgs(1), + RunE: runFsBench, +} + +const writeBatchSize = 1 << 10 + +var fsConfig struct { + // An upper limit on the number of ops which can be run. + maxOps int + + // Benchmark to run. + benchname string + + // Number of times each benchmark should be run. + numTimes int + + fs vfs.FS + + precomputedWriteBatch []byte +} + +func init() { + fsBenchCmd.Flags().IntVar( + &fsConfig.maxOps, "max-ops", 0, + "Maximum number of times the operation which is being benchmarked should be run.", + ) + + fsBenchCmd.Flags().StringVar( + &fsConfig.benchname, "bench-name", "", "The benchmark to run.") + fsBenchCmd.MarkFlagRequired("bench-name") + + fsBenchCmd.Flags().IntVar( + &fsConfig.numTimes, "num-times", 1, + "Number of times each benchmark should be run.") + + // Add subcommand to list + fsBenchCmd.AddCommand(listFsBench) + + // Just use the default vfs implementation for now. + fsConfig.fs = vfs.Default + + fsConfig.precomputedWriteBatch = bytes.Repeat([]byte("a"), writeBatchSize) +} + +// State relevant to a benchmark. +type fsBench struct { + // A short name for the benchmark. + name string + + // A one line description for the benchmark. + description string + + // numOps is the total number of ops which + // have been run for the benchmark. This is used + // to make sure that we don't benchmark the operation + // more than max-ops times. + numOps int + + // directory under which the benchmark is run. + dir vfs.File + dirName string + + // Stats associated with the benchmark. + reg *histogramRegistry + + // The operation which we're benchmarking. This + // will be called over and over again. + // Returns false if run should no longer be called. + run func(*namedHistogram) bool + + // Stop the benchmark from executing any further. + // Stop is safe to call concurrently with run. + stop func() + + // A cleanup func which must be called after + // the benchmark has finished running. + // Clean should be only called after making sure + // that the run function is no longer executing. + clean func() +} + +// createFile can be used to create an empty file. +// Invariant: File shouldn't already exist. +func createFile(filepath string) vfs.File { + fh, err := fsConfig.fs.Create(filepath) + if err != nil { + log.Fatalln(err) + } + return fh +} + +// Invariant: file with filepath should exist. +func deleteFile(filepath string) { + err := fsConfig.fs.Remove(filepath) + if err != nil { + log.Fatalln(err) + } +} + +// Write size bytes to the file in batches. +func writeToFile(fh vfs.File, size int64) { + for size > 0 { + var toWrite []byte + if size >= writeBatchSize { + toWrite = fsConfig.precomputedWriteBatch + } else { + toWrite = fsConfig.precomputedWriteBatch[:size] + } + written, err := fh.Write(toWrite) + if err != nil { + log.Fatalln(err) + } + if written != len(toWrite) { + log.Fatalf("Couldn't write %d bytes to file\n", size) + } + size -= int64(len(toWrite)) + } +} + +func syncFile(fh vfs.File) { + err := fh.Sync() + if err != nil { + log.Fatalln(err) + } +} + +func closeFile(fh vfs.File) { + err := fh.Close() + if err != nil { + log.Fatalln(err) + } +} + +func getDiskUsage(filepath string) { + _, err := fsConfig.fs.GetDiskUsage(filepath) + if err != nil { + log.Fatalln(err) + } +} + +func openDir(filepath string) vfs.File { + fh, err := fsConfig.fs.OpenDir(filepath) + if err != nil { + log.Fatalln(err) + } + return fh +} + +func mkDir(filepath string) { + err := fsConfig.fs.MkdirAll(filepath, 0755) + if err != nil { + log.Fatalln(err) + } +} + +func removeAllFiles(filepath string) { + err := fsConfig.fs.RemoveAll(filepath) + if err != nil { + log.Fatalln(err) + } +} + +// fileSize is in bytes. +func createBench(benchName string, benchDescription string) fsBenchmark { + createBench := func(dirpath string) *fsBench { + bench := &fsBench{} + mkDir(dirpath) + fh := openDir(dirpath) + + bench.dir = fh + bench.dirName = dirpath + bench.reg = newHistogramRegistry() + bench.numOps = 0 + bench.name = benchName + bench.description = benchDescription + + // setup the operation to benchmark, and the cleanup functions. + pref := "temp_" + var numFiles int + var done atomic.Bool + + bench.run = func(hist *namedHistogram) bool { + if done.Load() { + return false + } + + start := time.Now() + fh := createFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, numFiles))) + syncFile(bench.dir) + hist.Record(time.Since(start)) + + closeFile(fh) + numFiles++ + return true + } + + bench.stop = func() { + done.Store(true) + } + + bench.clean = func() { + removeAllFiles(dirpath) + closeFile(bench.dir) + } + + return bench + } + + return fsBenchmark{ + createBench, + benchName, + benchDescription, + } +} + +// This benchmark prepopulates a directory with some files of a given size. Then, it creates and deletes +// a file of some size, while measuring only the performance of the delete. +func deleteBench( + benchName string, benchDescription string, preNumFiles int, preFileSize int64, fileSize int64, +) fsBenchmark { + + createBench := func(dirpath string) *fsBench { + bench := &fsBench{} + mkDir(dirpath) + fh := openDir(dirpath) + + bench.dir = fh + bench.dirName = dirpath + bench.reg = newHistogramRegistry() + bench.numOps = 0 + bench.name = benchName + bench.description = benchDescription + + // prepopulate the directory + prePref := "pre_temp_" + for i := 0; i < preNumFiles; i++ { + fh := createFile(path.Join(dirpath, fmt.Sprintf("%s%d", prePref, i))) + if preFileSize > 0 { + writeToFile(fh, preFileSize) + syncFile(fh) + } + closeFile(fh) + } + syncFile(bench.dir) + + var done atomic.Bool + bench.run = func(hist *namedHistogram) bool { + if done.Load() { + return false + } + + filename := "newfile" + fh := createFile(path.Join(dirpath, filename)) + writeToFile(fh, fileSize) + syncFile(fh) + + start := time.Now() + deleteFile(path.Join(dirpath, filename)) + hist.Record(time.Since(start)) + + return true + } + + bench.stop = func() { + done.Store(true) + } + + bench.clean = func() { + removeAllFiles(dirpath) + closeFile(bench.dir) + } + + return bench + } + + return fsBenchmark{ + createBench, + benchName, + benchDescription, + } +} + +// This benchmark creates some files in a directory, and then measures the performance +// of the vfs.Remove function. +// fileSize is in bytes. +func deleteUniformBench( + benchName string, benchDescription string, numFiles int, fileSize int64, +) fsBenchmark { + createBench := func(dirpath string) *fsBench { + bench := &fsBench{} + mkDir(dirpath) + fh := openDir(dirpath) + + bench.dir = fh + bench.dirName = dirpath + bench.reg = newHistogramRegistry() + bench.numOps = 0 + bench.name = benchName + bench.description = benchDescription + + // setup the operation to benchmark, and the cleaup functions. + pref := "temp_" + for i := 0; i < numFiles; i++ { + fh := createFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, i))) + if fileSize > 0 { + writeToFile(fh, fileSize) + syncFile(fh) + } + closeFile(fh) + } + syncFile(bench.dir) + + var done atomic.Bool + bench.run = func(hist *namedHistogram) bool { + if done.Load() { + return false + } + + if numFiles == 0 { + return false + } + + start := time.Now() + deleteFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, numFiles-1))) + hist.Record(time.Since(start)) + + numFiles-- + return true + } + + bench.stop = func() { + done.Store(true) + } + + bench.clean = func() { + removeAll(dirpath) + closeFile(bench.dir) + } + + return bench + } + + return fsBenchmark{ + createBench, + benchName, + benchDescription, + } +} + +// Tests the performance of syncing data to disk. +// Only measures the sync performance. +// The writes will be synced after every writeSize bytes have been written. +func writeSyncBench( + benchName string, benchDescription string, maxFileSize int64, writeSize int64, +) fsBenchmark { + + if writeSize > maxFileSize { + log.Fatalln("File write threshold is greater than max file size.") + } + + createBench := func(dirpath string) *fsBench { + bench := &fsBench{} + mkDir(dirpath) + fh := openDir(dirpath) + + bench.dir = fh + bench.dirName = dirpath + bench.reg = newHistogramRegistry() + bench.numOps = 0 + bench.name = benchName + bench.description = benchDescription + + pref := "temp_" + var benchData struct { + done atomic.Bool + fh vfs.File + fileNum int + bytesWritten int64 + } + benchData.fh = createFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, benchData.fileNum))) + + bench.run = func(hist *namedHistogram) bool { + if benchData.done.Load() { + return false + } + + if benchData.bytesWritten+writeSize > maxFileSize { + closeFile(benchData.fh) + benchData.fileNum++ + benchData.bytesWritten = 0 + benchData.fh = createFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, benchData.fileNum))) + } + + benchData.bytesWritten += writeSize + writeToFile(benchData.fh, writeSize) + + start := time.Now() + syncFile(benchData.fh) + hist.Record(time.Since(start)) + + return true + } + + bench.stop = func() { + benchData.done.Store(true) + } + + bench.clean = func() { + closeFile(benchData.fh) + removeAllFiles(dirpath) + closeFile(bench.dir) + } + + return bench + } + + return fsBenchmark{ + createBench, + benchName, + benchDescription, + } +} + +// Tests the peformance of calling the vfs.GetDiskUsage call on a directory, +// as the number of files/total size of files in the directory grows. +func diskUsageBench( + benchName string, benchDescription string, maxFileSize int64, writeSize int64, +) fsBenchmark { + + if writeSize > maxFileSize { + log.Fatalln("File write threshold is greater than max file size.") + } + + createBench := func(dirpath string) *fsBench { + bench := &fsBench{} + mkDir(dirpath) + fh := openDir(dirpath) + + bench.dir = fh + bench.dirName = dirpath + bench.reg = newHistogramRegistry() + bench.numOps = 0 + bench.name = benchName + bench.description = benchDescription + + pref := "temp_" + var benchData struct { + done atomic.Bool + fh vfs.File + fileNum int + bytesWritten int64 + } + benchData.fh = createFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, benchData.fileNum))) + + bench.run = func(hist *namedHistogram) bool { + if benchData.done.Load() { + return false + } + + if benchData.bytesWritten+writeSize > maxFileSize { + closeFile(benchData.fh) + benchData.fileNum++ + benchData.bytesWritten = 0 + benchData.fh = createFile(path.Join(dirpath, fmt.Sprintf("%s%d", pref, benchData.fileNum))) + } + + benchData.bytesWritten += writeSize + writeToFile(benchData.fh, writeSize) + syncFile(benchData.fh) + + start := time.Now() + getDiskUsage(dirpath) + hist.Record(time.Since(start)) + + return true + } + + bench.stop = func() { + benchData.done.Store(true) + } + + bench.clean = func() { + closeFile(benchData.fh) + removeAllFiles(dirpath) + closeFile(bench.dir) + } + + return bench + } + + return fsBenchmark{ + createBench, + benchName, + benchDescription, + } +} + +// A benchmark is a function which takes a directory +// as input and returns the fsBench struct which has +// all the information required to run the benchmark. +type fsBenchmark struct { + createBench func(string) *fsBench + name string + description string +} + +// The various benchmarks which can be run. +var benchmarks = map[string]fsBenchmark{ + "create_empty": createBench("create_empty", "create empty file, sync par dir"), + "delete_10k_2MiB": deleteUniformBench( + "delete_10k_2MiB", "create 10k 2MiB size files, measure deletion times", 10_000, 2<<20, + ), + "delete_100k_2MiB": deleteUniformBench( + "delete_100k_2MiB", "create 100k 2MiB size files, measure deletion times", 100_000, 2<<20, + ), + "delete_200k_2MiB": deleteUniformBench( + "delete_200k_2MiB", "create 200k 2MiB size files, measure deletion times", 200_000, 2<<20, + ), + "write_sync_1MiB": writeSyncBench( + "write_sync_1MiB", "Write 1MiB to a file, then sync, while timing the sync.", 2<<30, 1<<20, + ), + "write_sync_16MiB": writeSyncBench( + "write_sync_16MiB", "Write 16MiB to a file, then sync, while timing the sync.", 2<<30, 16<<20, + ), + "write_sync_128MiB": writeSyncBench( + "write_sync_128MiB", "Write 128MiB to a file, then sync, while timing the sync.", 2<<30, 128<<20, + ), + "disk_usage_128MB": diskUsageBench( + "disk_usage_128MB", + "Write 128MiB to a file, measure GetDiskUsage call. Create a new file, when file size is 1GB.", + 1<<30, 128<<20, + ), + "disk_usage_many_files": diskUsageBench( + "disk_usage_many_files", + "Create new file, Write 128KiB to a file, measure GetDiskUsage call.", + 128<<10, 128<<10, + ), + "delete_large_dir_256MiB": deleteBench( + "delete_large_dir_256MiB", "Prepopulate directory with 100k 1MiB files, measure delete peformance of 256MiB files", + 1e5, 1<<20, 256<<20, + ), + "delete_large_dir_2MiB": deleteBench( + "delete_large_dir_2MiB", "Prepopulate directory with 100k 1MiB files, measure delete peformance of 2MiB files", + 1e5, 1<<20, 2<<20, + ), + "delete_small_dir_2GiB": deleteBench( + "delete_small_dir_2GiB", "Prepopulate directory with 1k 1MiB files, measure delete peformance of 2GiB files", + 1e3, 1<<20, 2<<30, + ), + "delete_small_dir_256MiB": deleteBench( + "delete_small_dir_256MiB", "Prepopulate directory with 1k 1MiB files, measure delete peformance of 256MiB files", + 1e3, 1<<20, 256<<20, + ), + "delete_small_dir_2MiB": deleteBench( + "delete_small_dir_2MiB", "Prepopulate directory with 1k 1MiB files, measure delete peformance of 2MiB files", + 1e3, 1<<20, 2<<20, + ), +} + +func runFsBench(_ *cobra.Command, args []string) error { + benchmark, ok := benchmarks[fsConfig.benchname] + if !ok { + return errors.Errorf("trying to run an unknown benchmark: %s", fsConfig.benchname) + } + + // Run the benchmark a comple of times. + fmt.Printf("The benchmark will be run %d time(s).\n", fsConfig.numTimes) + for i := 0; i < fsConfig.numTimes; i++ { + fmt.Println("Starting benchmark:", i) + benchStruct := benchmark.createBench(args[0]) + runTestWithoutDB(testWithoutDB{ + init: benchStruct.init, + tick: benchStruct.tick, + done: benchStruct.done, + }) + } + return nil +} + +func (bench *fsBench) init(wg *sync.WaitGroup) { + fmt.Println("Running benchmark:", bench.name) + fmt.Println("Description:", bench.description) + + wg.Add(1) + go bench.execute(wg) +} + +func (bench *fsBench) execute(wg *sync.WaitGroup) { + defer wg.Done() + + latencyHist := bench.reg.Register(bench.name) + + for { + // run the op which we're benchmarking. + bench.numOps++ + + // The running function will determine exactly what to latency + // it wants to measure. + continueBench := bench.run(latencyHist) + if !continueBench || (fsConfig.maxOps > 0 && bench.numOps >= fsConfig.maxOps) { + break + } + } +} + +func (bench *fsBench) tick(elapsed time.Duration, i int) { + if i%20 == 0 { + fmt.Println("____optype__elapsed__ops/sec(inst)___ops/sec(cum)__p50(ms)__p95(ms)__p99(ms)__pMax(ms)") + } + bench.reg.Tick(func(tick histogramTick) { + h := tick.Hist + + fmt.Printf("%10s %8s %14.1f %14.1f %5.6f %5.6f %5.6f %5.6f\n", + tick.Name[:10], + time.Duration(elapsed.Seconds()+0.5)*time.Second, + float64(h.TotalCount())/tick.Elapsed.Seconds(), + float64(tick.Cumulative.TotalCount())/elapsed.Seconds(), + time.Duration(h.ValueAtQuantile(50)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(95)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(99)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(100)).Seconds()*1000, + ) + }) +} + +func (bench *fsBench) done(wg *sync.WaitGroup, elapsed time.Duration) { + // Do the cleanup. + bench.stop() + wg.Wait() + defer bench.clean() + + fmt.Println("\n____optype__elapsed_____ops(total)___ops/sec(cum)__avg(ms)__p50(ms)__p95(ms)__p99(ms)__pMax(ms)") + + resultTick := histogramTick{} + bench.reg.Tick(func(tick histogramTick) { + h := tick.Cumulative + if resultTick.Cumulative == nil { + resultTick.Now = tick.Now + resultTick.Cumulative = h + } else { + resultTick.Cumulative.Merge(h) + } + + fmt.Printf("%10s %7.1fs %14d %14.1f %5.6f %5.6f %5.6f %5.6f %5.6f\n", + tick.Name[:10], elapsed.Seconds(), h.TotalCount(), + float64(h.TotalCount())/elapsed.Seconds(), + time.Duration(h.Mean()).Seconds()*1000, + time.Duration(h.ValueAtQuantile(50)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(95)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(99)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(100)).Seconds()*1000, + ) + }) + fmt.Println() + + resultHist := resultTick.Cumulative + + fmt.Printf("Benchmarkfsbench/%s %d %0.1f ops/sec\n\n", + bench.name, + resultHist.TotalCount(), + float64(resultHist.TotalCount())/elapsed.Seconds(), + ) +} + +func verbosef(fmtstr string, args ...interface{}) { + if verbose { + fmt.Printf(fmtstr, args...) + } +} + +func removeAll(dir string) { + verbosef("Removing %q.\n", dir) + if err := os.RemoveAll(dir); err != nil { + log.Fatal(err) + } +} diff --git a/pebble/cmd/pebble/fsbenchlist.go b/pebble/cmd/pebble/fsbenchlist.go new file mode 100644 index 0000000..467af81 --- /dev/null +++ b/pebble/cmd/pebble/fsbenchlist.go @@ -0,0 +1,39 @@ +package main + +import ( + "fmt" + + "github.com/cockroachdb/errors" + "github.com/spf13/cobra" +) + +var listFsBench = &cobra.Command{ + Use: "list [] [] ...", + Short: "List the available file system benchmarks.", + Long: ` +List the available file system benchmarks. If no is supplied +as an argument, then all the available benchmark names are printed. +If one or more s are supplied as arguments, then the benchmark +descriptions are printed out for those names. +`, + RunE: runListFsBench, +} + +func runListFsBench(_ *cobra.Command, args []string) error { + if len(args) == 0 { + fmt.Println("Available benchmarks:") + for name := range benchmarks { + fmt.Println(name) + } + } else { + for _, v := range args { + benchStruct, ok := benchmarks[v] + if !ok { + return errors.Errorf("trying to print out the description for unknown benchmark: %s", v) + } + fmt.Println("Name:", benchStruct.name) + fmt.Println("Description:", benchStruct.description) + } + } + return nil +} diff --git a/pebble/cmd/pebble/main.go b/pebble/cmd/pebble/main.go new file mode 100644 index 0000000..9417bfb --- /dev/null +++ b/pebble/cmd/pebble/main.go @@ -0,0 +1,99 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +import ( + "log" + "os" + "time" + + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/cockroachdb/pebble/tool" + "github.com/spf13/cobra" +) + +var ( + cacheSize int64 + concurrency int + disableWAL bool + duration time.Duration + maxSize uint64 + maxOpsPerSec = newRateFlag("") + verbose bool + waitCompactions bool + wipe bool + pathToLocalSharedStorage string + // If zero, or if !sharedStorageEnabled, secondary cache is + // not used. + secondaryCacheSize int64 +) + +func main() { + log.SetFlags(0) + + cobra.EnableCommandSorting = false + + benchCmd := &cobra.Command{ + Use: "bench", + Short: "benchmarks", + } + + replayCmd := initReplayCmd() + benchCmd.AddCommand( + replayCmd, + scanCmd, + syncCmd, + tombstoneCmd, + ycsbCmd, + fsBenchCmd, + writeBenchCmd, + ) + + rootCmd := &cobra.Command{ + Use: "pebble [command] (flags)", + Short: "pebble benchmarking/introspection tool", + } + rootCmd.AddCommand(benchCmd) + + t := tool.New(tool.Comparers(mvccComparer, testkeys.Comparer), tool.Mergers(fauxMVCCMerger)) + rootCmd.AddCommand(t.Commands...) + + for _, cmd := range []*cobra.Command{replayCmd, scanCmd, syncCmd, tombstoneCmd, writeBenchCmd, ycsbCmd} { + cmd.Flags().BoolVarP( + &verbose, "verbose", "v", false, "enable verbose event logging") + cmd.Flags().StringVar( + &pathToLocalSharedStorage, "shared-storage", "", "path to local shared storage (empty for no shared storage)") + cmd.Flags().Int64Var( + &secondaryCacheSize, "secondary-cache", 0, "secondary cache size in bytes") + } + for _, cmd := range []*cobra.Command{scanCmd, syncCmd, tombstoneCmd, ycsbCmd} { + cmd.Flags().Int64Var( + &cacheSize, "cache", 1<<30, "cache size") + } + for _, cmd := range []*cobra.Command{scanCmd, syncCmd, tombstoneCmd, ycsbCmd, fsBenchCmd, writeBenchCmd} { + cmd.Flags().DurationVarP( + &duration, "duration", "d", 10*time.Second, "the duration to run (0, run forever)") + } + for _, cmd := range []*cobra.Command{scanCmd, syncCmd, tombstoneCmd, ycsbCmd} { + cmd.Flags().IntVarP( + &concurrency, "concurrency", "c", 1, "number of concurrent workers") + cmd.Flags().BoolVar( + &disableWAL, "disable-wal", false, "disable the WAL (voiding persistence guarantees)") + cmd.Flags().VarP( + maxOpsPerSec, "rate", "m", "max ops per second [{zipf,uniform}:]min[-max][/period (sec)]") + cmd.Flags().BoolVar( + &waitCompactions, "wait-compactions", false, + "wait for background compactions to complete after load stops") + cmd.Flags().BoolVarP( + &wipe, "wipe", "w", false, "wipe the database before starting") + cmd.Flags().Uint64Var( + &maxSize, "max-size", 0, "maximum disk size, in MB (0, run forever)") + } + + if err := rootCmd.Execute(); err != nil { + // Cobra has already printed the error message. + os.Exit(1) + } +} diff --git a/pebble/cmd/pebble/mvcc.go b/pebble/cmd/pebble/mvcc.go new file mode 100644 index 0000000..0e388de --- /dev/null +++ b/pebble/cmd/pebble/mvcc.go @@ -0,0 +1,223 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +import ( + "bytes" + + "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/internal/bytealloc" +) + +// MVCC encoding and decoding routines adapted from CockroachDB sources. Used +// to perform apples-to-apples benchmarking for CockroachDB's usage of RocksDB. + +var mvccComparer = &pebble.Comparer{ + Compare: mvccCompare, + + AbbreviatedKey: func(k []byte) uint64 { + key, _, ok := mvccSplitKey(k) + if !ok { + return 0 + } + return pebble.DefaultComparer.AbbreviatedKey(key) + }, + + Equal: func(a, b []byte) bool { + return mvccCompare(a, b) == 0 + }, + + Separator: func(dst, a, b []byte) []byte { + aKey, _, ok := mvccSplitKey(a) + if !ok { + return append(dst, a...) + } + bKey, _, ok := mvccSplitKey(b) + if !ok { + return append(dst, a...) + } + // If the keys are the same just return a. + if bytes.Equal(aKey, bKey) { + return append(dst, a...) + } + n := len(dst) + // MVCC key comparison uses bytes.Compare on the roachpb.Key, which is the same semantics as + // pebble.DefaultComparer, so reuse the latter's Separator implementation. + dst = pebble.DefaultComparer.Separator(dst, aKey, bKey) + // Did it pick a separator different than aKey -- if it did not we can't do better than a. + buf := dst[n:] + if bytes.Equal(aKey, buf) { + return append(dst[:n], a...) + } + // The separator is > aKey, so we only need to add the timestamp sentinel. + return append(dst, 0) + }, + + Successor: func(dst, a []byte) []byte { + aKey, _, ok := mvccSplitKey(a) + if !ok { + return append(dst, a...) + } + n := len(dst) + // MVCC key comparison uses bytes.Compare on the roachpb.Key, which is the same semantics as + // pebble.DefaultComparer, so reuse the latter's Successor implementation. + dst = pebble.DefaultComparer.Successor(dst, aKey) + // Did it pick a successor different than aKey -- if it did not we can't do better than a. + buf := dst[n:] + if bytes.Equal(aKey, buf) { + return append(dst[:n], a...) + } + // The successor is > aKey, so we only need to add the timestamp sentinel. + return append(dst, 0) + }, + + Split: func(k []byte) int { + key, _, ok := mvccSplitKey(k) + if !ok { + return len(k) + } + // This matches the behavior of libroach/KeyPrefix. RocksDB requires that + // keys generated via a SliceTransform be comparable with normal encoded + // MVCC keys. Encoded MVCC keys have a suffix indicating the number of + // bytes of timestamp data. MVCC keys without a timestamp have a suffix of + // 0. We're careful in EncodeKey to make sure that the user-key always has + // a trailing 0. If there is no timestamp this falls out naturally. If + // there is a timestamp we prepend a 0 to the encoded timestamp data. + return len(key) + 1 + }, + + Name: "cockroach_comparator", +} + +func mvccSplitKey(mvccKey []byte) (key []byte, ts []byte, ok bool) { + if len(mvccKey) == 0 { + return nil, nil, false + } + n := len(mvccKey) - 1 + tsLen := int(mvccKey[n]) + if n < tsLen { + return nil, nil, false + } + key = mvccKey[:n-tsLen] + if tsLen > 0 { + ts = mvccKey[n-tsLen+1 : len(mvccKey)-1] + } + return key, ts, true +} + +func mvccCompare(a, b []byte) int { + // NB: For performance, this routine manually splits the key into the + // user-key and timestamp components rather than using SplitMVCCKey. Don't + // try this at home kids: use SplitMVCCKey. + + aEnd := len(a) - 1 + bEnd := len(b) - 1 + if aEnd < 0 || bEnd < 0 { + // This should never happen unless there is some sort of corruption of + // the keys. This is a little bizarre, but the behavior exactly matches + // engine/db.cc:DBComparator. + return bytes.Compare(a, b) + } + + // Compute the index of the separator between the key and the timestamp. + aSep := aEnd - int(a[aEnd]) + bSep := bEnd - int(b[bEnd]) + if aSep < 0 || bSep < 0 { + // This should never happen unless there is some sort of corruption of + // the keys. This is a little bizarre, but the behavior exactly matches + // engine/db.cc:DBComparator. + return bytes.Compare(a, b) + } + + // Compare the "user key" part of the key. + if c := bytes.Compare(a[:aSep], b[:bSep]); c != 0 { + return c + } + + // Compare the timestamp part of the key. + aTS := a[aSep:aEnd] + bTS := b[bSep:bEnd] + if len(aTS) == 0 { + if len(bTS) == 0 { + return 0 + } + return -1 + } else if len(bTS) == 0 { + return 1 + } + return bytes.Compare(bTS, aTS) +} + +// \x00[[]]<#timestamp-bytes> +func mvccEncode(dst, key []byte, walltime uint64, logical uint32) []byte { + dst = append(dst, key...) + dst = append(dst, 0) + if walltime != 0 || logical != 0 { + extra := byte(1 + 8) + dst = encodeUint64Ascending(dst, walltime) + if logical != 0 { + dst = encodeUint32Ascending(dst, logical) + extra += 4 + } + dst = append(dst, extra) + } + return dst +} + +func mvccForwardScan(d DB, start, end, ts []byte) (int, int64) { + it := d.NewIter(&pebble.IterOptions{ + LowerBound: mvccEncode(nil, start, 0, 0), + UpperBound: mvccEncode(nil, end, 0, 0), + }) + defer it.Close() + + var data bytealloc.A + var count int + var nbytes int64 + + for valid := it.First(); valid; valid = it.Next() { + key, keyTS, _ := mvccSplitKey(it.Key()) + if bytes.Compare(keyTS, ts) <= 0 { + data, _ = data.Copy(key) + data, _ = data.Copy(it.Value()) + } + count++ + nbytes += int64(len(it.Key()) + len(it.Value())) + } + return count, nbytes +} + +func mvccReverseScan(d DB, start, end, ts []byte) (int, int64) { + it := d.NewIter(&pebble.IterOptions{ + LowerBound: mvccEncode(nil, start, 0, 0), + UpperBound: mvccEncode(nil, end, 0, 0), + }) + defer it.Close() + + var data bytealloc.A + var count int + var nbytes int64 + + for valid := it.Last(); valid; valid = it.Prev() { + key, keyTS, _ := mvccSplitKey(it.Key()) + if bytes.Compare(keyTS, ts) <= 0 { + data, _ = data.Copy(key) + data, _ = data.Copy(it.Value()) + } + count++ + nbytes += int64(len(it.Key()) + len(it.Value())) + } + return count, nbytes +} + +var fauxMVCCMerger = &pebble.Merger{ + Name: "cockroach_merge_operator", + Merge: func(key, value []byte) (pebble.ValueMerger, error) { + // This merger is used by the compact benchmark and use the + // pebble default value merger to concatenate values. + // It shouldn't materially affect the benchmarks. + return pebble.DefaultMerger.Merge(key, value) + }, +} diff --git a/pebble/cmd/pebble/queue.go b/pebble/cmd/pebble/queue.go new file mode 100644 index 0000000..7193741 --- /dev/null +++ b/pebble/cmd/pebble/queue.go @@ -0,0 +1,116 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +import ( + "fmt" + "log" + "sync" + "sync/atomic" + "time" + + "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/internal/randvar" + "github.com/spf13/cobra" + "golang.org/x/exp/rand" +) + +var queueConfig struct { + size int + values *randvar.BytesFlag +} + +func initQueue(cmd *cobra.Command) { + cmd.Flags().IntVar( + &queueConfig.size, "queue-size", 256, + "size of the queue to maintain") + queueConfig.values = randvar.NewBytesFlag("16384") + cmd.Flags().Var( + queueConfig.values, "queue-values", + "queue value size distribution [{zipf,uniform}:]min[-max][/]") +} + +func queueTest() (test, *atomic.Int64) { + ops := new(atomic.Int64) // atomic + var ( + lastOps int64 + lastElapsed time.Duration + ) + return test{ + init: func(d DB, wg *sync.WaitGroup) { + var ( + value []byte + rng = rand.New(rand.NewSource(1449168817)) + queue = make([][]byte, queueConfig.size) + ) + for i := 0; i < queueConfig.size; i++ { + b := d.NewBatch() + queue[i] = mvccEncode(nil, encodeUint32Ascending([]byte("queue-"), uint32(i)), uint64(i+1), 0) + value = queueConfig.values.Bytes(rng, value) + b.Set(queue[i], value, pebble.NoSync) + if err := b.Commit(pebble.NoSync); err != nil { + log.Fatal(err) + } + } + if err := d.Flush(); err != nil { + log.Fatal(err) + } + + limiter := maxOpsPerSec.newRateLimiter() + wg.Add(1) + go func() { + defer wg.Done() + + for i := queueConfig.size; ; i++ { + idx := i % queueConfig.size + + // Delete the head. + b := d.NewBatch() + if err := b.Delete(queue[idx], pebble.Sync); err != nil { + log.Fatal(err) + } + if err := b.Commit(pebble.Sync); err != nil { + log.Fatal(err) + } + _ = b.Close() + wait(limiter) + + // Append to the tail. + b = d.NewBatch() + queue[idx] = mvccEncode(queue[idx][:0], encodeUint32Ascending([]byte("queue-"), uint32(i)), uint64(i+1), 0) + value = queueConfig.values.Bytes(rng, value) + b.Set(queue[idx], value, nil) + if err := b.Commit(pebble.Sync); err != nil { + log.Fatal(err) + } + _ = b.Close() + wait(limiter) + ops.Add(1) + } + }() + }, + tick: func(elapsed time.Duration, i int) { + if i%20 == 0 { + fmt.Println("Queue___elapsed_______ops/sec") + } + + curOps := ops.Load() + dur := elapsed - lastElapsed + fmt.Printf("%15s %13.1f\n", + time.Duration(elapsed.Seconds()+0.5)*time.Second, + float64(curOps-lastOps)/dur.Seconds(), + ) + lastOps = curOps + lastElapsed = elapsed + }, + done: func(elapsed time.Duration) { + curOps := ops.Load() + fmt.Println("\nQueue___elapsed___ops/sec(cum)") + fmt.Printf("%13.1fs %14.1f\n\n", + elapsed.Seconds(), + float64(curOps)/elapsed.Seconds()) + }, + }, ops +} diff --git a/pebble/cmd/pebble/random.go b/pebble/cmd/pebble/random.go new file mode 100644 index 0000000..c098b74 --- /dev/null +++ b/pebble/cmd/pebble/random.go @@ -0,0 +1,92 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +import ( + "strconv" + "strings" + "time" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/randvar" + "github.com/cockroachdb/pebble/internal/rate" +) + +type rateFlag struct { + randvar.Flag + fluctuateDuration time.Duration + spec string +} + +func newRateFlag(spec string) *rateFlag { + f := &rateFlag{} + if err := f.Set(spec); err != nil { + panic(err) + } + return f +} + +func (f *rateFlag) String() string { + return f.spec +} + +// Type implements the Flag.Value interface. +func (f *rateFlag) Type() string { + return "ratevar" +} + +// Set implements the Flag.Value interface. +func (f *rateFlag) Set(spec string) error { + if spec == "" { + if err := f.Flag.Set("0"); err != nil { + return err + } + f.fluctuateDuration = time.Duration(0) + f.spec = spec + return nil + } + + parts := strings.Split(spec, "/") + if len(parts) == 0 || len(parts) > 2 { + return errors.Errorf("invalid ratevar spec: %s", errors.Safe(spec)) + } + if err := f.Flag.Set(parts[0]); err != nil { + return err + } + // Don't fluctuate by default. + f.fluctuateDuration = time.Duration(0) + if len(parts) == 2 { + fluctuateDurationFloat, err := strconv.ParseFloat(parts[1], 64) + if err != nil { + return err + } + f.fluctuateDuration = time.Duration(fluctuateDurationFloat) * time.Second + } + f.spec = spec + return nil +} + +func (f *rateFlag) newRateLimiter() *rate.Limiter { + if f.spec == "" { + return nil + } + rng := randvar.NewRand() + limiter := rate.NewLimiter(float64(f.Uint64(rng)), 1) + if f.fluctuateDuration != 0 { + go func(limiter *rate.Limiter) { + ticker := time.NewTicker(f.fluctuateDuration) + for range ticker.C { + limiter.SetRate(float64(f.Uint64(rng))) + } + }(limiter) + } + return limiter +} + +func wait(l *rate.Limiter) { + if l != nil { + l.Wait(1) + } +} diff --git a/pebble/cmd/pebble/replay.go b/pebble/cmd/pebble/replay.go new file mode 100644 index 0000000..7479769 --- /dev/null +++ b/pebble/cmd/pebble/replay.go @@ -0,0 +1,448 @@ +// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +import ( + "bytes" + "context" + "flag" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strconv" + "strings" + "syscall" + "time" + "unicode" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/bloom" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/cache" + "github.com/cockroachdb/pebble/replay" + "github.com/cockroachdb/pebble/vfs" + "github.com/spf13/cobra" +) + +func initReplayCmd() *cobra.Command { + c := replayConfig{ + pacer: pacerFlag{Pacer: replay.PaceByFixedReadAmp(10)}, + runDir: "", + count: 1, + streamLogs: false, + ignoreCheckpoint: false, + } + cmd := &cobra.Command{ + Use: "replay ", + Short: "run the provided captured write workload", + Args: cobra.ExactArgs(1), + RunE: c.runE, + } + cmd.Flags().IntVar( + &c.count, "count", 1, "the number of times to replay the workload") + cmd.Flags().StringVar( + &c.name, "name", "", "the name of the workload being replayed") + cmd.Flags().VarPF( + &c.pacer, "pacer", "p", "the pacer to use: unpaced, reference-ramp, or fixed-ramp=N") + cmd.Flags().Uint64Var( + &c.maxWritesMB, "max-writes", 0, "the maximum volume of writes (MB) to apply, with 0 denoting unlimited") + cmd.Flags().StringVar( + &c.optionsString, "options", "", "Pebble options to override, in the OPTIONS ini format but with any whitespace as field delimiters instead of newlines") + cmd.Flags().StringVar( + &c.runDir, "run-dir", c.runDir, "the directory to use for the replay data directory; defaults to a random dir in pwd") + cmd.Flags().Int64Var( + &c.maxCacheSize, "max-cache-size", c.maxCacheSize, "the max size of the block cache") + cmd.Flags().BoolVar( + &c.streamLogs, "stream-logs", c.streamLogs, "stream the Pebble logs to stdout during replay") + cmd.Flags().BoolVar( + &c.ignoreCheckpoint, "ignore-checkpoint", c.ignoreCheckpoint, "ignore the workload's initial checkpoint") + cmd.Flags().StringVar( + &c.checkpointDir, "checkpoint-dir", c.checkpointDir, "path to the checkpoint to use if not /checkpoint") + return cmd +} + +type replayConfig struct { + name string + pacer pacerFlag + runDir string + count int + maxWritesMB uint64 + streamLogs bool + checkpointDir string + ignoreCheckpoint bool + optionsString string + maxCacheSize int64 + + cleanUpFuncs []func() error +} + +func (c *replayConfig) args() (args []string) { + if c.name != "" { + args = append(args, "--name", c.name) + } + if c.pacer.spec != "" { + args = append(args, "--pacer", c.pacer.spec) + } + if c.runDir != "" { + args = append(args, "--run-dir", c.runDir) + } + if c.count != 0 { + args = append(args, "--count", fmt.Sprint(c.count)) + } + if c.maxWritesMB != 0 { + args = append(args, "--max-writes", fmt.Sprint(c.maxWritesMB)) + } + if c.maxCacheSize != 0 { + args = append(args, "--max-cache-size", fmt.Sprint(c.maxCacheSize)) + } + if c.streamLogs { + args = append(args, "--stream-logs") + } + if c.checkpointDir != "" { + args = append(args, "--checkpoint-dir", c.checkpointDir) + } + if c.ignoreCheckpoint { + args = append(args, "--ignore-checkpoint") + } + if c.optionsString != "" { + args = append(args, "--options", c.optionsString) + } + return args +} + +func (c *replayConfig) runE(cmd *cobra.Command, args []string) error { + if c.ignoreCheckpoint && c.checkpointDir != "" { + return errors.Newf("cannot provide both --checkpoint-dir and --ignore-checkpoint") + } + stdout := cmd.OutOrStdout() + + workloadPath := args[0] + if err := c.runOnce(stdout, workloadPath); err != nil { + return err + } + c.count-- + + // If necessary, run it again. We run again replacing our existing process + // with the next run so that we're truly starting over. This helps avoid the + // possibility of state within the Go runtime, the fragmentation of the + // heap, or global state within Pebble from interfering with the + // independence of individual runs. Previously we called runOnce multiple + // times without exec-ing, but we observed less variance between runs from + // within the same process. + if c.count > 0 { + fmt.Printf("%d runs remaining.", c.count) + executable, err := os.Executable() + if err != nil { + return err + } + execArgs := append(append([]string{executable, "bench", "replay"}, c.args()...), workloadPath) + syscall.Exec(executable, execArgs, os.Environ()) + } + return nil +} + +func (c *replayConfig) runOnce(stdout io.Writer, workloadPath string) error { + defer c.cleanUp() + if c.name == "" { + c.name = vfs.Default.PathBase(workloadPath) + } + + r := &replay.Runner{ + RunDir: c.runDir, + WorkloadFS: vfs.Default, + WorkloadPath: workloadPath, + Pacer: c.pacer, + Opts: &pebble.Options{}, + } + if c.maxWritesMB > 0 { + r.MaxWriteBytes = c.maxWritesMB * (1 << 20) + } + if err := c.initRunDir(r); err != nil { + return err + } + if err := c.initOptions(r); err != nil { + return err + } + if verbose { + fmt.Fprintln(stdout, "Options:") + fmt.Fprintln(stdout, r.Opts.String()) + } + + // Begin the workload. Run does not block. + ctx := context.Background() + if err := r.Run(ctx); err != nil { + return errors.Wrapf(err, "starting workload") + } + + // Wait blocks until the workload is complete. Once Wait returns, all of the + // workload's write operations have been replayed AND the database's + // compactions have quiesced. + m, err := r.Wait() + if err != nil { + return errors.Wrapf(err, "waiting for workload to complete") + } + if err := r.Close(); err != nil { + return errors.Wrapf(err, "cleaning up") + } + fmt.Fprintln(stdout, "Workload complete.") + if err := m.WriteBenchmarkString(c.name, stdout); err != nil { + return err + } + for _, plot := range m.Plots(120 /* width */, 30 /* height */) { + fmt.Fprintln(stdout, plot.Name) + fmt.Fprintln(stdout, plot.Plot) + fmt.Fprintln(stdout) + } + fmt.Fprintln(stdout, m.Final.String()) + return nil +} + +func (c *replayConfig) initRunDir(r *replay.Runner) error { + if r.RunDir == "" { + // Default to replaying in a new directory within the current working + // directory. + wd, err := os.Getwd() + if err != nil { + return err + } + r.RunDir, err = os.MkdirTemp(wd, "replay-") + if err != nil { + return err + } + c.cleanUpFuncs = append(c.cleanUpFuncs, func() error { + return os.RemoveAll(r.RunDir) + }) + } + if !c.ignoreCheckpoint { + checkpointDir := c.getCheckpointDir(r) + fmt.Printf("%s: Attempting to initialize with checkpoint %q.\n", time.Now().Format(time.RFC3339), checkpointDir) + ok, err := vfs.Clone( + r.WorkloadFS, + vfs.Default, + checkpointDir, + filepath.Join(r.RunDir), + vfs.CloneTryLink) + if err != nil { + return err + } + if !ok { + return errors.Newf("no checkpoint %q exists; you may re-run with --ignore-checkpoint", checkpointDir) + } + fmt.Printf("%s: Run directory initialized with checkpoint %q.\n", time.Now().Format(time.RFC3339), checkpointDir) + } + return nil +} + +func (c *replayConfig) initOptions(r *replay.Runner) error { + // If using a workload checkpoint, load the Options from it. + // TODO(jackson): Allow overriding the OPTIONS. + if !c.ignoreCheckpoint { + ls, err := r.WorkloadFS.List(c.getCheckpointDir(r)) + if err != nil { + return err + } + sort.Strings(ls) + var optionsFilepath string + for _, l := range ls { + path := r.WorkloadFS.PathJoin(r.WorkloadPath, "checkpoint", l) + typ, _, ok := base.ParseFilename(r.WorkloadFS, path) + if ok && typ == base.FileTypeOptions { + optionsFilepath = path + } + } + f, err := r.WorkloadFS.Open(optionsFilepath) + if err != nil { + return err + } + o, err := io.ReadAll(f) + if err != nil { + return err + } + if err := f.Close(); err != nil { + return err + } + if err := r.Opts.Parse(string(o), c.parseHooks()); err != nil { + return err + } + } + if err := c.parseCustomOptions(c.optionsString, r.Opts); err != nil { + return err + } + // TODO(jackson): If r.Opts.Comparer == nil, peek at the workload's + // manifests and pull the comparer out of them. + // + // r.Opts.Comparer can only be nil at this point if ignoreCheckpoint is + // set; otherwise we'll have already extracted the Comparer from the + // checkpoint's OPTIONS file. + + if c.streamLogs { + r.Opts.AddEventListener(pebble.MakeLoggingEventListener(pebble.DefaultLogger)) + } + r.Opts.EnsureDefaults() + return nil +} + +func (c *replayConfig) getCheckpointDir(r *replay.Runner) string { + if c.checkpointDir != "" { + return c.checkpointDir + } + return r.WorkloadFS.PathJoin(r.WorkloadPath, `checkpoint`) +} + +func (c *replayConfig) parseHooks() *pebble.ParseHooks { + return &pebble.ParseHooks{ + NewCache: func(size int64) *cache.Cache { + if c.maxCacheSize != 0 && size > c.maxCacheSize { + size = c.maxCacheSize + } + return cache.New(size) + }, + NewComparer: makeComparer, + NewFilterPolicy: func(name string) (pebble.FilterPolicy, error) { + switch name { + case "none": + return nil, nil + case "rocksdb.BuiltinBloomFilter": + return bloom.FilterPolicy(10), nil + default: + return nil, errors.Errorf("invalid filter policy name %q", name) + } + }, + NewMerger: makeMerger, + } +} + +// parseCustomOptions parses Pebble Options passed through a CLI flag. +// Ordinarily Pebble Options are specified through an INI file with newlines +// delimiting fields. That doesn't translate well to a CLI interface, so this +// function accepts fields are that delimited by any whitespace. This is the +// same format that CockroachDB accepts Pebble Options through the --store flag, +// and this code is copied from there. +func (c *replayConfig) parseCustomOptions(optsStr string, opts *pebble.Options) error { + if optsStr == "" { + return nil + } + // Pebble options are supplied in the Pebble OPTIONS ini-like + // format, but allowing any whitespace to delimit lines. Convert + // the options to a newline-delimited format. This isn't a trivial + // character replacement because whitespace may appear within a + // stanza, eg ["Level 0"]. + value := strings.TrimSpace(optsStr) + var buf bytes.Buffer + for len(value) > 0 { + i := strings.IndexFunc(value, func(r rune) bool { + return r == '[' || unicode.IsSpace(r) + }) + switch { + case i == -1: + buf.WriteString(value) + value = value[len(value):] + case value[i] == '[': + // If there's whitespace within [ ], we write it verbatim. + j := i + strings.IndexRune(value[i:], ']') + buf.WriteString(value[:j+1]) + value = value[j+1:] + case unicode.IsSpace(rune(value[i])): + // NB: This doesn't handle multibyte whitespace. + buf.WriteString(value[:i]) + buf.WriteRune('\n') + value = strings.TrimSpace(value[i+1:]) + } + } + return opts.Parse(buf.String(), c.parseHooks()) +} + +func (c *replayConfig) cleanUp() error { + for _, f := range c.cleanUpFuncs { + if err := f(); err != nil { + return err + } + } + return nil +} + +func makeComparer(name string) (*pebble.Comparer, error) { + switch name { + case base.DefaultComparer.Name: + return base.DefaultComparer, nil + case "cockroach_comparator": + return mvccComparer, nil + default: + return nil, errors.Newf("unrecognized comparer %q", name) + } +} + +func makeMerger(name string) (*pebble.Merger, error) { + switch name { + case base.DefaultMerger.Name: + return base.DefaultMerger, nil + case "cockroach_merge_operator": + // We don't want to reimplement the cockroach merger. Instead we + // implement this merger to return the newer of the two operands. This + // doesn't exactly model cockroach's true use but should be good enough. + // TODO(jackson): Consider lifting replay into a `cockroach debug` + // command so we can use the true merger and comparer. + merger := new(pebble.Merger) + merger.Merge = func(key, value []byte) (pebble.ValueMerger, error) { + return &overwriteValueMerger{value: append([]byte{}, value...)}, nil + } + merger.Name = name + return merger, nil + default: + return nil, errors.Newf("unrecognized comparer %q", name) + } +} + +// pacerFlag provides a command line flag interface for specifying the pacer to +// use. It implements the flag.Value interface. +type pacerFlag struct { + replay.Pacer + spec string +} + +var _ flag.Value = (*pacerFlag)(nil) + +func (f *pacerFlag) String() string { return f.spec } +func (f *pacerFlag) Type() string { return "pacer" } + +// Set implements the Flag.Value interface. +func (f *pacerFlag) Set(spec string) error { + f.spec = spec + switch { + case spec == "unpaced": + f.Pacer = replay.Unpaced{} + case spec == "reference-ramp": + f.Pacer = replay.PaceByReferenceReadAmp{} + case strings.HasPrefix(spec, "fixed-ramp="): + rAmp, err := strconv.Atoi(strings.TrimPrefix(spec, "fixed-ramp=")) + if err != nil { + return errors.Newf("unable to parse fixed r-amp: %s", err) + } + f.Pacer = replay.PaceByFixedReadAmp(rAmp) + default: + return errors.Newf("unrecognized pacer spec: %q", errors.Safe(spec)) + } + return nil +} + +type overwriteValueMerger struct { + value []byte +} + +func (o *overwriteValueMerger) MergeNewer(value []byte) error { + o.value = append(o.value[:0], value...) + return nil +} + +func (o *overwriteValueMerger) MergeOlder(value []byte) error { + return nil +} + +func (o *overwriteValueMerger) Finish(includesBase bool) ([]byte, io.Closer, error) { + return o.value, nil, nil +} diff --git a/pebble/cmd/pebble/replay_test.go b/pebble/cmd/pebble/replay_test.go new file mode 100644 index 0000000..b3f6225 --- /dev/null +++ b/pebble/cmd/pebble/replay_test.go @@ -0,0 +1,77 @@ +// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +import ( + "fmt" + "testing" + + "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/internal/cache" + "github.com/stretchr/testify/require" +) + +func TestParseOptionsStr(t *testing.T) { + type testCase struct { + c replayConfig + options *pebble.Options + } + + testCases := []testCase{ + { + c: replayConfig{optionsString: `[Options] max_concurrent_compactions=9`}, + options: &pebble.Options{MaxConcurrentCompactions: func() int { return 9 }}, + }, + { + c: replayConfig{optionsString: `[Options] bytes_per_sync=90000`}, + options: &pebble.Options{BytesPerSync: 90000}, + }, + { + c: replayConfig{optionsString: fmt.Sprintf(`[Options] cache_size=%d`, 16<<20 /* 16MB */)}, + options: &pebble.Options{Cache: cache.New(16 << 20 /* 16 MB */)}, + }, + { + c: replayConfig{ + maxCacheSize: 16 << 20, /* 16 MB */ + optionsString: fmt.Sprintf(`[Options] cache_size=%d`, int64(10<<30 /* 10 GB */)), + }, + options: &pebble.Options{Cache: cache.New(16 << 20 /* 16 MB */)}, + }, + { + c: replayConfig{optionsString: `[Options] [Level "0"] target_file_size=222`}, + options: &pebble.Options{Levels: []pebble.LevelOptions{ + {TargetFileSize: 222}, + }}, + }, + { + c: replayConfig{optionsString: `[Options] lbase_max_bytes=10 max_open_files=20 [Level "0"] target_file_size=30 [Level "1"] index_block_size=40`}, + options: &pebble.Options{ + LBaseMaxBytes: 10, + MaxOpenFiles: 20, + Levels: []pebble.LevelOptions{ + {TargetFileSize: 30}, + {IndexBlockSize: 40}, + }, + }, + }, + } + + for _, tc := range testCases { + o := new(pebble.Options) + require.NoError(t, tc.c.parseCustomOptions(tc.c.optionsString, o)) + o.EnsureDefaults() + got := o.String() + + tc.options.EnsureDefaults() + want := tc.options.String() + require.Equal(t, want, got) + if o.Cache != nil { + o.Cache.Unref() + } + if tc.options.Cache != nil { + tc.options.Cache.Unref() + } + } +} diff --git a/pebble/cmd/pebble/scan.go b/pebble/cmd/pebble/scan.go new file mode 100644 index 0000000..0803501 --- /dev/null +++ b/pebble/cmd/pebble/scan.go @@ -0,0 +1,160 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +import ( + "fmt" + "log" + "math" + "sync" + "sync/atomic" + "time" + + "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/internal/randvar" + "github.com/spf13/cobra" + "golang.org/x/exp/rand" +) + +var scanConfig struct { + reverse bool + rows *randvar.Flag + values *randvar.BytesFlag +} + +var scanCmd = &cobra.Command{ + Use: "scan ", + Short: "run the scan benchmark", + Long: ``, + Args: cobra.ExactArgs(1), + Run: runScan, +} + +func init() { + scanCmd.Flags().BoolVarP( + &scanConfig.reverse, "reverse", "r", false, "reverse scan") + scanConfig.rows = randvar.NewFlag("100") + scanCmd.Flags().Var( + scanConfig.rows, "rows", "number of rows to scan in each operation") + scanConfig.values = randvar.NewBytesFlag("8") + scanCmd.Flags().Var( + scanConfig.values, "values", + "value size distribution [{zipf,uniform}:]min[-max][/]") +} + +func runScan(cmd *cobra.Command, args []string) { + var ( + bytes atomic.Int64 + scanned atomic.Int64 + lastBytes int64 + lastScanned int64 + lastElapsed time.Duration + ) + + opts := pebble.Sync + if disableWAL { + opts = pebble.NoSync + } + + rowDist := scanConfig.rows + + runTest(args[0], test{ + init: func(d DB, wg *sync.WaitGroup) { + const count = 100000 + const batch = 1000 + + rng := rand.New(rand.NewSource(1449168817)) + keys := make([][]byte, count) + + for i := 0; i < count; { + b := d.NewBatch() + var value []byte + for end := i + batch; i < end; i++ { + keys[i] = mvccEncode(nil, encodeUint32Ascending([]byte("key-"), uint32(i)), uint64(i+1), 0) + value = scanConfig.values.Bytes(rng, value) + if err := b.Set(keys[i], value, nil); err != nil { + log.Fatal(err) + } + } + if err := b.Commit(opts); err != nil { + log.Fatal(err) + } + } + + if err := d.Flush(); err != nil { + log.Fatal(err) + } + + limiter := maxOpsPerSec.newRateLimiter() + + wg.Add(concurrency) + for i := 0; i < concurrency; i++ { + go func(i int) { + defer wg.Done() + + rng := rand.New(rand.NewSource(uint64(i))) + startKeyBuf := append(make([]byte, 0, 64), []byte("key-")...) + endKeyBuf := append(make([]byte, 0, 64), []byte("key-")...) + minTS := encodeUint64Ascending(nil, math.MaxUint64) + + for { + wait(limiter) + + rows := int(rowDist.Uint64(rng)) + startIdx := rng.Int31n(int32(len(keys) - rows)) + startKey := encodeUint32Ascending(startKeyBuf[:4], uint32(startIdx)) + endKey := encodeUint32Ascending(endKeyBuf[:4], uint32(startIdx+int32(rows))) + + var count int + var nbytes int64 + if scanConfig.reverse { + count, nbytes = mvccReverseScan(d, startKey, endKey, minTS) + } else { + count, nbytes = mvccForwardScan(d, startKey, endKey, minTS) + } + + if count != rows { + log.Fatalf("scanned %d, expected %d\n", count, rows) + } + + bytes.Add(nbytes) + scanned.Add(int64(count)) + } + }(i) + } + }, + + tick: func(elapsed time.Duration, i int) { + if i%20 == 0 { + fmt.Println("_elapsed_______rows/sec_______MB/sec_______ns/row") + } + + curBytes := bytes.Load() + curScanned := scanned.Load() + dur := elapsed - lastElapsed + fmt.Printf("%8s %14.1f %12.1f %12.1f\n", + time.Duration(elapsed.Seconds()+0.5)*time.Second, + float64(curScanned-lastScanned)/dur.Seconds(), + float64(curBytes-lastBytes)/(dur.Seconds()*(1<<20)), + float64(dur)/float64(curScanned-lastScanned), + ) + lastBytes = curBytes + lastScanned = curScanned + lastElapsed = elapsed + }, + + done: func(elapsed time.Duration) { + curBytes := bytes.Load() + curScanned := scanned.Load() + fmt.Println("\n_elapsed___ops/sec(cum)__MB/sec(cum)__ns/row(avg)") + fmt.Printf("%7.1fs %14.1f %12.1f %12.1f\n\n", + elapsed.Seconds(), + float64(curScanned)/elapsed.Seconds(), + float64(curBytes)/(elapsed.Seconds()*(1<<20)), + float64(elapsed)/float64(curScanned), + ) + }, + }) +} diff --git a/pebble/cmd/pebble/sync.go b/pebble/cmd/pebble/sync.go new file mode 100644 index 0000000..e2add26 --- /dev/null +++ b/pebble/cmd/pebble/sync.go @@ -0,0 +1,143 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +import ( + "fmt" + "log" + "sync" + "sync/atomic" + "time" + + "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/internal/randvar" + "github.com/spf13/cobra" + "golang.org/x/exp/rand" +) + +var syncConfig struct { + batch *randvar.Flag + walOnly bool + values *randvar.BytesFlag +} + +var syncCmd = &cobra.Command{ + Use: "sync ", + Short: "run the sync benchmark", + Long: ``, + Args: cobra.ExactArgs(1), + Run: runSync, +} + +func init() { + syncConfig.batch = randvar.NewFlag("5") + syncCmd.Flags().Var( + syncConfig.batch, "batch", + "batch size distribution [{zipf,uniform}:]min[-max]") + syncCmd.Flags().BoolVar( + &syncConfig.walOnly, "wal-only", false, "write data only to the WAL") + syncConfig.values = randvar.NewBytesFlag("uniform:60-80/1.0") + syncCmd.Flags().Var( + syncConfig.values, "values", + "value size distribution [{zipf,uniform}:]min[-max][/]") +} + +func runSync(cmd *cobra.Command, args []string) { + reg := newHistogramRegistry() + var bytes atomic.Uint64 + var lastBytes uint64 + + opts := pebble.Sync + if disableWAL { + opts = pebble.NoSync + } + + batchDist := syncConfig.batch + + runTest(args[0], test{ + init: func(d DB, wg *sync.WaitGroup) { + limiter := maxOpsPerSec.newRateLimiter() + + wg.Add(concurrency) + for i := 0; i < concurrency; i++ { + latency := reg.Register("ops") + go func() { + defer wg.Done() + + rand := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + var raw []byte + var buf []byte + var block []byte + for { + wait(limiter) + + start := time.Now() + b := d.NewBatch() + var n uint64 + count := int(batchDist.Uint64(rand)) + for j := 0; j < count; j++ { + block = syncConfig.values.Bytes(rand, block) + + if syncConfig.walOnly { + if err := b.LogData(block, nil); err != nil { + log.Fatal(err) + } + } else { + raw = encodeUint32Ascending(raw[:0], rand.Uint32()) + key := mvccEncode(buf[:0], raw, 0, 0) + buf = key[:0] + if err := b.Set(key, block, nil); err != nil { + log.Fatal(err) + } + } + n += uint64(len(block)) + } + if err := b.Commit(opts); err != nil { + log.Fatal(err) + } + latency.Record(time.Since(start)) + bytes.Add(n) + } + }() + } + }, + + tick: func(elapsed time.Duration, i int) { + if i%20 == 0 { + fmt.Println("_elapsed____ops/sec___mb/sec__p50(ms)__p95(ms)__p99(ms)_pMax(ms)") + } + reg.Tick(func(tick histogramTick) { + h := tick.Hist + n := bytes.Load() + fmt.Printf("%8s %10.1f %8.1f %8.1f %8.1f %8.1f %8.1f\n", + time.Duration(elapsed.Seconds()+0.5)*time.Second, + float64(h.TotalCount())/tick.Elapsed.Seconds(), + float64(n-lastBytes)/(1024.0*1024.0)/tick.Elapsed.Seconds(), + time.Duration(h.ValueAtQuantile(50)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(95)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(99)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(100)).Seconds()*1000, + ) + lastBytes = n + }) + }, + + done: func(elapsed time.Duration) { + fmt.Println("\n_elapsed___ops(total)_ops/sec(cum)_mb/sec(cum)__avg(ms)__p50(ms)__p95(ms)__p99(ms)_pMax(ms)") + reg.Tick(func(tick histogramTick) { + h := tick.Cumulative + fmt.Printf("%7.1fs %12d %12.1f %11.1f %8.1f %8.1f %8.1f %8.1f %8.1f\n\n", + elapsed.Seconds(), h.TotalCount(), + float64(h.TotalCount())/elapsed.Seconds(), + float64(bytes.Load()/(1024.0*1024.0))/elapsed.Seconds(), + time.Duration(h.Mean()).Seconds()*1000, + time.Duration(h.ValueAtQuantile(50)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(95)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(99)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(100)).Seconds()*1000) + }) + }, + }) +} diff --git a/pebble/cmd/pebble/test.go b/pebble/cmd/pebble/test.go new file mode 100644 index 0000000..c8d707b --- /dev/null +++ b/pebble/cmd/pebble/test.go @@ -0,0 +1,400 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +import ( + "fmt" + "io" + "log" + "os" + "os/signal" + "runtime" + "runtime/pprof" + "sort" + "sync" + "syscall" + "time" + + "github.com/HdrHistogram/hdrhistogram-go" + "github.com/cockroachdb/pebble" +) + +const ( + minLatency = 10 * time.Microsecond + maxLatency = 10 * time.Second +) + +func startCPUProfile() func() { + runtime.SetMutexProfileFraction(1000) + + done := startRecording("cpu.%04d.prof", pprof.StartCPUProfile, pprof.StopCPUProfile) + return func() { + done() + if p := pprof.Lookup("heap"); p != nil { + f, err := os.Create("heap.prof") + if err != nil { + log.Fatal(err) + } + if err := p.WriteTo(f, 0); err != nil { + log.Fatal(err) + } + f.Close() + } + if p := pprof.Lookup("mutex"); p != nil { + f, err := os.Create("mutex.prof") + if err != nil { + log.Fatal(err) + } + if err := p.WriteTo(f, 0); err != nil { + log.Fatal(err) + } + f.Close() + } + } +} + +func startRecording(fmtStr string, startFunc func(io.Writer) error, stopFunc func()) func() { + doneCh := make(chan struct{}) + var doneWG sync.WaitGroup + doneWG.Add(1) + + go func() { + defer doneWG.Done() + + start := time.Now() + t := time.NewTicker(10 * time.Second) + defer t.Stop() + + var current *os.File + defer func() { + if current != nil { + stopFunc() + current.Close() + } + }() + + for { + if current != nil { + stopFunc() + current.Close() + current = nil + } + path := fmt.Sprintf(fmtStr, int(time.Since(start).Seconds()+0.5)) + f, err := os.Create(path) + if err != nil { + log.Fatalf("unable to create cpu profile: %s", err) + return + } + if err := startFunc(f); err != nil { + log.Fatalf("unable to start cpu profile: %v", err) + f.Close() + return + } + current = f + + select { + case <-doneCh: + return + case <-t.C: + } + } + }() + + return func() { + close(doneCh) + doneWG.Wait() + } +} + +func newHistogram() *hdrhistogram.Histogram { + return hdrhistogram.New(minLatency.Nanoseconds(), maxLatency.Nanoseconds(), 1) +} + +type namedHistogram struct { + name string + mu struct { + sync.Mutex + current *hdrhistogram.Histogram + } +} + +func newNamedHistogram(name string) *namedHistogram { + w := &namedHistogram{name: name} + w.mu.current = newHistogram() + return w +} + +func (w *namedHistogram) Record(elapsed time.Duration) { + if elapsed < minLatency { + elapsed = minLatency + } else if elapsed > maxLatency { + elapsed = maxLatency + } + + w.mu.Lock() + err := w.mu.current.RecordValue(elapsed.Nanoseconds()) + w.mu.Unlock() + + if err != nil { + // Note that a histogram only drops recorded values that are out of range, + // but we clamp the latency value to the configured range to prevent such + // drops. This code path should never happen. + panic(fmt.Sprintf(`%s: recording value: %s`, w.name, err)) + } +} + +func (w *namedHistogram) tick(fn func(h *hdrhistogram.Histogram)) { + w.mu.Lock() + defer w.mu.Unlock() + h := w.mu.current + w.mu.current = newHistogram() + fn(h) +} + +type histogramTick struct { + // Name is the name given to the histograms represented by this tick. + Name string + // Hist is the merged result of the represented histograms for this tick. + // Hist.TotalCount() is the number of operations that occurred for this tick. + Hist *hdrhistogram.Histogram + // Cumulative is the merged result of the represented histograms for all + // time. Cumulative.TotalCount() is the total number of operations that have + // occurred over all time. + Cumulative *hdrhistogram.Histogram + // Elapsed is the amount of time since the last tick. + Elapsed time.Duration + // Now is the time at which the tick was gathered. It covers the period + // [Now-Elapsed,Now). + Now time.Time +} + +type histogramRegistry struct { + mu struct { + sync.Mutex + registered []*namedHistogram + } + + start time.Time + cumulative map[string]*hdrhistogram.Histogram + prevTick map[string]time.Time +} + +func newHistogramRegistry() *histogramRegistry { + return &histogramRegistry{ + start: time.Now(), + cumulative: make(map[string]*hdrhistogram.Histogram), + prevTick: make(map[string]time.Time), + } +} + +func (w *histogramRegistry) Register(name string) *namedHistogram { + hist := newNamedHistogram(name) + + w.mu.Lock() + w.mu.registered = append(w.mu.registered, hist) + w.mu.Unlock() + + return hist +} + +func (w *histogramRegistry) Tick(fn func(histogramTick)) { + w.mu.Lock() + registered := append([]*namedHistogram(nil), w.mu.registered...) + w.mu.Unlock() + + merged := make(map[string]*hdrhistogram.Histogram) + var names []string + for _, hist := range registered { + hist.tick(func(h *hdrhistogram.Histogram) { + if p, ok := merged[hist.name]; ok { + p.Merge(h) + } else { + merged[hist.name] = h + names = append(names, hist.name) + } + }) + } + + now := time.Now() + sort.Strings(names) + for _, name := range names { + mergedHist := merged[name] + if _, ok := w.cumulative[name]; !ok { + w.cumulative[name] = newHistogram() + } + w.cumulative[name].Merge(mergedHist) + + prevTick, ok := w.prevTick[name] + if !ok { + prevTick = w.start + } + w.prevTick[name] = now + fn(histogramTick{ + Name: name, + Hist: merged[name], + Cumulative: w.cumulative[name], + Elapsed: now.Sub(prevTick), + Now: now, + }) + } +} + +type testWithoutDB struct { + init func(wg *sync.WaitGroup) + tick func(elapsed time.Duration, i int) + done func(wg *sync.WaitGroup, elapsed time.Duration) +} + +func runTestWithoutDB(t testWithoutDB) { + var wg sync.WaitGroup + t.init(&wg) + + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + + done := make(chan os.Signal, 3) + workersDone := make(chan struct{}) + signal.Notify(done, os.Interrupt) + + go func() { + wg.Wait() + close(workersDone) + }() + + if duration > 0 { + go func() { + time.Sleep(duration) + done <- syscall.Signal(0) + }() + } + + stopProf := startCPUProfile() + defer stopProf() + + start := time.Now() + for i := 0; ; i++ { + select { + case <-ticker.C: + if workersDone != nil { + t.tick(time.Since(start), i) + } + + case <-workersDone: + workersDone = nil + t.done(&wg, time.Since(start)) + return + + case sig := <-done: + fmt.Println("operating system is killing the op.", sig) + if workersDone != nil { + t.done(&wg, time.Since(start)) + } + return + } + } +} + +type test struct { + init func(db DB, wg *sync.WaitGroup) + tick func(elapsed time.Duration, i int) + done func(elapsed time.Duration) +} + +func runTest(dir string, t test) { + // Check if the directory exists. + if wipe { + fmt.Printf("wiping %s\n", dir) + if err := os.RemoveAll(dir); err != nil { + log.Fatal(err) + } + } + + fmt.Printf("dir %s\nconcurrency %d\n", dir, concurrency) + + db := newPebbleDB(dir) + var wg sync.WaitGroup + t.init(db, &wg) + + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + + done := make(chan os.Signal, 3) + workersDone := make(chan struct{}) + signal.Notify(done, os.Interrupt) + + go func() { + wg.Wait() + close(workersDone) + }() + + if maxSize > 0 { + go func() { + for { + time.Sleep(10 * time.Second) + if db.Metrics().DiskSpaceUsage() > maxSize*1e6 { + fmt.Println("max size reached") + done <- syscall.Signal(0) + } + } + }() + } + if duration > 0 { + go func() { + time.Sleep(duration) + done <- syscall.Signal(0) + }() + } + + stopProf := startCPUProfile() + defer stopProf() + + backgroundCompactions := func(p *pebble.Metrics) bool { + // The last level never gets selected as an input level for compaction, + // only as an output level, so ignore it for the purposes of determining if + // background compactions are still needed. + for i := range p.Levels[:len(p.Levels)-1] { + if p.Levels[i].Score > 1 { + return true + } + } + return false + } + + start := time.Now() + for i := 0; ; i++ { + select { + case <-ticker.C: + if workersDone != nil { + t.tick(time.Since(start), i) + if verbose && (i%10) == 9 { + fmt.Printf("%s", db.Metrics()) + } + } else if waitCompactions { + p := db.Metrics() + fmt.Printf("%s", p) + if !backgroundCompactions(p) { + return + } + } + + case <-workersDone: + workersDone = nil + t.done(time.Since(start)) + p := db.Metrics() + fmt.Printf("%s", p) + if !waitCompactions || !backgroundCompactions(p) { + return + } + fmt.Printf("waiting for background compactions\n") + + case <-done: + if workersDone != nil { + t.done(time.Since(start)) + } + fmt.Printf("%s", db.Metrics()) + return + } + } +} diff --git a/pebble/cmd/pebble/tombstone.go b/pebble/cmd/pebble/tombstone.go new file mode 100644 index 0000000..bbe0e3b --- /dev/null +++ b/pebble/cmd/pebble/tombstone.go @@ -0,0 +1,134 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +import ( + "fmt" + "log" + "sync" + "time" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/humanize" + "github.com/spf13/cobra" +) + +func init() { + // NB: the tombstone workload piggybacks off the existing flags and + // configs for the queue and ycsb workloads. + initQueue(tombstoneCmd) + initYCSB(tombstoneCmd) +} + +var tombstoneCmd = &cobra.Command{ + Use: "tombstone ", + Short: "run the mixed-workload point tombstone benchmark", + Long: ` +Run a customizable YCSB workload, alongside a single-writer, fixed-sized queue +workload. This command is intended for evaluating compaction heuristics +surrounding point tombstones. + +The queue workload writes a point tombstone with every operation. A compaction +strategy that does not account for point tombstones may accumulate many +uncompacted tombstones, causing steady growth of the disk space consumed by +the queue keyspace. + +The --queue-values flag controls the distribution of the queue value sizes. +Larger values are more likely to exhibit problematic point tombstone behavior +on a database using a min-overlapping ratio heuristic because the compact +point tombstones may overlap many tables in the next level. + +The --queue-size flag controls the fixed number of live keys in the queue. Low +queue sizes may not exercise problematic tombstone behavior if queue sets and +deletes get written to the same sstable. The large-valued sets can serve as a +counterweight to the point tombstones, narrowing the keyrange of the sstable +inflating its size relative to its overlap with the next level. + `, + Args: cobra.ExactArgs(1), + RunE: runTombstoneCmd, +} + +func runTombstoneCmd(cmd *cobra.Command, args []string) error { + if wipe && ycsbConfig.prepopulatedKeys > 0 { + return errors.New("--wipe and --prepopulated-keys both specified which is nonsensical") + } + + weights, err := ycsbParseWorkload(ycsbConfig.workload) + if err != nil { + return err + } + + keyDist, err := ycsbParseKeyDist(ycsbConfig.keys) + if err != nil { + return err + } + + batchDist := ycsbConfig.batch + scanDist := ycsbConfig.scans + if err != nil { + return err + } + + valueDist := ycsbConfig.values + y := newYcsb(weights, keyDist, batchDist, scanDist, valueDist) + q, queueOps := queueTest() + + queueStart := []byte("queue-") + queueEnd := append(append([]byte{}, queueStart...), 0xFF) + + var lastElapsed time.Duration + var lastQueueOps int64 + + var pdb pebbleDB + runTest(args[0], test{ + init: func(d DB, wg *sync.WaitGroup) { + pdb = d.(pebbleDB) + y.init(d, wg) + q.init(d, wg) + }, + tick: func(elapsed time.Duration, i int) { + if i%20 == 0 { + fmt.Println(" queue ycsb") + fmt.Println("________elapsed______queue_size__ops/sec(inst)___ops/sec(cum)__ops/sec(inst)___ops/sec(cum)") + } + + curQueueOps := queueOps.Load() + dur := elapsed - lastElapsed + queueOpsPerSec := float64(curQueueOps-lastQueueOps) / dur.Seconds() + queueCumOpsPerSec := float64(curQueueOps) / elapsed.Seconds() + + lastQueueOps = curQueueOps + lastElapsed = elapsed + + var ycsbOpsPerSec, ycsbCumOpsPerSec float64 + y.reg.Tick(func(tick histogramTick) { + h := tick.Hist + ycsbOpsPerSec = float64(h.TotalCount()) / tick.Elapsed.Seconds() + ycsbCumOpsPerSec = float64(tick.Cumulative.TotalCount()) / elapsed.Seconds() + }) + + queueSize, err := pdb.d.EstimateDiskUsage(queueStart, queueEnd) + if err != nil { + log.Fatal(err) + } + fmt.Printf("%15s %15s %14.1f %14.1f %14.1f %14.1f\n", + time.Duration(elapsed.Seconds()+0.5)*time.Second, + humanize.Bytes.Uint64(queueSize), + queueOpsPerSec, + queueCumOpsPerSec, + ycsbOpsPerSec, + ycsbCumOpsPerSec) + }, + done: func(elapsed time.Duration) { + fmt.Println("________elapsed______queue_size") + queueSize, err := pdb.d.EstimateDiskUsage(queueStart, queueEnd) + if err != nil { + log.Fatal(err) + } + fmt.Printf("%15s %15s\n", elapsed.Truncate(time.Second), humanize.Bytes.Uint64(queueSize)) + }, + }) + return nil +} diff --git a/pebble/cmd/pebble/util.go b/pebble/cmd/pebble/util.go new file mode 100644 index 0000000..2da4685 --- /dev/null +++ b/pebble/cmd/pebble/util.go @@ -0,0 +1,15 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +func encodeUint32Ascending(b []byte, v uint32) []byte { + return append(b, byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) +} + +func encodeUint64Ascending(b []byte, v uint64) []byte { + return append(b, + byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32), + byte(v>>24), byte(v>>16), byte(v>>8), byte(v)) +} diff --git a/pebble/cmd/pebble/write_bench.go b/pebble/cmd/pebble/write_bench.go new file mode 100644 index 0000000..397a536 --- /dev/null +++ b/pebble/cmd/pebble/write_bench.go @@ -0,0 +1,483 @@ +// Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/cockroachdb/pebble/internal/ackseq" + "github.com/cockroachdb/pebble/internal/randvar" + "github.com/cockroachdb/pebble/internal/rate" + "github.com/spf13/cobra" +) + +// The following constants match the values that Cockroach uses in Admission +// Control at the time of writing. +// See: https://github.com/cockroachdb/cockroach/blob/cb5d5108a7705eac7be82bc7f0f8b6f4dc825b96/pkg/util/admission/granter.go#L1212-L1229 +const ( + defaultL0FileLimit = 1000 + defaultL0SubLevelLimit = 20 +) + +var writeBenchConfig struct { + batch *randvar.Flag + keys string + values *randvar.BytesFlag + concurrency int + rateStart int + incBase int + testPeriod time.Duration + cooloffPeriod time.Duration + targetL0Files int + targetL0SubLevels int + maxRateDipFraction float64 + debug bool +} + +var writeBenchCmd = &cobra.Command{ + Use: "write ", + Short: "Run YCSB F to find an a sustainable write throughput", + Long: ` +Run YCSB F (100% writes) at varying levels of sustained write load (ops/sec) to +determine an optimal value of write throughput. + +The benchmark works by maintaining a fixed amount of write load on the DB for a +fixed amount of time. If the database can handle the sustained load - determined +by a heuristic that takes into account the number of files in L0 sub-levels, the +number of L0 sub-levels, and whether the DB has encountered a write stall (i.e. +measured load on the DB drops to zero) - the load is increased on the DB. + +Load increases exponentially from an initial load. If the DB fails the heuristic +at the given write load, the load on the DB is paused for a period of time (the +cool-off period) before returning to the last value at which the DB could handle +the load. The exponent is then reset and the process repeats from this new +initial value. This allows the benchmark to converge on and oscillate around the +optimal write load. + +The values of load at which the DB passes and fails the heuristic are maintained +over the duration of the benchmark. On completion of the benchmark, an "optimal" +value is computed. The optimal value is computed as the value that minimizes the +mis-classification of the recorded "passes" and "fails"". This can be visualized +as a point on the x-axis that separates the passes and fails into the left and +right half-planes, minimizing the number of fails that fall to the left of this +point (i.e. mis-classified fails) and the number of passes that fall to the +right (i.e. mis-classified passes). + +The resultant "optimal sustained write load" value provides an estimate of the +write load that the DB can sustain without failing the target heuristic. + +A typical invocation of the benchmark is as follows: + + pebble bench write [PATH] --wipe -c 1024 -d 8h --rate-start 30000 --debug +`, + Args: cobra.ExactArgs(1), + RunE: runWriteBenchmark, +} + +func init() { + initWriteBench(writeBenchCmd) +} + +func initWriteBench(cmd *cobra.Command) { + // Default values for custom flags. + writeBenchConfig.batch = randvar.NewFlag("1") + writeBenchConfig.values = randvar.NewBytesFlag("1000") + + cmd.Flags().Var( + writeBenchConfig.batch, "batch", + "batch size distribution [{zipf,uniform}:]min[-max]") + cmd.Flags().StringVar( + &writeBenchConfig.keys, "keys", "zipf", "latest, uniform, or zipf") + cmd.Flags().Var( + writeBenchConfig.values, "values", + "value size distribution [{zipf,uniform}:]min[-max][/]") + cmd.Flags().IntVarP( + &writeBenchConfig.concurrency, "concurrency", "c", + 1, "number of concurrent workers") + cmd.Flags().IntVar( + &writeBenchConfig.rateStart, "rate-start", + 1000, "starting write load (ops/sec)") + cmd.Flags().IntVar( + &writeBenchConfig.incBase, "rate-inc-base", + 100, "increment / decrement base") + cmd.Flags().DurationVar( + &writeBenchConfig.testPeriod, "test-period", + 60*time.Second, "time to run at a given write load") + cmd.Flags().DurationVar( + &writeBenchConfig.cooloffPeriod, "cooloff-period", + 30*time.Second, "time to pause write load after a failure") + cmd.Flags().IntVar( + &writeBenchConfig.targetL0Files, "l0-files", + defaultL0FileLimit, "target L0 file count") + cmd.Flags().IntVar( + &writeBenchConfig.targetL0SubLevels, "l0-sublevels", + defaultL0SubLevelLimit, "target L0 sublevel count") + cmd.Flags().BoolVarP( + &wipe, "wipe", "w", false, "wipe the database before starting") + cmd.Flags().Float64Var( + &writeBenchConfig.maxRateDipFraction, "max-rate-dip-fraction", 0.1, + "fraction at which to mark a test-run as failed if the actual rate dips below (relative to the desired rate)") + cmd.Flags().BoolVar( + &writeBenchConfig.debug, "debug", false, "print benchmark debug information") +} + +// writeBenchResult contains the results of a test run at a given rate. The +// independent variable is the rate (in ops/sec) and the dependent variable is +// whether the test passed or failed. Additional metadata associated with the +// test run is also captured. +type writeBenchResult struct { + name string + rate int // The rate at which the test is currently running. + passed bool // Was the test successful at this rate. + elapsed time.Duration // The total elapsed time of the test. + bytes uint64 // The size of the LSM. + levels int // The number of levels occupied in the LSM. + writeAmp float64 // The write amplification. +} + +// String implements fmt.Stringer, printing a raw benchmark line. These lines +// are used when performing analysis on a given benchmark run. +func (r writeBenchResult) String() string { + return fmt.Sprintf("BenchmarkRaw%s %d ops/sec %v pass %s elapsed %d bytes %d levels %.2f writeAmp", + r.name, + r.rate, + r.passed, + r.elapsed, + r.bytes, + r.levels, + r.writeAmp, + ) +} + +func runWriteBenchmark(_ *cobra.Command, args []string) error { + const workload = "F" // 100% inserts. + var ( + writers []*pauseWriter + writersWg *sync.WaitGroup // Tracks completion of all pauseWriters. + cooloff bool // Is cool-off enabled. + streak int // The number of successive passes. + clockStart time.Time // Start time for current load. + cooloffStart time.Time // When cool-off was enabled. + stack []int // Stack of passing load values. + pass, fail []int // Values of load that pass and fail, respectively. + rateAcc float64 // Accumulator of measured rates for a single test run. + ) + + desiredRate := writeBenchConfig.rateStart + incBase := writeBenchConfig.incBase + weights, err := ycsbParseWorkload(workload) + + if err != nil { + return err + } + + keyDist, err := ycsbParseKeyDist(writeBenchConfig.keys) + if err != nil { + return err + } + batchDist := writeBenchConfig.batch + valueDist := writeBenchConfig.values + + // Construct a new YCSB F benchmark with the configured values. + y := newYcsb(weights, keyDist, batchDist, nil /* scans */, valueDist) + y.keyNum = ackseq.New(0) + + setLimit := func(l int) { + perWriterRate := float64(l) / float64(len(writers)) + for _, w := range writers { + w.setRate(perWriterRate) + } + } + + // Function closure to run on test-run failure. + onTestFail := func(r writeBenchResult, cancel func()) { + fail = append(fail, desiredRate) + + // Emit a benchmark raw datapoint. + fmt.Println(r) + + // We failed at the current load, we have two options: + + // a) No room to backtrack. We're done. + if len(stack) == 0 { + debugPrint("no room to backtrack; exiting ...\n") + cancel() + writersWg.Wait() + return + } + + // b) We still have room to backtrack. Reduce the load to the + // last known passing value. + desiredRate, stack = stack[len(stack)-1], stack[:len(stack)-1] + setLimit(desiredRate) + + // Enter the cool-off period. + cooloff = true + var wg sync.WaitGroup + for _, w := range writers { + // With a large number of writers, pausing synchronously can + // take a material amount of time. Instead, pause the + // writers in parallel in the background, and wait for all + // to complete before continuing. + wg.Add(1) + go func(writer *pauseWriter) { + writer.pause() + wg.Done() + }(w) + } + wg.Wait() + + // Reset the counters and clocks. + streak = 0 + rateAcc = 0 + cooloffStart = time.Now() + clockStart = time.Now() + debugPrint("Fail. Pausing writers for cool-off period.\n") + debugPrint(fmt.Sprintf("new rate=%d\npasses=%v\nfails=%v\nstack=%v\n", + desiredRate, pass, fail, stack)) + } + + // Function closure to run on test-run success. + onTestSuccess := func(r writeBenchResult) { + streak++ + pass = append(pass, desiredRate) + stack = append(stack, desiredRate) + + // Emit a benchmark raw datapoint. + r.passed = true + fmt.Println(r) + + // Increase the rate. + desiredRate = desiredRate + incBase*(1<<(streak-1)) + setLimit(desiredRate) + + // Restart the test. + rateAcc = 0 + clockStart = time.Now() + + debugPrint(fmt.Sprintf("Pass.\nnew rate=%d\npasses=%v\nfails=%v\nstreak=%d\nstack=%v\n", + desiredRate, pass, fail, streak, stack)) + } + + name := fmt.Sprintf("write/values=%s", writeBenchConfig.values) + ctx, cancel := context.WithCancel(context.Background()) + runTest(args[0], test{ + init: func(db DB, wg *sync.WaitGroup) { + y.db = db + writersWg = wg + + // Spawn the writers. + for i := 0; i < writeBenchConfig.concurrency; i++ { + writer := newPauseWriter(y, float64(desiredRate)) + writers = append(writers, writer) + writersWg.Add(1) + go writer.run(ctx, wg) + } + setLimit(desiredRate) + + // Start the clock on the current load. + clockStart = time.Now() + }, + tick: func(elapsed time.Duration, i int) { + m := y.db.Metrics() + if i%20 == 0 { + if writeBenchConfig.debug && i > 0 { + fmt.Printf("%s\n", m) + } + fmt.Println("___elapsed___clock___rate(desired)___rate(actual)___L0files___L0levels___levels______lsmBytes___writeAmp") + } + + // Print the current stats. + l0Files := m.Levels[0].NumFiles + l0Sublevels := m.Levels[0].Sublevels + nLevels := 0 + for _, l := range m.Levels { + if l.BytesIn > 0 { + nLevels++ + } + } + lsmBytes := m.DiskSpaceUsage() + total := m.Total() + writeAmp := (&total).WriteAmp() + + var currRate float64 + var stalled bool + y.reg.Tick(func(tick histogramTick) { + h := tick.Hist + currRate = float64(h.TotalCount()) / tick.Elapsed.Seconds() + stalled = !cooloff && currRate == 0 + }) + rateAcc += currRate + + // The heuristic by which the DB can sustain a given write load is + // determined by whether the DB, for the configured window of time: + // 1) did not encounter a write stall (i.e. write load fell to + // zero), + // 2) number of files in L0 was at or below the target, and + // 3) number of L0 sub-levels is at or below the target. + failed := stalled || + int(l0Files) > writeBenchConfig.targetL0Files || + int(l0Sublevels) > writeBenchConfig.targetL0SubLevels + + // Print the result for this tick. + fmt.Printf("%10s %7s %15d %14.1f %9d %10d %8d %13d %10.2f\n", + time.Duration(elapsed.Seconds()+0.5)*time.Second, + time.Duration(time.Since(clockStart).Seconds()+0.5)*time.Second, + desiredRate, + currRate, + l0Files, + l0Sublevels, + nLevels, + lsmBytes, + writeAmp, + ) + + // If we're in cool-off mode, allow it to complete before resuming + // writing. + if cooloff { + if time.Since(cooloffStart) < writeBenchConfig.cooloffPeriod { + return + } + debugPrint("ending cool-off") + + // Else, resume writing. + cooloff = false + for _, w := range writers { + w.unpause() + } + clockStart = time.Now() + + return + } + + r := writeBenchResult{ + name: name, + rate: desiredRate, + elapsed: time.Duration(elapsed.Seconds()+0.5) * time.Second, + bytes: lsmBytes, + levels: nLevels, + writeAmp: writeAmp, + } + + if failed { + onTestFail(r, cancel) + return + } + + // Else, the DB could handle the current load. We only increase + // after a fixed amount of time at this load as elapsed. + testElapsed := time.Since(clockStart) + if testElapsed < writeBenchConfig.testPeriod { + // This test-run still has time on the clock. + return + } + + // This test-run has completed. + + // If the average rate over the test is less than the desired rate, + // we mark this test-run as a failure. This handles cases where we + // encounter a bottleneck that limits write throughput but + // incorrectly mark the test as passed. + diff := 1 - rateAcc/(float64(desiredRate)*testElapsed.Seconds()) + if diff > writeBenchConfig.maxRateDipFraction { + if writeBenchConfig.debug { + debugPrint(fmt.Sprintf( + "difference in rates (%.2f) exceeded threshold (%.2f); marking test as failed\n", + diff, writeBenchConfig.maxRateDipFraction, + )) + } + onTestFail(r, cancel) + return + } + + // Mark this test-run as passed. + onTestSuccess(r) + }, + done: func(elapsed time.Duration) { + // Print final analysis. + var total int64 + y.reg.Tick(func(tick histogramTick) { + total = tick.Cumulative.TotalCount() + }) + fmt.Println("___elapsed___ops(total)") + fmt.Printf("%10s %12d\n", elapsed.Truncate(time.Second), total) + }, + }) + + return nil +} + +// debugPrint prints a debug line to stdout if debug logging is enabled via the +// --debug flag. +func debugPrint(s string) { + if !writeBenchConfig.debug { + return + } + fmt.Print("DEBUG: " + s) +} + +// pauseWriter issues load against a pebble instance, and can be paused on +// demand to allow the DB to recover. +type pauseWriter struct { + y *ycsb + limiter *rate.Limiter + pauseC chan struct{} + unpauseC chan struct{} +} + +// newPauseWriter returns a new pauseWriter. +func newPauseWriter(y *ycsb, initialRate float64) *pauseWriter { + // Set the burst rate for the limiter to the lowest sensible value to + // prevent excessive bursting. Note that a burst of zero effectively + // disables the rate limiter, as a wait time of +Inf is returned from all + // calls, and `wait(l *rate.Limiter)` will not sleep in this case. + const burst = 1 + return &pauseWriter{ + y: y, + limiter: rate.NewLimiter(float64(initialRate), burst), + pauseC: make(chan struct{}), + unpauseC: make(chan struct{}), + } +} + +// run starts the pauseWriter, issuing load against the DB. +func (w *pauseWriter) run(ctx context.Context, wg *sync.WaitGroup) { + defer wg.Done() + + buf := &ycsbBuf{rng: randvar.NewRand()} + hist := w.y.reg.Register("insert") + for { + select { + case <-ctx.Done(): + return + case <-w.pauseC: + // Hold the goroutine here until we unpause. + <-w.unpauseC + default: + wait(w.limiter) + start := time.Now() + w.y.insert(w.y.db, buf) + hist.Record(time.Since(start)) + } + } +} + +// pause signals that the writer should pause after the current operation. +func (w *pauseWriter) pause() { + w.pauseC <- struct{}{} +} + +// unpause unpauses the writer. +func (w *pauseWriter) unpause() { + w.unpauseC <- struct{}{} +} + +// setRate sets the rate limit for this writer. +func (w *pauseWriter) setRate(r float64) { + w.limiter.SetRate(r) +} diff --git a/pebble/cmd/pebble/ycsb.go b/pebble/cmd/pebble/ycsb.go new file mode 100644 index 0000000..41de324 --- /dev/null +++ b/pebble/cmd/pebble/ycsb.go @@ -0,0 +1,609 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package main + +import ( + "fmt" + "log" + "strconv" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/internal/ackseq" + "github.com/cockroachdb/pebble/internal/randvar" + "github.com/cockroachdb/pebble/internal/rate" + "github.com/spf13/cobra" + "golang.org/x/exp/rand" +) + +const ( + ycsbInsert = iota + ycsbRead + ycsbScan + ycsbReverseScan + ycsbUpdate + ycsbNumOps +) + +var ycsbConfig struct { + batch *randvar.Flag + keys string + initialKeys int + prepopulatedKeys int + numOps uint64 + scans *randvar.Flag + values *randvar.BytesFlag + workload string +} + +var ycsbCmd = &cobra.Command{ + Use: "ycsb ", + Short: "run customizable YCSB benchmark", + Long: ` +Run a customizable YCSB workload. The workload is specified by the --workload +flag which can take either one of the standard workload mixes (A-F), or +customizable workload fixes specified as a command separated list of op=weight +pairs. For example, --workload=read=50,update=50 performs a workload composed +of 50% reads and 50% updates. This is identical to the standard workload A. + +The --batch, --scans, and --values flags take the specification for a random +variable: [:][-]. The parameter must be one of "uniform" +or "zipf". If is omitted, a uniform distribution is used. If is +omitted it is set to the same value as . The specification "1000" results +in a constant 1000. The specification "10-100" results in a uniformly random +variable in the range [10,100). The specification "zipf(10,100)" results in a +zipf distribution with a minimum value of 10 and a maximum value of 100. + +The --batch flag controls the size of batches used for insert and update +operations. The --scans flag controls the number of iterations performed by a +scan operation. Read operations always read a single key. + +The --values flag provides for an optional "/" +suffix. The default target compression ratio is 1.0 (i.e. incompressible random +data). A value of 2 will cause random data to be generated that should compress +to 50% of its uncompressed size. + +Standard workloads: + + A: 50% reads / 50% updates + B: 95% reads / 5% updates + C: 100% reads + D: 95% reads / 5% inserts + E: 95% scans / 5% inserts + F: 100% inserts +`, + Args: cobra.ExactArgs(1), + RunE: runYcsb, +} + +func init() { + initYCSB(ycsbCmd) +} + +func initYCSB(cmd *cobra.Command) { + ycsbConfig.batch = randvar.NewFlag("1") + cmd.Flags().Var( + ycsbConfig.batch, "batch", + "batch size distribution [{zipf,uniform}:]min[-max]") + cmd.Flags().StringVar( + &ycsbConfig.keys, "keys", "zipf", "latest, uniform, or zipf") + cmd.Flags().IntVar( + &ycsbConfig.initialKeys, "initial-keys", 10000, + "initial number of keys to insert before beginning workload") + cmd.Flags().IntVar( + &ycsbConfig.prepopulatedKeys, "prepopulated-keys", 0, + "number of keys that were previously inserted into the database") + cmd.Flags().Uint64VarP( + &ycsbConfig.numOps, "num-ops", "n", 0, + "maximum number of operations (0 means unlimited)") + ycsbConfig.scans = randvar.NewFlag("zipf:1-1000") + cmd.Flags().Var( + ycsbConfig.scans, "scans", + "scan length distribution [{zipf,uniform}:]min[-max]") + cmd.Flags().StringVar( + &ycsbConfig.workload, "workload", "B", + "workload type (A-F) or spec (read=X,update=Y,...)") + ycsbConfig.values = randvar.NewBytesFlag("1000") + cmd.Flags().Var( + ycsbConfig.values, "values", + "value size distribution [{zipf,uniform}:]min[-max][/]") +} + +type ycsbWeights []float64 + +func (w ycsbWeights) get(i int) float64 { + if i >= len(w) { + return 0 + } + return w[i] +} + +var ycsbWorkloads = map[string]ycsbWeights{ + "A": { + ycsbRead: 0.5, + ycsbUpdate: 0.5, + }, + "B": { + ycsbRead: 0.95, + ycsbUpdate: 0.05, + }, + "C": { + ycsbRead: 1.0, + }, + "D": { + ycsbInsert: 0.05, + ycsbRead: 0.95, + // TODO(peter): default to skewed-latest distribution. + }, + "E": { + ycsbInsert: 0.05, + ycsbScan: 0.95, + }, + "F": { + ycsbInsert: 1.0, + // TODO(peter): the real workload is read-modify-write. + }, +} + +func ycsbParseWorkload(w string) (ycsbWeights, error) { + if weights := ycsbWorkloads[w]; weights != nil { + return weights, nil + } + iWeights := make([]int, ycsbNumOps) + for _, p := range strings.Split(w, ",") { + parts := strings.Split(p, "=") + if len(parts) != 2 { + return nil, errors.Errorf("malformed weights: %s", errors.Safe(w)) + } + weight, err := strconv.Atoi(parts[1]) + if err != nil { + return nil, err + } + switch parts[0] { + case "insert": + iWeights[ycsbInsert] = weight + case "read": + iWeights[ycsbRead] = weight + case "scan": + iWeights[ycsbScan] = weight + case "rscan": + iWeights[ycsbReverseScan] = weight + case "update": + iWeights[ycsbUpdate] = weight + } + } + + var sum int + for _, w := range iWeights { + sum += w + } + if sum == 0 { + return nil, errors.Errorf("zero weight specified: %s", errors.Safe(w)) + } + + weights := make(ycsbWeights, ycsbNumOps) + for i := range weights { + weights[i] = float64(iWeights[i]) / float64(sum) + } + return weights, nil +} + +func ycsbParseKeyDist(d string) (randvar.Dynamic, error) { + totalKeys := uint64(ycsbConfig.initialKeys + ycsbConfig.prepopulatedKeys) + switch strings.ToLower(d) { + case "latest": + return randvar.NewDefaultSkewedLatest() + case "uniform": + return randvar.NewUniform(1, totalKeys), nil + case "zipf": + return randvar.NewZipf(1, totalKeys, 0.99) + default: + return nil, errors.Errorf("unknown distribution: %s", errors.Safe(d)) + } +} + +func runYcsb(cmd *cobra.Command, args []string) error { + if wipe && ycsbConfig.prepopulatedKeys > 0 { + return errors.New("--wipe and --prepopulated-keys both specified which is nonsensical") + } + + weights, err := ycsbParseWorkload(ycsbConfig.workload) + if err != nil { + return err + } + + keyDist, err := ycsbParseKeyDist(ycsbConfig.keys) + if err != nil { + return err + } + + batchDist := ycsbConfig.batch + scanDist := ycsbConfig.scans + if err != nil { + return err + } + + valueDist := ycsbConfig.values + y := newYcsb(weights, keyDist, batchDist, scanDist, valueDist) + runTest(args[0], test{ + init: y.init, + tick: y.tick, + done: y.done, + }) + return nil +} + +type ycsbBuf struct { + rng *rand.Rand + keyBuf []byte + valueBuf []byte + keyNums []uint64 +} + +type ycsb struct { + db DB + writeOpts *pebble.WriteOptions + weights ycsbWeights + reg *histogramRegistry + keyDist randvar.Dynamic + batchDist randvar.Static + scanDist randvar.Static + valueDist *randvar.BytesFlag + readAmpCount atomic.Uint64 + readAmpSum atomic.Uint64 + keyNum *ackseq.S + numOps atomic.Uint64 + limiter *rate.Limiter + opsMap map[string]int +} + +func newYcsb( + weights ycsbWeights, + keyDist randvar.Dynamic, + batchDist, scanDist randvar.Static, + valueDist *randvar.BytesFlag, +) *ycsb { + y := &ycsb{ + reg: newHistogramRegistry(), + weights: weights, + keyDist: keyDist, + batchDist: batchDist, + scanDist: scanDist, + valueDist: valueDist, + opsMap: make(map[string]int), + } + y.writeOpts = pebble.Sync + if disableWAL { + y.writeOpts = pebble.NoSync + } + + ops := map[string]int{ + "insert": ycsbInsert, + "read": ycsbRead, + "rscan": ycsbReverseScan, + "scan": ycsbScan, + "update": ycsbUpdate, + } + for name, op := range ops { + w := y.weights.get(op) + if w == 0 { + continue + } + wstr := fmt.Sprint(int(100 * w)) + fill := strings.Repeat("_", 3-len(wstr)) + if fill == "" { + fill = "_" + } + fullName := fmt.Sprintf("%s%s%s", name, fill, wstr) + y.opsMap[fullName] = op + } + return y +} + +func (y *ycsb) init(db DB, wg *sync.WaitGroup) { + y.db = db + + if ycsbConfig.initialKeys > 0 { + buf := &ycsbBuf{rng: randvar.NewRand()} + + b := db.NewBatch() + size := 0 + start := time.Now() + last := start + for i := 1; i <= ycsbConfig.initialKeys; i++ { + if now := time.Now(); now.Sub(last) >= time.Second { + fmt.Printf("%5s inserted %d keys (%0.1f%%)\n", + time.Duration(now.Sub(start).Seconds()+0.5)*time.Second, + i-1, 100*float64(i-1)/float64(ycsbConfig.initialKeys)) + last = now + } + if size >= 1<<20 { + if err := b.Commit(y.writeOpts); err != nil { + log.Fatal(err) + } + b = db.NewBatch() + size = 0 + } + key := y.makeKey(uint64(i+ycsbConfig.prepopulatedKeys), buf) + value := y.randBytes(buf) + if err := b.Set(key, value, nil); err != nil { + log.Fatal(err) + } + size += len(key) + len(value) + } + if err := b.Commit(y.writeOpts); err != nil { + log.Fatal(err) + } + _ = b.Close() + fmt.Printf("inserted keys [%d-%d)\n", + 1+ycsbConfig.prepopulatedKeys, + 1+ycsbConfig.prepopulatedKeys+ycsbConfig.initialKeys) + } + y.keyNum = ackseq.New(uint64(ycsbConfig.initialKeys + ycsbConfig.prepopulatedKeys)) + + y.limiter = maxOpsPerSec.newRateLimiter() + + wg.Add(concurrency) + + // If this workload doesn't produce reads, sample the worst case read-amp + // from Metrics() periodically. + if y.weights.get(ycsbRead) == 0 && y.weights.get(ycsbScan) == 0 && y.weights.get(ycsbReverseScan) == 0 { + wg.Add(1) + go y.sampleReadAmp(db, wg) + } + + for i := 0; i < concurrency; i++ { + go y.run(db, wg) + } +} + +func (y *ycsb) run(db DB, wg *sync.WaitGroup) { + defer wg.Done() + + var latency [ycsbNumOps]*namedHistogram + for name, op := range y.opsMap { + latency[op] = y.reg.Register(name) + } + + buf := &ycsbBuf{rng: randvar.NewRand()} + + ops := randvar.NewWeighted(nil, y.weights...) + for { + wait(y.limiter) + + start := time.Now() + + op := ops.Int() + switch op { + case ycsbInsert: + y.insert(db, buf) + case ycsbRead: + y.read(db, buf) + case ycsbScan: + y.scan(db, buf, false /* reverse */) + case ycsbReverseScan: + y.scan(db, buf, true /* reverse */) + case ycsbUpdate: + y.update(db, buf) + default: + panic("not reached") + } + + latency[op].Record(time.Since(start)) + if ycsbConfig.numOps > 0 && y.numOps.Add(1) >= ycsbConfig.numOps { + break + } + } +} + +func (y *ycsb) sampleReadAmp(db DB, wg *sync.WaitGroup) { + defer wg.Done() + + ticker := time.NewTicker(time.Second) + defer ticker.Stop() + for range ticker.C { + m := db.Metrics() + y.readAmpCount.Add(1) + y.readAmpSum.Add(uint64(m.ReadAmp())) + if ycsbConfig.numOps > 0 && y.numOps.Load() >= ycsbConfig.numOps { + break + } + } +} + +func (y *ycsb) hashKey(key uint64) uint64 { + // Inlined version of fnv.New64 + Write. + const offset64 = 14695981039346656037 + const prime64 = 1099511628211 + + h := uint64(offset64) + for i := 0; i < 8; i++ { + h *= prime64 + h ^= uint64(key & 0xff) + key >>= 8 + } + return h +} + +func (y *ycsb) makeKey(keyNum uint64, buf *ycsbBuf) []byte { + const size = 24 + 10 + if cap(buf.keyBuf) < size { + buf.keyBuf = make([]byte, size) + } + key := buf.keyBuf[:4] + copy(key, "user") + key = strconv.AppendUint(key, y.hashKey(keyNum), 10) + // Use the MVCC encoding for keys. This appends a timestamp with + // walltime=1. That knowledge is utilized by rocksDB.Scan. + key = append(key, '\x00', '\x00', '\x00', '\x00', '\x00', + '\x00', '\x00', '\x00', '\x01', '\x09') + buf.keyBuf = key + return key +} + +func (y *ycsb) nextReadKey(buf *ycsbBuf) []byte { + // NB: the range of values returned by keyDist is tied to the range returned + // by keyNum.Base. See how these are both incremented by ycsb.insert(). + keyNum := y.keyDist.Uint64(buf.rng) + return y.makeKey(keyNum, buf) +} + +func (y *ycsb) randBytes(buf *ycsbBuf) []byte { + buf.valueBuf = y.valueDist.Bytes(buf.rng, buf.valueBuf) + return buf.valueBuf +} + +func (y *ycsb) insert(db DB, buf *ycsbBuf) { + count := y.batchDist.Uint64(buf.rng) + if cap(buf.keyNums) < int(count) { + buf.keyNums = make([]uint64, count) + } + keyNums := buf.keyNums[:count] + + b := db.NewBatch() + for i := range keyNums { + keyNums[i] = y.keyNum.Next() + _ = b.Set(y.makeKey(keyNums[i], buf), y.randBytes(buf), nil) + } + if err := b.Commit(y.writeOpts); err != nil { + log.Fatal(err) + } + _ = b.Close() + + for i := range keyNums { + delta, err := y.keyNum.Ack(keyNums[i]) + if err != nil { + log.Fatal(err) + } + if delta > 0 { + y.keyDist.IncMax(delta) + } + } +} + +func (y *ycsb) read(db DB, buf *ycsbBuf) { + key := y.nextReadKey(buf) + iter := db.NewIter(nil) + iter.SeekGE(key) + if iter.Valid() { + _ = iter.Key() + _ = iter.Value() + } + + type metrics interface { + Metrics() pebble.IteratorMetrics + } + if m, ok := iter.(metrics); ok { + y.readAmpCount.Add(1) + y.readAmpSum.Add(uint64(m.Metrics().ReadAmp)) + } + + if err := iter.Close(); err != nil { + log.Fatal(err) + } +} + +func (y *ycsb) scan(db DB, buf *ycsbBuf, reverse bool) { + count := y.scanDist.Uint64(buf.rng) + key := y.nextReadKey(buf) + iter := db.NewIter(nil) + if err := db.Scan(iter, key, int64(count), reverse); err != nil { + log.Fatal(err) + } + + type metrics interface { + Metrics() pebble.IteratorMetrics + } + if m, ok := iter.(metrics); ok { + y.readAmpCount.Add(1) + y.readAmpSum.Add(uint64(m.Metrics().ReadAmp)) + } + + if err := iter.Close(); err != nil { + log.Fatal(err) + } +} + +func (y *ycsb) update(db DB, buf *ycsbBuf) { + count := int(y.batchDist.Uint64(buf.rng)) + b := db.NewBatch() + for i := 0; i < count; i++ { + _ = b.Set(y.nextReadKey(buf), y.randBytes(buf), nil) + } + if err := b.Commit(y.writeOpts); err != nil { + log.Fatal(err) + } + _ = b.Close() +} + +func (y *ycsb) tick(elapsed time.Duration, i int) { + if i%20 == 0 { + fmt.Println("____optype__elapsed__ops/sec(inst)___ops/sec(cum)__p50(ms)__p95(ms)__p99(ms)_pMax(ms)") + } + y.reg.Tick(func(tick histogramTick) { + h := tick.Hist + + fmt.Printf("%10s %8s %14.1f %14.1f %8.1f %8.1f %8.1f %8.1f\n", + tick.Name, + time.Duration(elapsed.Seconds()+0.5)*time.Second, + float64(h.TotalCount())/tick.Elapsed.Seconds(), + float64(tick.Cumulative.TotalCount())/elapsed.Seconds(), + time.Duration(h.ValueAtQuantile(50)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(95)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(99)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(100)).Seconds()*1000, + ) + }) +} + +func (y *ycsb) done(elapsed time.Duration) { + fmt.Println("\n____optype__elapsed_____ops(total)___ops/sec(cum)__avg(ms)__p50(ms)__p95(ms)__p99(ms)_pMax(ms)") + + resultTick := histogramTick{} + y.reg.Tick(func(tick histogramTick) { + h := tick.Cumulative + if resultTick.Cumulative == nil { + resultTick.Now = tick.Now + resultTick.Cumulative = h + } else { + resultTick.Cumulative.Merge(h) + } + + fmt.Printf("%10s %7.1fs %14d %14.1f %8.1f %8.1f %8.1f %8.1f %8.1f\n", + tick.Name, elapsed.Seconds(), h.TotalCount(), + float64(h.TotalCount())/elapsed.Seconds(), + time.Duration(h.Mean()).Seconds()*1000, + time.Duration(h.ValueAtQuantile(50)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(95)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(99)).Seconds()*1000, + time.Duration(h.ValueAtQuantile(100)).Seconds()*1000) + }) + fmt.Println() + + resultHist := resultTick.Cumulative + m := y.db.Metrics() + total := m.Total() + + readAmpCount := y.readAmpCount.Load() + readAmpSum := y.readAmpSum.Load() + if readAmpCount == 0 { + readAmpSum = 0 + readAmpCount = 1 + } + + fmt.Printf("Benchmarkycsb/%s/values=%s %d %0.1f ops/sec %d read %d write %.2f r-amp %0.2f w-amp\n\n", + ycsbConfig.workload, ycsbConfig.values, + resultHist.TotalCount(), + float64(resultHist.TotalCount())/elapsed.Seconds(), + total.BytesRead, + total.BytesFlushed+total.BytesCompacted, + float64(readAmpSum)/float64(readAmpCount), + total.WriteAmp(), + ) +} diff --git a/pebble/commit.go b/pebble/commit.go new file mode 100644 index 0000000..38cdbb8 --- /dev/null +++ b/pebble/commit.go @@ -0,0 +1,517 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "runtime" + "sync" + "sync/atomic" + "time" + + "github.com/cockroachdb/pebble/record" +) + +// commitQueue is a lock-free fixed-size single-producer, multi-consumer +// queue. The single producer can enqueue (push) to the head, and consumers can +// dequeue (pop) from the tail. +// +// It has the added feature that it nils out unused slots to avoid unnecessary +// retention of objects. +type commitQueue struct { + // headTail packs together a 32-bit head index and a 32-bit tail index. Both + // are indexes into slots modulo len(slots)-1. + // + // tail = index of oldest data in queue + // head = index of next slot to fill + // + // Slots in the range [tail, head) are owned by consumers. A consumer + // continues to own a slot outside this range until it nils the slot, at + // which point ownership passes to the producer. + // + // The head index is stored in the most-significant bits so that we can + // atomically add to it and the overflow is harmless. + headTail atomic.Uint64 + + // slots is a ring buffer of values stored in this queue. The size must be a + // power of 2. A slot is in use until *both* the tail index has moved beyond + // it and the slot value has been set to nil. The slot value is set to nil + // atomically by the consumer and read atomically by the producer. + slots [record.SyncConcurrency]atomic.Pointer[Batch] +} + +const dequeueBits = 32 + +func (q *commitQueue) unpack(ptrs uint64) (head, tail uint32) { + const mask = 1<> dequeueBits) & mask) + tail = uint32(ptrs & mask) + return +} + +func (q *commitQueue) pack(head, tail uint32) uint64 { + const mask = 1< syncWAL +func (p *commitPipeline) Commit(b *Batch, syncWAL bool, noSyncWait bool) error { + if b.Empty() { + return nil + } + + commitStartTime := time.Now() + // Acquire semaphores. + p.commitQueueSem <- struct{}{} + if syncWAL { + p.logSyncQSem <- struct{}{} + } + b.commitStats.SemaphoreWaitDuration = time.Since(commitStartTime) + + // Prepare the batch for committing: enqueuing the batch in the pending + // queue, determining the batch sequence number and writing the data to the + // WAL. + // + // NB: We set Batch.commitErr on error so that the batch won't be a candidate + // for reuse. See Batch.release(). + mem, err := p.prepare(b, syncWAL, noSyncWait) + if err != nil { + b.db = nil // prevent batch reuse on error + // NB: we are not doing <-p.commitQueueSem since the batch is still + // sitting in the pending queue. We should consider fixing this by also + // removing the batch from the pending queue. + return err + } + + // Apply the batch to the memtable. + if err := p.env.apply(b, mem); err != nil { + b.db = nil // prevent batch reuse on error + // NB: we are not doing <-p.commitQueueSem since the batch is still + // sitting in the pending queue. We should consider fixing this by also + // removing the batch from the pending queue. + return err + } + + // Publish the batch sequence number. + p.publish(b) + + <-p.commitQueueSem + + if !noSyncWait { + // Already waited for commit, so look at the error. + if b.commitErr != nil { + b.db = nil // prevent batch reuse on error + err = b.commitErr + } + } + // Else noSyncWait. The LogWriter can be concurrently writing to + // b.commitErr. We will read b.commitErr in Batch.SyncWait after the + // LogWriter is done writing. + + b.commitStats.TotalDuration = time.Since(commitStartTime) + + return err +} + +// AllocateSeqNum allocates count sequence numbers, invokes the prepare +// callback, then the apply callback, and then publishes the sequence +// numbers. AllocateSeqNum does not write to the WAL or add entries to the +// memtable. AllocateSeqNum can be used to sequence an operation such as +// sstable ingestion within the commit pipeline. The prepare callback is +// invoked with commitPipeline.mu held, but note that DB.mu is not held and +// must be locked if necessary. +func (p *commitPipeline) AllocateSeqNum( + count int, prepare func(seqNum uint64), apply func(seqNum uint64), +) { + // This method is similar to Commit and prepare. Be careful about trying to + // share additional code with those methods because Commit and prepare are + // performance critical code paths. + + b := newBatch(nil) + defer b.release() + + // Give the batch a count of 1 so that the log and visible sequence number + // are incremented correctly. + b.data = make([]byte, batchHeaderLen) + b.setCount(uint32(count)) + b.commit.Add(1) + + p.commitQueueSem <- struct{}{} + + p.mu.Lock() + + // Enqueue the batch in the pending queue. Note that while the pending queue + // is lock-free, we want the order of batches to be the same as the sequence + // number order. + p.pending.enqueue(b) + + // Assign the batch a sequence number. Note that we use atomic operations + // here to handle concurrent reads of logSeqNum. commitPipeline.mu provides + // mutual exclusion for other goroutines writing to logSeqNum. + logSeqNum := p.env.logSeqNum.Add(uint64(count)) - uint64(count) + seqNum := logSeqNum + if seqNum == 0 { + // We can't use the value 0 for the global seqnum during ingestion, because + // 0 indicates no global seqnum. So allocate one more seqnum. + p.env.logSeqNum.Add(1) + seqNum++ + } + b.setSeqNum(seqNum) + + // Wait for any outstanding writes to the memtable to complete. This is + // necessary for ingestion so that the check for memtable overlap can see any + // writes that were sequenced before the ingestion. The spin loop is + // unfortunate, but obviates the need for additional synchronization. + for { + visibleSeqNum := p.env.visibleSeqNum.Load() + if visibleSeqNum == logSeqNum { + break + } + runtime.Gosched() + } + + // Invoke the prepare callback. Note the lack of error reporting. Even if the + // callback internally fails, the sequence number needs to be published in + // order to allow the commit pipeline to proceed. + prepare(b.SeqNum()) + + p.mu.Unlock() + + // Invoke the apply callback. + apply(b.SeqNum()) + + // Publish the sequence number. + p.publish(b) + + <-p.commitQueueSem +} + +func (p *commitPipeline) prepare(b *Batch, syncWAL bool, noSyncWait bool) (*memTable, error) { + n := uint64(b.Count()) + if n == invalidBatchCount { + return nil, ErrInvalidBatch + } + var syncWG *sync.WaitGroup + var syncErr *error + switch { + case !syncWAL: + // Only need to wait for the publish. + b.commit.Add(1) + // Remaining cases represent syncWAL=true. + case noSyncWait: + syncErr = &b.commitErr + syncWG = &b.fsyncWait + // Only need to wait synchronously for the publish. The user will + // (asynchronously) wait on the batch's fsyncWait. + b.commit.Add(1) + b.fsyncWait.Add(1) + case !noSyncWait: + syncErr = &b.commitErr + syncWG = &b.commit + // Must wait for both the publish and the WAL fsync. + b.commit.Add(2) + } + + p.mu.Lock() + + // Enqueue the batch in the pending queue. Note that while the pending queue + // is lock-free, we want the order of batches to be the same as the sequence + // number order. + p.pending.enqueue(b) + + // Assign the batch a sequence number. Note that we use atomic operations + // here to handle concurrent reads of logSeqNum. commitPipeline.mu provides + // mutual exclusion for other goroutines writing to logSeqNum. + b.setSeqNum(p.env.logSeqNum.Add(n) - n) + + // Write the data to the WAL. + mem, err := p.env.write(b, syncWG, syncErr) + + p.mu.Unlock() + + return mem, err +} + +func (p *commitPipeline) publish(b *Batch) { + // Mark the batch as applied. + b.applied.Store(true) + + // Loop dequeuing applied batches from the pending queue. If our batch was + // the head of the pending queue we are guaranteed that either we'll publish + // it or someone else will dequeueApplied and publish it. If our batch is not the + // head of the queue then either we'll dequeueApplied applied batches and reach our + // batch or there is an unapplied batch blocking us. When that unapplied + // batch applies it will go through the same process and publish our batch + // for us. + for { + t := p.pending.dequeueApplied() + if t == nil { + // Wait for another goroutine to publish us. We might also be waiting for + // the WAL sync to finish. + now := time.Now() + b.commit.Wait() + b.commitStats.CommitWaitDuration += time.Since(now) + break + } + if !t.applied.Load() { + panic("not reached") + } + + // We're responsible for publishing the sequence number for batch t, but + // another concurrent goroutine might sneak in and publish the sequence + // number for a subsequent batch. That's ok as all we're guaranteeing is + // that the sequence number ratchets up. + for { + curSeqNum := p.env.visibleSeqNum.Load() + newSeqNum := t.SeqNum() + uint64(t.Count()) + if newSeqNum <= curSeqNum { + // t's sequence number has already been published. + break + } + if p.env.visibleSeqNum.CompareAndSwap(curSeqNum, newSeqNum) { + // We successfully published t's sequence number. + break + } + } + + t.commit.Done() + } +} diff --git a/pebble/commit_test.go b/pebble/commit_test.go new file mode 100644 index 0000000..51b618d --- /dev/null +++ b/pebble/commit_test.go @@ -0,0 +1,355 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "encoding/binary" + "fmt" + "io" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/cockroachdb/pebble/internal/arenaskl" + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/record" + "github.com/cockroachdb/pebble/vfs" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" +) + +type testCommitEnv struct { + logSeqNum atomic.Uint64 + visibleSeqNum atomic.Uint64 + writeCount atomic.Uint64 + applyBuf struct { + sync.Mutex + buf []uint64 + } + queueSemChan chan struct{} +} + +func (e *testCommitEnv) env() commitEnv { + return commitEnv{ + logSeqNum: &e.logSeqNum, + visibleSeqNum: &e.visibleSeqNum, + apply: e.apply, + write: e.write, + } +} + +func (e *testCommitEnv) apply(b *Batch, mem *memTable) error { + e.applyBuf.Lock() + e.applyBuf.buf = append(e.applyBuf.buf, b.SeqNum()) + e.applyBuf.Unlock() + return nil +} + +func (e *testCommitEnv) write(b *Batch, wg *sync.WaitGroup, _ *error) (*memTable, error) { + e.writeCount.Add(1) + if wg != nil { + wg.Done() + <-e.queueSemChan + } + return nil, nil +} + +func TestCommitQueue(t *testing.T) { + var q commitQueue + var batches [16]Batch + for i := range batches { + q.enqueue(&batches[i]) + } + if b := q.dequeueApplied(); b != nil { + t.Fatalf("unexpectedly dequeued batch: %p", b) + } + batches[1].applied.Store(true) + if b := q.dequeueApplied(); b != nil { + t.Fatalf("unexpectedly dequeued batch: %p", b) + } + for i := range batches { + batches[i].applied.Store(true) + if b := q.dequeueApplied(); b != &batches[i] { + t.Fatalf("%d: expected batch %p, but found %p", i, &batches[i], b) + } + } + if b := q.dequeueApplied(); b != nil { + t.Fatalf("unexpectedly dequeued batch: %p", b) + } +} + +func TestCommitPipeline(t *testing.T) { + var e testCommitEnv + p := newCommitPipeline(e.env()) + + n := 10000 + if invariants.RaceEnabled { + // Under race builds we have to limit the concurrency or we hit the + // following error: + // + // race: limit on 8128 simultaneously alive goroutines is exceeded, dying + n = 1000 + } + + var wg sync.WaitGroup + wg.Add(n) + for i := 0; i < n; i++ { + go func(i int) { + defer wg.Done() + var b Batch + _ = b.Set([]byte(fmt.Sprint(i)), nil, nil) + _ = p.Commit(&b, false, false) + }(i) + } + wg.Wait() + + if s := e.writeCount.Load(); uint64(n) != s { + t.Fatalf("expected %d written batches, but found %d", n, s) + } + if n != len(e.applyBuf.buf) { + t.Fatalf("expected %d written batches, but found %d", + n, len(e.applyBuf.buf)) + } + if s := e.logSeqNum.Load(); uint64(n) != s { + t.Fatalf("expected %d, but found %d", n, s) + } + if s := e.visibleSeqNum.Load(); uint64(n) != s { + t.Fatalf("expected %d, but found %d", n, s) + } +} + +func TestCommitPipelineSync(t *testing.T) { + n := 10000 + if invariants.RaceEnabled { + // Under race builds we have to limit the concurrency or we hit the + // following error: + // + // race: limit on 8128 simultaneously alive goroutines is exceeded, dying + n = 1000 + } + + for _, noSyncWait := range []bool{false, true} { + t.Run(fmt.Sprintf("no-sync-wait=%t", noSyncWait), func(t *testing.T) { + var e testCommitEnv + p := newCommitPipeline(e.env()) + e.queueSemChan = p.logSyncQSem + + var wg sync.WaitGroup + wg.Add(n) + for i := 0; i < n; i++ { + go func(i int) { + defer wg.Done() + var b Batch + require.NoError(t, b.Set([]byte(fmt.Sprint(i)), nil, nil)) + require.NoError(t, p.Commit(&b, true, noSyncWait)) + if noSyncWait { + require.NoError(t, b.SyncWait()) + } + }(i) + } + wg.Wait() + if s := e.writeCount.Load(); uint64(n) != s { + t.Fatalf("expected %d written batches, but found %d", n, s) + } + if n != len(e.applyBuf.buf) { + t.Fatalf("expected %d written batches, but found %d", + n, len(e.applyBuf.buf)) + } + if s := e.logSeqNum.Load(); uint64(n) != s { + t.Fatalf("expected %d, but found %d", n, s) + } + if s := e.visibleSeqNum.Load(); uint64(n) != s { + t.Fatalf("expected %d, but found %d", n, s) + } + }) + } +} + +func TestCommitPipelineAllocateSeqNum(t *testing.T) { + var e testCommitEnv + p := newCommitPipeline(e.env()) + + const n = 10 + var wg sync.WaitGroup + wg.Add(n) + var prepareCount atomic.Uint64 + var applyCount atomic.Uint64 + for i := 1; i <= n; i++ { + go func(i int) { + defer wg.Done() + p.AllocateSeqNum(i, func(_ uint64) { + prepareCount.Add(1) + }, func(seqNum uint64) { + applyCount.Add(1) + }) + }(i) + } + wg.Wait() + + if s := prepareCount.Load(); n != s { + t.Fatalf("expected %d prepares, but found %d", n, s) + } + if s := applyCount.Load(); n != s { + t.Fatalf("expected %d applies, but found %d", n, s) + } + // AllocateSeqNum always returns a non-zero sequence number causing the + // values we see to be offset from 1. + const total = 1 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + if s := e.logSeqNum.Load(); total != s { + t.Fatalf("expected %d, but found %d", total, s) + } + if s := e.visibleSeqNum.Load(); total != s { + t.Fatalf("expected %d, but found %d", total, s) + } +} + +type syncDelayFile struct { + vfs.File + done chan struct{} +} + +func (f *syncDelayFile) Sync() error { + <-f.done + return nil +} + +func TestCommitPipelineWALClose(t *testing.T) { + // This test stresses the edge case of N goroutines blocked in the + // commitPipeline waiting for the log to sync when we concurrently decide to + // rotate and close the log. + + mem := vfs.NewMem() + f, err := mem.Create("test-wal") + require.NoError(t, err) + + // syncDelayFile will block on the done channel befor returning from Sync + // call. + sf := &syncDelayFile{ + File: f, + done: make(chan struct{}), + } + + // A basic commitEnv which writes to a WAL. + var wal *record.LogWriter + var walDone sync.WaitGroup + testEnv := commitEnv{ + logSeqNum: new(atomic.Uint64), + visibleSeqNum: new(atomic.Uint64), + apply: func(b *Batch, mem *memTable) error { + // At this point, we've called SyncRecord but the sync is blocked. + walDone.Done() + return nil + }, + write: func(b *Batch, syncWG *sync.WaitGroup, syncErr *error) (*memTable, error) { + _, err := wal.SyncRecord(b.data, syncWG, syncErr) + return nil, err + }, + } + p := newCommitPipeline(testEnv) + wal = record.NewLogWriter(sf, 0 /* logNum */, record.LogWriterConfig{ + WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{}), + QueueSemChan: p.logSyncQSem, + }) + + // Launch N (commitConcurrency) goroutines which each create a batch and + // commit it with sync==true. Because of the syncDelayFile, none of these + // operations can complete until syncDelayFile.done is closed. + errCh := make(chan error, cap(p.commitQueueSem)) + walDone.Add(cap(errCh)) + for i := 0; i < cap(errCh); i++ { + go func(i int) { + b := &Batch{} + if err := b.LogData([]byte("foo"), nil); err != nil { + errCh <- err + return + } + errCh <- p.Commit(b, true /* sync */, false) + }(i) + } + + // Wait for all of the WAL writes to queue up. This ensures we don't violate + // the concurrency requirements of LogWriter, and also ensures all of the WAL + // writes are queued. + walDone.Wait() + close(sf.done) + + // Close the WAL. A "queue is full" panic means that something is broken. + require.NoError(t, wal.Close()) + for i := 0; i < cap(errCh); i++ { + require.NoError(t, <-errCh) + } +} + +func BenchmarkCommitPipeline(b *testing.B) { + for _, noSyncWait := range []bool{false, true} { + for _, parallelism := range []int{1, 2, 4, 8, 16, 32, 64, 128} { + b.Run(fmt.Sprintf("no-sync-wait=%t/parallel=%d", noSyncWait, parallelism), + func(b *testing.B) { + b.SetParallelism(parallelism) + mem := newMemTable(memTableOptions{}) + var wal *record.LogWriter + nullCommitEnv := commitEnv{ + logSeqNum: new(atomic.Uint64), + visibleSeqNum: new(atomic.Uint64), + apply: func(b *Batch, mem *memTable) error { + err := mem.apply(b, b.SeqNum()) + if err != nil { + return err + } + mem.writerUnref() + return nil + }, + write: func(b *Batch, syncWG *sync.WaitGroup, syncErr *error) (*memTable, error) { + for { + err := mem.prepare(b) + if err == arenaskl.ErrArenaFull { + mem = newMemTable(memTableOptions{}) + continue + } + if err != nil { + return nil, err + } + break + } + + _, err := wal.SyncRecord(b.data, syncWG, syncErr) + return mem, err + }, + } + p := newCommitPipeline(nullCommitEnv) + wal = record.NewLogWriter(io.Discard, 0, /* logNum */ + record.LogWriterConfig{ + WALFsyncLatency: prometheus.NewHistogram(prometheus.HistogramOpts{}), + QueueSemChan: p.logSyncQSem, + }) + const keySize = 8 + b.SetBytes(2 * keySize) + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + buf := make([]byte, keySize) + + for pb.Next() { + batch := newBatch(nil) + binary.BigEndian.PutUint64(buf, rng.Uint64()) + batch.Set(buf, buf, nil) + if err := p.Commit(batch, true /* sync */, noSyncWait); err != nil { + b.Fatal(err) + } + if noSyncWait { + if err := batch.SyncWait(); err != nil { + b.Fatal(err) + } + } + batch.release() + } + }) + }) + } + } +} diff --git a/pebble/compaction.go b/pebble/compaction.go new file mode 100644 index 0000000..42a709f --- /dev/null +++ b/pebble/compaction.go @@ -0,0 +1,3924 @@ +// Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "context" + "fmt" + "io" + "math" + "runtime/pprof" + "sort" + "sync/atomic" + "time" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invalidating" + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/internal/private" + "github.com/cockroachdb/pebble/internal/rangedel" + "github.com/cockroachdb/pebble/internal/rangekey" + "github.com/cockroachdb/pebble/objstorage" + "github.com/cockroachdb/pebble/objstorage/objstorageprovider/objiotracing" + "github.com/cockroachdb/pebble/objstorage/remote" + "github.com/cockroachdb/pebble/shims/cmp" + "github.com/cockroachdb/pebble/shims/slices" + "github.com/cockroachdb/pebble/sstable" + "github.com/cockroachdb/pebble/vfs" +) + +var errEmptyTable = errors.New("pebble: empty table") + +// ErrCancelledCompaction is returned if a compaction is cancelled by a +// concurrent excise or ingest-split operation. +var ErrCancelledCompaction = errors.New("pebble: compaction cancelled by a concurrent operation, will retry compaction") + +var compactLabels = pprof.Labels("pebble", "compact") +var flushLabels = pprof.Labels("pebble", "flush") +var gcLabels = pprof.Labels("pebble", "gc") + +// getInternalWriterProperties accesses a private variable (in the +// internal/private package) initialized by the sstable Writer. This indirection +// is necessary to ensure non-Pebble users constructing sstables for ingestion +// are unable to set internal-only properties. +var getInternalWriterProperties = private.SSTableInternalProperties.(func(*sstable.Writer) *sstable.Properties) + +// expandedCompactionByteSizeLimit is the maximum number of bytes in all +// compacted files. We avoid expanding the lower level file set of a compaction +// if it would make the total compaction cover more than this many bytes. +func expandedCompactionByteSizeLimit(opts *Options, level int, availBytes uint64) uint64 { + v := uint64(25 * opts.Level(level).TargetFileSize) + + // Never expand a compaction beyond half the available capacity, divided + // by the maximum number of concurrent compactions. Each of the concurrent + // compactions may expand up to this limit, so this attempts to limit + // compactions to half of available disk space. Note that this will not + // prevent compaction picking from pursuing compactions that are larger + // than this threshold before expansion. + diskMax := (availBytes / 2) / uint64(opts.MaxConcurrentCompactions()) + if v > diskMax { + v = diskMax + } + return v +} + +// maxGrandparentOverlapBytes is the maximum bytes of overlap with level+1 +// before we stop building a single file in a level-1 to level compaction. +func maxGrandparentOverlapBytes(opts *Options, level int) uint64 { + return uint64(10 * opts.Level(level).TargetFileSize) +} + +// maxReadCompactionBytes is used to prevent read compactions which +// are too wide. +func maxReadCompactionBytes(opts *Options, level int) uint64 { + return uint64(10 * opts.Level(level).TargetFileSize) +} + +// noCloseIter wraps around a FragmentIterator, intercepting and eliding +// calls to Close. It is used during compaction to ensure that rangeDelIters +// are not closed prematurely. +type noCloseIter struct { + keyspan.FragmentIterator +} + +func (i noCloseIter) Close() error { + return nil +} + +type compactionLevel struct { + level int + files manifest.LevelSlice + // l0SublevelInfo contains information about L0 sublevels being compacted. + // It's only set for the start level of a compaction starting out of L0 and + // is nil for all other compactions. + l0SublevelInfo []sublevelInfo +} + +func (cl compactionLevel) Clone() compactionLevel { + newCL := compactionLevel{ + level: cl.level, + files: cl.files.Reslice(func(start, end *manifest.LevelIterator) {}), + } + return newCL +} +func (cl compactionLevel) String() string { + return fmt.Sprintf(`Level %d, Files %s`, cl.level, cl.files) +} + +// Return output from compactionOutputSplitters. See comment on +// compactionOutputSplitter.shouldSplitBefore() on how this value is used. +type maybeSplit int + +const ( + noSplit maybeSplit = iota + splitNow +) + +// String implements the Stringer interface. +func (c maybeSplit) String() string { + if c == noSplit { + return "no-split" + } + return "split-now" +} + +// compactionOutputSplitter is an interface for encapsulating logic around +// switching the output of a compaction to a new output file. Additional +// constraints around switching compaction outputs that are specific to that +// compaction type (eg. flush splits) are implemented in +// compactionOutputSplitters that compose other child compactionOutputSplitters. +type compactionOutputSplitter interface { + // shouldSplitBefore returns whether we should split outputs before the + // specified "current key". The return value is splitNow or noSplit. + // splitNow means a split is advised before the specified key, and noSplit + // means no split is advised. If shouldSplitBefore(a) advises a split then + // shouldSplitBefore(b) should also advise a split given b >= a, until + // onNewOutput is called. + shouldSplitBefore(key *InternalKey, tw *sstable.Writer) maybeSplit + // onNewOutput updates internal splitter state when the compaction switches + // to a new sstable, and returns the next limit for the new output which + // would get used to truncate range tombstones if the compaction iterator + // runs out of keys. The limit returned MUST be > key according to the + // compaction's comparator. The specified key is the first key in the new + // output, or nil if this sstable will only contain range tombstones already + // in the fragmenter. + onNewOutput(key []byte) []byte +} + +// fileSizeSplitter is a compactionOutputSplitter that enforces target file +// sizes. This splitter splits to a new output file when the estimated file size +// is 0.5x-2x the target file size. If there are overlapping grandparent files, +// this splitter will attempt to split at a grandparent boundary. For example, +// consider the example where a compaction wrote 'd' to the current output file, +// and the next key has a user key 'g': +// +// previous key next key +// | | +// | | +// +---------------|----+ +--|----------+ +// grandparents: | 000006 | | | | 000007 | +// +---------------|----+ +--|----------+ +// a b d e f g i +// +// Splitting the output file F before 'g' will ensure that the current output +// file F does not overlap the grandparent file 000007. Aligning sstable +// boundaries like this can significantly reduce write amplification, since a +// subsequent compaction of F into the grandparent level will avoid needlessly +// rewriting any keys within 000007 that do not overlap F's bounds. Consider the +// following compaction: +// +// +----------------------+ +// input | | +// level +----------------------+ +// \/ +// +---------------+ +---------------+ +// output |XXXXXXX| | | |XXXXXXXX| +// level +---------------+ +---------------+ +// +// The input-level file overlaps two files in the output level, but only +// partially. The beginning of the first output-level file and the end of the +// second output-level file will be rewritten verbatim. This write I/O is +// "wasted" in the sense that no merging is being performed. +// +// To prevent the above waste, this splitter attempts to split output files +// before the start key of grandparent files. It still strives to write output +// files of approximately the target file size, by constraining this splitting +// at grandparent points to apply only if the current output's file size is +// about the right order of magnitude. +// +// Note that, unlike most other splitters, this splitter does not guarantee that +// it will advise splits only at user key change boundaries. +type fileSizeSplitter struct { + frontier frontier + targetFileSize uint64 + atGrandparentBoundary bool + boundariesObserved uint64 + nextGrandparent *fileMetadata + grandparents manifest.LevelIterator +} + +func newFileSizeSplitter( + f *frontiers, targetFileSize uint64, grandparents manifest.LevelIterator, +) *fileSizeSplitter { + s := &fileSizeSplitter{targetFileSize: targetFileSize} + s.nextGrandparent = grandparents.First() + s.grandparents = grandparents + if s.nextGrandparent != nil { + s.frontier.Init(f, s.nextGrandparent.Smallest.UserKey, s.reached) + } + return s +} + +func (f *fileSizeSplitter) reached(nextKey []byte) []byte { + f.atGrandparentBoundary = true + f.boundariesObserved++ + // NB: f.grandparents is a bounded iterator, constrained to the compaction + // key range. + f.nextGrandparent = f.grandparents.Next() + if f.nextGrandparent == nil { + return nil + } + // TODO(jackson): Should we also split before or immediately after + // grandparents' largest keys? Splitting before the start boundary prevents + // overlap with the grandparent. Also splitting after the end boundary may + // increase the probability of move compactions. + return f.nextGrandparent.Smallest.UserKey +} + +func (f *fileSizeSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) maybeSplit { + atGrandparentBoundary := f.atGrandparentBoundary + + // Clear f.atGrandparentBoundary unconditionally. + // + // This is a bit subtle. Even if do decide to split, it's possible that a + // higher-level splitter will ignore our request (eg, because we're between + // two internal keys with the same user key). In this case, the next call to + // shouldSplitBefore will find atGrandparentBoundary=false. This is + // desirable, because in this case we would've already written the earlier + // key with the same user key to the output file. The current output file is + // already doomed to overlap the grandparent whose bound triggered + // atGrandparentBoundary=true. We should continue on, waiting for the next + // grandparent boundary. + f.atGrandparentBoundary = false + + // If the key is a range tombstone, the EstimatedSize may not grow right + // away when a range tombstone is added to the fragmenter: It's dependent on + // whether or not the this new range deletion will start a new fragment. + // Range deletions are rare, so we choose to simply not split yet. + // TODO(jackson): Reconsider this, and consider range keys too as a part of + // #2321. + if key.Kind() == InternalKeyKindRangeDelete || tw == nil { + return noSplit + } + + estSize := tw.EstimatedSize() + switch { + case estSize < f.targetFileSize/2: + // The estimated file size is less than half the target file size. Don't + // split it, even if currently aligned with a grandparent file because + // it's too small. + return noSplit + case estSize >= 2*f.targetFileSize: + // The estimated file size is double the target file size. Split it even + // if we were not aligned with a grandparent file boundary to avoid + // excessively exceeding the target file size. + return splitNow + case !atGrandparentBoundary: + // Don't split if we're not at a grandparent, except if we've exhausted + // all the grandparents overlapping this compaction's key range. Then we + // may want to split purely based on file size. + if f.nextGrandparent == nil { + // There are no more grandparents. Optimize for the target file size + // and split as soon as we hit the target file size. + if estSize >= f.targetFileSize { + return splitNow + } + } + return noSplit + default: + // INVARIANT: atGrandparentBoundary + // INVARIANT: targetSize/2 < estSize < 2*targetSize + // + // The estimated file size is close enough to the target file size that + // we should consider splitting. + // + // Determine whether to split now based on how many grandparent + // boundaries we have already observed while building this output file. + // The intuition here is that if the grandparent level is dense in this + // part of the keyspace, we're likely to continue to have more + // opportunities to split this file aligned with a grandparent. If this + // is the first grandparent boundary observed, we split immediately + // (we're already at ≥50% the target file size). Otherwise, each + // overlapping grandparent we've observed increases the minimum file + // size by 5% of the target file size, up to at most 90% of the target + // file size. + // + // TODO(jackson): The particular thresholds are somewhat unprincipled. + // This is the same heuristic as RocksDB implements. Is there are more + // principled formulation that can, further reduce w-amp, produce files + // closer to the target file size, or is more understandable? + + // NB: Subtract 1 from `boundariesObserved` to account for the current + // boundary we're considering splitting at. `reached` will have + // incremented it at the same time it set `atGrandparentBoundary`. + minBoundaries := f.boundariesObserved-1 + if minBoundaries > 8 { + minBoundaries = 8 + } + minimumPctOfTargetSize := 50 + 5*minBoundaries + if estSize < (minimumPctOfTargetSize*f.targetFileSize)/100 { + return noSplit + } + return splitNow + } +} + +func (f *fileSizeSplitter) onNewOutput(key []byte) []byte { + f.boundariesObserved = 0 + return nil +} + +func newLimitFuncSplitter(f *frontiers, limitFunc func(userKey []byte) []byte) *limitFuncSplitter { + s := &limitFuncSplitter{limitFunc: limitFunc} + s.frontier.Init(f, nil, s.reached) + return s +} + +type limitFuncSplitter struct { + frontier frontier + limitFunc func(userKey []byte) []byte + split maybeSplit +} + +func (lf *limitFuncSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) maybeSplit { + return lf.split +} + +func (lf *limitFuncSplitter) reached(nextKey []byte) []byte { + lf.split = splitNow + return nil +} + +func (lf *limitFuncSplitter) onNewOutput(key []byte) []byte { + lf.split = noSplit + if key != nil { + // TODO(jackson): For some users, like L0 flush splits, there's no need + // to binary search over all the flush splits every time. The next split + // point must be ahead of the previous flush split point. + limit := lf.limitFunc(key) + lf.frontier.Update(limit) + return limit + } + lf.frontier.Update(nil) + return nil +} + +// splitterGroup is a compactionOutputSplitter that splits whenever one of its +// child splitters advises a compaction split. +type splitterGroup struct { + cmp Compare + splitters []compactionOutputSplitter +} + +func (a *splitterGroup) shouldSplitBefore( + key *InternalKey, tw *sstable.Writer, +) (suggestion maybeSplit) { + for _, splitter := range a.splitters { + if splitter.shouldSplitBefore(key, tw) == splitNow { + return splitNow + } + } + return noSplit +} + +func (a *splitterGroup) onNewOutput(key []byte) []byte { + var earliestLimit []byte + for _, splitter := range a.splitters { + limit := splitter.onNewOutput(key) + if limit == nil { + continue + } + if earliestLimit == nil || a.cmp(limit, earliestLimit) < 0 { + earliestLimit = limit + } + } + return earliestLimit +} + +// userKeyChangeSplitter is a compactionOutputSplitter that takes in a child +// splitter, and splits when 1) that child splitter has advised a split, and 2) +// the compaction output is at the boundary between two user keys (also +// the boundary between atomic compaction units). Use this splitter to wrap +// any splitters that don't guarantee user key splits (i.e. splitters that make +// their determination in ways other than comparing the current key against a +// limit key.) If a wrapped splitter advises a split, it must continue +// to advise a split until a new output. +type userKeyChangeSplitter struct { + cmp Compare + splitter compactionOutputSplitter + unsafePrevUserKey func() []byte +} + +func (u *userKeyChangeSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) maybeSplit { + // NB: The userKeyChangeSplitter only needs to suffer a key comparison if + // the wrapped splitter requests a split. + // + // We could implement this splitter using frontiers: When the inner splitter + // requests a split before key `k`, we'd update a frontier to be + // ImmediateSuccessor(k). Then on the next key greater than >k, the + // frontier's `reached` func would be called and we'd return splitNow. + // This doesn't really save work since duplicate user keys are rare, and it + // requires us to materialize the ImmediateSuccessor key. It also prevents + // us from splitting on the same key that the inner splitter requested a + // split for—instead we need to wait until the next key. The current + // implementation uses `unsafePrevUserKey` to gain access to the previous + // key which allows it to immediately respect the inner splitter if + // possible. + if split := u.splitter.shouldSplitBefore(key, tw); split != splitNow { + return split + } + if u.cmp(key.UserKey, u.unsafePrevUserKey()) > 0 { + return splitNow + } + return noSplit +} + +func (u *userKeyChangeSplitter) onNewOutput(key []byte) []byte { + return u.splitter.onNewOutput(key) +} + +// compactionWritable is a objstorage.Writable wrapper that, on every write, +// updates a metric in `versions` on bytes written by in-progress compactions so +// far. It also increments a per-compaction `written` int. +type compactionWritable struct { + objstorage.Writable + + versions *versionSet + written *int64 +} + +// Write is part of the objstorage.Writable interface. +func (c *compactionWritable) Write(p []byte) error { + if err := c.Writable.Write(p); err != nil { + return err + } + + *c.written += int64(len(p)) + c.versions.incrementCompactionBytes(int64(len(p))) + return nil +} + +type compactionKind int + +const ( + compactionKindDefault compactionKind = iota + compactionKindFlush + // compactionKindMove denotes a move compaction where the input file is + // retained and linked in a new level without being obsoleted. + compactionKindMove + // compactionKindCopy denotes a copy compaction where the input file is + // copied byte-by-byte into a new file with a new FileNum in the output level. + compactionKindCopy + compactionKindDeleteOnly + compactionKindElisionOnly + compactionKindRead + compactionKindRewrite + compactionKindIngestedFlushable +) + +func (k compactionKind) String() string { + switch k { + case compactionKindDefault: + return "default" + case compactionKindFlush: + return "flush" + case compactionKindMove: + return "move" + case compactionKindDeleteOnly: + return "delete-only" + case compactionKindElisionOnly: + return "elision-only" + case compactionKindRead: + return "read" + case compactionKindRewrite: + return "rewrite" + case compactionKindIngestedFlushable: + return "ingested-flushable" + case compactionKindCopy: + return "copy" + } + return "?" +} + +// rangeKeyCompactionTransform is used to transform range key spans as part of the +// keyspan.MergingIter. As part of this transformation step, we can elide range +// keys in the last snapshot stripe, as well as coalesce range keys within +// snapshot stripes. +func rangeKeyCompactionTransform( + eq base.Equal, snapshots []uint64, elideRangeKey func(start, end []byte) bool, +) keyspan.Transformer { + return keyspan.TransformerFunc(func(cmp base.Compare, s keyspan.Span, dst *keyspan.Span) error { + elideInLastStripe := func(keys []keyspan.Key) []keyspan.Key { + // Unsets and deletes in the last snapshot stripe can be elided. + k := 0 + for j := range keys { + if elideRangeKey(s.Start, s.End) && + (keys[j].Kind() == InternalKeyKindRangeKeyUnset || keys[j].Kind() == InternalKeyKindRangeKeyDelete) { + continue + } + keys[k] = keys[j] + k++ + } + keys = keys[:k] + return keys + } + // snapshots are in ascending order, while s.keys are in descending seqnum + // order. Partition s.keys by snapshot stripes, and call rangekey.Coalesce + // on each partition. + dst.Start = s.Start + dst.End = s.End + dst.Keys = dst.Keys[:0] + i, j := len(snapshots)-1, 0 + usedLen := 0 + for i >= 0 { + start := j + for j < len(s.Keys) && !base.Visible(s.Keys[j].SeqNum(), snapshots[i], base.InternalKeySeqNumMax) { + // Include j in current partition. + j++ + } + if j > start { + keysDst := dst.Keys[usedLen:cap(dst.Keys)] + if err := rangekey.Coalesce(cmp, eq, s.Keys[start:j], &keysDst); err != nil { + return err + } + if j == len(s.Keys) { + // This is the last snapshot stripe. Unsets and deletes can be elided. + keysDst = elideInLastStripe(keysDst) + } + usedLen += len(keysDst) + dst.Keys = append(dst.Keys, keysDst...) + } + i-- + } + if j < len(s.Keys) { + keysDst := dst.Keys[usedLen:cap(dst.Keys)] + if err := rangekey.Coalesce(cmp, eq, s.Keys[j:], &keysDst); err != nil { + return err + } + keysDst = elideInLastStripe(keysDst) + usedLen += len(keysDst) + dst.Keys = append(dst.Keys, keysDst...) + } + return nil + }) +} + +// compaction is a table compaction from one level to the next, starting from a +// given version. +type compaction struct { + // cancel is a bool that can be used by other goroutines to signal a compaction + // to cancel, such as if a conflicting excise operation raced it to manifest + // application. Only holders of the manifest lock will write to this atomic. + cancel atomic.Bool + + kind compactionKind + cmp Compare + equal Equal + comparer *base.Comparer + formatKey base.FormatKey + logger Logger + version *version + stats base.InternalIteratorStats + beganAt time.Time + // versionEditApplied is set to true when a compaction has completed and the + // resulting version has been installed (if successful), but the compaction + // goroutine is still cleaning up (eg, deleting obsolete files). + versionEditApplied bool + bufferPool sstable.BufferPool + + // startLevel is the level that is being compacted. Inputs from startLevel + // and outputLevel will be merged to produce a set of outputLevel files. + startLevel *compactionLevel + + // outputLevel is the level that files are being produced in. outputLevel is + // equal to startLevel+1 except when: + // - if startLevel is 0, the output level equals compactionPicker.baseLevel(). + // - in multilevel compaction, the output level is the lowest level involved in + // the compaction + // A compaction's outputLevel is nil for delete-only compactions. + outputLevel *compactionLevel + + // extraLevels point to additional levels in between the input and output + // levels that get compacted in multilevel compactions + extraLevels []*compactionLevel + + inputs []compactionLevel + + // maxOutputFileSize is the maximum size of an individual table created + // during compaction. + maxOutputFileSize uint64 + // maxOverlapBytes is the maximum number of bytes of overlap allowed for a + // single output table with the tables in the grandparent level. + maxOverlapBytes uint64 + // disableSpanElision disables elision of range tombstones and range keys. Used + // by tests to allow range tombstones or range keys to be added to tables where + // they would otherwise be elided. + disableSpanElision bool + + // flushing contains the flushables (aka memtables) that are being flushed. + flushing flushableList + // bytesIterated contains the number of bytes that have been flushed/compacted. + bytesIterated uint64 + // bytesWritten contains the number of bytes that have been written to outputs. + bytesWritten int64 + + // The boundaries of the input data. + smallest InternalKey + largest InternalKey + + // The range deletion tombstone fragmenter. Adds range tombstones as they are + // returned from `compactionIter` and fragments them for output to files. + // Referenced by `compactionIter` which uses it to check whether keys are deleted. + rangeDelFrag keyspan.Fragmenter + // The range key fragmenter. Similar to rangeDelFrag in that it gets range + // keys from the compaction iter and fragments them for output to files. + rangeKeyFrag keyspan.Fragmenter + // The range deletion tombstone iterator, that merges and fragments + // tombstones across levels. This iterator is included within the compaction + // input iterator as a single level. + // TODO(jackson): Remove this when the refactor of FragmentIterator, + // InterleavingIterator, etc is complete. + rangeDelIter keyspan.InternalIteratorShim + // rangeKeyInterleaving is the interleaving iter for range keys. + rangeKeyInterleaving keyspan.InterleavingIter + + // A list of objects to close when the compaction finishes. Used by input + // iteration to keep rangeDelIters open for the lifetime of the compaction, + // and only close them when the compaction finishes. + closers []io.Closer + + // grandparents are the tables in level+2 that overlap with the files being + // compacted. Used to determine output table boundaries. Do not assume that the actual files + // in the grandparent when this compaction finishes will be the same. + grandparents manifest.LevelSlice + + // Boundaries at which flushes to L0 should be split. Determined by + // L0Sublevels. If nil, flushes aren't split. + l0Limits [][]byte + + // List of disjoint inuse key ranges the compaction overlaps with in + // grandparent and lower levels. See setupInuseKeyRanges() for the + // construction. Used by elideTombstone() and elideRangeTombstone() to + // determine if keys affected by a tombstone possibly exist at a lower level. + inuseKeyRanges []manifest.UserKeyRange + // inuseEntireRange is set if the above inuse key ranges wholly contain the + // compaction's key range. This allows compactions in higher levels to often + // elide key comparisons. + inuseEntireRange bool + elideTombstoneIndex int + + // allowedZeroSeqNum is true if seqnums can be zeroed if there are no + // snapshots requiring them to be kept. This determination is made by + // looking for an sstable which overlaps the bounds of the compaction at a + // lower level in the LSM during runCompaction. + allowedZeroSeqNum bool + + metrics map[int]*LevelMetrics + + pickerMetrics compactionPickerMetrics +} + +func (c *compaction) makeInfo(jobID int) CompactionInfo { + info := CompactionInfo{ + JobID: jobID, + Reason: c.kind.String(), + Input: make([]LevelInfo, 0, len(c.inputs)), + Annotations: []string{}, + } + for _, cl := range c.inputs { + inputInfo := LevelInfo{Level: cl.level, Tables: nil} + iter := cl.files.Iter() + for m := iter.First(); m != nil; m = iter.Next() { + inputInfo.Tables = append(inputInfo.Tables, m.TableInfo()) + } + info.Input = append(info.Input, inputInfo) + } + if c.outputLevel != nil { + info.Output.Level = c.outputLevel.level + + // If there are no inputs from the output level (eg, a move + // compaction), add an empty LevelInfo to info.Input. + if len(c.inputs) > 0 && c.inputs[len(c.inputs)-1].level != c.outputLevel.level { + info.Input = append(info.Input, LevelInfo{Level: c.outputLevel.level}) + } + } else { + // For a delete-only compaction, set the output level to L6. The + // output level is not meaningful here, but complicating the + // info.Output interface with a pointer doesn't seem worth the + // semantic distinction. + info.Output.Level = numLevels - 1 + } + + for i, score := range c.pickerMetrics.scores { + info.Input[i].Score = score + } + info.SingleLevelOverlappingRatio = c.pickerMetrics.singleLevelOverlappingRatio + info.MultiLevelOverlappingRatio = c.pickerMetrics.multiLevelOverlappingRatio + if len(info.Input) > 2 { + info.Annotations = append(info.Annotations, "multilevel") + } + return info +} + +func newCompaction( + pc *pickedCompaction, opts *Options, beganAt time.Time, provider objstorage.Provider, +) *compaction { + c := &compaction{ + kind: compactionKindDefault, + cmp: pc.cmp, + equal: opts.equal(), + comparer: opts.Comparer, + formatKey: opts.Comparer.FormatKey, + inputs: pc.inputs, + smallest: pc.smallest, + largest: pc.largest, + logger: opts.Logger, + version: pc.version, + beganAt: beganAt, + maxOutputFileSize: pc.maxOutputFileSize, + maxOverlapBytes: pc.maxOverlapBytes, + pickerMetrics: pc.pickerMetrics, + } + c.startLevel = &c.inputs[0] + if pc.startLevel.l0SublevelInfo != nil { + c.startLevel.l0SublevelInfo = pc.startLevel.l0SublevelInfo + } + c.outputLevel = &c.inputs[1] + + if len(pc.extraLevels) > 0 { + c.extraLevels = pc.extraLevels + c.outputLevel = &c.inputs[len(c.inputs)-1] + } + // Compute the set of outputLevel+1 files that overlap this compaction (these + // are the grandparent sstables). + if c.outputLevel.level+1 < numLevels { + c.grandparents = c.version.Overlaps(c.outputLevel.level+1, c.cmp, + c.smallest.UserKey, c.largest.UserKey, c.largest.IsExclusiveSentinel()) + } + c.setupInuseKeyRanges() + c.kind = pc.kind + + if c.kind == compactionKindDefault && c.outputLevel.files.Empty() && !c.hasExtraLevelData() && + c.startLevel.files.Len() == 1 && c.grandparents.SizeSum() <= c.maxOverlapBytes { + // This compaction can be converted into a move or copy from one level + // to the next. We avoid such a move if there is lots of overlapping + // grandparent data. Otherwise, the move could create a parent file + // that will require a very expensive merge later on. + iter := c.startLevel.files.Iter() + meta := iter.First() + isRemote := false + // We should always be passed a provider, except in some unit tests. + if provider != nil { + objMeta, err := provider.Lookup(fileTypeTable, meta.FileBacking.DiskFileNum) + if err != nil { + panic(errors.Wrapf(err, "cannot lookup table %s in provider", meta.FileBacking.DiskFileNum)) + } + isRemote = objMeta.IsRemote() + } + // Avoid a trivial move or copy if all of these are true, as rewriting a + // new file is better: + // + // 1) The source file is a virtual sstable + // 2) The existing file `meta` is on non-remote storage + // 3) The output level prefers shared storage + mustCopy := !isRemote && remote.ShouldCreateShared(opts.Experimental.CreateOnShared, c.outputLevel.level) + if mustCopy { + // If the source is virtual, it's best to just rewrite the file as all + // conditions in the above comment are met. + if !meta.Virtual { + c.kind = compactionKindCopy + } + } else { + c.kind = compactionKindMove + } + } + return c +} + +func newDeleteOnlyCompaction( + opts *Options, cur *version, inputs []compactionLevel, beganAt time.Time, +) *compaction { + c := &compaction{ + kind: compactionKindDeleteOnly, + cmp: opts.Comparer.Compare, + equal: opts.equal(), + comparer: opts.Comparer, + formatKey: opts.Comparer.FormatKey, + logger: opts.Logger, + version: cur, + beganAt: beganAt, + inputs: inputs, + } + + // Set c.smallest, c.largest. + files := make([]manifest.LevelIterator, 0, len(inputs)) + for _, in := range inputs { + files = append(files, in.files.Iter()) + } + c.smallest, c.largest = manifest.KeyRange(opts.Comparer.Compare, files...) + return c +} + +func adjustGrandparentOverlapBytesForFlush(c *compaction, flushingBytes uint64) { + // Heuristic to place a lower bound on compaction output file size + // caused by Lbase. Prior to this heuristic we have observed an L0 in + // production with 310K files of which 290K files were < 10KB in size. + // Our hypothesis is that it was caused by L1 having 2600 files and + // ~10GB, such that each flush got split into many tiny files due to + // overlapping with most of the files in Lbase. + // + // The computation below is general in that it accounts + // for flushing different volumes of data (e.g. we may be flushing + // many memtables). For illustration, we consider the typical + // example of flushing a 64MB memtable. So 12.8MB output, + // based on the compression guess below. If the compressed bytes + // guess is an over-estimate we will end up with smaller files, + // and if an under-estimate we will end up with larger files. + // With a 2MB target file size, 7 files. We are willing to accept + // 4x the number of files, if it results in better write amplification + // when later compacting to Lbase, i.e., ~450KB files (target file + // size / 4). + // + // Note that this is a pessimistic heuristic in that + // fileCountUpperBoundDueToGrandparents could be far from the actual + // number of files produced due to the grandparent limits. For + // example, in the extreme, consider a flush that overlaps with 1000 + // files in Lbase f0...f999, and the initially calculated value of + // maxOverlapBytes will cause splits at f10, f20,..., f990, which + // means an upper bound file count of 100 files. Say the input bytes + // in the flush are such that acceptableFileCount=10. We will fatten + // up maxOverlapBytes by 10x to ensure that the upper bound file count + // drops to 10. However, it is possible that in practice, even without + // this change, we would have produced no more than 10 files, and that + // this change makes the files unnecessarily wide. Say the input bytes + // are distributed such that 10% are in f0...f9, 10% in f10...f19, ... + // 10% in f80...f89 and 10% in f990...f999. The original value of + // maxOverlapBytes would have actually produced only 10 sstables. But + // by increasing maxOverlapBytes by 10x, we may produce 1 sstable that + // spans f0...f89, i.e., a much wider sstable than necessary. + // + // We could produce a tighter estimate of + // fileCountUpperBoundDueToGrandparents if we had knowledge of the key + // distribution of the flush. The 4x multiplier mentioned earlier is + // a way to try to compensate for this pessimism. + // + // TODO(sumeer): we don't have compression info for the data being + // flushed, but it is likely that existing files that overlap with + // this flush in Lbase are representative wrt compression ratio. We + // could store the uncompressed size in FileMetadata and estimate + // the compression ratio. + const approxCompressionRatio = 0.2 + approxOutputBytes := approxCompressionRatio * float64(flushingBytes) + approxNumFilesBasedOnTargetSize := + int(math.Ceil(approxOutputBytes / float64(c.maxOutputFileSize))) + acceptableFileCount := float64(4 * approxNumFilesBasedOnTargetSize) + // The byte calculation is linear in numGrandparentFiles, but we will + // incur this linear cost in findGrandparentLimit too, so we are also + // willing to pay it now. We could approximate this cheaply by using + // the mean file size of Lbase. + grandparentFileBytes := c.grandparents.SizeSum() + fileCountUpperBoundDueToGrandparents := + float64(grandparentFileBytes) / float64(c.maxOverlapBytes) + if fileCountUpperBoundDueToGrandparents > acceptableFileCount { + c.maxOverlapBytes = uint64( + float64(c.maxOverlapBytes) * + (fileCountUpperBoundDueToGrandparents / acceptableFileCount)) + } +} + +func newFlush( + opts *Options, cur *version, baseLevel int, flushing flushableList, beganAt time.Time, +) *compaction { + c := &compaction{ + kind: compactionKindFlush, + cmp: opts.Comparer.Compare, + equal: opts.equal(), + comparer: opts.Comparer, + formatKey: opts.Comparer.FormatKey, + logger: opts.Logger, + version: cur, + beganAt: beganAt, + inputs: []compactionLevel{{level: -1}, {level: 0}}, + maxOutputFileSize: math.MaxUint64, + maxOverlapBytes: math.MaxUint64, + flushing: flushing, + } + c.startLevel = &c.inputs[0] + c.outputLevel = &c.inputs[1] + + if len(flushing) > 0 { + if _, ok := flushing[0].flushable.(*ingestedFlushable); ok { + if len(flushing) != 1 { + panic("pebble: ingestedFlushable must be flushed one at a time.") + } + c.kind = compactionKindIngestedFlushable + return c + } + } + + // Make sure there's no ingestedFlushable after the first flushable in the + // list. + for _, f := range flushing { + if _, ok := f.flushable.(*ingestedFlushable); ok { + panic("pebble: flushing shouldn't contain ingestedFlushable flushable") + } + } + + if cur.L0Sublevels != nil { + c.l0Limits = cur.L0Sublevels.FlushSplitKeys() + } + + smallestSet, largestSet := false, false + updatePointBounds := func(iter internalIterator) { + if key, _ := iter.First(); key != nil { + if !smallestSet || + base.InternalCompare(c.cmp, c.smallest, *key) > 0 { + smallestSet = true + c.smallest = key.Clone() + } + } + if key, _ := iter.Last(); key != nil { + if !largestSet || + base.InternalCompare(c.cmp, c.largest, *key) < 0 { + largestSet = true + c.largest = key.Clone() + } + } + } + + updateRangeBounds := func(iter keyspan.FragmentIterator) { + // File bounds require s != nil && !s.Empty(). We only need to check for + // s != nil here, as the memtable's FragmentIterator would never surface + // empty spans. + if s := iter.First(); s != nil { + if key := s.SmallestKey(); !smallestSet || + base.InternalCompare(c.cmp, c.smallest, key) > 0 { + smallestSet = true + c.smallest = key.Clone() + } + } + if s := iter.Last(); s != nil { + if key := s.LargestKey(); !largestSet || + base.InternalCompare(c.cmp, c.largest, key) < 0 { + largestSet = true + c.largest = key.Clone() + } + } + } + + var flushingBytes uint64 + for i := range flushing { + f := flushing[i] + updatePointBounds(f.newIter(nil)) + if rangeDelIter := f.newRangeDelIter(nil); rangeDelIter != nil { + updateRangeBounds(rangeDelIter) + } + if rangeKeyIter := f.newRangeKeyIter(nil); rangeKeyIter != nil { + updateRangeBounds(rangeKeyIter) + } + flushingBytes += f.inuseBytes() + } + + if opts.FlushSplitBytes > 0 { + c.maxOutputFileSize = uint64(opts.Level(0).TargetFileSize) + c.maxOverlapBytes = maxGrandparentOverlapBytes(opts, 0) + c.grandparents = c.version.Overlaps(baseLevel, c.cmp, c.smallest.UserKey, + c.largest.UserKey, c.largest.IsExclusiveSentinel()) + adjustGrandparentOverlapBytesForFlush(c, flushingBytes) + } + + c.setupInuseKeyRanges() + return c +} + +func (c *compaction) hasExtraLevelData() bool { + if len(c.extraLevels) == 0 { + // not a multi level compaction + return false + } else if c.extraLevels[0].files.Empty() { + // a multi level compaction without data in the intermediate input level; + // e.g. for a multi level compaction with levels 4,5, and 6, this could + // occur if there is no files to compact in 5, or in 5 and 6 (i.e. a move). + return false + } + return true +} + +func (c *compaction) setupInuseKeyRanges() { + level := c.outputLevel.level + 1 + if c.outputLevel.level == 0 { + level = 0 + } + // calculateInuseKeyRanges will return a series of sorted spans. Overlapping + // or abutting spans have already been merged. + c.inuseKeyRanges = calculateInuseKeyRanges( + c.version, c.cmp, level, numLevels-1, c.smallest.UserKey, c.largest.UserKey, + ) + // Check if there's a single in-use span that encompasses the entire key + // range of the compaction. This is an optimization to avoid key comparisons + // against inuseKeyRanges during the compaction when every key within the + // compaction overlaps with an in-use span. + if len(c.inuseKeyRanges) > 0 { + c.inuseEntireRange = c.cmp(c.inuseKeyRanges[0].Start, c.smallest.UserKey) <= 0 && + c.cmp(c.inuseKeyRanges[0].End, c.largest.UserKey) >= 0 + } +} + +func calculateInuseKeyRanges( + v *version, cmp base.Compare, level, maxLevel int, smallest, largest []byte, +) []manifest.UserKeyRange { + // Use two slices, alternating which one is input and which one is output + // as we descend the LSM. + var input, output []manifest.UserKeyRange + + // L0 requires special treatment, since sstables within L0 may overlap. + // We use the L0 Sublevels structure to efficiently calculate the merged + // in-use key ranges. + if level == 0 { + output = v.L0Sublevels.InUseKeyRanges(smallest, largest) + level++ + } + + for ; level <= maxLevel; level++ { + // NB: We always treat `largest` as inclusive for simplicity, because + // there's little consequence to calculating slightly broader in-use key + // ranges. + overlaps := v.Overlaps(level, cmp, smallest, largest, false /* exclusiveEnd */) + iter := overlaps.Iter() + + // We may already have in-use key ranges from higher levels. Iterate + // through both our accumulated in-use key ranges and this level's + // files, merging the two. + // + // Tables higher within the LSM have broader key spaces. We use this + // when possible to seek past a level's files that are contained by + // our current accumulated in-use key ranges. This helps avoid + // per-sstable work during flushes or compactions in high levels which + // overlap the majority of the LSM's sstables. + input, output = output, input + output = output[:0] + + var currFile *fileMetadata + var currAccum *manifest.UserKeyRange + if len(input) > 0 { + currAccum, input = &input[0], input[1:] + } + + // If we have an accumulated key range and its start is ≤ smallest, + // we can seek to the accumulated range's end. Otherwise, we need to + // start at the first overlapping file within the level. + if currAccum != nil && cmp(currAccum.Start, smallest) <= 0 { + currFile = seekGT(&iter, cmp, currAccum.End) + } else { + currFile = iter.First() + } + + for currFile != nil || currAccum != nil { + // If we've exhausted either the files in the level or the + // accumulated key ranges, we just need to append the one we have. + // If we have both a currFile and a currAccum, they either overlap + // or they're disjoint. If they're disjoint, we append whichever + // one sorts first and move on to the next file or range. If they + // overlap, we merge them into currAccum and proceed to the next + // file. + switch { + case currAccum == nil || (currFile != nil && cmp(currFile.Largest.UserKey, currAccum.Start) < 0): + // This file is strictly before the current accumulated range, + // or there are no more accumulated ranges. + output = append(output, manifest.UserKeyRange{ + Start: currFile.Smallest.UserKey, + End: currFile.Largest.UserKey, + }) + currFile = iter.Next() + case currFile == nil || (currAccum != nil && cmp(currAccum.End, currFile.Smallest.UserKey) < 0): + // The current accumulated key range is strictly before the + // current file, or there are no more files. + output = append(output, *currAccum) + currAccum = nil + if len(input) > 0 { + currAccum, input = &input[0], input[1:] + } + default: + // The current accumulated range and the current file overlap. + // Adjust the accumulated range to be the union. + if cmp(currFile.Smallest.UserKey, currAccum.Start) < 0 { + currAccum.Start = currFile.Smallest.UserKey + } + if cmp(currFile.Largest.UserKey, currAccum.End) > 0 { + currAccum.End = currFile.Largest.UserKey + } + + // Extending `currAccum`'s end boundary may have caused it to + // overlap with `input` key ranges that we haven't processed + // yet. Merge any such key ranges. + for len(input) > 0 && cmp(input[0].Start, currAccum.End) <= 0 { + if cmp(input[0].End, currAccum.End) > 0 { + currAccum.End = input[0].End + } + input = input[1:] + } + // Seek the level iterator past our current accumulated end. + currFile = seekGT(&iter, cmp, currAccum.End) + } + } + } + return output +} + +func seekGT(iter *manifest.LevelIterator, cmp base.Compare, key []byte) *manifest.FileMetadata { + f := iter.SeekGE(cmp, key) + for f != nil && cmp(f.Largest.UserKey, key) == 0 { + f = iter.Next() + } + return f +} + +// findGrandparentLimit takes the start user key for a table and returns the +// user key to which that table can extend without excessively overlapping +// the grandparent level. If no limit is needed considering the grandparent +// files, this function returns nil. This is done in order to prevent a table +// at level N from overlapping too much data at level N+1. We want to avoid +// such large overlaps because they translate into large compactions. The +// current heuristic stops output of a table if the addition of another key +// would cause the table to overlap more than 10x the target file size at +// level N. See maxGrandparentOverlapBytes. +func (c *compaction) findGrandparentLimit(start []byte) []byte { + iter := c.grandparents.Iter() + var overlappedBytes uint64 + var greater bool + for f := iter.SeekGE(c.cmp, start); f != nil; f = iter.Next() { + overlappedBytes += f.Size + // To ensure forward progress we always return a larger user + // key than where we started. See comments above clients of + // this function for how this is used. + greater = greater || c.cmp(f.Smallest.UserKey, start) > 0 + if !greater { + continue + } + + // We return the smallest bound of a sstable rather than the + // largest because the smallest is always inclusive, and limits + // are used exlusively when truncating range tombstones. If we + // truncated an output to the largest key while there's a + // pending tombstone, the next output file would also overlap + // the same grandparent f. + if overlappedBytes > c.maxOverlapBytes { + return f.Smallest.UserKey + } + } + return nil +} + +// findL0Limit takes the start key for a table and returns the user key to which +// that table can be extended without hitting the next l0Limit. Having flushed +// sstables "bridging across" an l0Limit could lead to increased L0 -> LBase +// compaction sizes as well as elevated read amplification. +func (c *compaction) findL0Limit(start []byte) []byte { + if c.startLevel.level > -1 || c.outputLevel.level != 0 || len(c.l0Limits) == 0 { + return nil + } + index := sort.Search(len(c.l0Limits), func(i int) bool { + return c.cmp(c.l0Limits[i], start) > 0 + }) + if index < len(c.l0Limits) { + return c.l0Limits[index] + } + return nil +} + +// errorOnUserKeyOverlap returns an error if the last two written sstables in +// this compaction have revisions of the same user key present in both sstables, +// when it shouldn't (eg. when splitting flushes). +func (c *compaction) errorOnUserKeyOverlap(ve *versionEdit) error { + if n := len(ve.NewFiles); n > 1 { + meta := ve.NewFiles[n-1].Meta + prevMeta := ve.NewFiles[n-2].Meta + if !prevMeta.Largest.IsExclusiveSentinel() && + c.cmp(prevMeta.Largest.UserKey, meta.Smallest.UserKey) >= 0 { + return errors.Errorf("pebble: compaction split user key across two sstables: %s in %s and %s", + prevMeta.Largest.Pretty(c.formatKey), + prevMeta.FileNum, + meta.FileNum) + } + } + return nil +} + +// allowZeroSeqNum returns true if seqnum's can be zeroed if there are no +// snapshots requiring them to be kept. It performs this determination by +// looking for an sstable which overlaps the bounds of the compaction at a +// lower level in the LSM. +func (c *compaction) allowZeroSeqNum() bool { + return c.elideRangeTombstone(c.smallest.UserKey, c.largest.UserKey) +} + +// elideTombstone returns true if it is ok to elide a tombstone for the +// specified key. A return value of true guarantees that there are no key/value +// pairs at c.level+2 or higher that possibly contain the specified user +// key. The keys in multiple invocations to elideTombstone must be supplied in +// order. +func (c *compaction) elideTombstone(key []byte) bool { + if c.inuseEntireRange || len(c.flushing) != 0 { + return false + } + + for ; c.elideTombstoneIndex < len(c.inuseKeyRanges); c.elideTombstoneIndex++ { + r := &c.inuseKeyRanges[c.elideTombstoneIndex] + if c.cmp(key, r.End) <= 0 { + if c.cmp(key, r.Start) >= 0 { + return false + } + break + } + } + return true +} + +// elideRangeTombstone returns true if it is ok to elide the specified range +// tombstone. A return value of true guarantees that there are no key/value +// pairs at c.outputLevel.level+1 or higher that possibly overlap the specified +// tombstone. +func (c *compaction) elideRangeTombstone(start, end []byte) bool { + // Disable range tombstone elision if the testing knob for that is enabled, + // or if we are flushing memtables. The latter requirement is due to + // inuseKeyRanges not accounting for key ranges in other memtables that are + // being flushed in the same compaction. It's possible for a range tombstone + // in one memtable to overlap keys in a preceding memtable in c.flushing. + // + // This function is also used in setting allowZeroSeqNum, so disabling + // elision of range tombstones also disables zeroing of SeqNums. + // + // TODO(peter): we disable zeroing of seqnums during flushing to match + // RocksDB behavior and to avoid generating overlapping sstables during + // DB.replayWAL. When replaying WAL files at startup, we flush after each + // WAL is replayed building up a single version edit that is + // applied. Because we don't apply the version edit after each flush, this + // code doesn't know that L0 contains files and zeroing of seqnums should + // be disabled. That is fixable, but it seems safer to just match the + // RocksDB behavior for now. + if c.disableSpanElision || len(c.flushing) != 0 { + return false + } + + lower := sort.Search(len(c.inuseKeyRanges), func(i int) bool { + return c.cmp(c.inuseKeyRanges[i].End, start) >= 0 + }) + upper := sort.Search(len(c.inuseKeyRanges), func(i int) bool { + return c.cmp(c.inuseKeyRanges[i].Start, end) > 0 + }) + return lower >= upper +} + +// elideRangeKey returns true if it is ok to elide the specified range key. A +// return value of true guarantees that there are no key/value pairs at +// c.outputLevel.level+1 or higher that possibly overlap the specified range key. +func (c *compaction) elideRangeKey(start, end []byte) bool { + // TODO(bilal): Track inuseKeyRanges separately for the range keyspace as + // opposed to the point keyspace. Once that is done, elideRangeTombstone + // can just check in the point keyspace, and this function can check for + // inuseKeyRanges in the range keyspace. + return c.elideRangeTombstone(start, end) +} + +// newInputIter returns an iterator over all the input tables in a compaction. +func (c *compaction) newInputIter( + newIters tableNewIters, newRangeKeyIter keyspan.TableNewSpanIter, snapshots []uint64, +) (_ internalIterator, retErr error) { + // Validate the ordering of compaction input files for defense in depth. + // TODO(jackson): Some of the CheckOrdering calls may be adapted to pass + // ProhibitSplitUserKeys if we thread the active format major version in. Or + // if we remove support for earlier FMVs, we can remove the parameter + // altogether. + if len(c.flushing) == 0 { + if c.startLevel.level >= 0 { + err := manifest.CheckOrdering(c.cmp, c.formatKey, + manifest.Level(c.startLevel.level), c.startLevel.files.Iter(), + manifest.AllowSplitUserKeys) + if err != nil { + return nil, err + } + } + err := manifest.CheckOrdering(c.cmp, c.formatKey, + manifest.Level(c.outputLevel.level), c.outputLevel.files.Iter(), + manifest.AllowSplitUserKeys) + if err != nil { + return nil, err + } + if c.startLevel.level == 0 { + if c.startLevel.l0SublevelInfo == nil { + panic("l0SublevelInfo not created for compaction out of L0") + } + for _, info := range c.startLevel.l0SublevelInfo { + err := manifest.CheckOrdering(c.cmp, c.formatKey, + info.sublevel, info.Iter(), + // NB: L0 sublevels have never allowed split user keys. + manifest.ProhibitSplitUserKeys) + if err != nil { + return nil, err + } + } + } + if len(c.extraLevels) > 0 { + if len(c.extraLevels) > 1 { + panic("n>2 multi level compaction not implemented yet") + } + interLevel := c.extraLevels[0] + err := manifest.CheckOrdering(c.cmp, c.formatKey, + manifest.Level(interLevel.level), interLevel.files.Iter(), + manifest.AllowSplitUserKeys) + if err != nil { + return nil, err + } + } + } + + // There are three classes of keys that a compaction needs to process: point + // keys, range deletion tombstones and range keys. Collect all iterators for + // all these classes of keys from all the levels. We'll aggregate them + // together farther below. + // + // numInputLevels is an approximation of the number of iterator levels. Due + // to idiosyncrasies in iterator construction, we may (rarely) exceed this + // initial capacity. + numInputLevels := len(c.flushing) + if numInputLevels < len(c.inputs) { + numInputLevels = len(c.inputs) + } + iters := make([]internalIterator, 0, numInputLevels) + rangeDelIters := make([]keyspan.FragmentIterator, 0, numInputLevels) + rangeKeyIters := make([]keyspan.FragmentIterator, 0, numInputLevels) + + // If construction of the iterator inputs fails, ensure that we close all + // the consitutent iterators. + defer func() { + if retErr != nil { + for _, iter := range iters { + if iter != nil { + iter.Close() + } + } + for _, rangeDelIter := range rangeDelIters { + rangeDelIter.Close() + } + } + }() + iterOpts := IterOptions{ + CategoryAndQoS: sstable.CategoryAndQoS{ + Category: "pebble-compaction", + QoSLevel: sstable.NonLatencySensitiveQoSLevel, + }, + logger: c.logger, + } + + // Populate iters, rangeDelIters and rangeKeyIters with the appropriate + // constituent iterators. This depends on whether this is a flush or a + // compaction. + if len(c.flushing) != 0 { + // If flushing, we need to build the input iterators over the memtables + // stored in c.flushing. + for i := range c.flushing { + f := c.flushing[i] + iters = append(iters, f.newFlushIter(nil, &c.bytesIterated)) + rangeDelIter := f.newRangeDelIter(nil) + if rangeDelIter != nil { + rangeDelIters = append(rangeDelIters, rangeDelIter) + } + if rangeKeyIter := f.newRangeKeyIter(nil); rangeKeyIter != nil { + rangeKeyIters = append(rangeKeyIters, rangeKeyIter) + } + } + } else { + addItersForLevel := func(level *compactionLevel, l manifest.Level) error { + // Add a *levelIter for point iterators. Because we don't call + // initRangeDel, the levelIter will close and forget the range + // deletion iterator when it steps on to a new file. Surfacing range + // deletions to compactions are handled below. + iters = append(iters, newLevelIter(context.Background(), + iterOpts, c.comparer, newIters, level.files.Iter(), l, internalIterOpts{ + bytesIterated: &c.bytesIterated, + bufferPool: &c.bufferPool, + })) + // TODO(jackson): Use keyspan.LevelIter to avoid loading all the range + // deletions into memory upfront. (See #2015, which reverted this.) + // There will be no user keys that are split between sstables + // within a level in Cockroach 23.1, which unblocks this optimization. + + // Add the range deletion iterator for each file as an independent level + // in mergingIter, as opposed to making a levelIter out of those. This + // is safer as levelIter expects all keys coming from underlying + // iterators to be in order. Due to compaction / tombstone writing + // logic in finishOutput(), it is possible for range tombstones to not + // be strictly ordered across all files in one level. + // + // Consider this example from the metamorphic tests (also repeated in + // finishOutput()), consisting of three L3 files with their bounds + // specified in square brackets next to the file name: + // + // ./000240.sst [tmgc#391,MERGE-tmgc#391,MERGE] + // tmgc#391,MERGE [786e627a] + // tmgc-udkatvs#331,RANGEDEL + // + // ./000241.sst [tmgc#384,MERGE-tmgc#384,MERGE] + // tmgc#384,MERGE [666c7070] + // tmgc-tvsalezade#383,RANGEDEL + // tmgc-tvsalezade#331,RANGEDEL + // + // ./000242.sst [tmgc#383,RANGEDEL-tvsalezade#72057594037927935,RANGEDEL] + // tmgc-tvsalezade#383,RANGEDEL + // tmgc#375,SET [72646c78766965616c72776865676e79] + // tmgc-tvsalezade#356,RANGEDEL + // + // Here, the range tombstone in 000240.sst falls "after" one in + // 000241.sst, despite 000240.sst being ordered "before" 000241.sst for + // levelIter's purposes. While each file is still consistent before its + // bounds, it's safer to have all rangedel iterators be visible to + // mergingIter. + iter := level.files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + rangeDelIter, closer, err := c.newRangeDelIter( + newIters, iter.Take(), iterOpts, l, &c.bytesIterated) + if err != nil { + // The error will already be annotated with the BackingFileNum, so + // we annotate it with the FileNum. + return errors.Wrapf(err, "pebble: could not open table %s", errors.Safe(f.FileNum)) + } + if rangeDelIter == nil { + continue + } + rangeDelIters = append(rangeDelIters, rangeDelIter) + c.closers = append(c.closers, closer) + } + + // Check if this level has any range keys. + hasRangeKeys := false + for f := iter.First(); f != nil; f = iter.Next() { + if f.HasRangeKeys { + hasRangeKeys = true + break + } + } + if hasRangeKeys { + li := &keyspan.LevelIter{} + newRangeKeyIterWrapper := func(file *manifest.FileMetadata, iterOptions keyspan.SpanIterOptions) (keyspan.FragmentIterator, error) { + iter, err := newRangeKeyIter(file, iterOptions) + if err != nil { + return nil, err + } else if iter == nil { + return emptyKeyspanIter, nil + } + // Ensure that the range key iter is not closed until the compaction is + // finished. This is necessary because range key processing + // requires the range keys to be held in memory for up to the + // lifetime of the compaction. + c.closers = append(c.closers, iter) + iter = noCloseIter{iter} + + // We do not need to truncate range keys to sstable boundaries, or + // only read within the file's atomic compaction units, unlike with + // range tombstones. This is because range keys were added after we + // stopped splitting user keys across sstables, so all the range keys + // in this sstable must wholly lie within the file's bounds. + return iter, err + } + li.Init(keyspan.SpanIterOptions{}, c.cmp, newRangeKeyIterWrapper, level.files.Iter(), l, manifest.KeyTypeRange) + rangeKeyIters = append(rangeKeyIters, li) + } + return nil + } + + for i := range c.inputs { + // If the level is annotated with l0SublevelInfo, expand it into one + // level per sublevel. + // TODO(jackson): Perform this expansion even earlier when we pick the + // compaction? + if len(c.inputs[i].l0SublevelInfo) > 0 { + for _, info := range c.startLevel.l0SublevelInfo { + sublevelCompactionLevel := &compactionLevel{0, info.LevelSlice, nil} + if err := addItersForLevel(sublevelCompactionLevel, info.sublevel); err != nil { + return nil, err + } + } + continue + } + if err := addItersForLevel(&c.inputs[i], manifest.Level(c.inputs[i].level)); err != nil { + return nil, err + } + } + } + + // In normal operation, levelIter iterates over the point operations in a + // level, and initializes a rangeDelIter pointer for the range deletions in + // each table. During compaction, we want to iterate over the merged view of + // point operations and range deletions. In order to do this we create one + // levelIter per level to iterate over the point operations, and collect up + // all the range deletion files. + // + // The range deletion levels are first combined with a keyspan.MergingIter + // (currently wrapped by a keyspan.InternalIteratorShim to satisfy the + // internal iterator interface). The resulting merged rangedel iterator is + // then included with the point levels in a single mergingIter. + // + // Combine all the rangedel iterators using a keyspan.MergingIterator and a + // InternalIteratorShim so that the range deletions may be interleaved in + // the compaction input. + // TODO(jackson): Replace the InternalIteratorShim with an interleaving + // iterator. + if len(rangeDelIters) > 0 { + c.rangeDelIter.Init(c.cmp, rangeDelIters...) + iters = append(iters, &c.rangeDelIter) + } + + // If there's only one constituent point iterator, we can avoid the overhead + // of a *mergingIter. This is possible, for example, when performing a flush + // of a single memtable. Otherwise, combine all the iterators into a merging + // iter. + iter := iters[0] + if len(iters) > 0 { + iter = newMergingIter(c.logger, &c.stats, c.cmp, nil, iters...) + } + // If there are range key iterators, we need to combine them using + // keyspan.MergingIter, and then interleave them among the points. + if len(rangeKeyIters) > 0 { + mi := &keyspan.MergingIter{} + mi.Init(c.cmp, rangeKeyCompactionTransform(c.equal, snapshots, c.elideRangeKey), new(keyspan.MergingBuffers), rangeKeyIters...) + di := &keyspan.DefragmentingIter{} + di.Init(c.comparer, mi, keyspan.DefragmentInternal, keyspan.StaticDefragmentReducer, new(keyspan.DefragmentingBuffers)) + c.rangeKeyInterleaving.Init(c.comparer, iter, di, keyspan.InterleavingIterOpts{}) + iter = &c.rangeKeyInterleaving + } + return iter, nil +} + +func (c *compaction) newRangeDelIter( + newIters tableNewIters, + f manifest.LevelFile, + opts IterOptions, + l manifest.Level, + bytesIterated *uint64, +) (keyspan.FragmentIterator, io.Closer, error) { + opts.level = l + iter, rangeDelIter, err := newIters(context.Background(), f.FileMetadata, + &opts, internalIterOpts{ + bytesIterated: &c.bytesIterated, + bufferPool: &c.bufferPool, + }) + if err != nil { + return nil, nil, err + } + // TODO(peter): It is mildly wasteful to open the point iterator only to + // immediately close it. One way to solve this would be to add new + // methods to tableCache for creating point and range-deletion iterators + // independently. We'd only want to use those methods here, + // though. Doesn't seem worth the hassle in the near term. + if err = iter.Close(); err != nil { + if rangeDelIter != nil { + err = errors.CombineErrors(err, rangeDelIter.Close()) + } + return nil, nil, err + } + if rangeDelIter == nil { + // The file doesn't contain any range deletions. + return nil, nil, nil + } + + // Ensure that rangeDelIter is not closed until the compaction is + // finished. This is necessary because range tombstone processing + // requires the range tombstones to be held in memory for up to the + // lifetime of the compaction. + closer := rangeDelIter + rangeDelIter = noCloseIter{rangeDelIter} + + // Truncate the range tombstones returned by the iterator to the + // upper bound of the atomic compaction unit of the file. We want to + // truncate the range tombstone to the bounds of the file, but files + // with split user keys pose an obstacle: The file's largest bound + // is inclusive whereas the range tombstone's end is exclusive. + // + // Consider the example: + // + // 000001:[b-f#200] range del [c,k) + // 000002:[f#190-g#inf] range del [c,k) + // 000003:[g#500-i#3] + // + // Files 000001 and 000002 contain the untruncated range tombstones + // [c,k). While the keyspace covered by 000003 was at one point + // deleted by the tombstone [c,k), the tombstone may have already + // been compacted away and the file does not contain an untruncated + // range tombstone. We want to bound 000001's tombstone to the file + // bounds, but it's not possible to encode a range tombstone with an + // end boundary within a user key (eg, between sequence numbers + // f#200 and f#190). Instead, we expand 000001 to its atomic + // compaction unit (000001 and 000002) and truncate the tombstone to + // g#inf. + // + // NB: We must not use the atomic compaction unit of the entire + // compaction, because the [c,k) tombstone contained in the file + // 000001 ≥ g. If 000001, 000002 and 000003 are all included in the + // same compaction, the compaction's atomic compaction unit includes + // 000003. However 000003's keys must not be covered by 000001's + // untruncated range tombstone. + // + // Note that we need do this truncation at read time in order to + // handle sstables generated by RocksDB and earlier versions of + // Pebble which do not truncate range tombstones to atomic + // compaction unit boundaries at write time. + // + // The current Pebble compaction logic DOES truncate tombstones to + // atomic unit boundaries at compaction time too. + atomicUnit, _ := expandToAtomicUnit(c.cmp, f.Slice(), true /* disableIsCompacting */) + lowerBound, upperBound := manifest.KeyRange(c.cmp, atomicUnit.Iter()) + // Range deletion tombstones are often written to sstables + // untruncated on the end key side. However, they are still only + // valid within a given file's bounds. The logic for writing range + // tombstones to an output file sometimes has an incomplete view + // of range tombstones outside the file's internal key bounds. Skip + // any range tombstones completely outside file bounds. + rangeDelIter = keyspan.Truncate( + c.cmp, rangeDelIter, lowerBound.UserKey, upperBound.UserKey, + &f.Smallest, &f.Largest, false, /* panicOnUpperTruncate */ + ) + return rangeDelIter, closer, nil +} + +func (c *compaction) String() string { + if len(c.flushing) != 0 { + return "flush\n" + } + + var buf bytes.Buffer + for level := c.startLevel.level; level <= c.outputLevel.level; level++ { + i := level - c.startLevel.level + fmt.Fprintf(&buf, "%d:", level) + iter := c.inputs[i].files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + fmt.Fprintf(&buf, " %s:%s-%s", f.FileNum, f.Smallest, f.Largest) + } + fmt.Fprintf(&buf, "\n") + } + return buf.String() +} + +type manualCompaction struct { + // Count of the retries either due to too many concurrent compactions, or a + // concurrent compaction to overlapping levels. + retries int + level int + outputLevel int + done chan error + start []byte + end []byte + split bool +} + +type readCompaction struct { + level int + // [start, end] key ranges are used for de-duping. + start []byte + end []byte + + // The file associated with the compaction. + // If the file no longer belongs in the same + // level, then we skip the compaction. + fileNum base.FileNum +} + +func (d *DB) addInProgressCompaction(c *compaction) { + d.mu.compact.inProgress[c] = struct{}{} + var isBase, isIntraL0 bool + for _, cl := range c.inputs { + iter := cl.files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if f.IsCompacting() { + d.opts.Logger.Fatalf("L%d->L%d: %s already being compacted", c.startLevel.level, c.outputLevel.level, f.FileNum) + } + f.SetCompactionState(manifest.CompactionStateCompacting) + if c.startLevel != nil && c.outputLevel != nil && c.startLevel.level == 0 { + if c.outputLevel.level == 0 { + f.IsIntraL0Compacting = true + isIntraL0 = true + } else { + isBase = true + } + } + } + } + + if (isIntraL0 || isBase) && c.version.L0Sublevels != nil { + l0Inputs := []manifest.LevelSlice{c.startLevel.files} + if isIntraL0 { + l0Inputs = append(l0Inputs, c.outputLevel.files) + } + if err := c.version.L0Sublevels.UpdateStateForStartedCompaction(l0Inputs, isBase); err != nil { + d.opts.Logger.Fatalf("could not update state for compaction: %s", err) + } + } +} + +// Removes compaction markers from files in a compaction. The rollback parameter +// indicates whether the compaction state should be rolled back to its original +// state in the case of an unsuccessful compaction. +// +// DB.mu must be held when calling this method, however this method can drop and +// re-acquire that mutex. All writes to the manifest for this compaction should +// have completed by this point. +func (d *DB) clearCompactingState(c *compaction, rollback bool) { + c.versionEditApplied = true + for _, cl := range c.inputs { + iter := cl.files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if !f.IsCompacting() { + d.opts.Logger.Fatalf("L%d->L%d: %s not being compacted", c.startLevel.level, c.outputLevel.level, f.FileNum) + } + if !rollback { + // On success all compactions other than move-compactions transition the + // file into the Compacted state. Move-compacted files become eligible + // for compaction again and transition back to NotCompacting. + if c.kind != compactionKindMove { + f.SetCompactionState(manifest.CompactionStateCompacted) + } else { + f.SetCompactionState(manifest.CompactionStateNotCompacting) + } + } else { + // Else, on rollback, all input files unconditionally transition back to + // NotCompacting. + f.SetCompactionState(manifest.CompactionStateNotCompacting) + } + f.IsIntraL0Compacting = false + } + } + l0InProgress := inProgressL0Compactions(d.getInProgressCompactionInfoLocked(c)) + func() { + // InitCompactingFileInfo requires that no other manifest writes be + // happening in parallel with it, i.e. we're not in the midst of installing + // another version. Otherwise, it's possible that we've created another + // L0Sublevels instance, but not added it to the versions list, causing + // all the indices in FileMetadata to be inaccurate. To ensure this, + // grab the manifest lock. + d.mu.versions.logLock() + defer d.mu.versions.logUnlock() + d.mu.versions.currentVersion().L0Sublevels.InitCompactingFileInfo(l0InProgress) + }() +} + +func (d *DB) calculateDiskAvailableBytes() uint64 { + if space, err := d.opts.FS.GetDiskUsage(d.dirname); err == nil { + d.diskAvailBytes.Store(space.AvailBytes) + return space.AvailBytes + } else if !errors.Is(err, vfs.ErrUnsupported) { + d.opts.EventListener.BackgroundError(err) + } + return d.diskAvailBytes.Load() +} + +func (d *DB) getDeletionPacerInfo() deletionPacerInfo { + var pacerInfo deletionPacerInfo + // Call GetDiskUsage after every file deletion. This may seem inefficient, + // but in practice this was observed to take constant time, regardless of + // volume size used, at least on linux with ext4 and zfs. All invocations + // take 10 microseconds or less. + pacerInfo.freeBytes = d.calculateDiskAvailableBytes() + d.mu.Lock() + pacerInfo.obsoleteBytes = d.mu.versions.metrics.Table.ObsoleteSize + pacerInfo.liveBytes = uint64(d.mu.versions.metrics.Total().Size) + d.mu.Unlock() + return pacerInfo +} + +// onObsoleteTableDelete is called to update metrics when an sstable is deleted. +func (d *DB) onObsoleteTableDelete(fileSize uint64) { + d.mu.Lock() + d.mu.versions.metrics.Table.ObsoleteCount-- + d.mu.versions.metrics.Table.ObsoleteSize -= fileSize + d.mu.Unlock() +} + +// maybeScheduleFlush schedules a flush if necessary. +// +// d.mu must be held when calling this. +func (d *DB) maybeScheduleFlush() { + if d.mu.compact.flushing || d.closed.Load() != nil || d.opts.ReadOnly { + return + } + if len(d.mu.mem.queue) <= 1 { + return + } + + if !d.passedFlushThreshold() { + return + } + + d.mu.compact.flushing = true + go d.flush() +} + +func (d *DB) passedFlushThreshold() bool { + var n int + var size uint64 + for ; n < len(d.mu.mem.queue)-1; n++ { + if !d.mu.mem.queue[n].readyForFlush() { + break + } + if d.mu.mem.queue[n].flushForced { + // A flush was forced. Pretend the memtable size is the configured + // size. See minFlushSize below. + size += d.opts.MemTableSize + } else { + size += d.mu.mem.queue[n].totalBytes() + } + } + if n == 0 { + // None of the immutable memtables are ready for flushing. + return false + } + + // Only flush once the sum of the queued memtable sizes exceeds half the + // configured memtable size. This prevents flushing of memtables at startup + // while we're undergoing the ramp period on the memtable size. See + // DB.newMemTable(). + minFlushSize := d.opts.MemTableSize / 2 + return size >= minFlushSize +} + +func (d *DB) maybeScheduleDelayedFlush(tbl *memTable, dur time.Duration) { + var mem *flushableEntry + for _, m := range d.mu.mem.queue { + if m.flushable == tbl { + mem = m + break + } + } + if mem == nil || mem.flushForced { + return + } + deadline := d.timeNow().Add(dur) + if !mem.delayedFlushForcedAt.IsZero() && deadline.After(mem.delayedFlushForcedAt) { + // Already scheduled to flush sooner than within `dur`. + return + } + mem.delayedFlushForcedAt = deadline + go func() { + timer := time.NewTimer(dur) + defer timer.Stop() + + select { + case <-d.closedCh: + return + case <-mem.flushed: + return + case <-timer.C: + d.commit.mu.Lock() + defer d.commit.mu.Unlock() + d.mu.Lock() + defer d.mu.Unlock() + + // NB: The timer may fire concurrently with a call to Close. If a + // Close call beat us to acquiring d.mu, d.closed holds ErrClosed, + // and it's too late to flush anything. Otherwise, the Close call + // will block on locking d.mu until we've finished scheduling the + // flush and set `d.mu.compact.flushing` to true. Close will wait + // for the current flush to complete. + if d.closed.Load() != nil { + return + } + + if d.mu.mem.mutable == tbl { + d.makeRoomForWrite(nil) + } else { + mem.flushForced = true + } + d.maybeScheduleFlush() + } + }() +} + +func (d *DB) flush() { + pprof.Do(context.Background(), flushLabels, func(context.Context) { + flushingWorkStart := time.Now() + d.mu.Lock() + defer d.mu.Unlock() + idleDuration := flushingWorkStart.Sub(d.mu.compact.noOngoingFlushStartTime) + var bytesFlushed uint64 + var err error + if bytesFlushed, err = d.flush1(); err != nil { + // TODO(peter): count consecutive flush errors and backoff. + d.opts.EventListener.BackgroundError(err) + } + d.mu.compact.flushing = false + d.mu.compact.noOngoingFlushStartTime = time.Now() + workDuration := d.mu.compact.noOngoingFlushStartTime.Sub(flushingWorkStart) + d.mu.compact.flushWriteThroughput.Bytes += int64(bytesFlushed) + d.mu.compact.flushWriteThroughput.WorkDuration += workDuration + d.mu.compact.flushWriteThroughput.IdleDuration += idleDuration + // More flush work may have arrived while we were flushing, so schedule + // another flush if needed. + d.maybeScheduleFlush() + // The flush may have produced too many files in a level, so schedule a + // compaction if needed. + d.maybeScheduleCompaction() + d.mu.compact.cond.Broadcast() + }) +} + +// runIngestFlush is used to generate a flush version edit for sstables which +// were ingested as flushables. Both DB.mu and the manifest lock must be held +// while runIngestFlush is called. +func (d *DB) runIngestFlush(c *compaction) (*manifest.VersionEdit, error) { + if len(c.flushing) != 1 { + panic("pebble: ingestedFlushable must be flushed one at a time.") + } + + // Construct the VersionEdit, levelMetrics etc. + c.metrics = make(map[int]*LevelMetrics, numLevels) + // Finding the target level for ingestion must use the latest version + // after the logLock has been acquired. + c.version = d.mu.versions.currentVersion() + + baseLevel := d.mu.versions.picker.getBaseLevel() + iterOpts := IterOptions{logger: d.opts.Logger} + ve := &versionEdit{} + var level int + var err error + var fileToSplit *fileMetadata + var ingestSplitFiles []ingestSplitFile + for _, file := range c.flushing[0].flushable.(*ingestedFlushable).files { + suggestSplit := d.opts.Experimental.IngestSplit != nil && d.opts.Experimental.IngestSplit() && + d.FormatMajorVersion() >= FormatVirtualSSTables + level, fileToSplit, err = ingestTargetLevel( + d.newIters, d.tableNewRangeKeyIter, iterOpts, d.opts.Comparer, + c.version, baseLevel, d.mu.compact.inProgress, file.FileMetadata, + suggestSplit, + ) + if err != nil { + return nil, err + } + ve.NewFiles = append(ve.NewFiles, newFileEntry{Level: level, Meta: file.FileMetadata}) + if fileToSplit != nil { + ingestSplitFiles = append(ingestSplitFiles, ingestSplitFile{ + ingestFile: file.FileMetadata, + splitFile: fileToSplit, + level: level, + }) + } + levelMetrics := c.metrics[level] + if levelMetrics == nil { + levelMetrics = &LevelMetrics{} + c.metrics[level] = levelMetrics + } + levelMetrics.BytesIngested += file.Size + levelMetrics.TablesIngested++ + } + + updateLevelMetricsOnExcise := func(m *fileMetadata, level int, added []newFileEntry) { + levelMetrics := c.metrics[level] + if levelMetrics == nil { + levelMetrics = &LevelMetrics{} + c.metrics[level] = levelMetrics + } + levelMetrics.NumFiles-- + levelMetrics.Size -= int64(m.Size) + for i := range added { + levelMetrics.NumFiles++ + levelMetrics.Size += int64(added[i].Meta.Size) + } + } + + if len(ingestSplitFiles) > 0 { + ve.DeletedFiles = make(map[manifest.DeletedFileEntry]*manifest.FileMetadata) + replacedFiles := make(map[base.FileNum][]newFileEntry) + if err := d.ingestSplit(ve, updateLevelMetricsOnExcise, ingestSplitFiles, replacedFiles); err != nil { + return nil, err + } + } + + return ve, nil +} + +// flush runs a compaction that copies the immutable memtables from memory to +// disk. +// +// d.mu must be held when calling this, but the mutex may be dropped and +// re-acquired during the course of this method. +func (d *DB) flush1() (bytesFlushed uint64, err error) { + // NB: The flushable queue can contain flushables of type ingestedFlushable. + // The sstables in ingestedFlushable.files must be placed into the appropriate + // level in the lsm. Let's say the flushable queue contains a prefix of + // regular immutable memtables, then an ingestedFlushable, and then the + // mutable memtable. When the flush of the ingestedFlushable is performed, + // it needs an updated view of the lsm. That is, the prefix of immutable + // memtables must have already been flushed. Similarly, if there are two + // contiguous ingestedFlushables in the queue, then the first flushable must + // be flushed, so that the second flushable can see an updated view of the + // lsm. + // + // Given the above, we restrict flushes to either some prefix of regular + // memtables, or a single flushable of type ingestedFlushable. The DB.flush + // function will call DB.maybeScheduleFlush again, so a new flush to finish + // the remaining flush work should be scheduled right away. + // + // NB: Large batches placed in the flushable queue share the WAL with the + // previous memtable in the queue. We must ensure the property that both the + // large batch and the memtable with which it shares a WAL are flushed + // together. The property ensures that the minimum unflushed log number + // isn't incremented incorrectly. Since a flushableBatch.readyToFlush always + // returns true, and since the large batch will always be placed right after + // the memtable with which it shares a WAL, the property is naturally + // ensured. The large batch will always be placed after the memtable with + // which it shares a WAL because we ensure it in DB.commitWrite by holding + // the commitPipeline.mu and then holding DB.mu. As an extra defensive + // measure, if we try to flush the memtable without also flushing the + // flushable batch in the same flush, since the memtable and flushableBatch + // have the same logNum, the logNum invariant check below will trigger. + var n, inputs int + var inputBytes uint64 + var ingest bool + for ; n < len(d.mu.mem.queue)-1; n++ { + if f, ok := d.mu.mem.queue[n].flushable.(*ingestedFlushable); ok { + if n == 0 { + // The first flushable is of type ingestedFlushable. Since these + // must be flushed individually, we perform a flush for just + // this. + if !f.readyForFlush() { + // This check is almost unnecessary, but we guard against it + // just in case this invariant changes in the future. + panic("pebble: ingestedFlushable should always be ready to flush.") + } + // By setting n = 1, we ensure that the first flushable(n == 0) + // is scheduled for a flush. The number of tables added is equal to the + // number of files in the ingest operation. + n = 1 + inputs = len(f.files) + ingest = true + break + } else { + // There was some prefix of flushables which weren't of type + // ingestedFlushable. So, perform a flush for those. + break + } + } + if !d.mu.mem.queue[n].readyForFlush() { + break + } + inputBytes += d.mu.mem.queue[n].inuseBytes() + } + if n == 0 { + // None of the immutable memtables are ready for flushing. + return 0, nil + } + if !ingest { + // Flushes of memtables add the prefix of n memtables from the flushable + // queue. + inputs = n + } + + // Require that every memtable being flushed has a log number less than the + // new minimum unflushed log number. + minUnflushedLogNum := d.mu.mem.queue[n].logNum + if !d.opts.DisableWAL { + for i := 0; i < n; i++ { + if logNum := d.mu.mem.queue[i].logNum; logNum >= minUnflushedLogNum { + panic(errors.AssertionFailedf("logNum invariant violated: flushing %d items; %d:type=%T,logNum=%d; %d:type=%T,logNum=%d", + n, + i, d.mu.mem.queue[i].flushable, logNum, + n, d.mu.mem.queue[n].flushable, minUnflushedLogNum)) + } + } + } + + c := newFlush(d.opts, d.mu.versions.currentVersion(), + d.mu.versions.picker.getBaseLevel(), d.mu.mem.queue[:n], d.timeNow()) + d.addInProgressCompaction(c) + + jobID := d.mu.nextJobID + d.mu.nextJobID++ + d.opts.EventListener.FlushBegin(FlushInfo{ + JobID: jobID, + Input: inputs, + InputBytes: inputBytes, + Ingest: ingest, + }) + startTime := d.timeNow() + + var ve *manifest.VersionEdit + var pendingOutputs []physicalMeta + var stats compactStats + // To determine the target level of the files in the ingestedFlushable, we + // need to acquire the logLock, and not release it for that duration. Since, + // we need to acquire the logLock below to perform the logAndApply step + // anyway, we create the VersionEdit for ingestedFlushable outside of + // runCompaction. For all other flush cases, we construct the VersionEdit + // inside runCompaction. + if c.kind != compactionKindIngestedFlushable { + ve, pendingOutputs, stats, err = d.runCompaction(jobID, c) + } + + // Acquire logLock. This will be released either on an error, by way of + // logUnlock, or through a call to logAndApply if there is no error. + d.mu.versions.logLock() + + if c.kind == compactionKindIngestedFlushable { + ve, err = d.runIngestFlush(c) + } + + info := FlushInfo{ + JobID: jobID, + Input: inputs, + InputBytes: inputBytes, + Duration: d.timeNow().Sub(startTime), + Done: true, + Ingest: ingest, + Err: err, + } + if err == nil { + for i := range ve.NewFiles { + e := &ve.NewFiles[i] + info.Output = append(info.Output, e.Meta.TableInfo()) + // Ingested tables are not necessarily flushed to L0. Record the level of + // each ingested file explicitly. + if ingest { + info.IngestLevels = append(info.IngestLevels, e.Level) + } + } + if len(ve.NewFiles) == 0 { + info.Err = errEmptyTable + } + + // The flush succeeded or it produced an empty sstable. In either case we + // want to bump the minimum unflushed log number to the log number of the + // oldest unflushed memtable. + ve.MinUnflushedLogNum = minUnflushedLogNum + if c.kind != compactionKindIngestedFlushable { + metrics := c.metrics[0] + if d.opts.DisableWAL { + // If the WAL is disabled, every flushable has a zero [logSize], + // resulting in zero bytes in. Instead, use the number of bytes we + // flushed as the BytesIn. This ensures we get a reasonable w-amp + // calculation even when the WAL is disabled. + metrics.BytesIn = metrics.BytesFlushed + } else { + metrics := c.metrics[0] + for i := 0; i < n; i++ { + metrics.BytesIn += d.mu.mem.queue[i].logSize + } + } + } else if len(ve.DeletedFiles) > 0 { + // c.kind == compactionKindIngestedFlushable && we have deleted files due + // to ingest-time splits. + // + // Iterate through all other compactions, and check if their inputs have + // been replaced due to an ingest-time split. In that case, cancel the + // compaction. + for c2 := range d.mu.compact.inProgress { + for i := range c2.inputs { + iter := c2.inputs[i].files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if _, ok := ve.DeletedFiles[deletedFileEntry{FileNum: f.FileNum, Level: c2.inputs[i].level}]; ok { + c2.cancel.Store(true) + break + } + } + } + } + } + err = d.mu.versions.logAndApply(jobID, ve, c.metrics, false, /* forceRotation */ + func() []compactionInfo { return d.getInProgressCompactionInfoLocked(c) }) + if err != nil { + info.Err = err + // TODO(peter): untested. + for _, f := range pendingOutputs { + // Note that the FileBacking for the file metadata might not have + // been set yet. So, we directly use the FileNum. Since these + // files were generated as compaction outputs, these must be + // physical files on disk. This property might not hold once + // https://github.com/cockroachdb/pebble/issues/389 is + // implemented if #389 creates virtual sstables as output files. + d.mu.versions.obsoleteTables = append( + d.mu.versions.obsoleteTables, + fileInfo{f.FileNum.DiskFileNum(), f.Size}, + ) + } + d.mu.versions.updateObsoleteTableMetricsLocked() + } + } else { + // We won't be performing the logAndApply step because of the error, + // so logUnlock. + d.mu.versions.logUnlock() + } + + bytesFlushed = c.bytesIterated + + // If err != nil, then the flush will be retried, and we will recalculate + // these metrics. + if err == nil { + d.mu.snapshots.cumulativePinnedCount += stats.cumulativePinnedKeys + d.mu.snapshots.cumulativePinnedSize += stats.cumulativePinnedSize + d.mu.versions.metrics.Keys.MissizedTombstonesCount += stats.countMissizedDels + d.maybeUpdateDeleteCompactionHints(c) + } + + d.clearCompactingState(c, err != nil) + delete(d.mu.compact.inProgress, c) + d.mu.versions.incrementCompactions(c.kind, c.extraLevels, c.pickerMetrics) + + var flushed flushableList + if err == nil { + flushed = d.mu.mem.queue[:n] + d.mu.mem.queue = d.mu.mem.queue[n:] + d.updateReadStateLocked(d.opts.DebugCheck) + d.updateTableStatsLocked(ve.NewFiles) + if ingest { + d.mu.versions.metrics.Flush.AsIngestCount++ + for _, l := range c.metrics { + d.mu.versions.metrics.Flush.AsIngestBytes += l.BytesIngested + d.mu.versions.metrics.Flush.AsIngestTableCount += l.TablesIngested + } + } + + // Update if any eventually file-only snapshots have now transitioned to + // being file-only. + earliestUnflushedSeqNum := d.getEarliestUnflushedSeqNumLocked() + currentVersion := d.mu.versions.currentVersion() + for s := d.mu.snapshots.root.next; s != &d.mu.snapshots.root; { + if s.efos == nil { + s = s.next + continue + } + if base.Visible(earliestUnflushedSeqNum, s.efos.seqNum, InternalKeySeqNumMax) { + s = s.next + continue + } + if s.efos.excised.Load() { + // If a concurrent excise has happened that overlaps with one of the key + // ranges this snapshot is interested in, this EFOS cannot transition to + // a file-only snapshot as keys in that range could now be deleted. Move + // onto the next snapshot. + s = s.next + continue + } + currentVersion.Ref() + + // NB: s.efos.transitionToFileOnlySnapshot could close s, in which + // case s.next would be nil. Save it before calling it. + next := s.next + _ = s.efos.transitionToFileOnlySnapshot(currentVersion) + s = next + } + } + // Signal FlushEnd after installing the new readState. This helps for unit + // tests that use the callback to trigger a read using an iterator with + // IterOptions.OnlyReadGuaranteedDurable. + info.TotalDuration = d.timeNow().Sub(startTime) + d.opts.EventListener.FlushEnd(info) + + // The order of these operations matters here for ease of testing. + // Removing the reader reference first allows tests to be guaranteed that + // the memtable reservation has been released by the time a synchronous + // flush returns. readerUnrefLocked may also produce obsolete files so the + // call to deleteObsoleteFiles must happen after it. + for i := range flushed { + flushed[i].readerUnrefLocked(true) + } + + d.deleteObsoleteFiles(jobID) + + // Mark all the memtables we flushed as flushed. + for i := range flushed { + close(flushed[i].flushed) + } + + return bytesFlushed, err +} + +// maybeScheduleCompactionAsync should be used when +// we want to possibly schedule a compaction, but don't +// want to eat the cost of running maybeScheduleCompaction. +// This method should be launched in a separate goroutine. +// d.mu must not be held when this is called. +func (d *DB) maybeScheduleCompactionAsync() { + defer d.compactionSchedulers.Done() + + d.mu.Lock() + d.maybeScheduleCompaction() + d.mu.Unlock() +} + +// maybeScheduleCompaction schedules a compaction if necessary. +// +// d.mu must be held when calling this. +func (d *DB) maybeScheduleCompaction() { + d.maybeScheduleCompactionPicker(pickAuto) +} + +func pickAuto(picker compactionPicker, env compactionEnv) *pickedCompaction { + return picker.pickAuto(env) +} + +func pickElisionOnly(picker compactionPicker, env compactionEnv) *pickedCompaction { + return picker.pickElisionOnlyCompaction(env) +} + +// maybeScheduleCompactionPicker schedules a compaction if necessary, +// calling `pickFunc` to pick automatic compactions. +// +// d.mu must be held when calling this. +func (d *DB) maybeScheduleCompactionPicker( + pickFunc func(compactionPicker, compactionEnv) *pickedCompaction, +) { + if d.closed.Load() != nil || d.opts.ReadOnly { + return + } + maxConcurrentCompactions := d.opts.MaxConcurrentCompactions() + if d.mu.compact.compactingCount >= maxConcurrentCompactions { + if len(d.mu.compact.manual) > 0 { + // Inability to run head blocks later manual compactions. + d.mu.compact.manual[0].retries++ + } + return + } + + // Compaction picking needs a coherent view of a Version. In particular, we + // need to exlude concurrent ingestions from making a decision on which level + // to ingest into that conflicts with our compaction + // decision. versionSet.logLock provides the necessary mutual exclusion. + d.mu.versions.logLock() + defer d.mu.versions.logUnlock() + + // Check for the closed flag again, in case the DB was closed while we were + // waiting for logLock(). + if d.closed.Load() != nil { + return + } + + env := compactionEnv{ + diskAvailBytes: d.diskAvailBytes.Load(), + earliestSnapshotSeqNum: d.mu.snapshots.earliest(), + earliestUnflushedSeqNum: d.getEarliestUnflushedSeqNumLocked(), + } + + // Check for delete-only compactions first, because they're expected to be + // cheap and reduce future compaction work. + if !d.opts.private.disableDeleteOnlyCompactions && + len(d.mu.compact.deletionHints) > 0 && + !d.opts.DisableAutomaticCompactions { + v := d.mu.versions.currentVersion() + snapshots := d.mu.snapshots.toSlice() + inputs, unresolvedHints := checkDeleteCompactionHints(d.cmp, v, d.mu.compact.deletionHints, snapshots) + d.mu.compact.deletionHints = unresolvedHints + + if len(inputs) > 0 { + c := newDeleteOnlyCompaction(d.opts, v, inputs, d.timeNow()) + d.mu.compact.compactingCount++ + d.addInProgressCompaction(c) + go d.compact(c, nil) + } + } + + for len(d.mu.compact.manual) > 0 && d.mu.compact.compactingCount < maxConcurrentCompactions { + v := d.mu.versions.currentVersion() + manual := d.mu.compact.manual[0] + env.inProgressCompactions = d.getInProgressCompactionInfoLocked(nil) + pc, retryLater := pickManualCompaction(v, d.opts, env, d.mu.versions.picker.getBaseLevel(), manual) + if pc != nil { + c := newCompaction(pc, d.opts, d.timeNow(), d.ObjProvider()) + d.mu.compact.manual = d.mu.compact.manual[1:] + d.mu.compact.compactingCount++ + d.addInProgressCompaction(c) + go d.compact(c, manual.done) + } else if !retryLater { + // Noop + d.mu.compact.manual = d.mu.compact.manual[1:] + manual.done <- nil + } else { + // Inability to run head blocks later manual compactions. + manual.retries++ + break + } + } + + for !d.opts.DisableAutomaticCompactions && d.mu.compact.compactingCount < maxConcurrentCompactions { + env.inProgressCompactions = d.getInProgressCompactionInfoLocked(nil) + env.readCompactionEnv = readCompactionEnv{ + readCompactions: &d.mu.compact.readCompactions, + flushing: d.mu.compact.flushing || d.passedFlushThreshold(), + rescheduleReadCompaction: &d.mu.compact.rescheduleReadCompaction, + } + pc := pickFunc(d.mu.versions.picker, env) + if pc == nil { + break + } + c := newCompaction(pc, d.opts, d.timeNow(), d.ObjProvider()) + d.mu.compact.compactingCount++ + d.addInProgressCompaction(c) + go d.compact(c, nil) + } +} + +// deleteCompactionHintType indicates whether the deleteCompactionHint was +// generated from a span containing a range del (point key only), a range key +// delete (range key only), or both a point and range key. +type deleteCompactionHintType uint8 + +const ( + // NOTE: While these are primarily used as enumeration types, they are also + // used for some bitwise operations. Care should be taken when updating. + deleteCompactionHintTypeUnknown deleteCompactionHintType = iota + deleteCompactionHintTypePointKeyOnly + deleteCompactionHintTypeRangeKeyOnly + deleteCompactionHintTypePointAndRangeKey +) + +// String implements fmt.Stringer. +func (h deleteCompactionHintType) String() string { + switch h { + case deleteCompactionHintTypeUnknown: + return "unknown" + case deleteCompactionHintTypePointKeyOnly: + return "point-key-only" + case deleteCompactionHintTypeRangeKeyOnly: + return "range-key-only" + case deleteCompactionHintTypePointAndRangeKey: + return "point-and-range-key" + default: + panic(fmt.Sprintf("unknown hint type: %d", h)) + } +} + +// compactionHintFromKeys returns a deleteCompactionHintType given a slice of +// keyspan.Keys. +func compactionHintFromKeys(keys []keyspan.Key) deleteCompactionHintType { + var hintType deleteCompactionHintType + for _, k := range keys { + switch k.Kind() { + case base.InternalKeyKindRangeDelete: + hintType |= deleteCompactionHintTypePointKeyOnly + case base.InternalKeyKindRangeKeyDelete: + hintType |= deleteCompactionHintTypeRangeKeyOnly + default: + panic(fmt.Sprintf("unsupported key kind: %s", k.Kind())) + } + } + return hintType +} + +// A deleteCompactionHint records a user key and sequence number span that has been +// deleted by a range tombstone. A hint is recorded if at least one sstable +// falls completely within both the user key and sequence number spans. +// Once the tombstones and the observed completely-contained sstables fall +// into the same snapshot stripe, a delete-only compaction may delete any +// sstables within the range. +type deleteCompactionHint struct { + // The type of key span that generated this hint (point key, range key, or + // both). + hintType deleteCompactionHintType + // start and end are user keys specifying a key range [start, end) of + // deleted keys. + start []byte + end []byte + // The level of the file containing the range tombstone(s) when the hint + // was created. Only lower levels need to be searched for files that may + // be deleted. + tombstoneLevel int + // The file containing the range tombstone(s) that created the hint. + tombstoneFile *fileMetadata + // The smallest and largest sequence numbers of the abutting tombstones + // merged to form this hint. All of a tables' keys must be less than the + // tombstone smallest sequence number to be deleted. All of a tables' + // sequence numbers must fall into the same snapshot stripe as the + // tombstone largest sequence number to be deleted. + tombstoneLargestSeqNum uint64 + tombstoneSmallestSeqNum uint64 + // The smallest sequence number of a sstable that was found to be covered + // by this hint. The hint cannot be resolved until this sequence number is + // in the same snapshot stripe as the largest tombstone sequence number. + // This is set when a hint is created, so the LSM may look different and + // notably no longer contain the sstable that contained the key at this + // sequence number. + fileSmallestSeqNum uint64 +} + +func (h deleteCompactionHint) String() string { + return fmt.Sprintf( + "L%d.%s %s-%s seqnums(tombstone=%d-%d, file-smallest=%d, type=%s)", + h.tombstoneLevel, h.tombstoneFile.FileNum, h.start, h.end, + h.tombstoneSmallestSeqNum, h.tombstoneLargestSeqNum, h.fileSmallestSeqNum, + h.hintType, + ) +} + +func (h *deleteCompactionHint) canDelete(cmp Compare, m *fileMetadata, snapshots []uint64) bool { + // The file can only be deleted if all of its keys are older than the + // earliest tombstone aggregated into the hint. + if m.LargestSeqNum >= h.tombstoneSmallestSeqNum || m.SmallestSeqNum < h.fileSmallestSeqNum { + return false + } + + // The file's oldest key must be in the same snapshot stripe as the + // newest tombstone. NB: We already checked the hint's sequence numbers, + // but this file's oldest sequence number might be lower than the hint's + // smallest sequence number despite the file falling within the key range + // if this file was constructed after the hint by a compaction. + ti, _ := snapshotIndex(h.tombstoneLargestSeqNum, snapshots) + fi, _ := snapshotIndex(m.SmallestSeqNum, snapshots) + if ti != fi { + return false + } + + switch h.hintType { + case deleteCompactionHintTypePointKeyOnly: + // A hint generated by a range del span cannot delete tables that contain + // range keys. + if m.HasRangeKeys { + return false + } + case deleteCompactionHintTypeRangeKeyOnly: + // A hint generated by a range key del span cannot delete tables that + // contain point keys. + if m.HasPointKeys { + return false + } + case deleteCompactionHintTypePointAndRangeKey: + // A hint from a span that contains both range dels *and* range keys can + // only be deleted if both bounds fall within the hint. The next check takes + // care of this. + default: + panic(fmt.Sprintf("pebble: unknown delete compaction hint type: %d", h.hintType)) + } + + // The file's keys must be completely contained within the hint range. + return cmp(h.start, m.Smallest.UserKey) <= 0 && cmp(m.Largest.UserKey, h.end) < 0 +} + +func (d *DB) maybeUpdateDeleteCompactionHints(c *compaction) { + // Compactions that zero sequence numbers can interfere with compaction + // deletion hints. Deletion hints apply to tables containing keys older + // than a threshold. If a key more recent than the threshold is zeroed in + // a compaction, a delete-only compaction may mistake it as meeting the + // threshold and drop a table containing live data. + // + // To avoid this scenario, compactions that zero sequence numbers remove + // any conflicting deletion hints. A deletion hint is conflicting if both + // of the following conditions apply: + // * its key space overlaps with the compaction + // * at least one of its inputs contains a key as recent as one of the + // hint's tombstones. + // + if !c.allowedZeroSeqNum { + return + } + + updatedHints := d.mu.compact.deletionHints[:0] + for _, h := range d.mu.compact.deletionHints { + // If the compaction's key space is disjoint from the hint's key + // space, the zeroing of sequence numbers won't affect the hint. Keep + // the hint. + keysDisjoint := d.cmp(h.end, c.smallest.UserKey) < 0 || d.cmp(h.start, c.largest.UserKey) > 0 + if keysDisjoint { + updatedHints = append(updatedHints, h) + continue + } + + // All of the compaction's inputs must be older than the hint's + // tombstones. + inputsOlder := true + for _, in := range c.inputs { + iter := in.files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + inputsOlder = inputsOlder && f.LargestSeqNum < h.tombstoneSmallestSeqNum + } + } + if inputsOlder { + updatedHints = append(updatedHints, h) + continue + } + + // Drop h, because the compaction c may have zeroed sequence numbers + // of keys more recent than some of h's tombstones. + } + d.mu.compact.deletionHints = updatedHints +} + +func checkDeleteCompactionHints( + cmp Compare, v *version, hints []deleteCompactionHint, snapshots []uint64, +) ([]compactionLevel, []deleteCompactionHint) { + var files map[*fileMetadata]bool + var byLevel [numLevels][]*fileMetadata + + unresolvedHints := hints[:0] + for _, h := range hints { + // Check each compaction hint to see if it's resolvable. Resolvable + // hints are removed and trigger a delete-only compaction if any files + // in the current LSM still meet their criteria. Unresolvable hints + // are saved and don't trigger a delete-only compaction. + // + // When a compaction hint is created, the sequence numbers of the + // range tombstones and the covered file with the oldest key are + // recorded. The largest tombstone sequence number and the smallest + // file sequence number must be in the same snapshot stripe for the + // hint to be resolved. The below graphic models a compaction hint + // covering the keyspace [b, r). The hint completely contains two + // files, 000002 and 000003. The file 000003 contains the lowest + // covered sequence number at #90. The tombstone b.RANGEDEL.230:h has + // the highest tombstone sequence number incorporated into the hint. + // The hint may be resolved only once the snapshots at #100, #180 and + // #210 are all closed. File 000001 is not included within the hint + // because it extends beyond the range tombstones in user key space. + // + // 250 + // + // |-b...230:h-| + // _____________________________________________________ snapshot #210 + // 200 |--h.RANGEDEL.200:r--| + // + // _____________________________________________________ snapshot #180 + // + // 150 +--------+ + // +---------+ | 000003 | + // | 000002 | | | + // +_________+ | | + // 100_____________________|________|___________________ snapshot #100 + // +--------+ + // _____________________________________________________ snapshot #70 + // +---------------+ + // 50 | 000001 | + // | | + // +---------------+ + // ______________________________________________________________ + // a b c d e f g h i j k l m n o p q r s t u v w x y z + + ti, _ := snapshotIndex(h.tombstoneLargestSeqNum, snapshots) + fi, _ := snapshotIndex(h.fileSmallestSeqNum, snapshots) + if ti != fi { + // Cannot resolve yet. + unresolvedHints = append(unresolvedHints, h) + continue + } + + // The hint h will be resolved and dropped, regardless of whether + // there are any tables that can be deleted. + for l := h.tombstoneLevel + 1; l < numLevels; l++ { + overlaps := v.Overlaps(l, cmp, h.start, h.end, true /* exclusiveEnd */) + iter := overlaps.Iter() + for m := iter.First(); m != nil; m = iter.Next() { + if m.IsCompacting() || !h.canDelete(cmp, m, snapshots) || files[m] { + continue + } + if files == nil { + // Construct files lazily, assuming most calls will not + // produce delete-only compactions. + files = make(map[*fileMetadata]bool) + } + files[m] = true + byLevel[l] = append(byLevel[l], m) + } + } + } + + var compactLevels []compactionLevel + for l, files := range byLevel { + if len(files) == 0 { + continue + } + compactLevels = append(compactLevels, compactionLevel{ + level: l, + files: manifest.NewLevelSliceKeySorted(cmp, files), + }) + } + return compactLevels, unresolvedHints +} + +// compact runs one compaction and maybe schedules another call to compact. +func (d *DB) compact(c *compaction, errChannel chan error) { + pprof.Do(context.Background(), compactLabels, func(context.Context) { + d.mu.Lock() + defer d.mu.Unlock() + if err := d.compact1(c, errChannel); err != nil { + // TODO(peter): count consecutive compaction errors and backoff. + d.opts.EventListener.BackgroundError(err) + } + d.mu.compact.compactingCount-- + delete(d.mu.compact.inProgress, c) + // Add this compaction's duration to the cumulative duration. NB: This + // must be atomic with the above removal of c from + // d.mu.compact.InProgress to ensure Metrics.Compact.Duration does not + // miss or double count a completing compaction's duration. + d.mu.compact.duration += d.timeNow().Sub(c.beganAt) + + // The previous compaction may have produced too many files in a + // level, so reschedule another compaction if needed. + d.maybeScheduleCompaction() + d.mu.compact.cond.Broadcast() + }) +} + +// compact1 runs one compaction. +// +// d.mu must be held when calling this, but the mutex may be dropped and +// re-acquired during the course of this method. +func (d *DB) compact1(c *compaction, errChannel chan error) (err error) { + if errChannel != nil { + defer func() { + errChannel <- err + }() + } + + jobID := d.mu.nextJobID + d.mu.nextJobID++ + info := c.makeInfo(jobID) + d.opts.EventListener.CompactionBegin(info) + startTime := d.timeNow() + + ve, pendingOutputs, stats, err := d.runCompaction(jobID, c) + + info.Duration = d.timeNow().Sub(startTime) + if err == nil { + err = func() error { + var err error + d.mu.versions.logLock() + // Check if this compaction had a conflicting operation (eg. a d.excise()) + // that necessitates it restarting from scratch. Note that since we hold + // the manifest lock, we don't expect this bool to change its value + // as only the holder of the manifest lock will ever write to it. + if c.cancel.Load() { + err = firstError(err, ErrCancelledCompaction) + } + if err != nil { + // logAndApply calls logUnlock. If we didn't call it, we need to call + // logUnlock ourselves. + d.mu.versions.logUnlock() + return err + } + return d.mu.versions.logAndApply(jobID, ve, c.metrics, false /* forceRotation */, func() []compactionInfo { + return d.getInProgressCompactionInfoLocked(c) + }) + }() + if err != nil { + // TODO(peter): untested. + for _, f := range pendingOutputs { + // Note that the FileBacking for the file metadata might not have + // been set yet. So, we directly use the FileNum. Since these + // files were generated as compaction outputs, these must be + // physical files on disk. This property might not hold once + // https://github.com/cockroachdb/pebble/issues/389 is + // implemented if #389 creates virtual sstables as output files. + d.mu.versions.obsoleteTables = append( + d.mu.versions.obsoleteTables, + fileInfo{f.FileNum.DiskFileNum(), f.Size}, + ) + } + d.mu.versions.updateObsoleteTableMetricsLocked() + } + } + + info.Done = true + info.Err = err + if err == nil { + for i := range ve.NewFiles { + e := &ve.NewFiles[i] + info.Output.Tables = append(info.Output.Tables, e.Meta.TableInfo()) + } + d.mu.snapshots.cumulativePinnedCount += stats.cumulativePinnedKeys + d.mu.snapshots.cumulativePinnedSize += stats.cumulativePinnedSize + d.mu.versions.metrics.Keys.MissizedTombstonesCount += stats.countMissizedDels + d.maybeUpdateDeleteCompactionHints(c) + } + + // NB: clearing compacting state must occur before updating the read state; + // L0Sublevels initialization depends on it. + d.clearCompactingState(c, err != nil) + d.mu.versions.incrementCompactions(c.kind, c.extraLevels, c.pickerMetrics) + d.mu.versions.incrementCompactionBytes(-c.bytesWritten) + + info.TotalDuration = d.timeNow().Sub(c.beganAt) + d.opts.EventListener.CompactionEnd(info) + + // Update the read state before deleting obsolete files because the + // read-state update will cause the previous version to be unref'd and if + // there are no references obsolete tables will be added to the obsolete + // table list. + if err == nil { + d.updateReadStateLocked(d.opts.DebugCheck) + d.updateTableStatsLocked(ve.NewFiles) + } + d.deleteObsoleteFiles(jobID) + + return err +} + +type compactStats struct { + cumulativePinnedKeys uint64 + cumulativePinnedSize uint64 + countMissizedDels uint64 +} + +// runCopyCompaction runs a copy compaction where a new FileNum is created that +// is a byte-for-byte copy of the input file. This is used in lieu of a move +// compaction when a file is being moved across the local/remote storage +// boundary. +// +// d.mu must be held when calling this method. +func (d *DB) runCopyCompaction( + jobID int, + c *compaction, + meta *fileMetadata, + objMeta objstorage.ObjectMetadata, + versionEdit *versionEdit, +) (ve *versionEdit, pendingOutputs []physicalMeta, retErr error) { + ve = versionEdit + if objMeta.IsRemote() || !remote.ShouldCreateShared(d.opts.Experimental.CreateOnShared, c.outputLevel.level) { + panic("pebble: scheduled a copy compaction that is not actually moving files to shared storage") + } + // Note that based on logic in the compaction picker, we're guaranteed + // meta.Virtual is false. + if meta.Virtual { + panic(errors.AssertionFailedf("cannot do a copy compaction of a virtual sstable across local/remote storage")) + } + // We are in the relatively more complex case where we need to copy this + // file to remote/shared storage. Drop the db mutex while we do the + // copy. + // + // To ease up cleanup of the local file and tracking of refs, we create + // a new FileNum. This has the potential of making the block cache less + // effective, however. + metaCopy := new(fileMetadata) + *metaCopy = fileMetadata{ + Size: meta.Size, + CreationTime: meta.CreationTime, + SmallestSeqNum: meta.SmallestSeqNum, + LargestSeqNum: meta.LargestSeqNum, + Stats: meta.Stats, + Virtual: meta.Virtual, + } + if meta.HasPointKeys { + metaCopy.ExtendPointKeyBounds(c.cmp, meta.SmallestPointKey, meta.LargestPointKey) + } + if meta.HasRangeKeys { + metaCopy.ExtendRangeKeyBounds(c.cmp, meta.SmallestRangeKey, meta.LargestRangeKey) + } + metaCopy.FileNum = d.mu.versions.getNextFileNum() + metaCopy.InitPhysicalBacking() + c.metrics = map[int]*LevelMetrics{ + c.outputLevel.level: { + BytesIn: meta.Size, + BytesCompacted: meta.Size, + TablesCompacted: 1, + }, + } + pendingOutputs = append(pendingOutputs, metaCopy.PhysicalMeta()) + // Before dropping the db mutex, grab a ref to the current version. This + // prevents any concurrent excises from deleting files that this compaction + // needs to read/maintain a reference to. + vers := d.mu.versions.currentVersion() + vers.Ref() + defer vers.UnrefLocked() + + d.mu.Unlock() + defer d.mu.Lock() + _, err := d.objProvider.LinkOrCopyFromLocal(context.TODO(), d.opts.FS, + d.objProvider.Path(objMeta), fileTypeTable, metaCopy.FileBacking.DiskFileNum, + objstorage.CreateOptions{PreferSharedStorage: true}) + if err != nil { + return ve, pendingOutputs, err + } + ve.NewFiles[0].Meta = metaCopy + + if err := d.objProvider.Sync(); err != nil { + return nil, pendingOutputs, err + } + return ve, pendingOutputs, nil +} + +// runCompactions runs a compaction that produces new on-disk tables from +// memtables or old on-disk tables. +// +// d.mu must be held when calling this, but the mutex may be dropped and +// re-acquired during the course of this method. +func (d *DB) runCompaction( + jobID int, c *compaction, +) (ve *versionEdit, pendingOutputs []physicalMeta, stats compactStats, retErr error) { + // As a sanity check, confirm that the smallest / largest keys for new and + // deleted files in the new versionEdit pass a validation function before + // returning the edit. + defer func() { + // If we're handling a panic, don't expect the version edit to validate. + if r := recover(); r != nil { + panic(r) + } else if ve != nil { + err := validateVersionEdit(ve, d.opts.Experimental.KeyValidationFunc, d.opts.Comparer.FormatKey) + if err != nil { + d.opts.Logger.Fatalf("pebble: version edit validation failed: %s", err) + } + } + }() + + // Check for a delete-only compaction. This can occur when wide range + // tombstones completely contain sstables. + if c.kind == compactionKindDeleteOnly { + c.metrics = make(map[int]*LevelMetrics, len(c.inputs)) + ve := &versionEdit{ + DeletedFiles: map[deletedFileEntry]*fileMetadata{}, + } + for _, cl := range c.inputs { + levelMetrics := &LevelMetrics{} + iter := cl.files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + ve.DeletedFiles[deletedFileEntry{ + Level: cl.level, + FileNum: f.FileNum, + }] = f + } + c.metrics[cl.level] = levelMetrics + } + return ve, nil, stats, nil + } + + if c.kind == compactionKindIngestedFlushable { + panic("pebble: runCompaction cannot handle compactionKindIngestedFlushable.") + } + + // Check for a move or copy of one table from one level to the next. We avoid + // such a move if there is lots of overlapping grandparent data. Otherwise, + // the move could create a parent file that will require a very expensive + // merge later on. + if c.kind == compactionKindMove || c.kind == compactionKindCopy { + iter := c.startLevel.files.Iter() + meta := iter.First() + if invariants.Enabled { + if iter.Next() != nil { + panic("got more than one file for a move or copy compaction") + } + } + if c.cancel.Load() { + return ve, nil, stats, ErrCancelledCompaction + } + objMeta, err := d.objProvider.Lookup(fileTypeTable, meta.FileBacking.DiskFileNum) + if err != nil { + return ve, pendingOutputs, stats, err + } + c.metrics = map[int]*LevelMetrics{ + c.outputLevel.level: { + BytesMoved: meta.Size, + TablesMoved: 1, + }, + } + ve := &versionEdit{ + DeletedFiles: map[deletedFileEntry]*fileMetadata{ + {Level: c.startLevel.level, FileNum: meta.FileNum}: meta, + }, + NewFiles: []newFileEntry{ + {Level: c.outputLevel.level, Meta: meta}, + }, + } + if c.kind == compactionKindCopy { + ve, pendingOutputs, retErr = d.runCopyCompaction(jobID, c, meta, objMeta, ve) + if retErr != nil { + return ve, pendingOutputs, stats, retErr + } + } + return ve, nil, stats, nil + } + + defer func() { + if retErr != nil { + pendingOutputs = nil + } + }() + + snapshots := d.mu.snapshots.toSlice() + formatVers := d.FormatMajorVersion() + + if c.flushing == nil { + // Before dropping the db mutex, grab a ref to the current version. This + // prevents any concurrent excises from deleting files that this compaction + // needs to read/maintain a reference to. + // + // Note that unlike user iterators, compactionIter does not maintain a ref + // of the version or read state. + vers := d.mu.versions.currentVersion() + vers.Ref() + defer vers.UnrefLocked() + } + + if c.cancel.Load() { + return ve, nil, stats, ErrCancelledCompaction + } + + // Release the d.mu lock while doing I/O. + // Note the unusual order: Unlock and then Lock. + d.mu.Unlock() + defer d.mu.Lock() + + // Compactions use a pool of buffers to read blocks, avoiding polluting the + // block cache with blocks that will not be read again. We initialize the + // buffer pool with a size 12. This initial size does not need to be + // accurate, because the pool will grow to accommodate the maximum number of + // blocks allocated at a given time over the course of the compaction. But + // choosing a size larger than that working set avoids any additional + // allocations to grow the size of the pool over the course of iteration. + // + // Justification for initial size 12: In a two-level compaction, at any + // given moment we'll have 2 index blocks in-use and 2 data blocks in-use. + // Additionally, when decoding a compressed block, we'll temporarily + // allocate 1 additional block to hold the compressed buffer. In the worst + // case that all input sstables have two-level index blocks (+2), value + // blocks (+2), range deletion blocks (+n) and range key blocks (+n), we'll + // additionally require 2n+4 blocks where n is the number of input sstables. + // Range deletion and range key blocks are relatively rare, and the cost of + // an additional allocation or two over the course of the compaction is + // considered to be okay. A larger initial size would cause the pool to hold + // on to more memory, even when it's not in-use because the pool will + // recycle buffers up to the current capacity of the pool. The memory use of + // a 12-buffer pool is expected to be within reason, even if all the buffers + // grow to the typical size of an index block (256 KiB) which would + // translate to 3 MiB per compaction. + c.bufferPool.Init(12) + defer c.bufferPool.Release() + + iiter, err := c.newInputIter(d.newIters, d.tableNewRangeKeyIter, snapshots) + if err != nil { + return nil, pendingOutputs, stats, err + } + c.allowedZeroSeqNum = c.allowZeroSeqNum() + iiter = invalidating.MaybeWrapIfInvariants(iiter) + iter := newCompactionIter(c.cmp, c.equal, c.formatKey, d.merge, iiter, snapshots, + &c.rangeDelFrag, &c.rangeKeyFrag, c.allowedZeroSeqNum, c.elideTombstone, + c.elideRangeTombstone, d.FormatMajorVersion()) + + var ( + createdFiles []base.DiskFileNum + tw *sstable.Writer + pinnedKeySize uint64 + pinnedValueSize uint64 + pinnedCount uint64 + ) + defer func() { + if iter != nil { + retErr = firstError(retErr, iter.Close()) + } + if tw != nil { + retErr = firstError(retErr, tw.Close()) + } + if retErr != nil { + for _, fileNum := range createdFiles { + _ = d.objProvider.Remove(fileTypeTable, fileNum) + } + } + for _, closer := range c.closers { + retErr = firstError(retErr, closer.Close()) + } + }() + + ve = &versionEdit{ + DeletedFiles: map[deletedFileEntry]*fileMetadata{}, + } + + startLevelBytes := c.startLevel.files.SizeSum() + outputMetrics := &LevelMetrics{ + BytesIn: startLevelBytes, + BytesRead: c.outputLevel.files.SizeSum(), + } + if len(c.extraLevels) > 0 { + outputMetrics.BytesIn += c.extraLevels[0].files.SizeSum() + } + outputMetrics.BytesRead += outputMetrics.BytesIn + + c.metrics = map[int]*LevelMetrics{ + c.outputLevel.level: outputMetrics, + } + if len(c.flushing) == 0 && c.metrics[c.startLevel.level] == nil { + c.metrics[c.startLevel.level] = &LevelMetrics{} + } + if len(c.extraLevels) > 0 { + c.metrics[c.extraLevels[0].level] = &LevelMetrics{} + outputMetrics.MultiLevel.BytesInTop = startLevelBytes + outputMetrics.MultiLevel.BytesIn = outputMetrics.BytesIn + outputMetrics.MultiLevel.BytesRead = outputMetrics.BytesRead + } + + // The table is typically written at the maximum allowable format implied by + // the current format major version of the DB. + tableFormat := formatVers.MaxTableFormat() + + // In format major versions with maximum table formats of Pebblev3, value + // blocks were conditional on an experimental setting. In format major + // versions with maximum table formats of Pebblev4 and higher, value blocks + // are always enabled. + if tableFormat == sstable.TableFormatPebblev3 && + (d.opts.Experimental.EnableValueBlocks == nil || !d.opts.Experimental.EnableValueBlocks()) { + tableFormat = sstable.TableFormatPebblev2 + } + + writerOpts := d.opts.MakeWriterOptions(c.outputLevel.level, tableFormat) + if formatVers < FormatBlockPropertyCollector { + // Cannot yet write block properties. + writerOpts.BlockPropertyCollectors = nil + } + + // prevPointKey is a sstable.WriterOption that provides access to + // the last point key written to a writer's sstable. When a new + // output begins in newOutput, prevPointKey is updated to point to + // the new output's sstable.Writer. This allows the compaction loop + // to access the last written point key without requiring the + // compaction loop to make a copy of each key ahead of time. Users + // must be careful, because the byte slice returned by UnsafeKey + // points directly into the Writer's block buffer. + var prevPointKey sstable.PreviousPointKeyOpt + var cpuWorkHandle CPUWorkHandle + defer func() { + if cpuWorkHandle != nil { + d.opts.Experimental.CPUWorkPermissionGranter.CPUWorkDone(cpuWorkHandle) + } + }() + + newOutput := func() error { + // Check if we've been cancelled by a concurrent operation. + if c.cancel.Load() { + return ErrCancelledCompaction + } + fileMeta := &fileMetadata{} + d.mu.Lock() + fileNum := d.mu.versions.getNextFileNum() + fileMeta.FileNum = fileNum + pendingOutputs = append(pendingOutputs, fileMeta.PhysicalMeta()) + d.mu.Unlock() + + ctx := context.TODO() + if objiotracing.Enabled { + ctx = objiotracing.WithLevel(ctx, c.outputLevel.level) + switch c.kind { + case compactionKindFlush: + ctx = objiotracing.WithReason(ctx, objiotracing.ForFlush) + case compactionKindIngestedFlushable: + ctx = objiotracing.WithReason(ctx, objiotracing.ForIngestion) + default: + ctx = objiotracing.WithReason(ctx, objiotracing.ForCompaction) + } + } + // Prefer shared storage if present. + createOpts := objstorage.CreateOptions{ + PreferSharedStorage: remote.ShouldCreateShared(d.opts.Experimental.CreateOnShared, c.outputLevel.level), + } + writable, objMeta, err := d.objProvider.Create(ctx, fileTypeTable, fileNum.DiskFileNum(), createOpts) + if err != nil { + return err + } + + reason := "flushing" + if c.flushing == nil { + reason = "compacting" + } + d.opts.EventListener.TableCreated(TableCreateInfo{ + JobID: jobID, + Reason: reason, + Path: d.objProvider.Path(objMeta), + FileNum: fileNum, + }) + if c.kind != compactionKindFlush { + writable = &compactionWritable{ + Writable: writable, + versions: d.mu.versions, + written: &c.bytesWritten, + } + } + createdFiles = append(createdFiles, fileNum.DiskFileNum()) + cacheOpts := private.SSTableCacheOpts(d.cacheID, fileNum.DiskFileNum()).(sstable.WriterOption) + + const MaxFileWriteAdditionalCPUTime = time.Millisecond * 100 + cpuWorkHandle = d.opts.Experimental.CPUWorkPermissionGranter.GetPermission( + MaxFileWriteAdditionalCPUTime, + ) + writerOpts.Parallelism = + d.opts.Experimental.MaxWriterConcurrency > 0 && + (cpuWorkHandle.Permitted() || d.opts.Experimental.ForceWriterParallelism) + + tw = sstable.NewWriter(writable, writerOpts, cacheOpts, &prevPointKey) + + fileMeta.CreationTime = time.Now().Unix() + ve.NewFiles = append(ve.NewFiles, newFileEntry{ + Level: c.outputLevel.level, + Meta: fileMeta, + }) + return nil + } + + // splitL0Outputs is true during flushes and intra-L0 compactions with flush + // splits enabled. + splitL0Outputs := c.outputLevel.level == 0 && d.opts.FlushSplitBytes > 0 + + // finishOutput is called with the a user key up to which all tombstones + // should be flushed. Typically, this is the first key of the next + // sstable or an empty key if this output is the final sstable. + finishOutput := func(splitKey []byte) error { + // If we haven't output any point records to the sstable (tw == nil) then the + // sstable will only contain range tombstones and/or range keys. The smallest + // key in the sstable will be the start key of the first range tombstone or + // range key added. We need to ensure that this start key is distinct from + // the splitKey passed to finishOutput (if set), otherwise we would generate + // an sstable where the largest key is smaller than the smallest key due to + // how the largest key boundary is set below. NB: It is permissible for the + // range tombstone / range key start key to be the empty string. + // + // TODO: It is unfortunate that we have to do this check here rather than + // when we decide to finish the sstable in the runCompaction loop. A better + // structure currently eludes us. + if tw == nil { + startKey := c.rangeDelFrag.Start() + if len(iter.tombstones) > 0 { + startKey = iter.tombstones[0].Start + } + if startKey == nil { + startKey = c.rangeKeyFrag.Start() + if len(iter.rangeKeys) > 0 { + startKey = iter.rangeKeys[0].Start + } + } + if splitKey != nil && d.cmp(startKey, splitKey) == 0 { + return nil + } + } + + // NB: clone the key because the data can be held on to by the call to + // compactionIter.Tombstones via keyspan.Fragmenter.FlushTo, and by the + // WriterMetadata.LargestRangeDel.UserKey. + splitKey = append([]byte(nil), splitKey...) + for _, v := range iter.Tombstones(splitKey) { + if tw == nil { + if err := newOutput(); err != nil { + return err + } + } + // The tombstone being added could be completely outside the + // eventual bounds of the sstable. Consider this example (bounds + // in square brackets next to table filename): + // + // ./000240.sst [tmgc#391,MERGE-tmgc#391,MERGE] + // tmgc#391,MERGE [786e627a] + // tmgc-udkatvs#331,RANGEDEL + // + // ./000241.sst [tmgc#384,MERGE-tmgc#384,MERGE] + // tmgc#384,MERGE [666c7070] + // tmgc-tvsalezade#383,RANGEDEL + // tmgc-tvsalezade#331,RANGEDEL + // + // ./000242.sst [tmgc#383,RANGEDEL-tvsalezade#72057594037927935,RANGEDEL] + // tmgc-tvsalezade#383,RANGEDEL + // tmgc#375,SET [72646c78766965616c72776865676e79] + // tmgc-tvsalezade#356,RANGEDEL + // + // Note that both of the top two SSTables have range tombstones + // that start after the file's end keys. Since the file bound + // computation happens well after all range tombstones have been + // added to the writer, eliding out-of-file range tombstones based + // on sequence number at this stage is difficult, and necessitates + // read-time logic to ignore range tombstones outside file bounds. + if err := rangedel.Encode(&v, tw.Add); err != nil { + return err + } + } + for _, v := range iter.RangeKeys(splitKey) { + // Same logic as for range tombstones, except added using tw.AddRangeKey. + if tw == nil { + if err := newOutput(); err != nil { + return err + } + } + if err := rangekey.Encode(&v, tw.AddRangeKey); err != nil { + return err + } + } + + if tw == nil { + return nil + } + { + // Set internal sstable properties. + p := getInternalWriterProperties(tw) + // Set the external sst version to 0. This is what RocksDB expects for + // db-internal sstables; otherwise, it could apply a global sequence number. + p.ExternalFormatVersion = 0 + // Set the snapshot pinned totals. + p.SnapshotPinnedKeys = pinnedCount + p.SnapshotPinnedKeySize = pinnedKeySize + p.SnapshotPinnedValueSize = pinnedValueSize + stats.cumulativePinnedKeys += pinnedCount + stats.cumulativePinnedSize += pinnedKeySize + pinnedValueSize + pinnedCount = 0 + pinnedKeySize = 0 + pinnedValueSize = 0 + } + if err := tw.Close(); err != nil { + tw = nil + return err + } + d.opts.Experimental.CPUWorkPermissionGranter.CPUWorkDone(cpuWorkHandle) + cpuWorkHandle = nil + writerMeta, err := tw.Metadata() + if err != nil { + tw = nil + return err + } + tw = nil + meta := ve.NewFiles[len(ve.NewFiles)-1].Meta + meta.Size = writerMeta.Size + meta.SmallestSeqNum = writerMeta.SmallestSeqNum + meta.LargestSeqNum = writerMeta.LargestSeqNum + meta.InitPhysicalBacking() + + // If the file didn't contain any range deletions, we can fill its + // table stats now, avoiding unnecessarily loading the table later. + maybeSetStatsFromProperties( + meta.PhysicalMeta(), &writerMeta.Properties, + ) + + if c.flushing == nil { + outputMetrics.TablesCompacted++ + outputMetrics.BytesCompacted += meta.Size + } else { + outputMetrics.TablesFlushed++ + outputMetrics.BytesFlushed += meta.Size + } + outputMetrics.Size += int64(meta.Size) + outputMetrics.NumFiles++ + outputMetrics.Additional.BytesWrittenDataBlocks += writerMeta.Properties.DataSize + outputMetrics.Additional.BytesWrittenValueBlocks += writerMeta.Properties.ValueBlocksSize + + if n := len(ve.NewFiles); n > 1 { + // This is not the first output file. Ensure the sstable boundaries + // are nonoverlapping. + prevMeta := ve.NewFiles[n-2].Meta + if writerMeta.SmallestRangeDel.UserKey != nil { + c := d.cmp(writerMeta.SmallestRangeDel.UserKey, prevMeta.Largest.UserKey) + if c < 0 { + return errors.Errorf( + "pebble: smallest range tombstone start key is less than previous sstable largest key: %s < %s", + writerMeta.SmallestRangeDel.Pretty(d.opts.Comparer.FormatKey), + prevMeta.Largest.Pretty(d.opts.Comparer.FormatKey)) + } else if c == 0 && !prevMeta.Largest.IsExclusiveSentinel() { + // The user key portion of the range boundary start key is + // equal to the previous table's largest key user key, and + // the previous table's largest key is not exclusive. This + // violates the invariant that tables are key-space + // partitioned. + return errors.Errorf( + "pebble: invariant violation: previous sstable largest key %s, current sstable smallest rangedel: %s", + prevMeta.Largest.Pretty(d.opts.Comparer.FormatKey), + writerMeta.SmallestRangeDel.Pretty(d.opts.Comparer.FormatKey), + ) + } + } + } + + // Verify that all range deletions outputted to the sstable are + // truncated to split key. + if splitKey != nil && writerMeta.LargestRangeDel.UserKey != nil && + d.cmp(writerMeta.LargestRangeDel.UserKey, splitKey) > 0 { + return errors.Errorf( + "pebble: invariant violation: rangedel largest key %q extends beyond split key %q", + writerMeta.LargestRangeDel.Pretty(d.opts.Comparer.FormatKey), + d.opts.Comparer.FormatKey(splitKey), + ) + } + + if writerMeta.HasPointKeys { + meta.ExtendPointKeyBounds(d.cmp, writerMeta.SmallestPoint, writerMeta.LargestPoint) + } + if writerMeta.HasRangeDelKeys { + meta.ExtendPointKeyBounds(d.cmp, writerMeta.SmallestRangeDel, writerMeta.LargestRangeDel) + } + if writerMeta.HasRangeKeys { + meta.ExtendRangeKeyBounds(d.cmp, writerMeta.SmallestRangeKey, writerMeta.LargestRangeKey) + } + + // Verify that the sstable bounds fall within the compaction input + // bounds. This is a sanity check that we don't have a logic error + // elsewhere that causes the sstable bounds to accidentally expand past the + // compaction input bounds as doing so could lead to various badness such + // as keys being deleted by a range tombstone incorrectly. + if c.smallest.UserKey != nil { + switch v := d.cmp(meta.Smallest.UserKey, c.smallest.UserKey); { + case v >= 0: + // Nothing to do. + case v < 0: + return errors.Errorf("pebble: compaction output grew beyond bounds of input: %s < %s", + meta.Smallest.Pretty(d.opts.Comparer.FormatKey), + c.smallest.Pretty(d.opts.Comparer.FormatKey)) + } + } + if c.largest.UserKey != nil { + switch v := d.cmp(meta.Largest.UserKey, c.largest.UserKey); { + case v <= 0: + // Nothing to do. + case v > 0: + return errors.Errorf("pebble: compaction output grew beyond bounds of input: %s > %s", + meta.Largest.Pretty(d.opts.Comparer.FormatKey), + c.largest.Pretty(d.opts.Comparer.FormatKey)) + } + } + // Verify that we never split different revisions of the same user key + // across two different sstables. + if err := c.errorOnUserKeyOverlap(ve); err != nil { + return err + } + if err := meta.Validate(d.cmp, d.opts.Comparer.FormatKey); err != nil { + return err + } + return nil + } + + // Build a compactionOutputSplitter that contains all logic to determine + // whether the compaction loop should stop writing to one output sstable and + // switch to a new one. Some splitters can wrap other splitters, and the + // splitterGroup can be composed of multiple splitters. In this case, we + // start off with splitters for file sizes, grandparent limits, and (for L0 + // splits) L0 limits, before wrapping them in an splitterGroup. + sizeSplitter := newFileSizeSplitter(&iter.frontiers, c.maxOutputFileSize, c.grandparents.Iter()) + unsafePrevUserKey := func() []byte { + // Return the largest point key written to tw or the start of + // the current range deletion in the fragmenter, whichever is + // greater. + prevPoint := prevPointKey.UnsafeKey() + if c.cmp(prevPoint.UserKey, c.rangeDelFrag.Start()) > 0 { + return prevPoint.UserKey + } + return c.rangeDelFrag.Start() + } + outputSplitters := []compactionOutputSplitter{ + // We do not split the same user key across different sstables within + // one flush or compaction. The fileSizeSplitter may request a split in + // the middle of a user key, so the userKeyChangeSplitter ensures we are + // at a user key change boundary when doing a split. + &userKeyChangeSplitter{ + cmp: c.cmp, + splitter: sizeSplitter, + unsafePrevUserKey: unsafePrevUserKey, + }, + newLimitFuncSplitter(&iter.frontiers, c.findGrandparentLimit), + } + if splitL0Outputs { + outputSplitters = append(outputSplitters, newLimitFuncSplitter(&iter.frontiers, c.findL0Limit)) + } + splitter := &splitterGroup{cmp: c.cmp, splitters: outputSplitters} + + // Each outer loop iteration produces one output file. An iteration that + // produces a file containing point keys (and optionally range tombstones) + // guarantees that the input iterator advanced. An iteration that produces + // a file containing only range tombstones guarantees the limit passed to + // `finishOutput()` advanced to a strictly greater user key corresponding + // to a grandparent file largest key, or nil. Taken together, these + // progress guarantees ensure that eventually the input iterator will be + // exhausted and the range tombstone fragments will all be flushed. + for key, val := iter.First(); key != nil || !c.rangeDelFrag.Empty() || !c.rangeKeyFrag.Empty(); { + var firstKey []byte + if key != nil { + firstKey = key.UserKey + } else if startKey := c.rangeDelFrag.Start(); startKey != nil { + // Pass the start key of the first pending tombstone to find the + // next limit. All pending tombstones have the same start key. We + // use this as opposed to the end key of the last written sstable to + // effectively handle cases like these: + // + // a.SET.3 + // (lf.limit at b) + // d.RANGEDEL.4:f + // + // In this case, the partition after b has only range deletions, so + // if we were to find the limit after the last written key at the + // split point (key a), we'd get the limit b again, and + // finishOutput() would not advance any further because the next + // range tombstone to write does not start until after the L0 split + // point. + firstKey = startKey + } + splitterSuggestion := splitter.onNewOutput(firstKey) + + // Each inner loop iteration processes one key from the input iterator. + for ; key != nil; key, val = iter.Next() { + if split := splitter.shouldSplitBefore(key, tw); split == splitNow { + break + } + + switch key.Kind() { + case InternalKeyKindRangeDelete: + // Range tombstones are handled specially. They are fragmented, + // and they're not written until later during `finishOutput()`. + // We add them to the `Fragmenter` now to make them visible to + // `compactionIter` so covered keys in the same snapshot stripe + // can be elided. + + // The interleaved range deletion might only be one of many with + // these bounds. Some fragmenting is performed ahead of time by + // keyspan.MergingIter. + if s := c.rangeDelIter.Span(); !s.Empty() { + // The memory management here is subtle. Range deletions + // blocks do NOT use prefix compression, which ensures that + // range deletion spans' memory is available as long we keep + // the iterator open. However, the keyspan.MergingIter that + // merges spans across levels only guarantees the lifetime + // of the [start, end) bounds until the next positioning + // method is called. + // + // Additionally, the Span.Keys slice is owned by the the + // range deletion iterator stack, and it may be overwritten + // when we advance. + // + // Clone the Keys slice and the start and end keys. + // + // TODO(jackson): Avoid the clone by removing c.rangeDelFrag + // and performing explicit truncation of the pending + // rangedel span as necessary. + clone := keyspan.Span{ + Start: iter.cloneKey(s.Start), + End: iter.cloneKey(s.End), + Keys: make([]keyspan.Key, len(s.Keys)), + } + copy(clone.Keys, s.Keys) + c.rangeDelFrag.Add(clone) + } + continue + case InternalKeyKindRangeKeySet, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeyDelete: + // Range keys are handled in the same way as range tombstones, except + // with a dedicated fragmenter. + if s := c.rangeKeyInterleaving.Span(); !s.Empty() { + clone := keyspan.Span{ + Start: iter.cloneKey(s.Start), + End: iter.cloneKey(s.End), + Keys: make([]keyspan.Key, len(s.Keys)), + } + // Since the keys' Suffix and Value fields are not deep cloned, the + // underlying blockIter must be kept open for the lifetime of the + // compaction. + copy(clone.Keys, s.Keys) + c.rangeKeyFrag.Add(clone) + } + continue + } + if tw == nil { + if err := newOutput(); err != nil { + return nil, pendingOutputs, stats, err + } + } + if err := tw.AddWithForceObsolete(*key, val, iter.forceObsoleteDueToRangeDel); err != nil { + return nil, pendingOutputs, stats, err + } + if iter.snapshotPinned { + // The kv pair we just added to the sstable was only surfaced by + // the compaction iterator because an open snapshot prevented + // its elision. Increment the stats. + pinnedCount++ + pinnedKeySize += uint64(len(key.UserKey)) + base.InternalTrailerLen + pinnedValueSize += uint64(len(val)) + } + } + + // A splitter requested a split, and we're ready to finish the output. + // We need to choose the key at which to split any pending range + // tombstones. There are two options: + // 1. splitterSuggestion — The key suggested by the splitter. This key + // is guaranteed to be greater than the last key written to the + // current output. + // 2. key.UserKey — the first key of the next sstable output. This user + // key is also guaranteed to be greater than the last user key + // written to the current output (see userKeyChangeSplitter). + // + // Use whichever is smaller. Using the smaller of the two limits + // overlap with grandparents. Consider the case where the + // grandparent limit is calculated to be 'b', key is 'x', and + // there exist many sstables between 'b' and 'x'. If the range + // deletion fragmenter has a pending tombstone [a,x), splitting + // at 'x' would cause the output table to overlap many + // grandparents well beyond the calculated grandparent limit + // 'b'. Splitting at the smaller `splitterSuggestion` avoids + // this unbounded overlap with grandparent tables. + splitKey := splitterSuggestion + if key != nil && (splitKey == nil || c.cmp(splitKey, key.UserKey) > 0) { + splitKey = key.UserKey + } + if err := finishOutput(splitKey); err != nil { + return nil, pendingOutputs, stats, err + } + } + + for _, cl := range c.inputs { + iter := cl.files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + ve.DeletedFiles[deletedFileEntry{ + Level: cl.level, + FileNum: f.FileNum, + }] = f + } + } + + // The compaction iterator keeps track of a count of the number of DELSIZED + // keys that encoded an incorrect size. Propagate it up as a part of + // compactStats. + stats.countMissizedDels = iter.stats.countMissizedDels + + if err := d.objProvider.Sync(); err != nil { + return nil, pendingOutputs, stats, err + } + + // Refresh the disk available statistic whenever a compaction/flush + // completes, before re-acquiring the mutex. + _ = d.calculateDiskAvailableBytes() + + return ve, pendingOutputs, stats, nil +} + +// validateVersionEdit validates that start and end keys across new and deleted +// files in a versionEdit pass the given validation function. +func validateVersionEdit( + ve *versionEdit, validateFn func([]byte) error, format base.FormatKey, +) error { + validateMetaFn := func(f *manifest.FileMetadata) error { + for _, key := range []InternalKey{f.Smallest, f.Largest} { + if err := validateFn(key.UserKey); err != nil { + return errors.Wrapf(err, "key=%q; file=%s", format(key.UserKey), f) + } + } + return nil + } + + // Validate both new and deleted files. + for _, f := range ve.NewFiles { + if err := validateMetaFn(f.Meta); err != nil { + return err + } + } + for _, m := range ve.DeletedFiles { + if err := validateMetaFn(m); err != nil { + return err + } + } + + return nil +} + +// scanObsoleteFiles scans the filesystem for files that are no longer needed +// and adds those to the internal lists of obsolete files. Note that the files +// are not actually deleted by this method. A subsequent call to +// deleteObsoleteFiles must be performed. Must be not be called concurrently +// with compactions and flushes. db.mu must be held when calling this function. +func (d *DB) scanObsoleteFiles(list []string) { + // Disable automatic compactions temporarily to avoid concurrent compactions / + // flushes from interfering. The original value is restored on completion. + disabledPrev := d.opts.DisableAutomaticCompactions + defer func() { + d.opts.DisableAutomaticCompactions = disabledPrev + }() + d.opts.DisableAutomaticCompactions = true + + // Wait for any ongoing compaction to complete before continuing. + for d.mu.compact.compactingCount > 0 || d.mu.compact.flushing { + d.mu.compact.cond.Wait() + } + + liveFileNums := make(map[base.DiskFileNum]struct{}) + d.mu.versions.addLiveFileNums(liveFileNums) + // Protect against files which are only referred to by the ingestedFlushable + // from being deleted. These are added to the flushable queue on WAL replay + // during read only mode and aren't part of the Version. Note that if + // !d.opts.ReadOnly, then all flushables of type ingestedFlushable have + // already been flushed. + for _, fEntry := range d.mu.mem.queue { + if f, ok := fEntry.flushable.(*ingestedFlushable); ok { + for _, file := range f.files { + liveFileNums[file.FileBacking.DiskFileNum] = struct{}{} + } + } + } + + minUnflushedLogNum := d.mu.versions.minUnflushedLogNum + manifestFileNum := d.mu.versions.manifestFileNum + + var obsoleteLogs []fileInfo + var obsoleteTables []fileInfo + var obsoleteManifests []fileInfo + var obsoleteOptions []fileInfo + + for _, filename := range list { + fileType, diskFileNum, ok := base.ParseFilename(d.opts.FS, filename) + if !ok { + continue + } + switch fileType { + case fileTypeLog: + if diskFileNum >= minUnflushedLogNum { + continue + } + fi := fileInfo{fileNum: diskFileNum} + if stat, err := d.opts.FS.Stat(filename); err == nil { + fi.fileSize = uint64(stat.Size()) + } + obsoleteLogs = append(obsoleteLogs, fi) + case fileTypeManifest: + if diskFileNum >= manifestFileNum { + continue + } + fi := fileInfo{fileNum: diskFileNum} + if stat, err := d.opts.FS.Stat(filename); err == nil { + fi.fileSize = uint64(stat.Size()) + } + obsoleteManifests = append(obsoleteManifests, fi) + case fileTypeOptions: + if diskFileNum.FileNum() >= d.optionsFileNum.FileNum() { + continue + } + fi := fileInfo{fileNum: diskFileNum} + if stat, err := d.opts.FS.Stat(filename); err == nil { + fi.fileSize = uint64(stat.Size()) + } + obsoleteOptions = append(obsoleteOptions, fi) + case fileTypeTable: + // Objects are handled through the objstorage provider below. + default: + // Don't delete files we don't know about. + } + } + + objects := d.objProvider.List() + for _, obj := range objects { + switch obj.FileType { + case fileTypeTable: + if _, ok := liveFileNums[obj.DiskFileNum]; ok { + continue + } + fileInfo := fileInfo{ + fileNum: obj.DiskFileNum, + } + if size, err := d.objProvider.Size(obj); err == nil { + fileInfo.fileSize = uint64(size) + } + obsoleteTables = append(obsoleteTables, fileInfo) + + default: + // Ignore object types we don't know about. + } + } + + d.mu.log.queue = merge(d.mu.log.queue, obsoleteLogs) + d.mu.versions.metrics.WAL.Files = int64(len(d.mu.log.queue)) + d.mu.versions.obsoleteTables = merge(d.mu.versions.obsoleteTables, obsoleteTables) + d.mu.versions.updateObsoleteTableMetricsLocked() + d.mu.versions.obsoleteManifests = merge(d.mu.versions.obsoleteManifests, obsoleteManifests) + d.mu.versions.obsoleteOptions = merge(d.mu.versions.obsoleteOptions, obsoleteOptions) +} + +// disableFileDeletions disables file deletions and then waits for any +// in-progress deletion to finish. The caller is required to call +// enableFileDeletions in order to enable file deletions again. It is ok for +// multiple callers to disable file deletions simultaneously, though they must +// all invoke enableFileDeletions in order for file deletions to be re-enabled +// (there is an internal reference count on file deletion disablement). +// +// d.mu must be held when calling this method. +func (d *DB) disableFileDeletions() { + d.mu.disableFileDeletions++ + d.mu.Unlock() + defer d.mu.Lock() + d.cleanupManager.Wait() +} + +// enableFileDeletions enables previously disabled file deletions. A cleanup job +// is queued if necessary. +// +// d.mu must be held when calling this method. +func (d *DB) enableFileDeletions() { + if d.mu.disableFileDeletions <= 0 { + panic("pebble: file deletion disablement invariant violated") + } + d.mu.disableFileDeletions-- + if d.mu.disableFileDeletions > 0 { + return + } + jobID := d.mu.nextJobID + d.mu.nextJobID++ + d.deleteObsoleteFiles(jobID) +} + +type fileInfo struct { + fileNum base.DiskFileNum + fileSize uint64 +} + +// deleteObsoleteFiles enqueues a cleanup job to the cleanup manager, if necessary. +// +// d.mu must be held when calling this. The function will release and re-aquire the mutex. +// +// Does nothing if file deletions are disabled (see disableFileDeletions). A +// cleanup job will be scheduled when file deletions are re-enabled. +func (d *DB) deleteObsoleteFiles(jobID int) { + if d.mu.disableFileDeletions > 0 { + return + } + + var obsoleteLogs []fileInfo + for i := range d.mu.log.queue { + // NB: d.mu.versions.minUnflushedLogNum is the log number of the earliest + // log that has not had its contents flushed to an sstable. We can recycle + // the prefix of d.mu.log.queue with log numbers less than + // minUnflushedLogNum. + if d.mu.log.queue[i].fileNum >= d.mu.versions.minUnflushedLogNum { + obsoleteLogs = d.mu.log.queue[:i] + d.mu.log.queue = d.mu.log.queue[i:] + d.mu.versions.metrics.WAL.Files -= int64(len(obsoleteLogs)) + break + } + } + + obsoleteTables := append([]fileInfo(nil), d.mu.versions.obsoleteTables...) + d.mu.versions.obsoleteTables = nil + + for _, tbl := range obsoleteTables { + delete(d.mu.versions.zombieTables, tbl.fileNum) + } + + // Sort the manifests cause we want to delete some contiguous prefix + // of the older manifests. + slices.SortFunc(d.mu.versions.obsoleteManifests, func(a, b fileInfo) int { + return cmp.Compare(a.fileNum, b.fileNum) + }) + + var obsoleteManifests []fileInfo + manifestsToDelete := len(d.mu.versions.obsoleteManifests) - d.opts.NumPrevManifest + if manifestsToDelete > 0 { + obsoleteManifests = d.mu.versions.obsoleteManifests[:manifestsToDelete] + d.mu.versions.obsoleteManifests = d.mu.versions.obsoleteManifests[manifestsToDelete:] + if len(d.mu.versions.obsoleteManifests) == 0 { + d.mu.versions.obsoleteManifests = nil + } + } + + obsoleteOptions := d.mu.versions.obsoleteOptions + d.mu.versions.obsoleteOptions = nil + + // Release d.mu while preparing the cleanup job and possibly waiting. + // Note the unusual order: Unlock and then Lock. + d.mu.Unlock() + defer d.mu.Lock() + + files := [4]struct { + fileType fileType + obsolete []fileInfo + }{ + {fileTypeLog, obsoleteLogs}, + {fileTypeTable, obsoleteTables}, + {fileTypeManifest, obsoleteManifests}, + {fileTypeOptions, obsoleteOptions}, + } + _, noRecycle := d.opts.Cleaner.(base.NeedsFileContents) + filesToDelete := make([]obsoleteFile, 0, len(obsoleteLogs)+len(obsoleteTables)+len(obsoleteManifests)+len(obsoleteOptions)) + for _, f := range files { + // We sort to make the order of deletions deterministic, which is nice for + // tests. + slices.SortFunc(f.obsolete, func(a, b fileInfo) int { + return cmp.Compare(a.fileNum, b.fileNum) + }) + for _, fi := range f.obsolete { + dir := d.dirname + switch f.fileType { + case fileTypeLog: + if !noRecycle && d.logRecycler.add(fi) { + continue + } + dir = d.walDirname + case fileTypeTable: + d.tableCache.evict(fi.fileNum) + } + + filesToDelete = append(filesToDelete, obsoleteFile{ + dir: dir, + fileNum: fi.fileNum, + fileType: f.fileType, + fileSize: fi.fileSize, + }) + } + } + if len(filesToDelete) > 0 { + d.cleanupManager.EnqueueJob(jobID, filesToDelete) + } + if d.opts.private.testingAlwaysWaitForCleanup { + d.cleanupManager.Wait() + } +} + +func (d *DB) maybeScheduleObsoleteTableDeletion() { + d.mu.Lock() + defer d.mu.Unlock() + d.maybeScheduleObsoleteTableDeletionLocked() +} + +func (d *DB) maybeScheduleObsoleteTableDeletionLocked() { + if len(d.mu.versions.obsoleteTables) > 0 { + jobID := d.mu.nextJobID + d.mu.nextJobID++ + d.deleteObsoleteFiles(jobID) + } +} + +func merge(a, b []fileInfo) []fileInfo { + if len(b) == 0 { + return a + } + + a = append(a, b...) + slices.SortFunc(a, func(a, b fileInfo) int { + return cmp.Compare(a.fileNum, b.fileNum) + }) + return slices.CompactFunc(a, func(a, b fileInfo) bool { + return a.fileNum == b.fileNum + }) +} diff --git a/pebble/compaction_iter.go b/pebble/compaction_iter.go new file mode 100644 index 0000000..0fb9e45 --- /dev/null +++ b/pebble/compaction_iter.go @@ -0,0 +1,1473 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "sort" + "strconv" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/bytealloc" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/rangekey" + "github.com/cockroachdb/redact" +) + +// compactionIter provides a forward-only iterator that encapsulates the logic +// for collapsing entries during compaction. It wraps an internal iterator and +// collapses entries that are no longer necessary because they are shadowed by +// newer entries. The simplest example of this is when the internal iterator +// contains two keys: a.PUT.2 and a.PUT.1. Instead of returning both entries, +// compactionIter collapses the second entry because it is no longer +// necessary. The high-level structure for compactionIter is to iterate over +// its internal iterator and output 1 entry for every user-key. There are four +// complications to this story. +// +// 1. Eliding Deletion Tombstones +// +// Consider the entries a.DEL.2 and a.PUT.1. These entries collapse to +// a.DEL.2. Do we have to output the entry a.DEL.2? Only if a.DEL.2 possibly +// shadows an entry at a lower level. If we're compacting to the base-level in +// the LSM tree then a.DEL.2 is definitely not shadowing an entry at a lower +// level and can be elided. +// +// We can do slightly better than only eliding deletion tombstones at the base +// level by observing that we can elide a deletion tombstone if there are no +// sstables that contain the entry's key. This check is performed by +// elideTombstone. +// +// 2. Merges +// +// The MERGE operation merges the value for an entry with the existing value +// for an entry. The logical value of an entry can be composed of a series of +// merge operations. When compactionIter sees a MERGE, it scans forward in its +// internal iterator collapsing MERGE operations for the same key until it +// encounters a SET or DELETE operation. For example, the keys a.MERGE.4, +// a.MERGE.3, a.MERGE.2 will be collapsed to a.MERGE.4 and the values will be +// merged using the specified Merger. +// +// An interesting case here occurs when MERGE is combined with SET. Consider +// the entries a.MERGE.3 and a.SET.2. The collapsed key will be a.SET.3. The +// reason that the kind is changed to SET is because the SET operation acts as +// a barrier preventing further merging. This can be seen better in the +// scenario a.MERGE.3, a.SET.2, a.MERGE.1. The entry a.MERGE.1 may be at lower +// (older) level and not involved in the compaction. If the compaction of +// a.MERGE.3 and a.SET.2 produced a.MERGE.3, a subsequent compaction with +// a.MERGE.1 would merge the values together incorrectly. +// +// 3. Snapshots +// +// Snapshots are lightweight point-in-time views of the DB state. At its core, +// a snapshot is a sequence number along with a guarantee from Pebble that it +// will maintain the view of the database at that sequence number. Part of this +// guarantee is relatively straightforward to achieve. When reading from the +// database Pebble will ignore sequence numbers that are larger than the +// snapshot sequence number. The primary complexity with snapshots occurs +// during compaction: the collapsing of entries that are shadowed by newer +// entries is at odds with the guarantee that Pebble will maintain the view of +// the database at the snapshot sequence number. Rather than collapsing entries +// up to the next user key, compactionIter can only collapse entries up to the +// next snapshot boundary. That is, every snapshot boundary potentially causes +// another entry for the same user-key to be emitted. Another way to view this +// is that snapshots define stripes and entries are collapsed within stripes, +// but not across stripes. Consider the following scenario: +// +// a.PUT.9 +// a.DEL.8 +// a.PUT.7 +// a.DEL.6 +// a.PUT.5 +// +// In the absence of snapshots these entries would be collapsed to +// a.PUT.9. What if there is a snapshot at sequence number 7? The entries can +// be divided into two stripes and collapsed within the stripes: +// +// a.PUT.9 a.PUT.9 +// a.DEL.8 ---> +// a.PUT.7 +// -- -- +// a.DEL.6 ---> a.DEL.6 +// a.PUT.5 +// +// All of the rules described earlier still apply, but they are confined to +// operate within a snapshot stripe. Snapshots only affect compaction when the +// snapshot sequence number lies within the range of sequence numbers being +// compacted. In the above example, a snapshot at sequence number 10 or at +// sequence number 5 would not have any effect. +// +// 4. Range Deletions +// +// Range deletions provide the ability to delete all of the keys (and values) +// in a contiguous range. Range deletions are stored indexed by their start +// key. The end key of the range is stored in the value. In order to support +// lookup of the range deletions which overlap with a particular key, the range +// deletion tombstones need to be fragmented whenever they overlap. This +// fragmentation is performed by keyspan.Fragmenter. The fragments are then +// subject to the rules for snapshots. For example, consider the two range +// tombstones [a,e)#1 and [c,g)#2: +// +// 2: c-------g +// 1: a-------e +// +// These tombstones will be fragmented into: +// +// 2: c---e---g +// 1: a---c---e +// +// Do we output the fragment [c,e)#1? Since it is covered by [c-e]#2 the answer +// depends on whether it is in a new snapshot stripe. +// +// In addition to the fragmentation of range tombstones, compaction also needs +// to take the range tombstones into consideration when outputting normal +// keys. Just as with point deletions, a range deletion covering an entry can +// cause the entry to be elided. +// +// A note on the stability of keys and values. +// +// The stability guarantees of keys and values returned by the iterator tree +// that backs a compactionIter is nuanced and care must be taken when +// referencing any returned items. +// +// Keys and values returned by exported functions (i.e. First, Next, etc.) have +// lifetimes that fall into two categories: +// +// Lifetime valid for duration of compaction. Range deletion keys and values are +// stable for the duration of the compaction, due to way in which a +// compactionIter is typically constructed (i.e. via (*compaction).newInputIter, +// which wraps the iterator over the range deletion block in a noCloseIter, +// preventing the release of the backing memory until the compaction is +// finished). +// +// Lifetime limited to duration of sstable block liveness. Point keys (SET, DEL, +// etc.) and values must be cloned / copied following the return from the +// exported function, and before a subsequent call to Next advances the iterator +// and mutates the contents of the returned key and value. +type compactionIter struct { + equal Equal + merge Merge + iter internalIterator + err error + // `key.UserKey` is set to `keyBuf` caused by saving `i.iterKey.UserKey` + // and `key.Trailer` is set to `i.iterKey.Trailer`. This is the + // case on return from all public methods -- these methods return `key`. + // Additionally, it is the internal state when the code is moving to the + // next key so it can determine whether the user key has changed from + // the previous key. + key InternalKey + // keyTrailer is updated when `i.key` is updated and holds the key's + // original trailer (eg, before any sequence-number zeroing or changes to + // key kind). + keyTrailer uint64 + value []byte + valueCloser io.Closer + // Temporary buffer used for storing the previous user key in order to + // determine when iteration has advanced to a new user key and thus a new + // snapshot stripe. + keyBuf []byte + // Temporary buffer used for storing the previous value, which may be an + // unsafe, i.iter-owned slice that could be altered when the iterator is + // advanced. + valueBuf []byte + // Is the current entry valid? + valid bool + iterKey *InternalKey + iterValue []byte + iterStripeChange stripeChangeType + // `skip` indicates whether the remaining skippable entries in the current + // snapshot stripe should be skipped or processed. An example of a non- + // skippable entry is a range tombstone as we need to return it from the + // `compactionIter`, even if a key covering its start key has already been + // seen in the same stripe. `skip` has no effect when `pos == iterPosNext`. + // + // TODO(jackson): If we use keyspan.InterleavingIter for range deletions, + // like we do for range keys, the only remaining 'non-skippable' key is + // the invalid key. We should be able to simplify this logic and remove this + // field. + skip bool + // `pos` indicates the iterator position at the top of `Next()`. Its type's + // (`iterPos`) values take on the following meanings in the context of + // `compactionIter`. + // + // - `iterPosCur`: the iterator is at the last key returned. + // - `iterPosNext`: the iterator has already been advanced to the next + // candidate key. For example, this happens when processing merge operands, + // where we advance the iterator all the way into the next stripe or next + // user key to ensure we've seen all mergeable operands. + // - `iterPosPrev`: this is invalid as compactionIter is forward-only. + pos iterPos + // `snapshotPinned` indicates whether the last point key returned by the + // compaction iterator was only returned because an open snapshot prevents + // its elision. This field only applies to point keys, and not to range + // deletions or range keys. + // + // For MERGE, it is possible that doing the merge is interrupted even when + // the next point key is in the same stripe. This can happen if the loop in + // mergeNext gets interrupted by sameStripeNonSkippable. + // sameStripeNonSkippable occurs due to RANGEDELs that sort before + // SET/MERGE/DEL with the same seqnum, so the RANGEDEL does not necessarily + // delete the subsequent SET/MERGE/DEL keys. + snapshotPinned bool + // forceObsoleteDueToRangeDel is set to true in a subset of the cases that + // snapshotPinned is true. This value is true when the point is obsolete due + // to a RANGEDEL but could not be deleted due to a snapshot. + // + // NB: it may seem that the additional cases that snapshotPinned captures + // are harmless in that they can also be used to mark a point as obsolete + // (it is merely a duplication of some logic that happens in + // Writer.AddWithForceObsolete), but that is not quite accurate as of this + // writing -- snapshotPinned originated in stats collection and for a + // sequence MERGE, SET, where the MERGE cannot merge with the (older) SET + // due to a snapshot, the snapshotPinned value for the SET is true. + // + // TODO(sumeer,jackson): improve the logic of snapshotPinned and reconsider + // whether we need forceObsoleteDueToRangeDel. + forceObsoleteDueToRangeDel bool + // The index of the snapshot for the current key within the snapshots slice. + curSnapshotIdx int + curSnapshotSeqNum uint64 + // The snapshot sequence numbers that need to be maintained. These sequence + // numbers define the snapshot stripes (see the Snapshots description + // above). The sequence numbers are in ascending order. + snapshots []uint64 + // frontiers holds a heap of user keys that affect compaction behavior when + // they're exceeded. Before a new key is returned, the compaction iterator + // advances the frontier, notifying any code that subscribed to be notified + // when a key was reached. The primary use today is within the + // implementation of compactionOutputSplitters in compaction.go. Many of + // these splitters wait for the compaction iterator to call Advance(k) when + // it's returning a new key. If the key that they're waiting for is + // surpassed, these splitters update internal state recording that they + // should request a compaction split next time they're asked in + // [shouldSplitBefore]. + frontiers frontiers + // Reference to the range deletion tombstone fragmenter (e.g., + // `compaction.rangeDelFrag`). + rangeDelFrag *keyspan.Fragmenter + rangeKeyFrag *keyspan.Fragmenter + // The fragmented tombstones. + tombstones []keyspan.Span + // The fragmented range keys. + rangeKeys []keyspan.Span + // Byte allocator for the tombstone keys. + alloc bytealloc.A + allowZeroSeqNum bool + elideTombstone func(key []byte) bool + elideRangeTombstone func(start, end []byte) bool + // The on-disk format major version. This informs the types of keys that + // may be written to disk during a compaction. + formatVersion FormatMajorVersion + stats struct { + // count of DELSIZED keys that were missized. + countMissizedDels uint64 + } +} + +func newCompactionIter( + cmp Compare, + equal Equal, + formatKey base.FormatKey, + merge Merge, + iter internalIterator, + snapshots []uint64, + rangeDelFrag *keyspan.Fragmenter, + rangeKeyFrag *keyspan.Fragmenter, + allowZeroSeqNum bool, + elideTombstone func(key []byte) bool, + elideRangeTombstone func(start, end []byte) bool, + formatVersion FormatMajorVersion, +) *compactionIter { + i := &compactionIter{ + equal: equal, + merge: merge, + iter: iter, + snapshots: snapshots, + frontiers: frontiers{cmp: cmp}, + rangeDelFrag: rangeDelFrag, + rangeKeyFrag: rangeKeyFrag, + allowZeroSeqNum: allowZeroSeqNum, + elideTombstone: elideTombstone, + elideRangeTombstone: elideRangeTombstone, + formatVersion: formatVersion, + } + i.rangeDelFrag.Cmp = cmp + i.rangeDelFrag.Format = formatKey + i.rangeDelFrag.Emit = i.emitRangeDelChunk + i.rangeKeyFrag.Cmp = cmp + i.rangeKeyFrag.Format = formatKey + i.rangeKeyFrag.Emit = i.emitRangeKeyChunk + return i +} + +func (i *compactionIter) First() (*InternalKey, []byte) { + if i.err != nil { + return nil, nil + } + var iterValue LazyValue + i.iterKey, iterValue = i.iter.First() + i.iterValue, _, i.err = iterValue.Value(nil) + if i.err != nil { + return nil, nil + } + if i.iterKey != nil { + i.curSnapshotIdx, i.curSnapshotSeqNum = snapshotIndex(i.iterKey.SeqNum(), i.snapshots) + } + i.pos = iterPosNext + i.iterStripeChange = newStripeNewKey + return i.Next() +} + +func (i *compactionIter) Next() (*InternalKey, []byte) { + if i.err != nil { + return nil, nil + } + + // Close the closer for the current value if one was open. + if i.closeValueCloser() != nil { + return nil, nil + } + + // Prior to this call to `Next()` we are in one of four situations with + // respect to `iterKey` and related state: + // + // - `!skip && pos == iterPosNext`: `iterKey` is already at the next key. + // - `!skip && pos == iterPosCurForward`: We are at the key that has been returned. + // To move forward we advance by one key, even if that lands us in the same + // snapshot stripe. + // - `skip && pos == iterPosCurForward`: We are at the key that has been returned. + // To move forward we skip skippable entries in the stripe. + // - `skip && pos == iterPosNext && i.iterStripeChange == sameStripeNonSkippable`: + // This case may occur when skipping within a snapshot stripe and we + // encounter either: + // a) an invalid key kind; The previous call will have returned + // whatever key it was processing and deferred handling of the + // invalid key to this invocation of Next(). We're responsible for + // ignoring skip=true and falling into the invalid key kind case + // down below. + // b) an interleaved range delete; This is a wart of the current code + // structure. While skipping within a snapshot stripe, a range + // delete interleaved at its start key and sequence number + // interrupts the sequence of point keys. After we return the range + // delete to the caller, we need to pick up skipping at where we + // left off, so we preserve skip=true. + // TODO(jackson): This last case is confusing and can be removed if we + // interleave range deletions at the maximal sequence number using the + // keyspan interleaving iterator. This is the treatment given to range + // keys today. + if i.pos == iterPosCurForward { + if i.skip { + i.skipInStripe() + } else { + i.nextInStripe() + } + } else if i.skip { + if i.iterStripeChange != sameStripeNonSkippable { + panic(errors.AssertionFailedf("compaction iterator has skip=true, but iterator is at iterPosNext")) + } + } + + i.pos = iterPosCurForward + i.valid = false + + for i.iterKey != nil { + // If we entered a new snapshot stripe with the same key, any key we + // return on this iteration is only returned because the open snapshot + // prevented it from being elided or merged with the key returned for + // the previous stripe. Mark it as pinned so that the compaction loop + // can correctly populate output tables' pinned statistics. We might + // also set snapshotPinned=true down below if we observe that the key is + // deleted by a range deletion in a higher stripe or that this key is a + // tombstone that could be elided if only it were in the last snapshot + // stripe. + i.snapshotPinned = i.iterStripeChange == newStripeSameKey + + if i.iterKey.Kind() == InternalKeyKindRangeDelete || rangekey.IsRangeKey(i.iterKey.Kind()) { + // Return the span so the compaction can use it for file truncation and add + // it to the relevant fragmenter. We do not set `skip` to true before + // returning as there may be a forthcoming point key with the same user key + // and sequence number. Such a point key must be visible (i.e., not skipped + // over) since we promise point keys are not deleted by range tombstones at + // the same sequence number. + // + // Although, note that `skip` may already be true before reaching here + // due to an earlier key in the stripe. Then it is fine to leave it set + // to true, as the earlier key must have had a higher sequence number. + // + // NOTE: there is a subtle invariant violation here in that calling + // saveKey and returning a reference to the temporary slice violates + // the stability guarantee for range deletion keys. A potential + // mediation could return the original iterKey and iterValue + // directly, as the backing memory is guaranteed to be stable until + // the compaction completes. The violation here is only minor in + // that the caller immediately clones the range deletion InternalKey + // when passing the key to the deletion fragmenter (see the + // call-site in compaction.go). + // TODO(travers): address this violation by removing the call to + // saveKey and instead return the original iterKey and iterValue. + // This goes against the comment on i.key in the struct, and + // therefore warrants some investigation. + i.saveKey() + // TODO(jackson): Handle tracking pinned statistics for range keys + // and range deletions. This would require updating + // emitRangeDelChunk and rangeKeyCompactionTransform to update + // statistics when they apply their own snapshot striping logic. + i.snapshotPinned = false + i.value = i.iterValue + i.valid = true + return &i.key, i.value + } + + if cover := i.rangeDelFrag.Covers(*i.iterKey, i.curSnapshotSeqNum); cover == keyspan.CoversVisibly { + // A pending range deletion deletes this key. Skip it. + i.saveKey() + i.skipInStripe() + continue + } else if cover == keyspan.CoversInvisibly { + // i.iterKey would be deleted by a range deletion if there weren't + // any open snapshots. Mark it as pinned. + // + // NB: there are multiple places in this file where we call + // i.rangeDelFrag.Covers and this is the only one where we are writing + // to i.snapshotPinned. Those other cases occur in mergeNext where the + // caller is deciding whether the value should be merged or not, and the + // key is in the same snapshot stripe. Hence, snapshotPinned is by + // definition false in those cases. + i.snapshotPinned = true + i.forceObsoleteDueToRangeDel = true + } else { + i.forceObsoleteDueToRangeDel = false + } + + switch i.iterKey.Kind() { + case InternalKeyKindDelete, InternalKeyKindSingleDelete, InternalKeyKindDeleteSized: + if i.elideTombstone(i.iterKey.UserKey) { + if i.curSnapshotIdx == 0 { + // If we're at the last snapshot stripe and the tombstone + // can be elided skip skippable keys in the same stripe. + i.saveKey() + i.skipInStripe() + if i.iterStripeChange == newStripeSameKey { + panic(errors.AssertionFailedf("pebble: skipInStripe in last stripe found a new stripe within the same key")) + } + if !i.skip && i.iterStripeChange != newStripeNewKey { + panic(errors.AssertionFailedf("pebble: skipInStripe in last stripe disabled skip without advancing to new key")) + } + continue + } else { + // We're not at the last snapshot stripe, so the tombstone + // can NOT yet be elided. Mark it as pinned, so that it's + // included in table statistics appropriately. + i.snapshotPinned = true + } + } + + switch i.iterKey.Kind() { + case InternalKeyKindDelete: + i.saveKey() + i.value = i.iterValue + i.valid = true + i.skip = true + return &i.key, i.value + + case InternalKeyKindDeleteSized: + // We may skip subsequent keys because of this tombstone. Scan + // ahead to see just how much data this tombstone drops and if + // the tombstone's value should be updated accordingly. + return i.deleteSizedNext() + + case InternalKeyKindSingleDelete: + if i.singleDeleteNext() { + return &i.key, i.value + } else if i.err != nil { + return nil, nil + } + continue + + default: + panic(errors.AssertionFailedf( + "unexpected kind %s", redact.SafeString(i.iterKey.Kind().String()))) + } + + case InternalKeyKindSet, InternalKeyKindSetWithDelete: + // The key we emit for this entry is a function of the current key + // kind, and whether this entry is followed by a DEL/SINGLEDEL + // entry. setNext() does the work to move the iterator forward, + // preserving the original value, and potentially mutating the key + // kind. + i.setNext() + if i.err != nil { + return nil, nil + } + return &i.key, i.value + + case InternalKeyKindMerge: + // Record the snapshot index before mergeNext as merging + // advances the iterator, adjusting curSnapshotIdx. + origSnapshotIdx := i.curSnapshotIdx + var valueMerger ValueMerger + valueMerger, i.err = i.merge(i.iterKey.UserKey, i.iterValue) + var change stripeChangeType + if i.err == nil { + change = i.mergeNext(valueMerger) + } + var needDelete bool + if i.err == nil { + // includesBase is true whenever we've transformed the MERGE record + // into a SET. + var includesBase bool + switch i.key.Kind() { + case InternalKeyKindSet, InternalKeyKindSetWithDelete: + includesBase = true + case InternalKeyKindMerge: + default: + panic(errors.AssertionFailedf( + "unexpected kind %s", redact.SafeString(i.key.Kind().String()))) + } + i.value, needDelete, i.valueCloser, i.err = finishValueMerger(valueMerger, includesBase) + } + if i.err == nil { + if needDelete { + i.valid = false + if i.closeValueCloser() != nil { + return nil, nil + } + continue + } + // A non-skippable entry does not necessarily cover later merge + // operands, so we must not zero the current merge result's seqnum. + // + // For example, suppose the forthcoming two keys are a range + // tombstone, `[a, b)#3`, and a merge operand, `a#3`. Recall that + // range tombstones do not cover point keys at the same seqnum, so + // `a#3` is not deleted. The range tombstone will be seen first due + // to its larger value type. Since it is a non-skippable key, the + // current merge will not include `a#3`. If we zeroed the current + // merge result's seqnum, then it would conflict with the upcoming + // merge including `a#3`, whose seqnum will also be zeroed. + if change != sameStripeNonSkippable { + i.maybeZeroSeqnum(origSnapshotIdx) + } + return &i.key, i.value + } + if i.err != nil { + i.valid = false + // TODO(sumeer): why is MarkCorruptionError only being called for + // MERGE? + i.err = base.MarkCorruptionError(i.err) + } + return nil, nil + + default: + i.err = base.CorruptionErrorf("invalid internal key kind: %d", errors.Safe(i.iterKey.Kind())) + i.valid = false + return nil, nil + } + } + + return nil, nil +} + +func (i *compactionIter) closeValueCloser() error { + if i.valueCloser == nil { + return nil + } + + i.err = i.valueCloser.Close() + i.valueCloser = nil + if i.err != nil { + i.valid = false + } + return i.err +} + +// snapshotIndex returns the index of the first sequence number in snapshots +// which is greater than or equal to seq. +func snapshotIndex(seq uint64, snapshots []uint64) (int, uint64) { + index := sort.Search(len(snapshots), func(i int) bool { + return snapshots[i] > seq + }) + if index >= len(snapshots) { + return index, InternalKeySeqNumMax + } + return index, snapshots[index] +} + +// skipInStripe skips over skippable keys in the same stripe and user key. It +// may set i.err, in which case i.iterKey will be nil. +func (i *compactionIter) skipInStripe() { + i.skip = true + for i.nextInStripe() == sameStripeSkippable { + if i.err != nil { + panic(i.err) + } + } + // Reset skip if we landed outside the original stripe. Otherwise, we landed + // in the same stripe on a non-skippable key. In that case we should preserve + // `i.skip == true` such that later keys in the stripe will continue to be + // skipped. + if i.iterStripeChange == newStripeNewKey || i.iterStripeChange == newStripeSameKey { + i.skip = false + } +} + +func (i *compactionIter) iterNext() bool { + var iterValue LazyValue + i.iterKey, iterValue = i.iter.Next() + i.iterValue, _, i.err = iterValue.Value(nil) + if i.err != nil { + i.iterKey = nil + } + return i.iterKey != nil +} + +// stripeChangeType indicates how the snapshot stripe changed relative to the +// previous key. If no change, it also indicates whether the current entry is +// skippable. If the snapshot stripe changed, it also indicates whether the new +// stripe was entered because the iterator progressed onto an entirely new key +// or entered a new stripe within the same key. +type stripeChangeType int + +const ( + newStripeNewKey stripeChangeType = iota + newStripeSameKey + sameStripeSkippable + sameStripeNonSkippable +) + +// nextInStripe advances the iterator and returns one of the above const ints +// indicating how its state changed. +// +// Calls to nextInStripe must be preceded by a call to saveKey to retain a +// temporary reference to the original key, so that forward iteration can +// proceed with a reference to the original key. Care should be taken to avoid +// overwriting or mutating the saved key or value before they have been returned +// to the caller of the exported function (i.e. the caller of Next, First, etc.) +// +// nextInStripe may set i.err, in which case the return value will be +// newStripeNewKey, and i.iterKey will be nil. +func (i *compactionIter) nextInStripe() stripeChangeType { + i.iterStripeChange = i.nextInStripeHelper() + return i.iterStripeChange +} + +// nextInStripeHelper is an internal helper for nextInStripe; callers should use +// nextInStripe and not call nextInStripeHelper. +func (i *compactionIter) nextInStripeHelper() stripeChangeType { + if !i.iterNext() { + return newStripeNewKey + } + key := i.iterKey + + if !i.equal(i.key.UserKey, key.UserKey) { + i.curSnapshotIdx, i.curSnapshotSeqNum = snapshotIndex(key.SeqNum(), i.snapshots) + return newStripeNewKey + } + + // If i.key and key have the same user key, then + // 1. i.key must not have had a zero sequence number (or it would've be the last + // key with its user key). + // 2. i.key must have a strictly larger sequence number + // There's an exception in that either key may be a range delete. Range + // deletes may share a sequence number with a point key if the keys were + // ingested together. Range keys may also share the sequence number if they + // were ingested, but range keys are interleaved into the compaction + // iterator's input iterator at the maximal sequence number so their + // original sequence number will not be observed here. + if prevSeqNum := base.SeqNumFromTrailer(i.keyTrailer); (prevSeqNum == 0 || prevSeqNum <= key.SeqNum()) && + i.key.Kind() != InternalKeyKindRangeDelete && key.Kind() != InternalKeyKindRangeDelete { + prevKey := i.key + prevKey.Trailer = i.keyTrailer + panic(errors.AssertionFailedf("pebble: invariant violation: %s and %s out of order", prevKey, key)) + } + + origSnapshotIdx := i.curSnapshotIdx + i.curSnapshotIdx, i.curSnapshotSeqNum = snapshotIndex(key.SeqNum(), i.snapshots) + switch key.Kind() { + case InternalKeyKindRangeDelete: + // Range tombstones need to be exposed by the compactionIter to the upper level + // `compaction` object, so return them regardless of whether they are in the same + // snapshot stripe. + if i.curSnapshotIdx == origSnapshotIdx { + return sameStripeNonSkippable + } + return newStripeSameKey + case InternalKeyKindRangeKeySet, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeyDelete: + // Range keys are interleaved at the max sequence number for a given user + // key, so we should not see any more range keys in this stripe. + panic("unreachable") + case InternalKeyKindInvalid: + if i.curSnapshotIdx == origSnapshotIdx { + return sameStripeNonSkippable + } + return newStripeSameKey + case InternalKeyKindDelete, InternalKeyKindSet, InternalKeyKindMerge, InternalKeyKindSingleDelete, + InternalKeyKindSetWithDelete, InternalKeyKindDeleteSized: + // Fall through + default: + i.iterKey = nil + i.err = base.CorruptionErrorf("invalid internal key kind: %d", errors.Safe(i.iterKey.Kind())) + i.valid = false + return newStripeNewKey + } + if i.curSnapshotIdx == origSnapshotIdx { + return sameStripeSkippable + } + return newStripeSameKey +} + +func (i *compactionIter) setNext() { + // Save the current key. + i.saveKey() + i.value = i.iterValue + i.valid = true + i.maybeZeroSeqnum(i.curSnapshotIdx) + + // There are two cases where we can early return and skip the remaining + // records in the stripe: + // - If the DB does not SETWITHDEL. + // - If this key is already a SETWITHDEL. + if i.formatVersion < FormatSetWithDelete || + i.iterKey.Kind() == InternalKeyKindSetWithDelete { + i.skip = true + return + } + + // We are iterating forward. Save the current value. + i.valueBuf = append(i.valueBuf[:0], i.iterValue...) + i.value = i.valueBuf + + // Else, we continue to loop through entries in the stripe looking for a + // DEL. Note that we may stop *before* encountering a DEL, if one exists. + for { + switch i.nextInStripe() { + case newStripeNewKey, newStripeSameKey: + i.pos = iterPosNext + return + case sameStripeNonSkippable: + i.pos = iterPosNext + // We iterated onto a key that we cannot skip. We can + // conservatively transform the original SET into a SETWITHDEL + // as an indication that there *may* still be a DEL/SINGLEDEL + // under this SET, even if we did not actually encounter one. + // + // This is safe to do, as: + // + // - in the case that there *is not* actually a DEL/SINGLEDEL + // under this entry, any SINGLEDEL above this now-transformed + // SETWITHDEL will become a DEL when the two encounter in a + // compaction. The DEL will eventually be elided in a + // subsequent compaction. The cost for ensuring correctness is + // that this entry is kept around for an additional compaction + // cycle(s). + // + // - in the case there *is* indeed a DEL/SINGLEDEL under us + // (but in a different stripe or sstable), then we will have + // already done the work to transform the SET into a + // SETWITHDEL, and we will skip any additional iteration when + // this entry is encountered again in a subsequent compaction. + // + // Ideally, this codepath would be smart enough to handle the + // case of SET <- RANGEDEL <- ... <- DEL/SINGLEDEL <- .... + // This requires preserving any RANGEDEL entries we encounter + // along the way, then emitting the original (possibly + // transformed) key, followed by the RANGEDELs. This requires + // a sizable refactoring of the existing code, as nextInStripe + // currently returns a sameStripeNonSkippable when it + // encounters a RANGEDEL. + // TODO(travers): optimize to handle the RANGEDEL case if it + // turns out to be a performance problem. + i.key.SetKind(InternalKeyKindSetWithDelete) + + // By setting i.skip=true, we are saying that after the + // non-skippable key is emitted (which is likely a RANGEDEL), + // the remaining point keys that share the same user key as this + // saved key should be skipped. + i.skip = true + return + case sameStripeSkippable: + // We're still in the same stripe. If this is a + // DEL/SINGLEDEL/DELSIZED, we stop looking and emit a SETWITHDEL. + // Subsequent keys are eligible for skipping. + switch i.iterKey.Kind() { + case InternalKeyKindDelete, InternalKeyKindSingleDelete, InternalKeyKindDeleteSized: + i.key.SetKind(InternalKeyKindSetWithDelete) + i.skip = true + return + case InternalKeyKindSet, InternalKeyKindMerge, InternalKeyKindSetWithDelete: + // Do nothing + default: + i.err = base.CorruptionErrorf("invalid internal key kind: %d", errors.Safe(i.iterKey.Kind())) + i.valid = false + } + default: + panic("pebble: unexpected stripeChangeType: " + strconv.Itoa(int(i.iterStripeChange))) + } + } +} + +func (i *compactionIter) mergeNext(valueMerger ValueMerger) stripeChangeType { + // Save the current key. + i.saveKey() + i.valid = true + + // Loop looking for older values in the current snapshot stripe and merge + // them. + for { + if i.nextInStripe() != sameStripeSkippable { + i.pos = iterPosNext + return i.iterStripeChange + } + if i.err != nil { + panic(i.err) + } + key := i.iterKey + switch key.Kind() { + case InternalKeyKindDelete, InternalKeyKindSingleDelete, InternalKeyKindDeleteSized: + // We've hit a deletion tombstone. Return everything up to this point and + // then skip entries until the next snapshot stripe. We change the kind + // of the result key to a Set so that it shadows keys in lower + // levels. That is, MERGE+DEL -> SETWITHDEL. + // + // We do the same for SingleDelete since SingleDelete is only + // permitted (with deterministic behavior) for keys that have been + // set once since the last SingleDelete/Delete, so everything + // older is acceptable to shadow. Note that this is slightly + // different from singleDeleteNext() which implements stricter + // semantics in terms of applying the SingleDelete to the single + // next Set. But those stricter semantics are not observable to + // the end-user since Iterator interprets SingleDelete as Delete. + // We could do something more complicated here and consume only a + // single Set, and then merge in any following Sets, but that is + // complicated wrt code and unnecessary given the narrow permitted + // use of SingleDelete. + i.key.SetKind(InternalKeyKindSetWithDelete) + i.skip = true + return sameStripeSkippable + + case InternalKeyKindSet, InternalKeyKindSetWithDelete: + if i.rangeDelFrag.Covers(*key, i.curSnapshotSeqNum) == keyspan.CoversVisibly { + // We change the kind of the result key to a Set so that it shadows + // keys in lower levels. That is, MERGE+RANGEDEL -> SET. This isn't + // strictly necessary, but provides consistency with the behavior of + // MERGE+DEL. + i.key.SetKind(InternalKeyKindSet) + i.skip = true + return sameStripeSkippable + } + + // We've hit a Set or SetWithDel value. Merge with the existing + // value and return. We change the kind of the resulting key to a + // Set so that it shadows keys in lower levels. That is: + // MERGE + (SET*) -> SET. + i.err = valueMerger.MergeOlder(i.iterValue) + if i.err != nil { + i.valid = false + return sameStripeSkippable + } + i.key.SetKind(InternalKeyKindSet) + i.skip = true + return sameStripeSkippable + + case InternalKeyKindMerge: + if i.rangeDelFrag.Covers(*key, i.curSnapshotSeqNum) == keyspan.CoversVisibly { + // We change the kind of the result key to a Set so that it shadows + // keys in lower levels. That is, MERGE+RANGEDEL -> SET. This isn't + // strictly necessary, but provides consistency with the behavior of + // MERGE+DEL. + i.key.SetKind(InternalKeyKindSet) + i.skip = true + return sameStripeSkippable + } + + // We've hit another Merge value. Merge with the existing value and + // continue looping. + i.err = valueMerger.MergeOlder(i.iterValue) + if i.err != nil { + i.valid = false + return sameStripeSkippable + } + + default: + i.err = base.CorruptionErrorf("invalid internal key kind: %d", errors.Safe(i.iterKey.Kind())) + i.valid = false + return sameStripeSkippable + } + } +} + +// singleDeleteNext processes a SingleDelete point tombstone. A SingleDelete, or +// SINGLEDEL, is unique in that it deletes exactly 1 internal key. It's a +// performance optimization when the client knows a user key has not been +// overwritten, allowing the elision of the tombstone earlier, avoiding write +// amplification. +// +// singleDeleteNext returns a boolean indicating whether or not the caller +// should yield the SingleDelete key to the consumer of the compactionIter. If +// singleDeleteNext returns false, the caller may consume/elide the +// SingleDelete. +func (i *compactionIter) singleDeleteNext() bool { + // Save the current key. + i.saveKey() + i.value = i.iterValue + i.valid = true + + // Loop until finds a key to be passed to the next level. + for { + // If we find a key that can't be skipped, return true so that the + // caller yields the SingleDelete to the caller. + if i.nextInStripe() != sameStripeSkippable { + i.pos = iterPosNext + return i.err == nil + } + if i.err != nil { + panic(i.err) + } + key := i.iterKey + switch key.Kind() { + case InternalKeyKindDelete, InternalKeyKindMerge, InternalKeyKindSetWithDelete, InternalKeyKindDeleteSized: + // We've hit a Delete, DeleteSized, Merge, SetWithDelete, transform + // the SingleDelete into a full Delete. + i.key.SetKind(InternalKeyKindDelete) + i.skip = true + return true + + case InternalKeyKindSet: + // This SingleDelete deletes the Set, and we can now elide the + // SingleDel as well. We advance past the Set and return false to + // indicate to the main compaction loop that we should NOT yield the + // current SingleDel key to the compaction loop. + i.nextInStripe() + // TODO(jackson): We could assert that nextInStripe either a) + // stepped onto a new key, or b) stepped on to a Delete, DeleteSized + // or SingleDel key. This would detect improper uses of SingleDel, + // but only when all three internal keys meet in the same compaction + // which is not likely. + i.valid = false + return false + + case InternalKeyKindSingleDelete: + // Two single deletes met in a compaction. With proper deterministic + // use of SingleDelete, this should never happen. The expectation is + // that there's exactly 1 set beneath a single delete. Currently, we + // opt to skip it. + // TODO(jackson): Should we make this an error? This would also + // allow us to simplify the code a bit by removing the for loop. + continue + + default: + i.err = base.CorruptionErrorf("invalid internal key kind: %d", errors.Safe(i.iterKey.Kind())) + i.valid = false + return false + } + } +} + +// deleteSizedNext processes a DELSIZED point tombstone. Unlike ordinary DELs, +// these tombstones carry a value that's a varint indicating the size of the +// entry (len(key)+len(value)) that the tombstone is expected to delete. +// +// When a deleteSizedNext is encountered, we skip ahead to see which keys, if +// any, are elided as a result of the tombstone. +func (i *compactionIter) deleteSizedNext() (*base.InternalKey, []byte) { + i.saveKey() + i.valid = true + i.skip = true + + // The DELSIZED tombstone may have no value at all. This happens when the + // tombstone has already deleted the key that the user originally predicted. + // In this case, we still peek forward in case there's another DELSIZED key + // with a lower sequence number, in which case we'll adopt its value. + if len(i.iterValue) == 0 { + i.value = i.valueBuf[:0] + } else { + i.valueBuf = append(i.valueBuf[:0], i.iterValue...) + i.value = i.valueBuf + } + + // Loop through all the keys within this stripe that are skippable. + i.pos = iterPosNext + for i.nextInStripe() == sameStripeSkippable { + if i.err != nil { + panic(i.err) + } + switch i.iterKey.Kind() { + case InternalKeyKindDelete, InternalKeyKindDeleteSized, InternalKeyKindSingleDelete: + // We encountered a tombstone (DEL, or DELSIZED) that's deleted by + // the original DELSIZED tombstone. This can happen in two cases: + // + // (1) These tombstones were intended to delete two distinct values, + // and this DELSIZED has already dropped the relevant key. For + // example: + // + // a.DELSIZED.9 a.SET.7 a.DELSIZED.5 a.SET.4 + // + // If a.DELSIZED.9 has already deleted a.SET.7, its size has + // already been zeroed out. In this case, we want to adopt the + // value of the DELSIZED with the lower sequence number, in + // case the a.SET.4 key has not yet been elided. + // + // (2) This DELSIZED was missized. The user thought they were + // deleting a key with this user key, but this user key had + // already been deleted. + // + // We can differentiate these two cases by examining the length of + // the DELSIZED's value. A DELSIZED's value holds the size of both + // the user key and value that it intends to delete. For any user + // key with a length > 1, a DELSIZED that has not deleted a key must + // have a value with a length > 1. + // + // We treat both cases the same functionally, adopting the identity + // of the lower-sequence numbered tombstone. However in the second + // case, we also increment the stat counting missized tombstones. + if len(i.value) > 0 { + // The original DELSIZED key was missized. The key that the user + // thought they were deleting does not exist. + i.stats.countMissizedDels++ + } + i.valueBuf = append(i.valueBuf[:0], i.iterValue...) + i.value = i.valueBuf + if i.iterKey.Kind() != InternalKeyKindDeleteSized { + // Convert the DELSIZED to a DEL—The DEL/SINGLEDEL we're eliding + // may not have deleted the key(s) it was intended to yet. The + // ordinary DEL compaction heuristics are better suited at that, + // plus we don't want to count it as a missized DEL. We early + // exit in this case, after skipping the remainder of the + // snapshot stripe. + i.key.SetKind(InternalKeyKindDelete) + // NB: We skipInStripe now, rather than returning leaving + // i.skip=true and returning early, because Next() requires + // that i.skip=true only if i.iterPos = iterPosCurForward. + // + // Ignore any error caused by skipInStripe since it does not affect + // the key/value being returned here, and the next call to Next() will + // expose it. + i.skipInStripe() + return &i.key, i.value + } + // Continue, in case we uncover another DELSIZED or a key this + // DELSIZED deletes. + + case InternalKeyKindSet, InternalKeyKindMerge, InternalKeyKindSetWithDelete: + // If the DELSIZED is value-less, it already deleted the key that it + // was intended to delete. This is possible with a sequence like: + // + // DELSIZED.8 SET.7 SET.3 + // + // The DELSIZED only describes the size of the SET.7, which in this + // case has already been elided. We don't count it as a missizing, + // instead converting the DELSIZED to a DEL. Skip the remainder of + // the snapshot stripe and return. + if len(i.value) == 0 { + i.key.SetKind(InternalKeyKindDelete) + // NB: We skipInStripe now, rather than returning leaving + // i.skip=true and returning early, because Next() requires + // that i.skip=true only if i.iterPos = iterPosCurForward. + // + // Ignore any error caused by skipInStripe since it does not affect + // the key/value being returned here, and the next call to Next() will + // expose it. + i.skipInStripe() + return &i.key, i.value + } + // The deleted key is not a DEL, DELSIZED, and the DELSIZED in i.key + // has a positive size. + expectedSize, n := binary.Uvarint(i.value) + if n != len(i.value) { + i.err = base.CorruptionErrorf("DELSIZED holds invalid value: %x", errors.Safe(i.value)) + i.valid = false + return nil, nil + } + elidedSize := uint64(len(i.iterKey.UserKey)) + uint64(len(i.iterValue)) + if elidedSize != expectedSize { + // The original DELSIZED key was missized. It's unclear what to + // do. The user-provided size was wrong, so it's unlikely to be + // accurate or meaningful. We could: + // + // 1. return the DELSIZED with the original user-provided size unmodified + // 2. return the DELZIZED with a zeroed size to reflect that a key was + // elided, even if it wasn't the anticipated size. + // 3. subtract the elided size from the estimate and re-encode. + // 4. convert the DELSIZED into a value-less DEL, so that + // ordinary DEL heuristics apply. + // + // We opt for (4) under the rationale that we can't rely on the + // user-provided size for accuracy, so ordinary DEL heuristics + // are safer. + i.stats.countMissizedDels++ + i.key.SetKind(InternalKeyKindDelete) + i.value = i.valueBuf[:0] + // NB: We skipInStripe now, rather than returning leaving + // i.skip=true and returning early, because Next() requires + // that i.skip=true only if i.iterPos = iterPosCurForward. + // + // Ignore any error caused by skipInStripe since it does not affect + // the key/value being returned here, and the next call to Next() will + // expose it. + i.skipInStripe() + return &i.key, i.value + } + // NB: We remove the value regardless of whether the key was sized + // appropriately. The size encoded is 'consumed' the first time it + // meets a key that it deletes. + i.value = i.valueBuf[:0] + + default: + i.err = base.CorruptionErrorf("invalid internal key kind: %d", errors.Safe(i.iterKey.Kind())) + i.valid = false + return nil, nil + } + } + // Reset skip if we landed outside the original stripe. Otherwise, we landed + // in the same stripe on a non-skippable key. In that case we should preserve + // `i.skip == true` such that later keys in the stripe will continue to be + // skipped. + if i.iterStripeChange == newStripeNewKey || i.iterStripeChange == newStripeSameKey { + i.skip = false + } + if i.err != nil { + return nil, nil + } + return &i.key, i.value +} + +func (i *compactionIter) saveKey() { + i.keyBuf = append(i.keyBuf[:0], i.iterKey.UserKey...) + i.key.UserKey = i.keyBuf + i.key.Trailer = i.iterKey.Trailer + i.keyTrailer = i.iterKey.Trailer + i.frontiers.Advance(i.key.UserKey) +} + +func (i *compactionIter) cloneKey(key []byte) []byte { + i.alloc, key = i.alloc.Copy(key) + return key +} + +func (i *compactionIter) Key() InternalKey { + return i.key +} + +func (i *compactionIter) Value() []byte { + return i.value +} + +func (i *compactionIter) Valid() bool { + return i.valid +} + +func (i *compactionIter) Error() error { + return i.err +} + +func (i *compactionIter) Close() error { + err := i.iter.Close() + if i.err == nil { + i.err = err + } + + // Close the closer for the current value if one was open. + if i.valueCloser != nil { + i.err = firstError(i.err, i.valueCloser.Close()) + i.valueCloser = nil + } + + return i.err +} + +// Tombstones returns a list of pending range tombstones in the fragmenter +// up to the specified key, or all pending range tombstones if key = nil. +func (i *compactionIter) Tombstones(key []byte) []keyspan.Span { + if key == nil { + i.rangeDelFrag.Finish() + } else { + // The specified end key is exclusive; no versions of the specified + // user key (including range tombstones covering that key) should + // be flushed yet. + i.rangeDelFrag.TruncateAndFlushTo(key) + } + tombstones := i.tombstones + i.tombstones = nil + return tombstones +} + +// RangeKeys returns a list of pending fragmented range keys up to the specified +// key, or all pending range keys if key = nil. +func (i *compactionIter) RangeKeys(key []byte) []keyspan.Span { + if key == nil { + i.rangeKeyFrag.Finish() + } else { + // The specified end key is exclusive; no versions of the specified + // user key (including range tombstones covering that key) should + // be flushed yet. + i.rangeKeyFrag.TruncateAndFlushTo(key) + } + rangeKeys := i.rangeKeys + i.rangeKeys = nil + return rangeKeys +} + +func (i *compactionIter) emitRangeDelChunk(fragmented keyspan.Span) { + // Apply the snapshot stripe rules, keeping only the latest tombstone for + // each snapshot stripe. + currentIdx := -1 + keys := fragmented.Keys[:0] + for _, k := range fragmented.Keys { + idx, _ := snapshotIndex(k.SeqNum(), i.snapshots) + if currentIdx == idx { + continue + } + if idx == 0 && i.elideRangeTombstone(fragmented.Start, fragmented.End) { + // This is the last snapshot stripe and the range tombstone + // can be elided. + break + } + + keys = append(keys, k) + if idx == 0 { + // This is the last snapshot stripe. + break + } + currentIdx = idx + } + if len(keys) > 0 { + i.tombstones = append(i.tombstones, keyspan.Span{ + Start: fragmented.Start, + End: fragmented.End, + Keys: keys, + }) + } +} + +func (i *compactionIter) emitRangeKeyChunk(fragmented keyspan.Span) { + // Elision of snapshot stripes happens in rangeKeyCompactionTransform, so no need to + // do that here. + if len(fragmented.Keys) > 0 { + i.rangeKeys = append(i.rangeKeys, fragmented) + } +} + +// maybeZeroSeqnum attempts to set the seqnum for the current key to 0. Doing +// so improves compression and enables an optimization during forward iteration +// to skip some key comparisons. The seqnum for an entry can be zeroed if the +// entry is on the bottom snapshot stripe and on the bottom level of the LSM. +func (i *compactionIter) maybeZeroSeqnum(snapshotIdx int) { + if !i.allowZeroSeqNum { + // TODO(peter): allowZeroSeqNum applies to the entire compaction. We could + // make the determination on a key by key basis, similar to what is done + // for elideTombstone. Need to add a benchmark for compactionIter to verify + // that isn't too expensive. + return + } + if snapshotIdx > 0 { + // This is not the last snapshot + return + } + i.key.SetSeqNum(base.SeqNumZero) +} + +// A frontier is used to monitor a compaction's progression across the user +// keyspace. +// +// A frontier hold a user key boundary that it's concerned with in its `key` +// field. If/when the compaction iterator returns an InternalKey with a user key +// _k_ such that k ≥ frontier.key, the compaction iterator invokes the +// frontier's `reached` function, passing _k_ as its argument. +// +// The `reached` function returns a new value to use as the key. If `reached` +// returns nil, the frontier is forgotten and its `reached` method will not be +// invoked again, unless the user calls [Update] to set a new key. +// +// A frontier's key may be updated outside the context of a `reached` +// invocation at any time, through its Update method. +type frontier struct { + // container points to the containing *frontiers that was passed to Init + // when the frontier was initialized. + container *frontiers + + // key holds the frontier's current key. If nil, this frontier is inactive + // and its reached func will not be invoked. The value of this key may only + // be updated by the `frontiers` type, or the Update method. + key []byte + + // reached is invoked to inform a frontier that its key has been reached. + // It's invoked with the user key that reached the limit. The `key` argument + // is guaranteed to be ≥ the frontier's key. + // + // After reached is invoked, the frontier's key is updated to the return + // value of `reached`. Note bene, the frontier is permitted to update its + // key to a user key ≤ the argument `key`. + // + // If a frontier is set to key k1, and reached(k2) is invoked (k2 ≥ k1), the + // frontier will receive reached(k2) calls until it returns nil or a key + // `k3` such that k2 < k3. This property is useful for frontiers that use + // `reached` invocations to drive iteration through collections of keys that + // may contain multiple keys that are both < k2 and ≥ k1. + reached func(key []byte) (next []byte) +} + +// Init initializes the frontier with the provided key and reached callback. +// The frontier is attached to the provided *frontiers and the provided reached +// func will be invoked when the *frontiers is advanced to a key ≥ this +// frontier's key. +func (f *frontier) Init( + frontiers *frontiers, initialKey []byte, reached func(key []byte) (next []byte), +) { + *f = frontier{ + container: frontiers, + key: initialKey, + reached: reached, + } + if initialKey != nil { + f.container.push(f) + } +} + +// String implements fmt.Stringer. +func (f *frontier) String() string { + return string(f.key) +} + +// Update replaces the existing frontier's key with the provided key. The +// frontier's reached func will be invoked when the new key is reached. +func (f *frontier) Update(key []byte) { + c := f.container + prevKeyIsNil := f.key == nil + f.key = key + if prevKeyIsNil { + if key != nil { + c.push(f) + } + return + } + + // Find the frontier within the heap (it must exist within the heap because + // f.key was != nil). If the frontier key is now nil, remove it from the + // heap. Otherwise, fix up its position. + for i := 0; i < len(c.items); i++ { + if c.items[i] == f { + if key != nil { + c.fix(i) + } else { + n := c.len() - 1 + c.swap(i, n) + c.down(i, n) + c.items = c.items[:n] + } + return + } + } + panic("unreachable") +} + +// frontiers is used to track progression of a task (eg, compaction) across the +// keyspace. Clients that want to be informed when the task advances to a key ≥ +// some frontier may register a frontier, providing a callback. The task calls +// `Advance(k)` with each user key encountered, which invokes the `reached` func +// on all tracked frontiers with `key`s ≤ k. +// +// Internally, frontiers is implemented as a simple heap. +type frontiers struct { + cmp Compare + items []*frontier +} + +// String implements fmt.Stringer. +func (f *frontiers) String() string { + var buf bytes.Buffer + for i := 0; i < len(f.items); i++ { + if i > 0 { + fmt.Fprint(&buf, ", ") + } + fmt.Fprintf(&buf, "%s: %q", f.items[i], f.items[i].key) + } + return buf.String() +} + +// Advance notifies all member frontiers with keys ≤ k. +func (f *frontiers) Advance(k []byte) { + for len(f.items) > 0 && f.cmp(k, f.items[0].key) >= 0 { + // This frontier has been reached. Invoke the closure and update with + // the next frontier. + f.items[0].key = f.items[0].reached(k) + if f.items[0].key == nil { + // This was the final frontier that this user was concerned with. + // Remove it from the heap. + f.pop() + } else { + // Fix up the heap root. + f.fix(0) + } + } +} + +func (f *frontiers) len() int { + return len(f.items) +} + +func (f *frontiers) less(i, j int) bool { + return f.cmp(f.items[i].key, f.items[j].key) < 0 +} + +func (f *frontiers) swap(i, j int) { + f.items[i], f.items[j] = f.items[j], f.items[i] +} + +// fix, up and down are copied from the go stdlib. + +func (f *frontiers) fix(i int) { + if !f.down(i, f.len()) { + f.up(i) + } +} + +func (f *frontiers) push(ff *frontier) { + n := len(f.items) + f.items = append(f.items, ff) + f.up(n) +} + +func (f *frontiers) pop() *frontier { + n := f.len() - 1 + f.swap(0, n) + f.down(0, n) + item := f.items[n] + f.items = f.items[:n] + return item +} + +func (f *frontiers) up(j int) { + for { + i := (j - 1) / 2 // parent + if i == j || !f.less(j, i) { + break + } + f.swap(i, j) + j = i + } +} + +func (f *frontiers) down(i0, n int) bool { + i := i0 + for { + j1 := 2*i + 1 + if j1 >= n || j1 < 0 { // j1 < 0 after int overflow + break + } + j := j1 // left child + if j2 := j1 + 1; j2 < n && f.less(j2, j1) { + j = j2 // = 2*i + 2 // right child + } + if !f.less(j, i) { + break + } + f.swap(i, j) + i = j + } + return i > i0 +} diff --git a/pebble/compaction_iter_test.go b/pebble/compaction_iter_test.go new file mode 100644 index 0000000..07c489c --- /dev/null +++ b/pebble/compaction_iter_test.go @@ -0,0 +1,382 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "slices" + "strconv" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invalidating" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/rangekey" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/stretchr/testify/require" +) + +func TestSnapshotIndex(t *testing.T) { + testCases := []struct { + snapshots []uint64 + seq uint64 + expectedIndex int + expectedSeqNum uint64 + }{ + {[]uint64{}, 1, 0, InternalKeySeqNumMax}, + {[]uint64{1}, 0, 0, 1}, + {[]uint64{1}, 1, 1, InternalKeySeqNumMax}, + {[]uint64{1}, 2, 1, InternalKeySeqNumMax}, + {[]uint64{1, 3}, 1, 1, 3}, + {[]uint64{1, 3}, 2, 1, 3}, + {[]uint64{1, 3}, 3, 2, InternalKeySeqNumMax}, + {[]uint64{1, 3}, 4, 2, InternalKeySeqNumMax}, + {[]uint64{1, 3, 3}, 2, 1, 3}, + } + for _, c := range testCases { + t.Run("", func(t *testing.T) { + idx, seqNum := snapshotIndex(c.seq, c.snapshots) + if c.expectedIndex != idx { + t.Fatalf("expected %d, but got %d", c.expectedIndex, idx) + } + if c.expectedSeqNum != seqNum { + t.Fatalf("expected %d, but got %d", c.expectedSeqNum, seqNum) + } + }) + } +} + +type debugMerger struct { + buf []byte +} + +func (m *debugMerger) MergeNewer(value []byte) error { + m.buf = append(m.buf, value...) + return nil +} + +func (m *debugMerger) MergeOlder(value []byte) error { + buf := make([]byte, 0, len(m.buf)+len(value)) + buf = append(buf, value...) + buf = append(buf, m.buf...) + m.buf = buf + return nil +} + +func (m *debugMerger) Finish(includesBase bool) ([]byte, io.Closer, error) { + if includesBase { + m.buf = append(m.buf, []byte("[base]")...) + } + return m.buf, nil, nil +} + +func TestCompactionIter(t *testing.T) { + var merge Merge + var keys []InternalKey + var rangeKeys []keyspan.Span + var vals [][]byte + var snapshots []uint64 + var elideTombstones bool + var allowZeroSeqnum bool + var interleavingIter *keyspan.InterleavingIter + + // The input to the data-driven test is dependent on the format major + // version we are testing against. + fileFunc := func(formatVersion FormatMajorVersion) string { + if formatVersion < FormatSetWithDelete { + return "testdata/compaction_iter" + } + if formatVersion < FormatDeleteSizedAndObsolete { + return "testdata/compaction_iter_set_with_del" + } + return "testdata/compaction_iter_delete_sized" + } + + newIter := func(formatVersion FormatMajorVersion) *compactionIter { + // To adhere to the existing assumption that range deletion blocks in + // SSTables are not released while iterating, and therefore not + // susceptible to use-after-free bugs, we skip the zeroing of + // RangeDelete keys. + fi := &fakeIter{keys: keys, vals: vals} + interleavingIter = &keyspan.InterleavingIter{} + interleavingIter.Init( + base.DefaultComparer, + fi, + keyspan.NewIter(base.DefaultComparer.Compare, rangeKeys), + keyspan.InterleavingIterOpts{}) + iter := invalidating.NewIter(interleavingIter, invalidating.IgnoreKinds(InternalKeyKindRangeDelete)) + if merge == nil { + merge = func(key, value []byte) (base.ValueMerger, error) { + m := &debugMerger{} + m.buf = append(m.buf, value...) + return m, nil + } + } + + return newCompactionIter( + DefaultComparer.Compare, + DefaultComparer.Equal, + DefaultComparer.FormatKey, + merge, + iter, + snapshots, + &keyspan.Fragmenter{}, + &keyspan.Fragmenter{}, + allowZeroSeqnum, + func([]byte) bool { + return elideTombstones + }, + func(_, _ []byte) bool { + return elideTombstones + }, + formatVersion, + ) + } + + runTest := func(t *testing.T, formatVersion FormatMajorVersion) { + datadriven.RunTest(t, fileFunc(formatVersion), func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + merge = nil + if len(d.CmdArgs) > 0 && d.CmdArgs[0].Key == "merger" && + len(d.CmdArgs[0].Vals) > 0 && d.CmdArgs[0].Vals[0] == "deletable" { + merge = newDeletableSumValueMerger + } + keys = keys[:0] + vals = vals[:0] + rangeKeys = rangeKeys[:0] + for _, key := range strings.Split(d.Input, "\n") { + j := strings.Index(key, ":") + keys = append(keys, base.ParseInternalKey(key[:j])) + + if strings.HasPrefix(key[j+1:], "varint(") { + valueStr := strings.TrimSuffix(strings.TrimPrefix(key[j+1:], "varint("), ")") + v, err := strconv.ParseUint(valueStr, 10, 64) + require.NoError(t, err) + encodedValue := binary.AppendUvarint([]byte(nil), v) + vals = append(vals, encodedValue) + } else { + vals = append(vals, []byte(key[j+1:])) + } + } + return "" + + case "define-range-keys": + for _, key := range strings.Split(d.Input, "\n") { + s := keyspan.ParseSpan(strings.TrimSpace(key)) + rangeKeys = append(rangeKeys, s) + } + return "" + + case "iter": + snapshots = snapshots[:0] + elideTombstones = false + allowZeroSeqnum = false + printSnapshotPinned := false + printMissizedDels := false + printForceObsolete := false + for _, arg := range d.CmdArgs { + switch arg.Key { + case "snapshots": + for _, val := range arg.Vals { + seqNum, err := strconv.Atoi(val) + if err != nil { + return err.Error() + } + snapshots = append(snapshots, uint64(seqNum)) + } + case "elide-tombstones": + var err error + elideTombstones, err = strconv.ParseBool(arg.Vals[0]) + if err != nil { + return err.Error() + } + case "allow-zero-seqnum": + var err error + allowZeroSeqnum, err = strconv.ParseBool(arg.Vals[0]) + if err != nil { + return err.Error() + } + case "print-snapshot-pinned": + printSnapshotPinned = true + case "print-missized-dels": + printMissizedDels = true + case "print-force-obsolete": + printForceObsolete = true + default: + return fmt.Sprintf("%s: unknown arg: %s", d.Cmd, arg.Key) + } + } + slices.Sort(snapshots) + + iter := newIter(formatVersion) + var b bytes.Buffer + for _, line := range strings.Split(d.Input, "\n") { + parts := strings.Fields(line) + if len(parts) == 0 { + continue + } + switch parts[0] { + case "first": + iter.First() + case "next": + iter.Next() + case "tombstones": + var key []byte + if len(parts) == 2 { + key = []byte(parts[1]) + } + for _, v := range iter.Tombstones(key) { + for _, k := range v.Keys { + fmt.Fprintf(&b, "%s-%s#%d\n", v.Start, v.End, k.SeqNum()) + } + } + fmt.Fprintf(&b, ".\n") + continue + case "range-keys": + var key []byte + if len(parts) == 2 { + key = []byte(parts[1]) + } + for _, v := range iter.RangeKeys(key) { + fmt.Fprintf(&b, "%s\n", v) + } + fmt.Fprintf(&b, ".\n") + continue + default: + return fmt.Sprintf("unknown op: %s", parts[0]) + } + if iter.Valid() { + snapshotPinned := "" + if printSnapshotPinned { + snapshotPinned = " (not pinned)" + if iter.snapshotPinned { + snapshotPinned = " (pinned)" + } + } + forceObsolete := "" + if printForceObsolete { + forceObsolete = " (not force obsolete)" + if iter.forceObsoleteDueToRangeDel { + forceObsolete = " (force obsolete)" + } + } + v := string(iter.Value()) + if iter.Key().Kind() == base.InternalKeyKindDeleteSized && len(iter.Value()) > 0 { + vn, n := binary.Uvarint(iter.Value()) + if n != len(iter.Value()) { + v = fmt.Sprintf("err: %0x value not a uvarint", iter.Value()) + } else { + v = fmt.Sprintf("varint(%d)", vn) + } + } + fmt.Fprintf(&b, "%s:%s%s%s\n", iter.Key(), v, snapshotPinned, forceObsolete) + if iter.Key().Kind() == InternalKeyKindRangeDelete { + iter.rangeDelFrag.Add(keyspan.Span{ + Start: append([]byte{}, iter.Key().UserKey...), + End: append([]byte{}, iter.Value()...), + Keys: []keyspan.Key{ + {Trailer: iter.Key().Trailer}, + }, + }) + } + if rangekey.IsRangeKey(iter.Key().Kind()) { + iter.rangeKeyFrag.Add(*interleavingIter.Span()) + } + } else if err := iter.Error(); err != nil { + fmt.Fprintf(&b, "err=%v\n", err) + } else { + fmt.Fprintf(&b, ".\n") + } + } + if printMissizedDels { + fmt.Fprintf(&b, "missized-dels=%d\n", iter.stats.countMissizedDels) + } + return b.String() + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) + } + + // Rather than testing against all format version, we test against the + // significant boundaries. + formatVersions := []FormatMajorVersion{ + FormatMostCompatible, + FormatSetWithDelete - 1, + FormatSetWithDelete, + internalFormatNewest, + } + for _, formatVersion := range formatVersions { + t.Run(fmt.Sprintf("version-%s", formatVersion), func(t *testing.T) { + runTest(t, formatVersion) + }) + } +} + +func TestFrontiers(t *testing.T) { + cmp := testkeys.Comparer.Compare + var keySets [][][]byte + datadriven.RunTest(t, "testdata/frontiers", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "init": + // Init configures a frontier per line of input. Each line should + // contain a sorted whitespace-separated list of keys that the + // frontier will use. + // + // For example, the following input creates two separate monitored + // frontiers: one that sets its key successively to 'd', 'e', 'j' + // and one that sets its key to 'a', 'p', 'n', 'z': + // + // init + // b e j + // a p n z + + keySets = keySets[:0] + for _, line := range strings.Split(td.Input, "\n") { + keySets = append(keySets, bytes.Fields([]byte(line))) + } + return "" + case "scan": + f := &frontiers{cmp: cmp} + for _, keys := range keySets { + initTestFrontier(f, keys...) + } + var buf bytes.Buffer + for _, kStr := range strings.Fields(td.Input) { + k := []byte(kStr) + f.Advance(k) + fmt.Fprintf(&buf, "%s : { %s }\n", kStr, f.String()) + } + return buf.String() + default: + return fmt.Sprintf("unrecognized command %q", td.Cmd) + } + }) +} + +// initTestFrontiers adds a new frontier to f that iterates through the provided +// keys. The keys slice must be sorted. +func initTestFrontier(f *frontiers, keys ...[]byte) *frontier { + ff := &frontier{} + var key []byte + if len(keys) > 0 { + key, keys = keys[0], keys[1:] + } + reached := func(k []byte) (nextKey []byte) { + if len(keys) > 0 { + nextKey, keys = keys[0], keys[1:] + } + return nextKey + } + ff.Init(f, key, reached) + return ff +} diff --git a/pebble/compaction_picker.go b/pebble/compaction_picker.go new file mode 100644 index 0000000..6567391 --- /dev/null +++ b/pebble/compaction_picker.go @@ -0,0 +1,2068 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "fmt" + "math" + "sort" + "strings" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/humanize" + "github.com/cockroachdb/pebble/internal/manifest" +) + +// The minimum count for an intra-L0 compaction. This matches the RocksDB +// heuristic. +const minIntraL0Count = 4 + +type compactionEnv struct { + // diskAvailBytes holds a statistic on the number of bytes available on + // disk, as reported by the filesystem. It's used to be more restrictive in + // expanding compactions if available disk space is limited. + // + // The cached value (d.diskAvailBytes) is updated whenever a file is deleted + // and whenever a compaction or flush completes. Since file removal is the + // primary means of reclaiming space, there is a rough bound on the + // statistic's staleness when available bytes is growing. Compactions and + // flushes are longer, slower operations and provide a much looser bound + // when available bytes is decreasing. + diskAvailBytes uint64 + earliestUnflushedSeqNum uint64 + earliestSnapshotSeqNum uint64 + inProgressCompactions []compactionInfo + readCompactionEnv readCompactionEnv +} + +type compactionPicker interface { + getScores([]compactionInfo) [numLevels]float64 + getBaseLevel() int + estimatedCompactionDebt(l0ExtraSize uint64) uint64 + pickAuto(env compactionEnv) (pc *pickedCompaction) + pickElisionOnlyCompaction(env compactionEnv) (pc *pickedCompaction) + pickRewriteCompaction(env compactionEnv) (pc *pickedCompaction) + pickReadTriggeredCompaction(env compactionEnv) (pc *pickedCompaction) + forceBaseLevel1() +} + +// readCompactionEnv is used to hold data required to perform read compactions +type readCompactionEnv struct { + rescheduleReadCompaction *bool + readCompactions *readCompactionQueue + flushing bool +} + +// Information about in-progress compactions provided to the compaction picker. +// These are used to constrain the new compactions that will be picked. +type compactionInfo struct { + // versionEditApplied is true if this compaction's version edit has already + // been committed. The compaction may still be in-progress deleting newly + // obsolete files. + versionEditApplied bool + inputs []compactionLevel + outputLevel int + smallest InternalKey + largest InternalKey +} + +func (info compactionInfo) String() string { + var buf bytes.Buffer + var largest int + for i, in := range info.inputs { + if i > 0 { + fmt.Fprintf(&buf, " -> ") + } + fmt.Fprintf(&buf, "L%d", in.level) + in.files.Each(func(m *fileMetadata) { + fmt.Fprintf(&buf, " %s", m.FileNum) + }) + if largest < in.level { + largest = in.level + } + } + if largest != info.outputLevel || len(info.inputs) == 1 { + fmt.Fprintf(&buf, " -> L%d", info.outputLevel) + } + return buf.String() +} + +type sortCompactionLevelsByPriority []candidateLevelInfo + +func (s sortCompactionLevelsByPriority) Len() int { + return len(s) +} + +// A level should be picked for compaction if the compensatedScoreRatio is >= the +// compactionScoreThreshold. +const compactionScoreThreshold = 1 + +// Less should return true if s[i] must be placed earlier than s[j] in the final +// sorted list. The candidateLevelInfo for the level placed earlier is more likely +// to be picked for a compaction. +func (s sortCompactionLevelsByPriority) Less(i, j int) bool { + iShouldCompact := s[i].compensatedScoreRatio >= compactionScoreThreshold + jShouldCompact := s[j].compensatedScoreRatio >= compactionScoreThreshold + // Ordering is defined as decreasing on (shouldCompact, uncompensatedScoreRatio) + // where shouldCompact is 1 for true and 0 for false. + if iShouldCompact && !jShouldCompact { + return true + } + if !iShouldCompact && jShouldCompact { + return false + } + + if s[i].uncompensatedScoreRatio != s[j].uncompensatedScoreRatio { + return s[i].uncompensatedScoreRatio > s[j].uncompensatedScoreRatio + } + return s[i].level < s[j].level +} + +func (s sortCompactionLevelsByPriority) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +// sublevelInfo is used to tag a LevelSlice for an L0 sublevel with the +// sublevel. +type sublevelInfo struct { + manifest.LevelSlice + sublevel manifest.Level +} + +func (cl sublevelInfo) Clone() sublevelInfo { + return sublevelInfo{ + sublevel: cl.sublevel, + LevelSlice: cl.LevelSlice.Reslice(func(start, end *manifest.LevelIterator) {}), + } +} +func (cl sublevelInfo) String() string { + return fmt.Sprintf(`Sublevel %s; Levels %s`, cl.sublevel, cl.LevelSlice) +} + +// generateSublevelInfo will generate the level slices for each of the sublevels +// from the level slice for all of L0. +func generateSublevelInfo(cmp base.Compare, levelFiles manifest.LevelSlice) []sublevelInfo { + sublevelMap := make(map[uint64][]*fileMetadata) + it := levelFiles.Iter() + for f := it.First(); f != nil; f = it.Next() { + sublevelMap[uint64(f.SubLevel)] = append(sublevelMap[uint64(f.SubLevel)], f) + } + + var sublevels []int + for level := range sublevelMap { + sublevels = append(sublevels, int(level)) + } + sort.Ints(sublevels) + + var levelSlices []sublevelInfo + for _, sublevel := range sublevels { + metas := sublevelMap[uint64(sublevel)] + levelSlices = append( + levelSlices, + sublevelInfo{ + manifest.NewLevelSliceKeySorted(cmp, metas), + manifest.L0Sublevel(sublevel), + }, + ) + } + return levelSlices +} + +// compactionPickerMetrics holds metrics related to the compaction picking process +type compactionPickerMetrics struct { + // scores contains the compensatedScoreRatio from the candidateLevelInfo. + scores []float64 + singleLevelOverlappingRatio float64 + multiLevelOverlappingRatio float64 +} + +// pickedCompaction contains information about a compaction that has already +// been chosen, and is being constructed. Compaction construction info lives in +// this struct, and is copied over into the compaction struct when that's +// created. +type pickedCompaction struct { + cmp Compare + // score of the chosen compaction. This is the same as the + // compensatedScoreRatio in the candidateLevelInfo. + score float64 + // kind indicates the kind of compaction. + kind compactionKind + // startLevel is the level that is being compacted. Inputs from startLevel + // and outputLevel will be merged to produce a set of outputLevel files. + startLevel *compactionLevel + // outputLevel is the level that files are being produced in. outputLevel is + // equal to startLevel+1 except when: + // - if startLevel is 0, the output level equals compactionPicker.baseLevel(). + // - in multilevel compaction, the output level is the lowest level involved in + // the compaction + outputLevel *compactionLevel + // extraLevels contain additional levels in between the input and output + // levels that get compacted in multi level compactions + extraLevels []*compactionLevel + inputs []compactionLevel + // LBase at the time of compaction picking. + baseLevel int + // L0-specific compaction info. Set to a non-nil value for all compactions + // where startLevel == 0 that were generated by L0Sublevels. + lcf *manifest.L0CompactionFiles + // maxOutputFileSize is the maximum size of an individual table created + // during compaction. + maxOutputFileSize uint64 + // maxOverlapBytes is the maximum number of bytes of overlap allowed for a + // single output table with the tables in the grandparent level. + maxOverlapBytes uint64 + // maxReadCompactionBytes is the maximum bytes a read compaction is allowed to + // overlap in its output level with. If the overlap is greater than + // maxReadCompaction bytes, then we don't proceed with the compaction. + maxReadCompactionBytes uint64 + // The boundaries of the input data. + smallest InternalKey + largest InternalKey + version *version + pickerMetrics compactionPickerMetrics +} + +func defaultOutputLevel(startLevel, baseLevel int) int { + outputLevel := startLevel + 1 + if startLevel == 0 { + outputLevel = baseLevel + } + if outputLevel >= numLevels-1 { + outputLevel = numLevels - 1 + } + return outputLevel +} + +func newPickedCompaction( + opts *Options, cur *version, startLevel, outputLevel, baseLevel int, +) *pickedCompaction { + if startLevel > 0 && startLevel < baseLevel { + panic(fmt.Sprintf("invalid compaction: start level %d should not be empty (base level %d)", + startLevel, baseLevel)) + } + + adjustedLevel := adjustedOutputLevel(outputLevel, baseLevel) + pc := &pickedCompaction{ + cmp: opts.Comparer.Compare, + version: cur, + baseLevel: baseLevel, + inputs: []compactionLevel{{level: startLevel}, {level: outputLevel}}, + maxOutputFileSize: uint64(opts.Level(adjustedLevel).TargetFileSize), + maxOverlapBytes: maxGrandparentOverlapBytes(opts, adjustedLevel), + maxReadCompactionBytes: maxReadCompactionBytes(opts, adjustedLevel), + } + pc.startLevel = &pc.inputs[0] + pc.outputLevel = &pc.inputs[1] + return pc +} + +// adjustedOutputLevel is the output level used for the purpose of +// determining the target output file size, overlap bytes, and expanded +// bytes, taking into account the base level. +func adjustedOutputLevel(outputLevel int, baseLevel int) int { + adjustedOutputLevel := outputLevel + if adjustedOutputLevel > 0 { + // Output level is in the range [baseLevel, numLevels]. For the purpose of + // determining the target output file size, overlap bytes, and expanded + // bytes, we want to adjust the range to [1,numLevels]. + adjustedOutputLevel = 1 + outputLevel - baseLevel + } + return adjustedOutputLevel +} + +func newPickedCompactionFromL0( + lcf *manifest.L0CompactionFiles, opts *Options, vers *version, baseLevel int, isBase bool, +) *pickedCompaction { + outputLevel := baseLevel + if !isBase { + outputLevel = 0 // Intra L0 + } + + pc := newPickedCompaction(opts, vers, 0, outputLevel, baseLevel) + pc.lcf = lcf + pc.outputLevel.level = outputLevel + + // Manually build the compaction as opposed to calling + // pickAutoHelper. This is because L0Sublevels has already added + // any overlapping L0 SSTables that need to be added, and + // because compactions built by L0SSTables do not necessarily + // pick contiguous sequences of files in pc.version.Levels[0]. + files := make([]*manifest.FileMetadata, 0, len(lcf.Files)) + iter := vers.Levels[0].Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if lcf.FilesIncluded[f.L0Index] { + files = append(files, f) + } + } + pc.startLevel.files = manifest.NewLevelSliceSeqSorted(files) + return pc +} + +func (pc *pickedCompaction) String() string { + var builder strings.Builder + builder.WriteString(fmt.Sprintf(`Score=%f, `, pc.score)) + builder.WriteString(fmt.Sprintf(`Kind=%s, `, pc.kind)) + builder.WriteString(fmt.Sprintf(`AdjustedOutputLevel=%d, `, adjustedOutputLevel(pc.outputLevel.level, pc.baseLevel))) + builder.WriteString(fmt.Sprintf(`maxOutputFileSize=%d, `, pc.maxOutputFileSize)) + builder.WriteString(fmt.Sprintf(`maxReadCompactionBytes=%d, `, pc.maxReadCompactionBytes)) + builder.WriteString(fmt.Sprintf(`smallest=%s, `, pc.smallest)) + builder.WriteString(fmt.Sprintf(`largest=%s, `, pc.largest)) + builder.WriteString(fmt.Sprintf(`version=%s, `, pc.version)) + builder.WriteString(fmt.Sprintf(`inputs=%s, `, pc.inputs)) + builder.WriteString(fmt.Sprintf(`startlevel=%s, `, pc.startLevel)) + builder.WriteString(fmt.Sprintf(`outputLevel=%s, `, pc.outputLevel)) + builder.WriteString(fmt.Sprintf(`extraLevels=%s, `, pc.extraLevels)) + builder.WriteString(fmt.Sprintf(`l0SublevelInfo=%s, `, pc.startLevel.l0SublevelInfo)) + builder.WriteString(fmt.Sprintf(`lcf=%s`, pc.lcf)) + return builder.String() +} + +// Clone creates a deep copy of the pickedCompaction +func (pc *pickedCompaction) clone() *pickedCompaction { + + // Quickly copy over fields that do not require special deep copy care, and + // set all fields that will require a deep copy to nil. + newPC := &pickedCompaction{ + cmp: pc.cmp, + score: pc.score, + kind: pc.kind, + baseLevel: pc.baseLevel, + maxOutputFileSize: pc.maxOutputFileSize, + maxOverlapBytes: pc.maxOverlapBytes, + maxReadCompactionBytes: pc.maxReadCompactionBytes, + smallest: pc.smallest.Clone(), + largest: pc.largest.Clone(), + + // TODO(msbutler): properly clone picker metrics + pickerMetrics: pc.pickerMetrics, + + // Both copies see the same manifest, therefore, it's ok for them to se + // share the same pc. version. + version: pc.version, + } + + newPC.inputs = make([]compactionLevel, len(pc.inputs)) + newPC.extraLevels = make([]*compactionLevel, 0, len(pc.extraLevels)) + for i := range pc.inputs { + newPC.inputs[i] = pc.inputs[i].Clone() + if i == 0 { + newPC.startLevel = &newPC.inputs[i] + } else if i == len(pc.inputs)-1 { + newPC.outputLevel = &newPC.inputs[i] + } else { + newPC.extraLevels = append(newPC.extraLevels, &newPC.inputs[i]) + } + } + + if len(pc.startLevel.l0SublevelInfo) > 0 { + newPC.startLevel.l0SublevelInfo = make([]sublevelInfo, len(pc.startLevel.l0SublevelInfo)) + for i := range pc.startLevel.l0SublevelInfo { + newPC.startLevel.l0SublevelInfo[i] = pc.startLevel.l0SublevelInfo[i].Clone() + } + } + if pc.lcf != nil { + newPC.lcf = pc.lcf.Clone() + } + return newPC +} + +// maybeExpandedBounds is a helper function for setupInputs which ensures the +// pickedCompaction's smallest and largest internal keys are updated iff +// the candidate keys expand the key span. This avoids a bug for multi-level +// compactions: during the second call to setupInputs, the picked compaction's +// smallest and largest keys should not decrease the key span. +func (pc *pickedCompaction) maybeExpandBounds(smallest InternalKey, largest InternalKey) { + emptyKey := InternalKey{} + if base.InternalCompare(pc.cmp, smallest, emptyKey) == 0 { + if base.InternalCompare(pc.cmp, largest, emptyKey) != 0 { + panic("either both candidate keys are empty or neither are empty") + } + return + } + if base.InternalCompare(pc.cmp, pc.smallest, emptyKey) == 0 { + if base.InternalCompare(pc.cmp, pc.largest, emptyKey) != 0 { + panic("either both pc keys are empty or neither are empty") + } + pc.smallest = smallest + pc.largest = largest + return + } + if base.InternalCompare(pc.cmp, pc.smallest, smallest) >= 0 { + pc.smallest = smallest + } + if base.InternalCompare(pc.cmp, pc.largest, largest) <= 0 { + pc.largest = largest + } +} + +// setupInputs returns true if a compaction has been set up. It returns false if +// a concurrent compaction is occurring on the start or output level files. +func (pc *pickedCompaction) setupInputs( + opts *Options, diskAvailBytes uint64, startLevel *compactionLevel, +) bool { + // maxExpandedBytes is the maximum size of an expanded compaction. If + // growing a compaction results in a larger size, the original compaction + // is used instead. + maxExpandedBytes := expandedCompactionByteSizeLimit( + opts, adjustedOutputLevel(pc.outputLevel.level, pc.baseLevel), diskAvailBytes, + ) + + // Expand the initial inputs to a clean cut. + var isCompacting bool + startLevel.files, isCompacting = expandToAtomicUnit(pc.cmp, startLevel.files, false /* disableIsCompacting */) + if isCompacting { + return false + } + pc.maybeExpandBounds(manifest.KeyRange(pc.cmp, startLevel.files.Iter())) + + // Determine the sstables in the output level which overlap with the input + // sstables, and then expand those tables to a clean cut. No need to do + // this for intra-L0 compactions; outputLevel.files is left empty for those. + if startLevel.level != pc.outputLevel.level { + pc.outputLevel.files = pc.version.Overlaps(pc.outputLevel.level, pc.cmp, pc.smallest.UserKey, + pc.largest.UserKey, pc.largest.IsExclusiveSentinel()) + pc.outputLevel.files, isCompacting = expandToAtomicUnit(pc.cmp, pc.outputLevel.files, + false /* disableIsCompacting */) + if isCompacting { + return false + } + pc.maybeExpandBounds(manifest.KeyRange(pc.cmp, + startLevel.files.Iter(), pc.outputLevel.files.Iter())) + } + + // Grow the sstables in startLevel.level as long as it doesn't affect the number + // of sstables included from pc.outputLevel.level. + if pc.lcf != nil && startLevel.level == 0 && pc.outputLevel.level != 0 { + // Call the L0-specific compaction extension method. Similar logic as + // pc.grow. Additional L0 files are optionally added to the compaction at + // this step. Note that the bounds passed in are not the bounds of the + // compaction, but rather the smallest and largest internal keys that + // the compaction cannot include from L0 without pulling in more Lbase + // files. Consider this example: + // + // L0: c-d e+f g-h + // Lbase: a-b e+f i-j + // a b c d e f g h i j + // + // The e-f files have already been chosen in the compaction. As pulling + // in more LBase files is undesirable, the logic below will pass in + // smallest = b and largest = i to ExtendL0ForBaseCompactionTo, which + // will expand the compaction to include c-d and g-h from L0. The + // bounds passed in are exclusive; the compaction cannot be expanded + // to include files that "touch" it. + smallestBaseKey := base.InvalidInternalKey + largestBaseKey := base.InvalidInternalKey + if pc.outputLevel.files.Empty() { + baseIter := pc.version.Levels[pc.outputLevel.level].Iter() + if sm := baseIter.SeekLT(pc.cmp, pc.smallest.UserKey); sm != nil { + smallestBaseKey = sm.Largest + } + if la := baseIter.SeekGE(pc.cmp, pc.largest.UserKey); la != nil { + largestBaseKey = la.Smallest + } + } else { + // NB: We use Reslice to access the underlying level's files, but + // we discard the returned slice. The pc.outputLevel.files slice + // is not modified. + _ = pc.outputLevel.files.Reslice(func(start, end *manifest.LevelIterator) { + if sm := start.Prev(); sm != nil { + smallestBaseKey = sm.Largest + } + if la := end.Next(); la != nil { + largestBaseKey = la.Smallest + } + }) + } + oldLcf := pc.lcf.Clone() + if pc.version.L0Sublevels.ExtendL0ForBaseCompactionTo(smallestBaseKey, largestBaseKey, pc.lcf) { + var newStartLevelFiles []*fileMetadata + iter := pc.version.Levels[0].Iter() + var sizeSum uint64 + for j, f := 0, iter.First(); f != nil; j, f = j+1, iter.Next() { + if pc.lcf.FilesIncluded[f.L0Index] { + newStartLevelFiles = append(newStartLevelFiles, f) + sizeSum += f.Size + } + } + if sizeSum+pc.outputLevel.files.SizeSum() < maxExpandedBytes { + startLevel.files = manifest.NewLevelSliceSeqSorted(newStartLevelFiles) + pc.smallest, pc.largest = manifest.KeyRange(pc.cmp, + startLevel.files.Iter(), pc.outputLevel.files.Iter()) + } else { + *pc.lcf = *oldLcf + } + } + } else if pc.grow(pc.smallest, pc.largest, maxExpandedBytes, startLevel) { + pc.maybeExpandBounds(manifest.KeyRange(pc.cmp, + startLevel.files.Iter(), pc.outputLevel.files.Iter())) + } + + if pc.startLevel.level == 0 { + // We don't change the input files for the compaction beyond this point. + pc.startLevel.l0SublevelInfo = generateSublevelInfo(pc.cmp, pc.startLevel.files) + } + + return true +} + +// grow grows the number of inputs at c.level without changing the number of +// c.level+1 files in the compaction, and returns whether the inputs grew. sm +// and la are the smallest and largest InternalKeys in all of the inputs. +func (pc *pickedCompaction) grow( + sm, la InternalKey, maxExpandedBytes uint64, startLevel *compactionLevel, +) bool { + if pc.outputLevel.files.Empty() { + return false + } + grow0 := pc.version.Overlaps(startLevel.level, pc.cmp, sm.UserKey, + la.UserKey, la.IsExclusiveSentinel()) + grow0, isCompacting := expandToAtomicUnit(pc.cmp, grow0, false /* disableIsCompacting */) + if isCompacting { + return false + } + if grow0.Len() <= startLevel.files.Len() { + return false + } + if grow0.SizeSum()+pc.outputLevel.files.SizeSum() >= maxExpandedBytes { + return false + } + // We need to include the outputLevel iter because without it, in a multiLevel scenario, + // sm1 and la1 could shift the output level keyspace when pc.outputLevel.files is set to grow1. + sm1, la1 := manifest.KeyRange(pc.cmp, grow0.Iter(), pc.outputLevel.files.Iter()) + grow1 := pc.version.Overlaps(pc.outputLevel.level, pc.cmp, sm1.UserKey, + la1.UserKey, la1.IsExclusiveSentinel()) + grow1, isCompacting = expandToAtomicUnit(pc.cmp, grow1, false /* disableIsCompacting */) + if isCompacting { + return false + } + if grow1.Len() != pc.outputLevel.files.Len() { + return false + } + startLevel.files = grow0 + pc.outputLevel.files = grow1 + return true +} + +func (pc *pickedCompaction) compactionSize() uint64 { + var bytesToCompact uint64 + for i := range pc.inputs { + bytesToCompact += pc.inputs[i].files.SizeSum() + } + return bytesToCompact +} + +// setupMultiLevelCandidated returns true if it successfully added another level +// to the compaction. +func (pc *pickedCompaction) setupMultiLevelCandidate(opts *Options, diskAvailBytes uint64) bool { + pc.inputs = append(pc.inputs, compactionLevel{level: pc.outputLevel.level + 1}) + + // Recalibrate startLevel and outputLevel: + // - startLevel and outputLevel pointers may be obsolete after appending to pc.inputs. + // - push outputLevel to extraLevels and move the new level to outputLevel + pc.startLevel = &pc.inputs[0] + pc.extraLevels = []*compactionLevel{&pc.inputs[1]} + pc.outputLevel = &pc.inputs[2] + return pc.setupInputs(opts, diskAvailBytes, pc.extraLevels[len(pc.extraLevels)-1]) +} + +// expandToAtomicUnit expands the provided level slice within its level both +// forwards and backwards to its "atomic compaction unit" boundaries, if +// necessary. +// +// While picking compaction inputs, this is required to maintain the invariant +// that the versions of keys at level+1 are older than the versions of keys at +// level. Tables are added to the right of the current slice tables such that +// the rightmost table has a "clean cut". A clean cut is either a change in +// user keys, or when the largest key in the left sstable is a range tombstone +// sentinel key (InternalKeyRangeDeleteSentinel). +// +// In addition to maintaining the seqnum invariant, expandToAtomicUnit is used +// to provide clean boundaries for range tombstone truncation during +// compaction. In order to achieve these clean boundaries, expandToAtomicUnit +// needs to find a "clean cut" on the left edge of the compaction as well. +// This is necessary in order for "atomic compaction units" to always be +// compacted as a unit. Failure to do this leads to a subtle bug with +// truncation of range tombstones to atomic compaction unit boundaries. +// Consider the scenario: +// +// L3: +// 12:[a#2,15-b#1,1] +// 13:[b#0,15-d#72057594037927935,15] +// +// These sstables contain a range tombstone [a-d)#2 which spans the two +// sstables. The two sstables need to always be kept together. Compacting +// sstable 13 independently of sstable 12 would result in: +// +// L3: +// 12:[a#2,15-b#1,1] +// L4: +// 14:[b#0,15-d#72057594037927935,15] +// +// This state is still ok, but when sstable 12 is next compacted, its range +// tombstones will be truncated at "b" (the largest key in its atomic +// compaction unit). In the scenario here, that could result in b#1 becoming +// visible when it should be deleted. +// +// isCompacting is returned true for any atomic units that contain files that +// have in-progress compactions, i.e. FileMetadata.Compacting == true. If +// disableIsCompacting is true, isCompacting always returns false. This helps +// avoid spurious races from being detected when this method is used outside +// of compaction picking code. +// +// TODO(jackson): Compactions and flushes no longer split a user key between two +// sstables. We could perform a migration, re-compacting any sstables with split +// user keys, which would allow us to remove atomic compaction unit expansion +// code. +func expandToAtomicUnit( + cmp Compare, inputs manifest.LevelSlice, disableIsCompacting bool, +) (slice manifest.LevelSlice, isCompacting bool) { + // NB: Inputs for L0 can't be expanded and *version.Overlaps guarantees + // that we get a 'clean cut.' For L0, Overlaps will return a slice without + // access to the rest of the L0 files, so it's OK to try to reslice. + if inputs.Empty() { + // Nothing to expand. + return inputs, false + } + + // TODO(jackson): Update to avoid use of LevelIterator.Current(). The + // Reslice interface will require some tweaking, because we currently rely + // on Reslice having already positioned the LevelIterator appropriately. + + inputs = inputs.Reslice(func(start, end *manifest.LevelIterator) { + iter := start.Clone() + iter.Prev() + for cur, prev := start.Current(), iter.Current(); prev != nil; cur, prev = start.Prev(), iter.Prev() { + if cur.IsCompacting() { + isCompacting = true + } + if cmp(prev.Largest.UserKey, cur.Smallest.UserKey) < 0 { + break + } + if prev.Largest.IsExclusiveSentinel() { + // The table prev has a largest key indicating that the user key + // prev.largest.UserKey doesn't actually exist in the table. + break + } + // prev.Largest.UserKey == cur.Smallest.UserKey, so we need to + // include prev in the compaction. + } + + iter = end.Clone() + iter.Next() + for cur, next := end.Current(), iter.Current(); next != nil; cur, next = end.Next(), iter.Next() { + if cur.IsCompacting() { + isCompacting = true + } + if cmp(cur.Largest.UserKey, next.Smallest.UserKey) < 0 { + break + } + if cur.Largest.IsExclusiveSentinel() { + // The table cur has a largest key indicating that the user key + // cur.largest.UserKey doesn't actually exist in the table. + break + } + // cur.Largest.UserKey == next.Smallest.UserKey, so we need to + // include next in the compaction. + } + }) + inputIter := inputs.Iter() + isCompacting = !disableIsCompacting && + (isCompacting || inputIter.First().IsCompacting() || inputIter.Last().IsCompacting()) + return inputs, isCompacting +} + +func newCompactionPicker( + v *version, opts *Options, inProgressCompactions []compactionInfo, +) compactionPicker { + p := &compactionPickerByScore{ + opts: opts, + vers: v, + } + p.initLevelMaxBytes(inProgressCompactions) + return p +} + +// Information about a candidate compaction level that has been identified by +// the compaction picker. +type candidateLevelInfo struct { + // The compensatedScore of the level after adjusting according to the other + // levels' sizes. For L0, the compensatedScoreRatio is equivalent to the + // uncompensatedScoreRatio as we don't account for level size compensation in + // L0. + compensatedScoreRatio float64 + // The score of the level after accounting for level size compensation before + // adjusting according to other levels' sizes. For L0, the compensatedScore + // is equivalent to the uncompensatedScore as we don't account for level + // size compensation in L0. + compensatedScore float64 + // The score of the level to be compacted, calculated using uncompensated file + // sizes and without any adjustments. + uncompensatedScore float64 + // uncompensatedScoreRatio is the uncompensatedScore adjusted according to + // the other levels' sizes. + uncompensatedScoreRatio float64 + level int + // The level to compact to. + outputLevel int + // The file in level that will be compacted. Additional files may be + // picked by the compaction, and a pickedCompaction created for the + // compaction. + file manifest.LevelFile +} + +func (c *candidateLevelInfo) shouldCompact() bool { + return c.compensatedScoreRatio >= compactionScoreThreshold +} + +func fileCompensation(f *fileMetadata) uint64 { + return uint64(f.Stats.PointDeletionsBytesEstimate) + f.Stats.RangeDeletionsBytesEstimate +} + +// compensatedSize returns f's file size, inflated according to compaction +// priorities. +func compensatedSize(f *fileMetadata) uint64 { + // Add in the estimate of disk space that may be reclaimed by compacting the + // file's tombstones. + return f.Size + fileCompensation(f) +} + +// compensatedSizeAnnotator implements manifest.Annotator, annotating B-Tree +// nodes with the sum of the files' compensated sizes. Its annotation type is +// a *uint64. Compensated sizes may change once a table's stats are loaded +// asynchronously, so its values are marked as cacheable only if a file's +// stats have been loaded. +type compensatedSizeAnnotator struct { +} + +var _ manifest.Annotator = compensatedSizeAnnotator{} + +func (a compensatedSizeAnnotator) Zero(dst interface{}) interface{} { + if dst == nil { + return new(uint64) + } + v := dst.(*uint64) + *v = 0 + return v +} + +func (a compensatedSizeAnnotator) Accumulate( + f *fileMetadata, dst interface{}, +) (v interface{}, cacheOK bool) { + vptr := dst.(*uint64) + *vptr = *vptr + compensatedSize(f) + return vptr, f.StatsValid() +} + +func (a compensatedSizeAnnotator) Merge(src interface{}, dst interface{}) interface{} { + srcV := src.(*uint64) + dstV := dst.(*uint64) + *dstV = *dstV + *srcV + return dstV +} + +// totalCompensatedSize computes the compensated size over a file metadata +// iterator. Note that this function is linear in the files available to the +// iterator. Use the compensatedSizeAnnotator if querying the total +// compensated size of a level. +func totalCompensatedSize(iter manifest.LevelIterator) uint64 { + var sz uint64 + for f := iter.First(); f != nil; f = iter.Next() { + sz += compensatedSize(f) + } + return sz +} + +// compactionPickerByScore holds the state and logic for picking a compaction. A +// compaction picker is associated with a single version. A new compaction +// picker is created and initialized every time a new version is installed. +type compactionPickerByScore struct { + opts *Options + vers *version + // The level to target for L0 compactions. Levels L1 to baseLevel must be + // empty. + baseLevel int + // levelMaxBytes holds the dynamically adjusted max bytes setting for each + // level. + levelMaxBytes [numLevels]int64 +} + +var _ compactionPicker = &compactionPickerByScore{} + +func (p *compactionPickerByScore) getScores(inProgress []compactionInfo) [numLevels]float64 { + var scores [numLevels]float64 + for _, info := range p.calculateLevelScores(inProgress) { + scores[info.level] = info.compensatedScoreRatio + } + return scores +} + +func (p *compactionPickerByScore) getBaseLevel() int { + if p == nil { + return 1 + } + return p.baseLevel +} + +// estimatedCompactionDebt estimates the number of bytes which need to be +// compacted before the LSM tree becomes stable. +func (p *compactionPickerByScore) estimatedCompactionDebt(l0ExtraSize uint64) uint64 { + if p == nil { + return 0 + } + + // We assume that all the bytes in L0 need to be compacted to Lbase. This is + // unlike the RocksDB logic that figures out whether L0 needs compaction. + bytesAddedToNextLevel := l0ExtraSize + p.vers.Levels[0].Size() + lbaseSize := p.vers.Levels[p.baseLevel].Size() + + var compactionDebt uint64 + if bytesAddedToNextLevel > 0 && lbaseSize > 0 { + // We only incur compaction debt if both L0 and Lbase contain data. If L0 + // is empty, no compaction is necessary. If Lbase is empty, a move-based + // compaction from L0 would occur. + compactionDebt += bytesAddedToNextLevel + lbaseSize + } + + // loop invariant: At the beginning of the loop, bytesAddedToNextLevel is the + // bytes added to `level` in the loop. + for level := p.baseLevel; level < numLevels-1; level++ { + levelSize := p.vers.Levels[level].Size() + bytesAddedToNextLevel + nextLevelSize := p.vers.Levels[level+1].Size() + if levelSize > uint64(p.levelMaxBytes[level]) { + bytesAddedToNextLevel = levelSize - uint64(p.levelMaxBytes[level]) + if nextLevelSize > 0 { + // We only incur compaction debt if the next level contains data. If the + // next level is empty, a move-based compaction would be used. + levelRatio := float64(nextLevelSize) / float64(levelSize) + // The current level contributes bytesAddedToNextLevel to compactions. + // The next level contributes levelRatio * bytesAddedToNextLevel. + compactionDebt += uint64(float64(bytesAddedToNextLevel) * (levelRatio + 1)) + } + } else { + // We're not moving any bytes to the next level. + bytesAddedToNextLevel = 0 + } + } + return compactionDebt +} + +func (p *compactionPickerByScore) initLevelMaxBytes(inProgressCompactions []compactionInfo) { + // The levelMaxBytes calculations here differ from RocksDB in two ways: + // + // 1. The use of dbSize vs maxLevelSize. RocksDB uses the size of the maximum + // level in L1-L6, rather than determining the size of the bottom level + // based on the total amount of data in the dB. The RocksDB calculation is + // problematic if L0 contains a significant fraction of data, or if the + // level sizes are roughly equal and thus there is a significant fraction + // of data outside of the largest level. + // + // 2. Not adjusting the size of Lbase based on L0. RocksDB computes + // baseBytesMax as the maximum of the configured LBaseMaxBytes and the + // size of L0. This is problematic because baseBytesMax is used to compute + // the max size of lower levels. A very large baseBytesMax will result in + // an overly large value for the size of lower levels which will caused + // those levels not to be compacted even when they should be + // compacted. This often results in "inverted" LSM shapes where Ln is + // larger than Ln+1. + + // Determine the first non-empty level and the total DB size. + firstNonEmptyLevel := -1 + var dbSize uint64 + for level := 1; level < numLevels; level++ { + if p.vers.Levels[level].Size() > 0 { + if firstNonEmptyLevel == -1 { + firstNonEmptyLevel = level + } + dbSize += p.vers.Levels[level].Size() + } + } + for _, c := range inProgressCompactions { + if c.outputLevel == 0 || c.outputLevel == -1 { + continue + } + if c.inputs[0].level == 0 && (firstNonEmptyLevel == -1 || c.outputLevel < firstNonEmptyLevel) { + firstNonEmptyLevel = c.outputLevel + } + } + + // Initialize the max-bytes setting for each level to "infinity" which will + // disallow compaction for that level. We'll fill in the actual value below + // for levels we want to allow compactions from. + for level := 0; level < numLevels; level++ { + p.levelMaxBytes[level] = math.MaxInt64 + } + + if dbSize == 0 { + // No levels for L1 and up contain any data. Target L0 compactions for the + // last level or to the level to which there is an ongoing L0 compaction. + p.baseLevel = numLevels - 1 + if firstNonEmptyLevel >= 0 { + p.baseLevel = firstNonEmptyLevel + } + return + } + + dbSize += p.vers.Levels[0].Size() + bottomLevelSize := dbSize - dbSize/uint64(p.opts.Experimental.LevelMultiplier) + + curLevelSize := bottomLevelSize + for level := numLevels - 2; level >= firstNonEmptyLevel; level-- { + curLevelSize = uint64(float64(curLevelSize) / float64(p.opts.Experimental.LevelMultiplier)) + } + + // Compute base level (where L0 data is compacted to). + baseBytesMax := uint64(p.opts.LBaseMaxBytes) + p.baseLevel = firstNonEmptyLevel + for p.baseLevel > 1 && curLevelSize > baseBytesMax { + p.baseLevel-- + curLevelSize = uint64(float64(curLevelSize) / float64(p.opts.Experimental.LevelMultiplier)) + } + + smoothedLevelMultiplier := 1.0 + if p.baseLevel < numLevels-1 { + smoothedLevelMultiplier = math.Pow( + float64(bottomLevelSize)/float64(baseBytesMax), + 1.0/float64(numLevels-p.baseLevel-1)) + } + + levelSize := float64(baseBytesMax) + for level := p.baseLevel; level < numLevels; level++ { + if level > p.baseLevel && levelSize > 0 { + levelSize *= smoothedLevelMultiplier + } + // Round the result since test cases use small target level sizes, which + // can be impacted by floating-point imprecision + integer truncation. + roundedLevelSize := math.Round(levelSize) + if roundedLevelSize > float64(math.MaxInt64) { + p.levelMaxBytes[level] = math.MaxInt64 + } else { + p.levelMaxBytes[level] = int64(roundedLevelSize) + } + } +} + +type levelSizeAdjust struct { + incomingActualBytes uint64 + outgoingActualBytes uint64 + outgoingCompensatedBytes uint64 +} + +func (a levelSizeAdjust) compensated() uint64 { + return a.incomingActualBytes - a.outgoingCompensatedBytes +} + +func (a levelSizeAdjust) actual() uint64 { + return a.incomingActualBytes - a.outgoingActualBytes +} + +func calculateSizeAdjust(inProgressCompactions []compactionInfo) [numLevels]levelSizeAdjust { + // Compute size adjustments for each level based on the in-progress + // compactions. We sum the file sizes of all files leaving and entering each + // level in in-progress compactions. For outgoing files, we also sum a + // separate sum of 'compensated file sizes', which are inflated according + // to deletion estimates. + // + // When we adjust a level's size according to these values during score + // calculation, we subtract the compensated size of start level inputs to + // account for the fact that score calculation uses compensated sizes. + // + // Since compensated file sizes may be compensated because they reclaim + // space from the output level's files, we only add the real file size to + // the output level. + // + // This is slightly different from RocksDB's behavior, which simply elides + // compacting files from the level size calculation. + var sizeAdjust [numLevels]levelSizeAdjust + for i := range inProgressCompactions { + c := &inProgressCompactions[i] + // If this compaction's version edit has already been applied, there's + // no need to adjust: The LSM we'll examine will already reflect the + // new LSM state. + if c.versionEditApplied { + continue + } + + for _, input := range c.inputs { + actualSize := input.files.SizeSum() + compensatedSize := totalCompensatedSize(input.files.Iter()) + + if input.level != c.outputLevel { + sizeAdjust[input.level].outgoingCompensatedBytes += compensatedSize + sizeAdjust[input.level].outgoingActualBytes += actualSize + if c.outputLevel != -1 { + sizeAdjust[c.outputLevel].incomingActualBytes += actualSize + } + } + } + } + return sizeAdjust +} + +func levelCompensatedSize(lm manifest.LevelMetadata) uint64 { + return *lm.Annotation(compensatedSizeAnnotator{}).(*uint64) +} + +func (p *compactionPickerByScore) calculateLevelScores( + inProgressCompactions []compactionInfo, +) [numLevels]candidateLevelInfo { + var scores [numLevels]candidateLevelInfo + for i := range scores { + scores[i].level = i + scores[i].outputLevel = i + 1 + } + l0UncompensatedScore := calculateL0UncompensatedScore(p.vers, p.opts, inProgressCompactions) + scores[0] = candidateLevelInfo{ + outputLevel: p.baseLevel, + uncompensatedScore: l0UncompensatedScore, + compensatedScore: l0UncompensatedScore, /* No level size compensation for L0 */ + } + sizeAdjust := calculateSizeAdjust(inProgressCompactions) + for level := 1; level < numLevels; level++ { + compensatedLevelSize := levelCompensatedSize(p.vers.Levels[level]) + sizeAdjust[level].compensated() + scores[level].compensatedScore = float64(compensatedLevelSize) / float64(p.levelMaxBytes[level]) + scores[level].uncompensatedScore = float64(p.vers.Levels[level].Size()+sizeAdjust[level].actual()) / float64(p.levelMaxBytes[level]) + } + + // Adjust each level's {compensated, uncompensated}Score by the uncompensatedScore + // of the next level to get a {compensated, uncompensated}ScoreRatio. If the + // next level has a high uncompensatedScore, and is thus a priority for compaction, + // this reduces the priority for compacting the current level. If the next level + // has a low uncompensatedScore (i.e. it is below its target size), this increases + // the priority for compacting the current level. + // + // The effect of this adjustment is to help prioritize compactions in lower + // levels. The following example shows the compensatedScoreRatio and the + // compensatedScore. In this scenario, L0 has 68 sublevels. L3 (a.k.a. Lbase) + // is significantly above its target size. The original score prioritizes + // compactions from those two levels, but doing so ends up causing a future + // problem: data piles up in the higher levels, starving L5->L6 compactions, + // and to a lesser degree starving L4->L5 compactions. + // + // Note that in the example shown there is no level size compensation so the + // compensatedScore and the uncompensatedScore is the same for each level. + // + // compensatedScoreRatio compensatedScore uncompensatedScore size max-size + // L0 3.2 68.0 68.0 2.2 G - + // L3 3.2 21.1 21.1 1.3 G 64 M + // L4 3.4 6.7 6.7 3.1 G 467 M + // L5 3.4 2.0 2.0 6.6 G 3.3 G + // L6 0.6 0.6 0.6 14 G 24 G + var prevLevel int + for level := p.baseLevel; level < numLevels; level++ { + // The compensated scores, and uncompensated scores will be turned into + // ratios as they're adjusted according to other levels' sizes. + scores[prevLevel].compensatedScoreRatio = scores[prevLevel].compensatedScore + scores[prevLevel].uncompensatedScoreRatio = scores[prevLevel].uncompensatedScore + + // Avoid absurdly large scores by placing a floor on the score that we'll + // adjust a level by. The value of 0.01 was chosen somewhat arbitrarily. + const minScore = 0.01 + if scores[prevLevel].compensatedScoreRatio >= compactionScoreThreshold { + if scores[level].uncompensatedScore >= minScore { + scores[prevLevel].compensatedScoreRatio /= scores[level].uncompensatedScore + } else { + scores[prevLevel].compensatedScoreRatio /= minScore + } + } + if scores[prevLevel].uncompensatedScoreRatio >= compactionScoreThreshold { + if scores[level].uncompensatedScore >= minScore { + scores[prevLevel].uncompensatedScoreRatio /= scores[level].uncompensatedScore + } else { + scores[prevLevel].uncompensatedScoreRatio /= minScore + } + } + prevLevel = level + } + // Set the score ratios for the lowest level. + // INVARIANT: prevLevel == numLevels-1 + scores[prevLevel].compensatedScoreRatio = scores[prevLevel].compensatedScore + scores[prevLevel].uncompensatedScoreRatio = scores[prevLevel].uncompensatedScore + + sort.Sort(sortCompactionLevelsByPriority(scores[:])) + return scores +} + +// calculateL0UncompensatedScore calculates a float score representing the +// relative priority of compacting L0. Level L0 is special in that files within +// L0 may overlap one another, so a different set of heuristics that take into +// account read amplification apply. +func calculateL0UncompensatedScore( + vers *version, opts *Options, inProgressCompactions []compactionInfo, +) float64 { + // Use the sublevel count to calculate the score. The base vs intra-L0 + // compaction determination happens in pickAuto, not here. + score := float64(2*vers.L0Sublevels.MaxDepthAfterOngoingCompactions()) / + float64(opts.L0CompactionThreshold) + + // Also calculate a score based on the file count but use it only if it + // produces a higher score than the sublevel-based one. This heuristic is + // designed to accommodate cases where L0 is accumulating non-overlapping + // files in L0. Letting too many non-overlapping files accumulate in few + // sublevels is undesirable, because: + // 1) we can produce a massive backlog to compact once files do overlap. + // 2) constructing L0 sublevels has a runtime that grows superlinearly with + // the number of files in L0 and must be done while holding D.mu. + noncompactingFiles := vers.Levels[0].Len() + for _, c := range inProgressCompactions { + for _, cl := range c.inputs { + if cl.level == 0 { + noncompactingFiles -= cl.files.Len() + } + } + } + fileScore := float64(noncompactingFiles) / float64(opts.L0CompactionFileThreshold) + if score < fileScore { + score = fileScore + } + return score +} + +// pickCompactionSeedFile picks a file from `level` in the `vers` to build a +// compaction around. Currently, this function implements a heuristic similar to +// RocksDB's kMinOverlappingRatio, seeking to minimize write amplification. This +// function is linear with respect to the number of files in `level` and +// `outputLevel`. +func pickCompactionSeedFile( + vers *version, opts *Options, level, outputLevel int, earliestSnapshotSeqNum uint64, +) (manifest.LevelFile, bool) { + // Select the file within the level to compact. We want to minimize write + // amplification, but also ensure that deletes are propagated to the + // bottom level in a timely fashion so as to reclaim disk space. A table's + // smallest sequence number provides a measure of its age. The ratio of + // overlapping-bytes / table-size gives an indication of write + // amplification (a smaller ratio is preferrable). + // + // The current heuristic is based off the the RocksDB kMinOverlappingRatio + // heuristic. It chooses the file with the minimum overlapping ratio with + // the target level, which minimizes write amplification. + // + // It uses a "compensated size" for the denominator, which is the file + // size but artificially inflated by an estimate of the space that may be + // reclaimed through compaction. Currently, we only compensate for range + // deletions and only with a rough estimate of the reclaimable bytes. This + // differs from RocksDB which only compensates for point tombstones and + // only if they exceed the number of non-deletion entries in table. + // + // TODO(peter): For concurrent compactions, we may want to try harder to + // pick a seed file whose resulting compaction bounds do not overlap with + // an in-progress compaction. + + cmp := opts.Comparer.Compare + startIter := vers.Levels[level].Iter() + outputIter := vers.Levels[outputLevel].Iter() + + var file manifest.LevelFile + smallestRatio := uint64(math.MaxUint64) + + outputFile := outputIter.First() + + for f := startIter.First(); f != nil; f = startIter.Next() { + var overlappingBytes uint64 + compacting := f.IsCompacting() + if compacting { + // Move on if this file is already being compacted. We'll likely + // still need to move past the overlapping output files regardless, + // but in cases where all start-level files are compacting we won't. + continue + } + + // Trim any output-level files smaller than f. + for outputFile != nil && sstableKeyCompare(cmp, outputFile.Largest, f.Smallest) < 0 { + outputFile = outputIter.Next() + } + + for outputFile != nil && sstableKeyCompare(cmp, outputFile.Smallest, f.Largest) <= 0 && !compacting { + overlappingBytes += outputFile.Size + compacting = compacting || outputFile.IsCompacting() + + // For files in the bottommost level of the LSM, the + // Stats.RangeDeletionsBytesEstimate field is set to the estimate + // of bytes /within/ the file itself that may be dropped by + // recompacting the file. These bytes from obsolete keys would not + // need to be rewritten if we compacted `f` into `outputFile`, so + // they don't contribute to write amplification. Subtracting them + // out of the overlapping bytes helps prioritize these compactions + // that are cheaper than their file sizes suggest. + if outputLevel == numLevels-1 && outputFile.LargestSeqNum < earliestSnapshotSeqNum { + overlappingBytes -= outputFile.Stats.RangeDeletionsBytesEstimate + } + + // If the file in the next level extends beyond f's largest key, + // break out and don't advance outputIter because f's successor + // might also overlap. + // + // Note, we stop as soon as we encounter an output-level file with a + // largest key beyond the input-level file's largest bound. We + // perform a simple user key comparison here using sstableKeyCompare + // which handles the potential for exclusive largest key bounds. + // There's some subtlety when the bounds are equal (eg, equal and + // inclusive, or equal and exclusive). Current Pebble doesn't split + // user keys across sstables within a level (and in format versions + // FormatSplitUserKeysMarkedCompacted and later we guarantee no + // split user keys exist within the entire LSM). In that case, we're + // assured that neither the input level nor the output level's next + // file shares the same user key, so compaction expansion will not + // include them in any compaction compacting `f`. + // + // NB: If we /did/ allow split user keys, or we're running on an + // old database with an earlier format major version where there are + // existing split user keys, this logic would be incorrect. Consider + // L1: [a#120,a#100] [a#80,a#60] + // L2: [a#55,a#45] [a#35,a#25] [a#15,a#5] + // While considering the first file in L1, [a#120,a#100], we'd skip + // past all of the files in L2. When considering the second file in + // L1, we'd improperly conclude that the second file overlaps + // nothing in the second level and is cheap to compact, when in + // reality we'd need to expand the compaction to include all 5 + // files. + if sstableKeyCompare(cmp, outputFile.Largest, f.Largest) > 0 { + break + } + outputFile = outputIter.Next() + } + + // If the input level file or one of the overlapping files is + // compacting, we're not going to be able to compact this file + // anyways, so skip it. + if compacting { + continue + } + + compSz := compensatedSize(f) + scaledRatio := overlappingBytes * 1024 / compSz + if scaledRatio < smallestRatio { + smallestRatio = scaledRatio + file = startIter.Take() + } + } + return file, file.FileMetadata != nil +} + +// pickAuto picks the best compaction, if any. +// +// On each call, pickAuto computes per-level size adjustments based on +// in-progress compactions, and computes a per-level score. The levels are +// iterated over in decreasing score order trying to find a valid compaction +// anchored at that level. +// +// If a score-based compaction cannot be found, pickAuto falls back to looking +// for an elision-only compaction to remove obsolete keys. +func (p *compactionPickerByScore) pickAuto(env compactionEnv) (pc *pickedCompaction) { + // Compaction concurrency is controlled by L0 read-amp. We allow one + // additional compaction per L0CompactionConcurrency sublevels, as well as + // one additional compaction per CompactionDebtConcurrency bytes of + // compaction debt. Compaction concurrency is tied to L0 sublevels as that + // signal is independent of the database size. We tack on the compaction + // debt as a second signal to prevent compaction concurrency from dropping + // significantly right after a base compaction finishes, and before those + // bytes have been compacted further down the LSM. + if n := len(env.inProgressCompactions); n > 0 { + l0ReadAmp := p.vers.L0Sublevels.MaxDepthAfterOngoingCompactions() + compactionDebt := p.estimatedCompactionDebt(0) + ccSignal1 := n * p.opts.Experimental.L0CompactionConcurrency + ccSignal2 := uint64(n) * p.opts.Experimental.CompactionDebtConcurrency + if l0ReadAmp < ccSignal1 && compactionDebt < ccSignal2 { + return nil + } + } + + scores := p.calculateLevelScores(env.inProgressCompactions) + + // TODO(bananabrick): Either remove, or change this into an event sent to the + // EventListener. + logCompaction := func(pc *pickedCompaction) { + var buf bytes.Buffer + for i := 0; i < numLevels; i++ { + if i != 0 && i < p.baseLevel { + continue + } + + var info *candidateLevelInfo + for j := range scores { + if scores[j].level == i { + info = &scores[j] + break + } + } + + marker := " " + if pc.startLevel.level == info.level { + marker = "*" + } + fmt.Fprintf(&buf, " %sL%d: %5.1f %5.1f %5.1f %5.1f %8s %8s", + marker, info.level, info.compensatedScoreRatio, info.compensatedScore, + info.uncompensatedScoreRatio, info.uncompensatedScore, + humanize.Bytes.Int64(int64(totalCompensatedSize( + p.vers.Levels[info.level].Iter(), + ))), + humanize.Bytes.Int64(p.levelMaxBytes[info.level]), + ) + + count := 0 + for i := range env.inProgressCompactions { + c := &env.inProgressCompactions[i] + if c.inputs[0].level != info.level { + continue + } + count++ + if count == 1 { + fmt.Fprintf(&buf, " [") + } else { + fmt.Fprintf(&buf, " ") + } + fmt.Fprintf(&buf, "L%d->L%d", c.inputs[0].level, c.outputLevel) + } + if count > 0 { + fmt.Fprintf(&buf, "]") + } + fmt.Fprintf(&buf, "\n") + } + p.opts.Logger.Infof("pickAuto: L%d->L%d\n%s", + pc.startLevel.level, pc.outputLevel.level, buf.String()) + } + + // Check for a score-based compaction. candidateLevelInfos are first sorted + // by whether they should be compacted, so if we find a level which shouldn't + // be compacted, we can break early. + for i := range scores { + info := &scores[i] + if !info.shouldCompact() { + break + } + if info.level == numLevels-1 { + continue + } + + if info.level == 0 { + pc = pickL0(env, p.opts, p.vers, p.baseLevel) + // Fail-safe to protect against compacting the same sstable + // concurrently. + if pc != nil && !inputRangeAlreadyCompacting(env, pc) { + p.addScoresToPickedCompactionMetrics(pc, scores) + pc.score = info.compensatedScoreRatio + // TODO(bananabrick): Create an EventListener for logCompaction. + if false { + logCompaction(pc) + } + return pc + } + continue + } + + // info.level > 0 + var ok bool + info.file, ok = pickCompactionSeedFile(p.vers, p.opts, info.level, info.outputLevel, env.earliestSnapshotSeqNum) + if !ok { + continue + } + + pc := pickAutoLPositive(env, p.opts, p.vers, *info, p.baseLevel, p.levelMaxBytes) + // Fail-safe to protect against compacting the same sstable concurrently. + if pc != nil && !inputRangeAlreadyCompacting(env, pc) { + p.addScoresToPickedCompactionMetrics(pc, scores) + pc.score = info.compensatedScoreRatio + // TODO(bananabrick): Create an EventListener for logCompaction. + if false { + logCompaction(pc) + } + return pc + } + } + + // Check for L6 files with tombstones that may be elided. These files may + // exist if a snapshot prevented the elision of a tombstone or because of + // a move compaction. These are low-priority compactions because they + // don't help us keep up with writes, just reclaim disk space. + if pc := p.pickElisionOnlyCompaction(env); pc != nil { + return pc + } + + if pc := p.pickReadTriggeredCompaction(env); pc != nil { + return pc + } + + // NB: This should only be run if a read compaction wasn't + // scheduled. + // + // We won't be scheduling a read compaction right now, and in + // read heavy workloads, compactions won't be scheduled frequently + // because flushes aren't frequent. So we need to signal to the + // iterator to schedule a compaction when it adds compactions to + // the read compaction queue. + // + // We need the nil check here because without it, we have some + // tests which don't set that variable fail. Since there's a + // chance that one of those tests wouldn't want extra compactions + // to be scheduled, I added this check here, instead of + // setting rescheduleReadCompaction in those tests. + if env.readCompactionEnv.rescheduleReadCompaction != nil { + *env.readCompactionEnv.rescheduleReadCompaction = true + } + + // At the lowest possible compaction-picking priority, look for files marked + // for compaction. Pebble will mark files for compaction if they have atomic + // compaction units that span multiple files. While current Pebble code does + // not construct such sstables, RocksDB and earlier versions of Pebble may + // have created them. These split user keys form sets of files that must be + // compacted together for correctness (referred to as "atomic compaction + // units" within the code). Rewrite them in-place. + // + // It's also possible that a file may have been marked for compaction by + // even earlier versions of Pebble code, since FileMetadata's + // MarkedForCompaction field is persisted in the manifest. That's okay. We + // previously would've ignored the designation, whereas now we'll re-compact + // the file in place. + if p.vers.Stats.MarkedForCompaction > 0 { + if pc := p.pickRewriteCompaction(env); pc != nil { + return pc + } + } + + return nil +} + +func (p *compactionPickerByScore) addScoresToPickedCompactionMetrics( + pc *pickedCompaction, candInfo [numLevels]candidateLevelInfo, +) { + + // candInfo is sorted by score, not by compaction level. + infoByLevel := [numLevels]candidateLevelInfo{} + for i := range candInfo { + level := candInfo[i].level + infoByLevel[level] = candInfo[i] + } + // Gather the compaction scores for the levels participating in the compaction. + pc.pickerMetrics.scores = make([]float64, len(pc.inputs)) + inputIdx := 0 + for i := range infoByLevel { + if pc.inputs[inputIdx].level == infoByLevel[i].level { + pc.pickerMetrics.scores[inputIdx] = infoByLevel[i].compensatedScoreRatio + inputIdx++ + } + if inputIdx == len(pc.inputs) { + break + } + } +} + +// elisionOnlyAnnotator implements the manifest.Annotator interface, +// annotating B-Tree nodes with the *fileMetadata of a file meeting the +// obsolete keys criteria for an elision-only compaction within the subtree. +// If multiple files meet the criteria, it chooses whichever file has the +// lowest LargestSeqNum. The lowest LargestSeqNum file will be the first +// eligible for an elision-only compaction once snapshots less than or equal +// to its LargestSeqNum are closed. +type elisionOnlyAnnotator struct{} + +var _ manifest.Annotator = elisionOnlyAnnotator{} + +func (a elisionOnlyAnnotator) Zero(interface{}) interface{} { + return nil +} + +func (a elisionOnlyAnnotator) Accumulate(f *fileMetadata, dst interface{}) (interface{}, bool) { + if f.IsCompacting() { + return dst, true + } + if !f.StatsValid() { + return dst, false + } + // Bottommost files are large and not worthwhile to compact just + // to remove a few tombstones. Consider a file ineligible if its + // own range deletions delete less than 10% of its data and its + // deletion tombstones make up less than 10% of its entries. + // + // TODO(jackson): This does not account for duplicate user keys + // which may be collapsed. Ideally, we would have 'obsolete keys' + // statistics that would include tombstones, the keys that are + // dropped by tombstones and duplicated user keys. See #847. + // + // Note that tables that contain exclusively range keys (i.e. no point keys, + // `NumEntries` and `RangeDeletionsBytesEstimate` are both zero) are excluded + // from elision-only compactions. + // TODO(travers): Consider an alternative heuristic for elision of range-keys. + if f.Stats.RangeDeletionsBytesEstimate*10 < f.Size && + f.Stats.NumDeletions*10 <= f.Stats.NumEntries { + return dst, true + } + if dst == nil { + return f, true + } else if dstV := dst.(*fileMetadata); dstV.LargestSeqNum > f.LargestSeqNum { + return f, true + } + return dst, true +} + +func (a elisionOnlyAnnotator) Merge(v interface{}, accum interface{}) interface{} { + if v == nil { + return accum + } + // If we haven't accumulated an eligible file yet, or f's LargestSeqNum is + // less than the accumulated file's, use f. + if accum == nil { + return v + } + f := v.(*fileMetadata) + accumV := accum.(*fileMetadata) + if accumV == nil || accumV.LargestSeqNum > f.LargestSeqNum { + return f + } + return accumV +} + +// markedForCompactionAnnotator implements the manifest.Annotator interface, +// annotating B-Tree nodes with the *fileMetadata of a file that is marked for +// compaction within the subtree. If multiple files meet the criteria, it +// chooses whichever file has the lowest LargestSeqNum. +type markedForCompactionAnnotator struct{} + +var _ manifest.Annotator = markedForCompactionAnnotator{} + +func (a markedForCompactionAnnotator) Zero(interface{}) interface{} { + return nil +} + +func (a markedForCompactionAnnotator) Accumulate( + f *fileMetadata, dst interface{}, +) (interface{}, bool) { + if !f.MarkedForCompaction { + // Not marked for compaction; return dst. + return dst, true + } + return markedMergeHelper(f, dst) +} + +func (a markedForCompactionAnnotator) Merge(v interface{}, accum interface{}) interface{} { + if v == nil { + return accum + } + accum, _ = markedMergeHelper(v.(*fileMetadata), accum) + return accum +} + +// REQUIRES: f is non-nil, and f.MarkedForCompaction=true. +func markedMergeHelper(f *fileMetadata, dst interface{}) (interface{}, bool) { + if dst == nil { + return f, true + } else if dstV := dst.(*fileMetadata); dstV.LargestSeqNum > f.LargestSeqNum { + return f, true + } + return dst, true +} + +// pickElisionOnlyCompaction looks for compactions of sstables in the +// bottommost level containing obsolete records that may now be dropped. +func (p *compactionPickerByScore) pickElisionOnlyCompaction( + env compactionEnv, +) (pc *pickedCompaction) { + if p.opts.private.disableElisionOnlyCompactions { + return nil + } + v := p.vers.Levels[numLevels-1].Annotation(elisionOnlyAnnotator{}) + if v == nil { + return nil + } + candidate := v.(*fileMetadata) + if candidate.IsCompacting() || candidate.LargestSeqNum >= env.earliestSnapshotSeqNum { + return nil + } + lf := p.vers.Levels[numLevels-1].Find(p.opts.Comparer.Compare, candidate) + if lf == nil { + panic(fmt.Sprintf("file %s not found in level %d as expected", candidate.FileNum, numLevels-1)) + } + + // Construct a picked compaction of the elision candidate's atomic + // compaction unit. + pc = newPickedCompaction(p.opts, p.vers, numLevels-1, numLevels-1, p.baseLevel) + pc.kind = compactionKindElisionOnly + var isCompacting bool + pc.startLevel.files, isCompacting = expandToAtomicUnit(p.opts.Comparer.Compare, lf.Slice(), false /* disableIsCompacting */) + if isCompacting { + return nil + } + pc.smallest, pc.largest = manifest.KeyRange(pc.cmp, pc.startLevel.files.Iter()) + // Fail-safe to protect against compacting the same sstable concurrently. + if !inputRangeAlreadyCompacting(env, pc) { + return pc + } + return nil +} + +// pickRewriteCompaction attempts to construct a compaction that +// rewrites a file marked for compaction. pickRewriteCompaction will +// pull in adjacent files in the file's atomic compaction unit if +// necessary. A rewrite compaction outputs files to the same level as +// the input level. +func (p *compactionPickerByScore) pickRewriteCompaction(env compactionEnv) (pc *pickedCompaction) { + for l := numLevels - 1; l >= 0; l-- { + v := p.vers.Levels[l].Annotation(markedForCompactionAnnotator{}) + if v == nil { + // Try the next level. + continue + } + candidate := v.(*fileMetadata) + if candidate.IsCompacting() { + // Try the next level. + continue + } + lf := p.vers.Levels[l].Find(p.opts.Comparer.Compare, candidate) + if lf == nil { + panic(fmt.Sprintf("file %s not found in level %d as expected", candidate.FileNum, numLevels-1)) + } + + inputs := lf.Slice() + // L0 files generated by a flush have never been split such that + // adjacent files can contain the same user key. So we do not need to + // rewrite an atomic compaction unit for L0. Note that there is nothing + // preventing two different flushes from producing files that are + // non-overlapping from an InternalKey perspective, but span the same + // user key. However, such files cannot be in the same L0 sublevel, + // since each sublevel requires non-overlapping user keys (unlike other + // levels). + if l > 0 { + // Find this file's atomic compaction unit. This is only relevant + // for levels L1+. + var isCompacting bool + inputs, isCompacting = expandToAtomicUnit( + p.opts.Comparer.Compare, + inputs, + false, /* disableIsCompacting */ + ) + if isCompacting { + // Try the next level. + continue + } + } + + pc = newPickedCompaction(p.opts, p.vers, l, l, p.baseLevel) + pc.outputLevel.level = l + pc.kind = compactionKindRewrite + pc.startLevel.files = inputs + pc.smallest, pc.largest = manifest.KeyRange(pc.cmp, pc.startLevel.files.Iter()) + + // Fail-safe to protect against compacting the same sstable concurrently. + if !inputRangeAlreadyCompacting(env, pc) { + if pc.startLevel.level == 0 { + pc.startLevel.l0SublevelInfo = generateSublevelInfo(pc.cmp, pc.startLevel.files) + } + return pc + } + } + return nil +} + +// pickAutoLPositive picks an automatic compaction for the candidate +// file in a positive-numbered level. This function must not be used for +// L0. +func pickAutoLPositive( + env compactionEnv, + opts *Options, + vers *version, + cInfo candidateLevelInfo, + baseLevel int, + levelMaxBytes [numLevels]int64, +) (pc *pickedCompaction) { + if cInfo.level == 0 { + panic("pebble: pickAutoLPositive called for L0") + } + + pc = newPickedCompaction(opts, vers, cInfo.level, defaultOutputLevel(cInfo.level, baseLevel), baseLevel) + if pc.outputLevel.level != cInfo.outputLevel { + panic("pebble: compaction picked unexpected output level") + } + pc.startLevel.files = cInfo.file.Slice() + // Files in level 0 may overlap each other, so pick up all overlapping ones. + if pc.startLevel.level == 0 { + cmp := opts.Comparer.Compare + smallest, largest := manifest.KeyRange(cmp, pc.startLevel.files.Iter()) + pc.startLevel.files = vers.Overlaps(0, cmp, smallest.UserKey, + largest.UserKey, largest.IsExclusiveSentinel()) + if pc.startLevel.files.Empty() { + panic("pebble: empty compaction") + } + } + + if !pc.setupInputs(opts, env.diskAvailBytes, pc.startLevel) { + return nil + } + return pc.maybeAddLevel(opts, env.diskAvailBytes) +} + +// maybeAddLevel maybe adds a level to the picked compaction. +func (pc *pickedCompaction) maybeAddLevel(opts *Options, diskAvailBytes uint64) *pickedCompaction { + pc.pickerMetrics.singleLevelOverlappingRatio = pc.overlappingRatio() + if pc.outputLevel.level == numLevels-1 { + // Don't add a level if the current output level is in L6 + return pc + } + if !opts.Experimental.MultiLevelCompactionHeuristic.allowL0() && pc.startLevel.level == 0 { + return pc + } + if pc.compactionSize() > expandedCompactionByteSizeLimit( + opts, adjustedOutputLevel(pc.outputLevel.level, pc.baseLevel), diskAvailBytes) { + // Don't add a level if the current compaction exceeds the compaction size limit + return pc + } + return opts.Experimental.MultiLevelCompactionHeuristic.pick(pc, opts, diskAvailBytes) +} + +// MultiLevelHeuristic evaluates whether to add files from the next level into the compaction. +type MultiLevelHeuristic interface { + // Evaluate returns the preferred compaction. + pick(pc *pickedCompaction, opts *Options, diskAvailBytes uint64) *pickedCompaction + + // Returns if the heuristic allows L0 to be involved in ML compaction + allowL0() bool +} + +// NoMultiLevel will never add an additional level to the compaction. +type NoMultiLevel struct{} + +var _ MultiLevelHeuristic = (*NoMultiLevel)(nil) + +func (nml NoMultiLevel) pick( + pc *pickedCompaction, opts *Options, diskAvailBytes uint64, +) *pickedCompaction { + return pc +} + +func (nml NoMultiLevel) allowL0() bool { + return false +} + +func (pc *pickedCompaction) predictedWriteAmp() float64 { + var bytesToCompact uint64 + var higherLevelBytes uint64 + for i := range pc.inputs { + levelSize := pc.inputs[i].files.SizeSum() + bytesToCompact += levelSize + if i != len(pc.inputs)-1 { + higherLevelBytes += levelSize + } + } + return float64(bytesToCompact) / float64(higherLevelBytes) +} + +func (pc *pickedCompaction) overlappingRatio() float64 { + var higherLevelBytes uint64 + var lowestLevelBytes uint64 + for i := range pc.inputs { + levelSize := pc.inputs[i].files.SizeSum() + if i == len(pc.inputs)-1 { + lowestLevelBytes += levelSize + continue + } + higherLevelBytes += levelSize + } + return float64(lowestLevelBytes) / float64(higherLevelBytes) +} + +// WriteAmpHeuristic defines a multi level compaction heuristic which will add +// an additional level to the picked compaction if it reduces predicted write +// amp of the compaction + the addPropensity constant. +type WriteAmpHeuristic struct { + // addPropensity is a constant that affects the propensity to conduct multilevel + // compactions. If positive, a multilevel compaction may get picked even if + // the single level compaction has lower write amp, and vice versa. + AddPropensity float64 + + // AllowL0 if true, allow l0 to be involved in a ML compaction. + AllowL0 bool +} + +var _ MultiLevelHeuristic = (*WriteAmpHeuristic)(nil) + +// TODO(msbutler): microbenchmark the extent to which multilevel compaction +// picking slows down the compaction picking process. This should be as fast as +// possible since Compaction-picking holds d.mu, which prevents WAL rotations, +// in-progress flushes and compactions from completing, etc. Consider ways to +// deduplicate work, given that setupInputs has already been called. +func (wa WriteAmpHeuristic) pick( + pcOrig *pickedCompaction, opts *Options, diskAvailBytes uint64, +) *pickedCompaction { + pcMulti := pcOrig.clone() + if !pcMulti.setupMultiLevelCandidate(opts, diskAvailBytes) { + return pcOrig + } + picked := pcOrig + if pcMulti.predictedWriteAmp() <= pcOrig.predictedWriteAmp()+wa.AddPropensity { + picked = pcMulti + } + // Regardless of what compaction was picked, log the multilevelOverlapping ratio. + picked.pickerMetrics.multiLevelOverlappingRatio = pcMulti.overlappingRatio() + return picked +} + +func (wa WriteAmpHeuristic) allowL0() bool { + return wa.AllowL0 +} + +// Helper method to pick compactions originating from L0. Uses information about +// sublevels to generate a compaction. +func pickL0(env compactionEnv, opts *Options, vers *version, baseLevel int) (pc *pickedCompaction) { + // It is important to pass information about Lbase files to L0Sublevels + // so it can pick a compaction that does not conflict with an Lbase => Lbase+1 + // compaction. Without this, we observed reduced concurrency of L0=>Lbase + // compactions, and increasing read amplification in L0. + // + // TODO(bilal) Remove the minCompactionDepth parameter once fixing it at 1 + // has been shown to not cause a performance regression. + lcf, err := vers.L0Sublevels.PickBaseCompaction(1, vers.Levels[baseLevel].Slice()) + if err != nil { + opts.Logger.Errorf("error when picking base compaction: %s", err) + return + } + if lcf != nil { + pc = newPickedCompactionFromL0(lcf, opts, vers, baseLevel, true) + pc.setupInputs(opts, env.diskAvailBytes, pc.startLevel) + if pc.startLevel.files.Empty() { + opts.Logger.Fatalf("empty compaction chosen") + } + return pc.maybeAddLevel(opts, env.diskAvailBytes) + } + + // Couldn't choose a base compaction. Try choosing an intra-L0 + // compaction. Note that we pass in L0CompactionThreshold here as opposed to + // 1, since choosing a single sublevel intra-L0 compaction is + // counterproductive. + lcf, err = vers.L0Sublevels.PickIntraL0Compaction(env.earliestUnflushedSeqNum, minIntraL0Count) + if err != nil { + opts.Logger.Errorf("error when picking intra-L0 compaction: %s", err) + return + } + if lcf != nil { + pc = newPickedCompactionFromL0(lcf, opts, vers, 0, false) + if !pc.setupInputs(opts, env.diskAvailBytes, pc.startLevel) { + return nil + } + if pc.startLevel.files.Empty() { + opts.Logger.Fatalf("empty compaction chosen") + } + { + iter := pc.startLevel.files.Iter() + if iter.First() == nil || iter.Next() == nil { + // A single-file intra-L0 compaction is unproductive. + return nil + } + } + + pc.smallest, pc.largest = manifest.KeyRange(pc.cmp, pc.startLevel.files.Iter()) + } + return pc +} + +func pickManualCompaction( + vers *version, opts *Options, env compactionEnv, baseLevel int, manual *manualCompaction, +) (pc *pickedCompaction, retryLater bool) { + outputLevel := manual.level + 1 + if manual.level == 0 { + outputLevel = baseLevel + } else if manual.level < baseLevel { + // The start level for a compaction must be >= Lbase. A manual + // compaction could have been created adhering to that condition, and + // then an automatic compaction came in and compacted all of the + // sstables in Lbase to Lbase+1 which caused Lbase to change. Simply + // ignore this manual compaction as there is nothing to do (manual.level + // points to an empty level). + return nil, false + } + // This conflictsWithInProgress call is necessary for the manual compaction to + // be retried when it conflicts with an ongoing automatic compaction. Without + // it, the compaction is dropped due to pc.setupInputs returning false since + // the input/output range is already being compacted, and the manual + // compaction ends with a non-compacted LSM. + if conflictsWithInProgress(manual, outputLevel, env.inProgressCompactions, opts.Comparer.Compare) { + return nil, true + } + pc = newPickedCompaction(opts, vers, manual.level, defaultOutputLevel(manual.level, baseLevel), baseLevel) + manual.outputLevel = pc.outputLevel.level + pc.startLevel.files = vers.Overlaps(manual.level, opts.Comparer.Compare, manual.start, manual.end, false) + if pc.startLevel.files.Empty() { + // Nothing to do + return nil, false + } + if !pc.setupInputs(opts, env.diskAvailBytes, pc.startLevel) { + // setupInputs returned false indicating there's a conflicting + // concurrent compaction. + return nil, true + } + if pc = pc.maybeAddLevel(opts, env.diskAvailBytes); pc == nil { + return nil, false + } + if pc.outputLevel.level != outputLevel { + if len(pc.extraLevels) > 0 { + // multilevel compactions relax this invariant + } else { + panic("pebble: compaction picked unexpected output level") + } + } + // Fail-safe to protect against compacting the same sstable concurrently. + if inputRangeAlreadyCompacting(env, pc) { + return nil, true + } + return pc, false +} + +func (p *compactionPickerByScore) pickReadTriggeredCompaction( + env compactionEnv, +) (pc *pickedCompaction) { + // If a flush is in-progress or expected to happen soon, it means more writes are taking place. We would + // soon be scheduling more write focussed compactions. In this case, skip read compactions as they are + // lower priority. + if env.readCompactionEnv.flushing || env.readCompactionEnv.readCompactions == nil { + return nil + } + for env.readCompactionEnv.readCompactions.size > 0 { + rc := env.readCompactionEnv.readCompactions.remove() + if pc = pickReadTriggeredCompactionHelper(p, rc, env); pc != nil { + break + } + } + return pc +} + +func pickReadTriggeredCompactionHelper( + p *compactionPickerByScore, rc *readCompaction, env compactionEnv, +) (pc *pickedCompaction) { + cmp := p.opts.Comparer.Compare + overlapSlice := p.vers.Overlaps(rc.level, cmp, rc.start, rc.end, false /* exclusiveEnd */) + if overlapSlice.Empty() { + // If there is no overlap, then the file with the key range + // must have been compacted away. So, we don't proceed to + // compact the same key range again. + return nil + } + + iter := overlapSlice.Iter() + var fileMatches bool + for f := iter.First(); f != nil; f = iter.Next() { + if f.FileNum == rc.fileNum { + fileMatches = true + break + } + } + if !fileMatches { + return nil + } + + pc = newPickedCompaction(p.opts, p.vers, rc.level, defaultOutputLevel(rc.level, p.baseLevel), p.baseLevel) + + pc.startLevel.files = overlapSlice + if !pc.setupInputs(p.opts, env.diskAvailBytes, pc.startLevel) { + return nil + } + if inputRangeAlreadyCompacting(env, pc) { + return nil + } + pc.kind = compactionKindRead + + // Prevent read compactions which are too wide. + outputOverlaps := pc.version.Overlaps( + pc.outputLevel.level, pc.cmp, pc.smallest.UserKey, + pc.largest.UserKey, pc.largest.IsExclusiveSentinel()) + if outputOverlaps.SizeSum() > pc.maxReadCompactionBytes { + return nil + } + + // Prevent compactions which start with a small seed file X, but overlap + // with over allowedCompactionWidth * X file sizes in the output layer. + const allowedCompactionWidth = 35 + if outputOverlaps.SizeSum() > overlapSlice.SizeSum()*allowedCompactionWidth { + return nil + } + + return pc +} + +func (p *compactionPickerByScore) forceBaseLevel1() { + p.baseLevel = 1 +} + +func inputRangeAlreadyCompacting(env compactionEnv, pc *pickedCompaction) bool { + for _, cl := range pc.inputs { + iter := cl.files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if f.IsCompacting() { + return true + } + } + } + + // Look for active compactions outputting to the same region of the key + // space in the same output level. Two potential compactions may conflict + // without sharing input files if there are no files in the output level + // that overlap with the intersection of the compactions' key spaces. + // + // Consider an active L0->Lbase compaction compacting two L0 files one + // [a-f] and the other [t-z] into Lbase. + // + // L0 + // ↦ 000100 ↤ ↦ 000101 ↤ + // L1 + // ↦ 000004 ↤ + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // + // If a new file 000102 [j-p] is flushed while the existing compaction is + // still ongoing, new file would not be in any compacting sublevel + // intervals and would not overlap with any Lbase files that are also + // compacting. However, this compaction cannot be picked because the + // compaction's output key space [j-p] would overlap the existing + // compaction's output key space [a-z]. + // + // L0 + // ↦ 000100* ↤ ↦ 000102 ↤ ↦ 000101* ↤ + // L1 + // ↦ 000004* ↤ + // a b c d e f g h i j k l m n o p q r s t u v w x y z + // + // * - currently compacting + if pc.outputLevel != nil && pc.outputLevel.level != 0 { + for _, c := range env.inProgressCompactions { + if pc.outputLevel.level != c.outputLevel { + continue + } + if base.InternalCompare(pc.cmp, c.largest, pc.smallest) < 0 || + base.InternalCompare(pc.cmp, c.smallest, pc.largest) > 0 { + continue + } + + // The picked compaction and the in-progress compaction c are + // outputting to the same region of the key space of the same + // level. + return true + } + } + return false +} + +// conflictsWithInProgress checks if there are any in-progress compactions with overlapping keyspace. +func conflictsWithInProgress( + manual *manualCompaction, outputLevel int, inProgressCompactions []compactionInfo, cmp Compare, +) bool { + for _, c := range inProgressCompactions { + if (c.outputLevel == manual.level || c.outputLevel == outputLevel) && + isUserKeysOverlapping(manual.start, manual.end, c.smallest.UserKey, c.largest.UserKey, cmp) { + return true + } + for _, in := range c.inputs { + if in.files.Empty() { + continue + } + iter := in.files.Iter() + smallest := iter.First().Smallest.UserKey + largest := iter.Last().Largest.UserKey + if (in.level == manual.level || in.level == outputLevel) && + isUserKeysOverlapping(manual.start, manual.end, smallest, largest, cmp) { + return true + } + } + } + return false +} + +func isUserKeysOverlapping(x1, x2, y1, y2 []byte, cmp Compare) bool { + return cmp(x1, y2) <= 0 && cmp(y1, x2) <= 0 +} diff --git a/pebble/compaction_picker_test.go b/pebble/compaction_picker_test.go new file mode 100644 index 0000000..b0ace35 --- /dev/null +++ b/pebble/compaction_picker_test.go @@ -0,0 +1,1593 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "fmt" + "math" + "sort" + "strconv" + "strings" + "sync" + "testing" + "time" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/humanize" + "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/cockroachdb/pebble/vfs" + "github.com/stretchr/testify/require" +) + +func loadVersion(t *testing.T, d *datadriven.TestData) (*version, *Options, string) { + var sizes [numLevels]int64 + opts := &Options{} + opts.testingRandomized(t) + opts.EnsureDefaults() + + if len(d.CmdArgs) != 1 { + return nil, nil, fmt.Sprintf("%s expects 1 argument", d.Cmd) + } + var err error + opts.LBaseMaxBytes, err = strconv.ParseInt(d.CmdArgs[0].Key, 10, 64) + if err != nil { + return nil, nil, err.Error() + } + + var files [numLevels][]*fileMetadata + if len(d.Input) > 0 { + // Parse each line as + // + // : [compensation] + // + // Creating sstables within the level whose file sizes total to `size` + // and whose compensated file sizes total to `size`+`compensation`. If + // size is sufficiently large, only one single file is created. See + // the TODO below. + for _, data := range strings.Split(d.Input, "\n") { + parts := strings.Split(data, " ") + parts[0] = strings.TrimSuffix(strings.TrimSpace(parts[0]), ":") + if len(parts) < 2 { + return nil, nil, fmt.Sprintf("malformed test:\n%s", d.Input) + } + level, err := strconv.Atoi(parts[0]) + if err != nil { + return nil, nil, err.Error() + } + if files[level] != nil { + return nil, nil, fmt.Sprintf("level %d already filled", level) + } + size, err := strconv.ParseUint(strings.TrimSpace(parts[1]), 10, 64) + if err != nil { + return nil, nil, err.Error() + } + var compensation uint64 + if len(parts) == 3 { + compensation, err = strconv.ParseUint(strings.TrimSpace(parts[2]), 10, 64) + if err != nil { + return nil, nil, err.Error() + } + } + + var lastFile *fileMetadata + for i := uint64(1); sizes[level] < int64(size); i++ { + var key InternalKey + if level == 0 { + // For L0, make `size` overlapping files. + key = base.MakeInternalKey([]byte(fmt.Sprintf("%04d", 1)), i, InternalKeyKindSet) + } else { + key = base.MakeInternalKey([]byte(fmt.Sprintf("%04d", i)), i, InternalKeyKindSet) + } + m := (&fileMetadata{ + FileNum: base.FileNum(uint64(level)*100_000 + i), + SmallestSeqNum: key.SeqNum(), + LargestSeqNum: key.SeqNum(), + Size: 1, + Stats: manifest.TableStats{ + RangeDeletionsBytesEstimate: 0, + }, + }).ExtendPointKeyBounds(opts.Comparer.Compare, key, key) + m.InitPhysicalBacking() + m.StatsMarkValid() + lastFile = m + if size >= 100 { + // If the requested size of the level is very large only add a single + // file in order to avoid massive blow-up in the number of files in + // the Version. + // + // TODO(peter): There is tension between the testing in + // TestCompactionPickerLevelMaxBytes and + // TestCompactionPickerTargetLevel. Clean this up somehow. + m.Size = size + if level != 0 { + endKey := base.MakeInternalKey([]byte(fmt.Sprintf("%04d", size)), i, InternalKeyKindSet) + m.ExtendPointKeyBounds(opts.Comparer.Compare, key, endKey) + } + } + files[level] = append(files[level], m) + sizes[level] += int64(m.Size) + } + // Let all the compensation be due to the last file. + if lastFile != nil && compensation > 0 { + lastFile.Stats.RangeDeletionsBytesEstimate = compensation + } + } + } + + vers := newVersion(opts, files) + return vers, opts, "" +} + +func TestCompactionPickerByScoreLevelMaxBytes(t *testing.T) { + datadriven.RunTest(t, "testdata/compaction_picker_level_max_bytes", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "init": + vers, opts, errMsg := loadVersion(t, d) + if errMsg != "" { + return errMsg + } + + p, ok := newCompactionPicker(vers, opts, nil).(*compactionPickerByScore) + require.True(t, ok) + var buf bytes.Buffer + for level := p.getBaseLevel(); level < numLevels; level++ { + fmt.Fprintf(&buf, "%d: %d\n", level, p.levelMaxBytes[level]) + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestCompactionPickerTargetLevel(t *testing.T) { + var vers *version + var opts *Options + var pickerByScore *compactionPickerByScore + + parseInProgress := func(vals []string) ([]compactionInfo, error) { + var levels []int + for _, s := range vals { + l, err := strconv.ParseInt(s, 10, 8) + if err != nil { + return nil, err + } + levels = append(levels, int(l)) + } + if len(levels)%2 != 0 { + return nil, errors.New("odd number of levels with ongoing compactions") + } + var inProgress []compactionInfo + for i := 0; i < len(levels); i += 2 { + inProgress = append(inProgress, compactionInfo{ + inputs: []compactionLevel{ + {level: levels[i]}, + {level: levels[i+1]}, + }, + outputLevel: levels[i+1], + }) + } + return inProgress, nil + } + + resetCompacting := func() { + for _, files := range vers.Levels { + files.Slice().Each(func(f *fileMetadata) { + f.CompactionState = manifest.CompactionStateNotCompacting + }) + } + } + + datadriven.RunTest(t, "testdata/compaction_picker_target_level", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "init": + // loadVersion expects a single datadriven argument that it + // sets as Options.LBaseMaxBytes. It parses the input as + // newline-separated levels, specifying the level's file size + // and optionally additional compensation to be added during + // compensated file size calculations. Eg: + // + // init + // : [compensation] + // : [compensation] + var errMsg string + vers, opts, errMsg = loadVersion(t, d) + if errMsg != "" { + return errMsg + } + return runVersionFileSizes(vers) + case "init_cp": + resetCompacting() + + var inProgress []compactionInfo + if arg, ok := d.Arg("ongoing"); ok { + var err error + inProgress, err = parseInProgress(arg.Vals) + if err != nil { + return err.Error() + } + } + + p := newCompactionPicker(vers, opts, inProgress) + var ok bool + pickerByScore, ok = p.(*compactionPickerByScore) + require.True(t, ok) + return fmt.Sprintf("base: %d", pickerByScore.baseLevel) + case "queue": + var b strings.Builder + var inProgress []compactionInfo + for { + env := compactionEnv{ + diskAvailBytes: math.MaxUint64, + earliestUnflushedSeqNum: InternalKeySeqNumMax, + inProgressCompactions: inProgress, + } + pc := pickerByScore.pickAuto(env) + if pc == nil { + break + } + fmt.Fprintf(&b, "L%d->L%d: %.1f\n", pc.startLevel.level, pc.outputLevel.level, pc.score) + inProgress = append(inProgress, compactionInfo{ + inputs: pc.inputs, + outputLevel: pc.outputLevel.level, + smallest: pc.smallest, + largest: pc.largest, + }) + if pc.outputLevel.level == 0 { + // Once we pick one L0->L0 compaction, we'll keep on doing so + // because the test isn't marking files as Compacting. + break + } + for _, cl := range pc.inputs { + cl.files.Each(func(f *fileMetadata) { + f.CompactionState = manifest.CompactionStateCompacting + fmt.Fprintf(&b, " %s marked as compacting\n", f) + }) + } + } + + resetCompacting() + return b.String() + case "pick": + resetCompacting() + + var inProgress []compactionInfo + if len(d.CmdArgs) == 1 { + arg := d.CmdArgs[0] + if arg.Key != "ongoing" { + return "unknown arg: " + arg.Key + } + var err error + inProgress, err = parseInProgress(arg.Vals) + if err != nil { + return err.Error() + } + } + + // Mark files as compacting for each in-progress compaction. + for i := range inProgress { + c := &inProgress[i] + for j, cl := range c.inputs { + iter := vers.Levels[cl.level].Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if !f.IsCompacting() { + f.CompactionState = manifest.CompactionStateCompacting + c.inputs[j].files = iter.Take().Slice() + break + } + } + } + if c.inputs[0].level == 0 && c.outputLevel != 0 { + // L0->Lbase: mark all of Lbase as compacting. + c.inputs[1].files = vers.Levels[c.outputLevel].Slice() + for _, in := range c.inputs { + in.files.Each(func(f *fileMetadata) { + f.CompactionState = manifest.CompactionStateCompacting + }) + } + } + } + + var b strings.Builder + fmt.Fprintf(&b, "Initial state before pick:\n%s", runVersionFileSizes(vers)) + pc := pickerByScore.pickAuto(compactionEnv{ + earliestUnflushedSeqNum: InternalKeySeqNumMax, + inProgressCompactions: inProgress, + }) + if pc != nil { + fmt.Fprintf(&b, "Picked: L%d->L%d: %0.1f\n", pc.startLevel.level, pc.outputLevel.level, pc.score) + } + if pc == nil { + fmt.Fprintln(&b, "Picked: no compaction") + } + return b.String() + case "pick_manual": + var startLevel int + var start, end string + d.MaybeScanArgs(t, "level", &startLevel) + d.MaybeScanArgs(t, "start", &start) + d.MaybeScanArgs(t, "end", &end) + + iStart := base.MakeInternalKey([]byte(start), InternalKeySeqNumMax, InternalKeyKindMax) + iEnd := base.MakeInternalKey([]byte(end), 0, 0) + manual := &manualCompaction{ + done: make(chan error, 1), + level: startLevel, + start: iStart.UserKey, + end: iEnd.UserKey, + } + + pc, retryLater := pickManualCompaction( + pickerByScore.vers, + opts, + compactionEnv{ + earliestUnflushedSeqNum: InternalKeySeqNumMax, + }, + pickerByScore.getBaseLevel(), + manual) + if pc == nil { + return fmt.Sprintf("nil, retryLater = %v", retryLater) + } + + return fmt.Sprintf("L%d->L%d, retryLater = %v", pc.startLevel.level, pc.outputLevel.level, retryLater) + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestCompactionPickerEstimatedCompactionDebt(t *testing.T) { + datadriven.RunTest(t, "testdata/compaction_picker_estimated_debt", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "init": + vers, opts, errMsg := loadVersion(t, d) + if errMsg != "" { + return errMsg + } + opts.MemTableSize = 1000 + + p := newCompactionPicker(vers, opts, nil) + return fmt.Sprintf("%d\n", p.estimatedCompactionDebt(0)) + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestCompactionPickerL0(t *testing.T) { + opts := (*Options)(nil).EnsureDefaults() + opts.Experimental.L0CompactionConcurrency = 1 + + parseMeta := func(s string) (*fileMetadata, error) { + parts := strings.Split(s, ":") + fileNum, err := strconv.Atoi(parts[0]) + if err != nil { + return nil, err + } + fields := strings.Fields(parts[1]) + parts = strings.Split(fields[0], "-") + if len(parts) != 2 { + return nil, errors.Errorf("malformed table spec: %s", s) + } + m := (&fileMetadata{ + FileNum: base.FileNum(fileNum), + }).ExtendPointKeyBounds( + opts.Comparer.Compare, + base.ParseInternalKey(strings.TrimSpace(parts[0])), + base.ParseInternalKey(strings.TrimSpace(parts[1])), + ) + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + m.InitPhysicalBacking() + return m, nil + } + + var picker *compactionPickerByScore + var inProgressCompactions []compactionInfo + var pc *pickedCompaction + + datadriven.RunTest(t, "testdata/compaction_picker_L0", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + fileMetas := [manifest.NumLevels][]*fileMetadata{} + baseLevel := manifest.NumLevels - 1 + level := 0 + var err error + lines := strings.Split(td.Input, "\n") + var compactionLines []string + + for len(lines) > 0 { + data := strings.TrimSpace(lines[0]) + lines = lines[1:] + switch data { + case "L0", "L1", "L2", "L3", "L4", "L5", "L6": + level, err = strconv.Atoi(data[1:]) + if err != nil { + return err.Error() + } + case "compactions": + compactionLines, lines = lines, nil + default: + meta, err := parseMeta(data) + if err != nil { + return err.Error() + } + if level != 0 && level < baseLevel { + baseLevel = level + } + fileMetas[level] = append(fileMetas[level], meta) + } + } + + // Parse in-progress compactions in the form of: + // L0 000001 -> L2 000005 + inProgressCompactions = nil + for len(compactionLines) > 0 { + parts := strings.Fields(compactionLines[0]) + compactionLines = compactionLines[1:] + + var level int + var info compactionInfo + first := true + compactionFiles := map[int][]*fileMetadata{} + for _, p := range parts { + switch p { + case "L0", "L1", "L2", "L3", "L4", "L5", "L6": + var err error + level, err = strconv.Atoi(p[1:]) + if err != nil { + return err.Error() + } + if len(info.inputs) > 0 && info.inputs[len(info.inputs)-1].level == level { + // eg, L0 -> L0 compaction or L6 -> L6 compaction + continue + } + if info.outputLevel < level { + info.outputLevel = level + } + info.inputs = append(info.inputs, compactionLevel{level: level}) + case "->": + continue + default: + fileNum, err := strconv.Atoi(p) + if err != nil { + return err.Error() + } + var compactFile *fileMetadata + for _, m := range fileMetas[level] { + if m.FileNum == FileNum(fileNum) { + compactFile = m + } + } + if compactFile == nil { + return fmt.Sprintf("cannot find compaction file %s", FileNum(fileNum)) + } + compactFile.CompactionState = manifest.CompactionStateCompacting + if first || base.InternalCompare(DefaultComparer.Compare, info.largest, compactFile.Largest) < 0 { + info.largest = compactFile.Largest + } + if first || base.InternalCompare(DefaultComparer.Compare, info.smallest, compactFile.Smallest) > 0 { + info.smallest = compactFile.Smallest + } + first = false + compactionFiles[level] = append(compactionFiles[level], compactFile) + } + } + for i, cl := range info.inputs { + files := compactionFiles[cl.level] + info.inputs[i].files = manifest.NewLevelSliceSeqSorted(files) + // Mark as intra-L0 compacting if the compaction is + // L0 -> L0. + if info.outputLevel == 0 { + for _, f := range files { + f.IsIntraL0Compacting = true + } + } + } + inProgressCompactions = append(inProgressCompactions, info) + } + + version := newVersion(opts, fileMetas) + version.L0Sublevels.InitCompactingFileInfo(inProgressL0Compactions(inProgressCompactions)) + vs := &versionSet{ + opts: opts, + cmp: DefaultComparer.Compare, + cmpName: DefaultComparer.Name, + } + vs.versions.Init(nil) + vs.append(version) + picker = &compactionPickerByScore{ + opts: opts, + vers: version, + baseLevel: baseLevel, + } + vs.picker = picker + picker.initLevelMaxBytes(inProgressCompactions) + + var buf bytes.Buffer + fmt.Fprint(&buf, version.String()) + if len(inProgressCompactions) > 0 { + fmt.Fprintln(&buf, "compactions") + for _, c := range inProgressCompactions { + fmt.Fprintf(&buf, " %s\n", c.String()) + } + } + return buf.String() + case "pick-auto": + td.MaybeScanArgs(t, "l0_compaction_threshold", &opts.L0CompactionThreshold) + td.MaybeScanArgs(t, "l0_compaction_file_threshold", &opts.L0CompactionFileThreshold) + + pc = picker.pickAuto(compactionEnv{ + diskAvailBytes: math.MaxUint64, + earliestUnflushedSeqNum: math.MaxUint64, + inProgressCompactions: inProgressCompactions, + }) + var result strings.Builder + if pc != nil { + checkClone(t, pc) + c := newCompaction(pc, opts, time.Now(), nil /* provider */) + fmt.Fprintf(&result, "L%d -> L%d\n", pc.startLevel.level, pc.outputLevel.level) + fmt.Fprintf(&result, "L%d: %s\n", pc.startLevel.level, fileNums(pc.startLevel.files)) + if !pc.outputLevel.files.Empty() { + fmt.Fprintf(&result, "L%d: %s\n", pc.outputLevel.level, fileNums(pc.outputLevel.files)) + } + if !c.grandparents.Empty() { + fmt.Fprintf(&result, "grandparents: %s\n", fileNums(c.grandparents)) + } + } else { + return "nil" + } + return result.String() + case "mark-for-compaction": + var fileNum uint64 + td.ScanArgs(t, "file", &fileNum) + for l, lm := range picker.vers.Levels { + iter := lm.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if f.FileNum != base.FileNum(fileNum) { + continue + } + f.MarkedForCompaction = true + picker.vers.Stats.MarkedForCompaction++ + picker.vers.Levels[l].InvalidateAnnotation(markedForCompactionAnnotator{}) + return fmt.Sprintf("marked L%d.%s", l, f.FileNum) + } + } + return "not-found" + case "max-output-file-size": + if pc == nil { + return "no compaction" + } + return fmt.Sprintf("%d", pc.maxOutputFileSize) + case "max-overlap-bytes": + if pc == nil { + return "no compaction" + } + return fmt.Sprintf("%d", pc.maxOverlapBytes) + } + return fmt.Sprintf("unrecognized command: %s", td.Cmd) + }) +} + +func TestCompactionPickerConcurrency(t *testing.T) { + opts := (*Options)(nil).EnsureDefaults() + opts.Experimental.L0CompactionConcurrency = 1 + + parseMeta := func(s string) (*fileMetadata, error) { + parts := strings.Split(s, ":") + fileNum, err := strconv.Atoi(parts[0]) + if err != nil { + return nil, err + } + fields := strings.Fields(parts[1]) + parts = strings.Split(fields[0], "-") + if len(parts) != 2 { + return nil, errors.Errorf("malformed table spec: %s", s) + } + m := (&fileMetadata{ + FileNum: base.FileNum(fileNum), + Size: 1028, + }).ExtendPointKeyBounds( + opts.Comparer.Compare, + base.ParseInternalKey(strings.TrimSpace(parts[0])), + base.ParseInternalKey(strings.TrimSpace(parts[1])), + ) + m.InitPhysicalBacking() + for _, p := range fields[1:] { + if strings.HasPrefix(p, "size=") { + v, err := strconv.Atoi(strings.TrimPrefix(p, "size=")) + if err != nil { + return nil, err + } + m.Size = uint64(v) + } + } + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + return m, nil + } + + var picker *compactionPickerByScore + var inProgressCompactions []compactionInfo + + datadriven.RunTest(t, "testdata/compaction_picker_concurrency", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + fileMetas := [manifest.NumLevels][]*fileMetadata{} + level := 0 + var err error + lines := strings.Split(td.Input, "\n") + var compactionLines []string + + for len(lines) > 0 { + data := strings.TrimSpace(lines[0]) + lines = lines[1:] + switch data { + case "L0", "L1", "L2", "L3", "L4", "L5", "L6": + level, err = strconv.Atoi(data[1:]) + if err != nil { + return err.Error() + } + case "compactions": + compactionLines, lines = lines, nil + default: + meta, err := parseMeta(data) + if err != nil { + return err.Error() + } + fileMetas[level] = append(fileMetas[level], meta) + } + } + + // Parse in-progress compactions in the form of: + // L0 000001 -> L2 000005 + inProgressCompactions = nil + for len(compactionLines) > 0 { + parts := strings.Fields(compactionLines[0]) + compactionLines = compactionLines[1:] + + var level int + var info compactionInfo + first := true + compactionFiles := map[int][]*fileMetadata{} + for _, p := range parts { + switch p { + case "L0", "L1", "L2", "L3", "L4", "L5", "L6": + var err error + level, err = strconv.Atoi(p[1:]) + if err != nil { + return err.Error() + } + if len(info.inputs) > 0 && info.inputs[len(info.inputs)-1].level == level { + // eg, L0 -> L0 compaction or L6 -> L6 compaction + continue + } + if info.outputLevel < level { + info.outputLevel = level + } + info.inputs = append(info.inputs, compactionLevel{level: level}) + case "->": + continue + default: + fileNum, err := strconv.Atoi(p) + if err != nil { + return err.Error() + } + var compactFile *fileMetadata + for _, m := range fileMetas[level] { + if m.FileNum == FileNum(fileNum) { + compactFile = m + } + } + if compactFile == nil { + return fmt.Sprintf("cannot find compaction file %s", FileNum(fileNum)) + } + compactFile.CompactionState = manifest.CompactionStateCompacting + if first || base.InternalCompare(DefaultComparer.Compare, info.largest, compactFile.Largest) < 0 { + info.largest = compactFile.Largest + } + if first || base.InternalCompare(DefaultComparer.Compare, info.smallest, compactFile.Smallest) > 0 { + info.smallest = compactFile.Smallest + } + first = false + compactionFiles[level] = append(compactionFiles[level], compactFile) + } + } + for i, cl := range info.inputs { + files := compactionFiles[cl.level] + if cl.level == 0 { + info.inputs[i].files = manifest.NewLevelSliceSeqSorted(files) + } else { + info.inputs[i].files = manifest.NewLevelSliceKeySorted(DefaultComparer.Compare, files) + } + // Mark as intra-L0 compacting if the compaction is + // L0 -> L0. + if info.outputLevel == 0 { + for _, f := range files { + f.IsIntraL0Compacting = true + } + } + } + inProgressCompactions = append(inProgressCompactions, info) + } + + version := newVersion(opts, fileMetas) + version.L0Sublevels.InitCompactingFileInfo(inProgressL0Compactions(inProgressCompactions)) + vs := &versionSet{ + opts: opts, + cmp: DefaultComparer.Compare, + cmpName: DefaultComparer.Name, + } + vs.versions.Init(nil) + vs.append(version) + + picker = newCompactionPicker(version, opts, inProgressCompactions).(*compactionPickerByScore) + vs.picker = picker + + var buf bytes.Buffer + fmt.Fprint(&buf, version.String()) + if len(inProgressCompactions) > 0 { + fmt.Fprintln(&buf, "compactions") + for _, c := range inProgressCompactions { + fmt.Fprintf(&buf, " %s\n", c.String()) + } + } + return buf.String() + + case "pick-auto": + td.MaybeScanArgs(t, "l0_compaction_threshold", &opts.L0CompactionThreshold) + td.MaybeScanArgs(t, "l0_compaction_concurrency", &opts.Experimental.L0CompactionConcurrency) + td.MaybeScanArgs(t, "compaction_debt_concurrency", &opts.Experimental.CompactionDebtConcurrency) + + pc := picker.pickAuto(compactionEnv{ + earliestUnflushedSeqNum: math.MaxUint64, + inProgressCompactions: inProgressCompactions, + }) + var result strings.Builder + if pc != nil { + c := newCompaction(pc, opts, time.Now(), nil /* provider */) + fmt.Fprintf(&result, "L%d -> L%d\n", pc.startLevel.level, pc.outputLevel.level) + fmt.Fprintf(&result, "L%d: %s\n", pc.startLevel.level, fileNums(pc.startLevel.files)) + if !pc.outputLevel.files.Empty() { + fmt.Fprintf(&result, "L%d: %s\n", pc.outputLevel.level, fileNums(pc.outputLevel.files)) + } + if !c.grandparents.Empty() { + fmt.Fprintf(&result, "grandparents: %s\n", fileNums(c.grandparents)) + } + } else { + return "nil" + } + return result.String() + } + return fmt.Sprintf("unrecognized command: %s", td.Cmd) + }) +} + +func TestCompactionPickerPickReadTriggered(t *testing.T) { + opts := (*Options)(nil).EnsureDefaults() + var picker *compactionPickerByScore + var rcList readCompactionQueue + var vers *version + + parseMeta := func(s string) (*fileMetadata, error) { + parts := strings.Split(s, ":") + fileNum, err := strconv.Atoi(parts[0]) + if err != nil { + return nil, err + } + fields := strings.Fields(parts[1]) + parts = strings.Split(fields[0], "-") + if len(parts) != 2 { + return nil, errors.Errorf("malformed table spec: %s. usage: :start.SET.1-end.SET.2", s) + } + m := (&fileMetadata{ + FileNum: base.FileNum(fileNum), + Size: 1028, + }).ExtendPointKeyBounds( + opts.Comparer.Compare, + base.ParseInternalKey(strings.TrimSpace(parts[0])), + base.ParseInternalKey(strings.TrimSpace(parts[1])), + ) + m.InitPhysicalBacking() + for _, p := range fields[1:] { + if strings.HasPrefix(p, "size=") { + v, err := strconv.Atoi(strings.TrimPrefix(p, "size=")) + if err != nil { + return nil, err + } + m.Size = uint64(v) + } + } + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + return m, nil + } + + datadriven.RunTest(t, "testdata/compaction_picker_read_triggered", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + rcList = readCompactionQueue{} + fileMetas := [manifest.NumLevels][]*fileMetadata{} + level := 0 + var err error + lines := strings.Split(td.Input, "\n") + + for len(lines) > 0 { + data := strings.TrimSpace(lines[0]) + lines = lines[1:] + switch data { + case "L0", "L1", "L2", "L3", "L4", "L5", "L6": + level, err = strconv.Atoi(data[1:]) + if err != nil { + return err.Error() + } + default: + meta, err := parseMeta(data) + if err != nil { + return err.Error() + } + fileMetas[level] = append(fileMetas[level], meta) + } + } + + vers = newVersion(opts, fileMetas) + vs := &versionSet{ + opts: opts, + cmp: DefaultComparer.Compare, + cmpName: DefaultComparer.Name, + } + vs.versions.Init(nil) + vs.append(vers) + var inProgressCompactions []compactionInfo + picker = newCompactionPicker(vers, opts, inProgressCompactions).(*compactionPickerByScore) + vs.picker = picker + + var buf bytes.Buffer + fmt.Fprint(&buf, vers.String()) + return buf.String() + + case "add-read-compaction": + for _, line := range strings.Split(td.Input, "\n") { + if line == "" { + continue + } + parts := strings.Split(line, " ") + if len(parts) != 3 { + return "error: malformed data for add-read-compaction. usage: : - " + } + if l, err := strconv.Atoi(parts[0][:1]); err == nil { + keys := strings.Split(parts[1], "-") + fileNum, _ := strconv.Atoi(parts[2]) + + rc := readCompaction{ + level: l, + start: []byte(keys[0]), + end: []byte(keys[1]), + fileNum: base.FileNum(fileNum), + } + rcList.add(&rc, DefaultComparer.Compare) + } else { + return err.Error() + } + } + return "" + + case "show-read-compactions": + var sb strings.Builder + if rcList.size == 0 { + sb.WriteString("(none)") + } + for i := 0; i < rcList.size; i++ { + rc := rcList.at(i) + sb.WriteString(fmt.Sprintf("(level: %d, start: %s, end: %s)\n", rc.level, string(rc.start), string(rc.end))) + } + return sb.String() + + case "pick-auto": + pc := picker.pickAuto(compactionEnv{ + earliestUnflushedSeqNum: math.MaxUint64, + readCompactionEnv: readCompactionEnv{ + readCompactions: &rcList, + flushing: false, + }, + }) + var result strings.Builder + if pc != nil { + fmt.Fprintf(&result, "L%d -> L%d\n", pc.startLevel.level, pc.outputLevel.level) + fmt.Fprintf(&result, "L%d: %s\n", pc.startLevel.level, fileNums(pc.startLevel.files)) + if !pc.outputLevel.files.Empty() { + fmt.Fprintf(&result, "L%d: %s\n", pc.outputLevel.level, fileNums(pc.outputLevel.files)) + } + } else { + return "nil" + } + return result.String() + } + return fmt.Sprintf("unrecognized command: %s", td.Cmd) + }) +} + +type alwaysMultiLevel struct{} + +func (d alwaysMultiLevel) pick( + pcOrig *pickedCompaction, opts *Options, diskAvailBytes uint64, +) *pickedCompaction { + pcMulti := pcOrig.clone() + if !pcMulti.setupMultiLevelCandidate(opts, diskAvailBytes) { + return pcOrig + } + return pcMulti +} + +func (d alwaysMultiLevel) allowL0() bool { + return false +} + +func TestPickedCompactionSetupInputs(t *testing.T) { + opts := &Options{} + opts.EnsureDefaults() + + parseMeta := func(s string) *fileMetadata { + parts := strings.Split(strings.TrimSpace(s), " ") + var fileSize uint64 + var compacting bool + for _, part := range parts { + switch { + case part == "compacting": + compacting = true + case strings.HasPrefix(part, "size="): + v, err := strconv.ParseUint(strings.TrimPrefix(part, "size="), 10, 64) + require.NoError(t, err) + fileSize = v + } + } + tableParts := strings.Split(parts[0], "-") + if len(tableParts) != 2 { + t.Fatalf("malformed table spec: %s", s) + } + state := manifest.CompactionStateNotCompacting + if compacting { + state = manifest.CompactionStateCompacting + } + m := (&fileMetadata{ + CompactionState: state, + Size: fileSize, + }).ExtendPointKeyBounds( + opts.Comparer.Compare, + base.ParseInternalKey(strings.TrimSpace(tableParts[0])), + base.ParseInternalKey(strings.TrimSpace(tableParts[1])), + ) + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + m.InitPhysicalBacking() + return m + } + + setupInputTest := func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "setup-inputs": + var availBytes uint64 = math.MaxUint64 + var maxLevelBytes [7]int64 + args := d.CmdArgs + + if len(args) > 0 && args[0].Key == "avail-bytes" { + require.Equal(t, 1, len(args[0].Vals)) + var err error + availBytes, err = strconv.ParseUint(args[0].Vals[0], 10, 64) + require.NoError(t, err) + args = args[1:] + } + + if len(args) != 2 { + return "setup-inputs [avail-bytes=XXX] " + } + + pc := &pickedCompaction{ + cmp: DefaultComparer.Compare, + inputs: []compactionLevel{{level: -1}, {level: -1}}, + } + pc.startLevel, pc.outputLevel = &pc.inputs[0], &pc.inputs[1] + var currentLevel int + var files [numLevels][]*fileMetadata + fileNum := FileNum(1) + + for _, data := range strings.Split(d.Input, "\n") { + switch data[:2] { + case "L0", "L1", "L2", "L3", "L4", "L5", "L6": + levelArgs := strings.Fields(data) + level, err := strconv.Atoi(levelArgs[0][1:]) + if err != nil { + return err.Error() + } + currentLevel = level + if len(levelArgs) > 1 { + maxSizeArg := strings.Replace(levelArgs[1], "max-size=", "", 1) + maxSize, err := strconv.ParseInt(maxSizeArg, 10, 64) + if err != nil { + return err.Error() + } + maxLevelBytes[level] = maxSize + } else { + maxLevelBytes[level] = math.MaxInt64 + } + if pc.startLevel.level == -1 { + pc.startLevel.level = level + + } else if pc.outputLevel.level == -1 { + if pc.startLevel.level >= level { + return fmt.Sprintf("startLevel=%d >= outputLevel=%d\n", pc.startLevel.level, level) + } + pc.outputLevel.level = level + } + default: + meta := parseMeta(data) + meta.FileNum = fileNum + fileNum++ + files[currentLevel] = append(files[currentLevel], meta) + } + } + + if pc.outputLevel.level == -1 { + pc.outputLevel.level = pc.startLevel.level + 1 + } + pc.version = newVersion(opts, files) + pc.startLevel.files = pc.version.Overlaps(pc.startLevel.level, pc.cmp, + []byte(args[0].String()), []byte(args[1].String()), false /* exclusiveEnd */) + + var isCompacting bool + if !pc.setupInputs(opts, availBytes, pc.startLevel) { + isCompacting = true + } + origPC := pc + pc = pc.maybeAddLevel(opts, availBytes) + // If pc points to a new pickedCompaction, a new multi level compaction + // was initialized. + initMultiLevel := pc != origPC + checkClone(t, pc) + var buf bytes.Buffer + for _, cl := range pc.inputs { + if cl.files.Empty() { + continue + } + + fmt.Fprintf(&buf, "L%d\n", cl.level) + cl.files.Each(func(f *fileMetadata) { + fmt.Fprintf(&buf, " %s\n", f) + }) + } + if isCompacting { + fmt.Fprintf(&buf, "is-compacting\n") + } + + if initMultiLevel { + extraLevel := pc.extraLevels[0].level + fmt.Fprintf(&buf, "init-multi-level(%d,%d,%d)\n", pc.startLevel.level, extraLevel, + pc.outputLevel.level) + fmt.Fprintf(&buf, "Original WriteAmp %.2f; ML WriteAmp %.2f\n", origPC.predictedWriteAmp(), pc.predictedWriteAmp()) + fmt.Fprintf(&buf, "Original OverlappingRatio %.2f; ML OverlappingRatio %.2f\n", origPC.overlappingRatio(), pc.overlappingRatio()) + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + } + + t.Logf("Test basic setup inputs behavior without multi level compactions") + opts.Experimental.MultiLevelCompactionHeuristic = NoMultiLevel{} + datadriven.RunTest(t, "testdata/compaction_setup_inputs", + setupInputTest) + + t.Logf("Turning multi level compaction on") + opts.Experimental.MultiLevelCompactionHeuristic = alwaysMultiLevel{} + datadriven.RunTest(t, "testdata/compaction_setup_inputs_multilevel_dummy", + setupInputTest) + + t.Logf("Try Write-Amp Heuristic") + opts.Experimental.MultiLevelCompactionHeuristic = WriteAmpHeuristic{} + datadriven.RunTest(t, "testdata/compaction_setup_inputs_multilevel_write_amp", + setupInputTest) +} + +func TestPickedCompactionExpandInputs(t *testing.T) { + opts := &Options{} + opts.EnsureDefaults() + cmp := DefaultComparer.Compare + var files []*fileMetadata + + parseMeta := func(s string) *fileMetadata { + parts := strings.Split(s, "-") + if len(parts) != 2 { + t.Fatalf("malformed table spec: %s", s) + } + m := (&fileMetadata{}).ExtendPointKeyBounds( + opts.Comparer.Compare, + base.ParseInternalKey(parts[0]), + base.ParseInternalKey(parts[1]), + ) + m.InitPhysicalBacking() + return m + } + + datadriven.RunTest(t, "testdata/compaction_expand_inputs", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + files = nil + if len(d.Input) == 0 { + return "" + } + for _, data := range strings.Split(d.Input, "\n") { + meta := parseMeta(data) + meta.FileNum = FileNum(len(files)) + files = append(files, meta) + } + manifest.SortBySmallest(files, cmp) + return "" + + case "expand-inputs": + pc := &pickedCompaction{ + cmp: cmp, + inputs: []compactionLevel{{level: 1}}, + } + pc.startLevel = &pc.inputs[0] + + var filesLevelled [numLevels][]*fileMetadata + filesLevelled[pc.startLevel.level] = files + pc.version = newVersion(opts, filesLevelled) + + if len(d.CmdArgs) != 1 { + return fmt.Sprintf("%s expects 1 argument", d.Cmd) + } + index, err := strconv.ParseInt(d.CmdArgs[0].String(), 10, 64) + if err != nil { + return err.Error() + } + + // Advance the iterator to position `index`. + iter := pc.version.Levels[pc.startLevel.level].Iter() + _ = iter.First() + for i := int64(0); i < index; i++ { + _ = iter.Next() + } + + inputs, _ := expandToAtomicUnit(cmp, iter.Take().Slice(), true /* disableIsCompacting */) + + var buf bytes.Buffer + inputs.Each(func(f *fileMetadata) { + fmt.Fprintf(&buf, "%d: %s-%s\n", f.FileNum, f.Smallest, f.Largest) + }) + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestCompactionOutputFileSize(t *testing.T) { + opts := (*Options)(nil).EnsureDefaults() + var picker *compactionPickerByScore + var vers *version + + parseMeta := func(s string) (*fileMetadata, error) { + parts := strings.Split(s, ":") + fileNum, err := strconv.Atoi(parts[0]) + if err != nil { + return nil, err + } + fields := strings.Fields(parts[1]) + parts = strings.Split(fields[0], "-") + if len(parts) != 2 { + return nil, errors.Errorf("malformed table spec: %s. usage: :start.SET.1-end.SET.2", s) + } + m := (&fileMetadata{ + FileNum: base.FileNum(fileNum), + Size: 1028, + }).ExtendPointKeyBounds( + opts.Comparer.Compare, + base.ParseInternalKey(strings.TrimSpace(parts[0])), + base.ParseInternalKey(strings.TrimSpace(parts[1])), + ) + m.InitPhysicalBacking() + for _, p := range fields[1:] { + if strings.HasPrefix(p, "size=") { + v, err := strconv.Atoi(strings.TrimPrefix(p, "size=")) + if err != nil { + return nil, err + } + m.Size = uint64(v) + } + if strings.HasPrefix(p, "range-deletions-bytes-estimate=") { + v, err := strconv.Atoi(strings.TrimPrefix(p, "range-deletions-bytes-estimate=")) + if err != nil { + return nil, err + } + m.Stats.RangeDeletionsBytesEstimate = uint64(v) + m.Stats.NumDeletions = 1 // At least one range del responsible for the deletion bytes. + m.StatsMarkValid() + } + } + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + return m, nil + } + + datadriven.RunTest(t, "testdata/compaction_output_file_size", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + fileMetas := [manifest.NumLevels][]*fileMetadata{} + level := 0 + var err error + lines := strings.Split(td.Input, "\n") + + for len(lines) > 0 { + data := strings.TrimSpace(lines[0]) + lines = lines[1:] + switch data { + case "L0", "L1", "L2", "L3", "L4", "L5", "L6": + level, err = strconv.Atoi(data[1:]) + if err != nil { + return err.Error() + } + default: + meta, err := parseMeta(data) + if err != nil { + return err.Error() + } + fileMetas[level] = append(fileMetas[level], meta) + } + } + + vers = newVersion(opts, fileMetas) + vs := &versionSet{ + opts: opts, + cmp: DefaultComparer.Compare, + cmpName: DefaultComparer.Name, + } + vs.versions.Init(nil) + vs.append(vers) + var inProgressCompactions []compactionInfo + picker = newCompactionPicker(vers, opts, inProgressCompactions).(*compactionPickerByScore) + vs.picker = picker + + var buf bytes.Buffer + fmt.Fprint(&buf, vers.String()) + return buf.String() + + case "pick-auto": + pc := picker.pickAuto(compactionEnv{ + earliestUnflushedSeqNum: math.MaxUint64, + earliestSnapshotSeqNum: math.MaxUint64, + }) + var buf bytes.Buffer + if pc != nil { + fmt.Fprintf(&buf, "L%d -> L%d\n", pc.startLevel.level, pc.outputLevel.level) + fmt.Fprintf(&buf, "L%d: %s\n", pc.startLevel.level, fileNums(pc.startLevel.files)) + fmt.Fprintf(&buf, "maxOutputFileSize: %d\n", pc.maxOutputFileSize) + } else { + return "nil" + } + return buf.String() + + default: + return fmt.Sprintf("unrecognized command: %s", td.Cmd) + } + }) +} + +func TestCompactionPickerCompensatedSize(t *testing.T) { + testCases := []struct { + size uint64 + pointDelEstimateBytes uint64 + rangeDelEstimateBytes uint64 + wantBytes uint64 + }{ + { + size: 100, + pointDelEstimateBytes: 0, + rangeDelEstimateBytes: 0, + wantBytes: 100, + }, + { + size: 100, + pointDelEstimateBytes: 10, + rangeDelEstimateBytes: 0, + wantBytes: 100 + 10, + }, + { + size: 100, + pointDelEstimateBytes: 10, + rangeDelEstimateBytes: 5, + wantBytes: 100 + 10 + 5, + }, + } + + for _, tc := range testCases { + t.Run("", func(t *testing.T) { + f := &fileMetadata{Size: tc.size} + f.InitPhysicalBacking() + f.Stats.PointDeletionsBytesEstimate = tc.pointDelEstimateBytes + f.Stats.RangeDeletionsBytesEstimate = tc.rangeDelEstimateBytes + gotBytes := compensatedSize(f) + require.Equal(t, tc.wantBytes, gotBytes) + }) + } +} + +func TestCompactionPickerPickFile(t *testing.T) { + fs := vfs.NewMem() + opts := &Options{ + Comparer: testkeys.Comparer, + FormatMajorVersion: FormatNewest, + FS: fs, + } + + d, err := Open("", opts) + require.NoError(t, err) + defer func() { + if d != nil { + require.NoError(t, d.Close()) + } + }() + + datadriven.RunTest(t, "testdata/compaction_picker_pick_file", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + require.NoError(t, d.Close()) + + d, err = runDBDefineCmd(td, opts) + if err != nil { + return err.Error() + } + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + case "file-sizes": + return runTableFileSizesCmd(td, d) + + case "pick-file": + s := strings.TrimPrefix(td.CmdArgs[0].String(), "L") + level, err := strconv.Atoi(s) + if err != nil { + return fmt.Sprintf("unable to parse arg %q as level", td.CmdArgs[0].String()) + } + if level == 0 { + panic("L0 picking unimplemented") + } + d.mu.Lock() + defer d.mu.Unlock() + + // Use maybeScheduleCompactionPicker to take care of all of the + // initialization of the compaction-picking environment, but never + // pick a compaction; just call pickFile using the user-provided + // level. + var lf manifest.LevelFile + var ok bool + d.maybeScheduleCompactionPicker(func(untypedPicker compactionPicker, env compactionEnv) *pickedCompaction { + p := untypedPicker.(*compactionPickerByScore) + lf, ok = pickCompactionSeedFile(p.vers, opts, level, level+1, env.earliestSnapshotSeqNum) + return nil + }) + if !ok { + return "(none)" + } + return lf.FileMetadata.String() + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +type pausableCleaner struct { + mu sync.Mutex + cond sync.Cond + paused bool + cleaner Cleaner +} + +func (c *pausableCleaner) Clean(fs vfs.FS, fileType base.FileType, path string) error { + c.mu.Lock() + defer c.mu.Unlock() + for c.paused { + c.cond.Wait() + } + return c.cleaner.Clean(fs, fileType, path) +} + +func (c *pausableCleaner) pause() { + c.mu.Lock() + defer c.mu.Unlock() + c.paused = true +} + +func (c *pausableCleaner) resume() { + c.mu.Lock() + defer c.mu.Unlock() + c.paused = false + c.cond.Broadcast() +} + +func TestCompactionPickerScores(t *testing.T) { + fs := vfs.NewMem() + cleaner := pausableCleaner{cleaner: DeleteCleaner{}} + cleaner.cond.L = &cleaner.mu + opts := &Options{ + Cleaner: &cleaner, + Comparer: testkeys.Comparer, + DisableAutomaticCompactions: true, + FormatMajorVersion: FormatNewest, + FS: fs, + } + + d, err := Open("", opts) + require.NoError(t, err) + defer func() { + if d != nil { + cleaner.resume() + require.NoError(t, closeAllSnapshots(d)) + require.NoError(t, d.Close()) + } + }() + + var buf bytes.Buffer + datadriven.RunTest(t, "testdata/compaction_picker_scores", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + require.NoError(t, closeAllSnapshots(d)) + require.NoError(t, d.Close()) + + if td.HasArg("pause-cleaning") { + cleaner.pause() + } + + d, err = runDBDefineCmd(td, opts) + if err != nil { + return err.Error() + } + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + case "disable-table-stats": + d.mu.Lock() + d.opts.private.disableTableStats = true + d.mu.Unlock() + return "" + + case "enable-table-stats": + d.mu.Lock() + d.opts.private.disableTableStats = false + d.maybeCollectTableStatsLocked() + d.mu.Unlock() + return "" + + case "resume-cleaning": + cleaner.resume() + return "" + + case "ingest": + if err = runBuildCmd(td, d, d.opts.FS); err != nil { + return err.Error() + } + if err = runIngestCmd(td, d, d.opts.FS); err != nil { + return err.Error() + } + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + case "lsm": + return runLSMCmd(td, d) + + case "maybe-compact": + buf.Reset() + d.mu.Lock() + d.opts.DisableAutomaticCompactions = false + d.maybeScheduleCompaction() + fmt.Fprintf(&buf, "%d compactions in progress:", d.mu.compact.compactingCount) + for c := range d.mu.compact.inProgress { + fmt.Fprintf(&buf, "\n%s", c) + } + d.opts.DisableAutomaticCompactions = true + d.mu.Unlock() + return buf.String() + + case "scores": + waitFor := "completion" + td.MaybeScanArgs(t, "wait-for-compaction", &waitFor) + + // Wait for any running compactions to complete before calculating + // scores. Otherwise, the output of this command is + // nondeterministic. + switch waitFor { + case "completion": + d.mu.Lock() + for d.mu.compact.compactingCount > 0 { + d.mu.compact.cond.Wait() + } + d.mu.Unlock() + case "version-edit": + func() { + for { + d.mu.Lock() + wait := len(d.mu.compact.inProgress) > 0 + for c := range d.mu.compact.inProgress { + wait = wait && !c.versionEditApplied + } + d.mu.Unlock() + if !wait { + return + } + // d.mu.compact.cond isn't notified until the compaction + // is removed from inProgress, so we need to just sleep + // and check again soon. + time.Sleep(10 * time.Millisecond) + } + }() + default: + panic(fmt.Sprintf("unrecognized `wait-for-compaction` value: %q", waitFor)) + } + + buf.Reset() + fmt.Fprintf(&buf, "L Size Score\n") + for l, lm := range d.Metrics().Levels { + if l < numLevels-1 { + fmt.Fprintf(&buf, "L%-3d\t%-7s%.1f\n", l, humanize.Bytes.Int64(lm.Size), lm.Score) + } else { + fmt.Fprintf(&buf, "L%-3d\t%-7s-\n", l, humanize.Bytes.Int64(lm.Size)) + } + } + return buf.String() + + case "wait-pending-table-stats": + return runTableStatsCmd(td, d) + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func fileNums(files manifest.LevelSlice) string { + var ss []string + files.Each(func(f *fileMetadata) { + ss = append(ss, f.FileNum.String()) + }) + sort.Strings(ss) + return strings.Join(ss, ",") +} + +func checkClone(t *testing.T, pc *pickedCompaction) { + pcClone := pc.clone() + require.Equal(t, pc.String(), pcClone.String()) + + // ensure all input files are in new address + for i := range pc.inputs { + // Len could be zero if setup inputs rejected a level + if pc.inputs[i].files.Len() > 0 { + require.NotEqual(t, &pc.inputs[i], &pcClone.inputs[i]) + } + } + for i := range pc.startLevel.l0SublevelInfo { + if pc.startLevel.l0SublevelInfo[i].Len() > 0 { + require.NotEqual(t, &pc.startLevel.l0SublevelInfo[i], &pcClone.startLevel.l0SublevelInfo[i]) + } + } +} diff --git a/pebble/compaction_test.go b/pebble/compaction_test.go new file mode 100644 index 0000000..ea1437a --- /dev/null +++ b/pebble/compaction_test.go @@ -0,0 +1,3912 @@ +// Copyright 2013 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "context" + crand "crypto/rand" + "fmt" + "math" + "math/rand" + "path/filepath" + "reflect" + "regexp" + "runtime" + "slices" + "sort" + "strconv" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/errors/oserror" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/cockroachdb/pebble/objstorage" + "github.com/cockroachdb/pebble/objstorage/objstorageprovider" + "github.com/cockroachdb/pebble/objstorage/remote" + "github.com/cockroachdb/pebble/sstable" + "github.com/cockroachdb/pebble/vfs" + "github.com/cockroachdb/pebble/vfs/errorfs" + "github.com/stretchr/testify/require" +) + +func newVersion(opts *Options, files [numLevels][]*fileMetadata) *version { + return manifest.NewVersion( + opts.Comparer.Compare, + opts.Comparer.FormatKey, + opts.FlushSplitBytes, + files) +} + +type compactionPickerForTesting struct { + score float64 + level int + baseLevel int + opts *Options + vers *manifest.Version + maxLevelBytes [7]int64 +} + +var _ compactionPicker = &compactionPickerForTesting{} + +func (p *compactionPickerForTesting) getScores([]compactionInfo) [numLevels]float64 { + return [numLevels]float64{} +} + +func (p *compactionPickerForTesting) getBaseLevel() int { + return p.baseLevel +} + +func (p *compactionPickerForTesting) estimatedCompactionDebt(l0ExtraSize uint64) uint64 { + return 0 +} + +func (p *compactionPickerForTesting) forceBaseLevel1() {} + +func (p *compactionPickerForTesting) pickAuto(env compactionEnv) (pc *pickedCompaction) { + if p.score < 1 { + return nil + } + outputLevel := p.level + 1 + if p.level == 0 { + outputLevel = p.baseLevel + } + iter := p.vers.Levels[p.level].Iter() + iter.First() + cInfo := candidateLevelInfo{ + level: p.level, + outputLevel: outputLevel, + file: iter.Take(), + } + if cInfo.level == 0 { + return pickL0(env, p.opts, p.vers, p.baseLevel) + } + return pickAutoLPositive(env, p.opts, p.vers, cInfo, p.baseLevel, p.maxLevelBytes) +} + +func (p *compactionPickerForTesting) pickElisionOnlyCompaction( + env compactionEnv, +) (pc *pickedCompaction) { + return nil +} + +func (p *compactionPickerForTesting) pickRewriteCompaction( + env compactionEnv, +) (pc *pickedCompaction) { + return nil +} + +func (p *compactionPickerForTesting) pickReadTriggeredCompaction( + env compactionEnv, +) (pc *pickedCompaction) { + return nil +} + +func TestPickCompaction(t *testing.T) { + fileNums := func(files manifest.LevelSlice) string { + var ss []string + files.Each(func(meta *fileMetadata) { + ss = append(ss, strconv.Itoa(int(meta.FileNum))) + }) + sort.Strings(ss) + return strings.Join(ss, ",") + } + + opts := (*Options)(nil).EnsureDefaults() + newFileMeta := func(fileNum FileNum, size uint64, smallest, largest base.InternalKey) *fileMetadata { + m := (&fileMetadata{ + FileNum: fileNum, + Size: size, + }).ExtendPointKeyBounds(opts.Comparer.Compare, smallest, largest) + m.InitPhysicalBacking() + return m + } + + testCases := []struct { + desc string + version *version + picker compactionPickerForTesting + want string + wantMulti bool + }{ + { + desc: "no compaction", + version: newVersion(opts, [numLevels][]*fileMetadata{ + 0: { + newFileMeta( + 100, + 1, + base.ParseInternalKey("i.SET.101"), + base.ParseInternalKey("j.SET.102"), + ), + }, + }), + want: "", + }, + + { + desc: "1 L0 file", + version: newVersion(opts, [numLevels][]*fileMetadata{ + 0: { + newFileMeta( + 100, + 1, + base.ParseInternalKey("i.SET.101"), + base.ParseInternalKey("j.SET.102"), + ), + }, + }), + picker: compactionPickerForTesting{ + score: 99, + level: 0, + baseLevel: 1, + }, + want: "100 ", + }, + + { + desc: "2 L0 files (0 overlaps)", + version: newVersion(opts, [numLevels][]*fileMetadata{ + 0: { + newFileMeta( + 100, + 1, + base.ParseInternalKey("i.SET.101"), + base.ParseInternalKey("j.SET.102"), + ), + newFileMeta( + 110, + 1, + base.ParseInternalKey("k.SET.111"), + base.ParseInternalKey("l.SET.112"), + ), + }, + }), + picker: compactionPickerForTesting{ + score: 99, + level: 0, + baseLevel: 1, + }, + want: "100,110 ", + }, + + { + desc: "2 L0 files, with ikey overlap", + version: newVersion(opts, [numLevels][]*fileMetadata{ + 0: { + newFileMeta( + 100, + 1, + base.ParseInternalKey("i.SET.101"), + base.ParseInternalKey("p.SET.102"), + ), + newFileMeta( + 110, + 1, + base.ParseInternalKey("j.SET.111"), + base.ParseInternalKey("q.SET.112"), + ), + }, + }), + picker: compactionPickerForTesting{ + score: 99, + level: 0, + baseLevel: 1, + }, + want: "100,110 ", + }, + + { + desc: "2 L0 files, with ukey overlap", + version: newVersion(opts, [numLevels][]*fileMetadata{ + 0: { + newFileMeta( + 100, + 1, + base.ParseInternalKey("i.SET.101"), + base.ParseInternalKey("i.SET.102"), + ), + newFileMeta( + 110, + 1, + base.ParseInternalKey("i.SET.111"), + base.ParseInternalKey("i.SET.112"), + ), + }, + }), + picker: compactionPickerForTesting{ + score: 99, + level: 0, + baseLevel: 1, + }, + want: "100,110 ", + }, + + { + desc: "1 L0 file, 2 L1 files (0 overlaps)", + version: newVersion(opts, [numLevels][]*fileMetadata{ + 0: { + newFileMeta( + 100, + 1, + base.ParseInternalKey("i.SET.101"), + base.ParseInternalKey("i.SET.102"), + ), + }, + 1: { + newFileMeta( + 200, + 1, + base.ParseInternalKey("a.SET.201"), + base.ParseInternalKey("b.SET.202"), + ), + newFileMeta( + 210, + 1, + base.ParseInternalKey("y.SET.211"), + base.ParseInternalKey("z.SET.212"), + ), + }, + }), + picker: compactionPickerForTesting{ + score: 99, + level: 0, + baseLevel: 1, + }, + want: "100 ", + }, + + { + desc: "1 L0 file, 2 L1 files (1 overlap), 4 L2 files (3 overlaps)", + version: newVersion(opts, [numLevels][]*fileMetadata{ + 0: { + newFileMeta( + 100, + 1, + base.ParseInternalKey("i.SET.101"), + base.ParseInternalKey("t.SET.102"), + ), + }, + 1: { + newFileMeta( + 200, + 1, + base.ParseInternalKey("a.SET.201"), + base.ParseInternalKey("e.SET.202"), + ), + newFileMeta( + 210, + 1, + base.ParseInternalKey("f.SET.211"), + base.ParseInternalKey("j.SET.212"), + ), + }, + 2: { + newFileMeta( + 300, + 1, + base.ParseInternalKey("a.SET.301"), + base.ParseInternalKey("b.SET.302"), + ), + newFileMeta( + 310, + 1, + base.ParseInternalKey("c.SET.311"), + base.ParseInternalKey("g.SET.312"), + ), + newFileMeta( + 320, + 1, + base.ParseInternalKey("h.SET.321"), + base.ParseInternalKey("m.SET.322"), + ), + newFileMeta( + 330, + 1, + base.ParseInternalKey("n.SET.331"), + base.ParseInternalKey("z.SET.332"), + ), + }, + }), + picker: compactionPickerForTesting{ + score: 99, + level: 0, + baseLevel: 1, + }, + want: "100 210 310,320,330", + }, + + { + desc: "4 L1 files, 2 L2 files, can grow", + version: newVersion(opts, [numLevels][]*fileMetadata{ + 1: { + newFileMeta( + 200, + 1, + base.ParseInternalKey("i1.SET.201"), + base.ParseInternalKey("i2.SET.202"), + ), + newFileMeta( + 210, + 1, + base.ParseInternalKey("j1.SET.211"), + base.ParseInternalKey("j2.SET.212"), + ), + newFileMeta( + 220, + 1, + base.ParseInternalKey("k1.SET.221"), + base.ParseInternalKey("k2.SET.222"), + ), + newFileMeta( + 230, + 1, + base.ParseInternalKey("l1.SET.231"), + base.ParseInternalKey("l2.SET.232"), + ), + }, + 2: { + newFileMeta( + 300, + 1, + base.ParseInternalKey("a0.SET.301"), + base.ParseInternalKey("l0.SET.302"), + ), + newFileMeta( + 310, + 1, + base.ParseInternalKey("l2.SET.311"), + base.ParseInternalKey("z2.SET.312"), + ), + }, + }), + picker: compactionPickerForTesting{ + score: 99, + level: 1, + baseLevel: 1, + }, + want: "200,210,220 300 ", + wantMulti: true, + }, + + { + desc: "4 L1 files, 2 L2 files, can't grow (range)", + version: newVersion(opts, [numLevels][]*fileMetadata{ + 1: { + newFileMeta( + 200, + 1, + base.ParseInternalKey("i1.SET.201"), + base.ParseInternalKey("i2.SET.202"), + ), + newFileMeta( + 210, + 1, + base.ParseInternalKey("j1.SET.211"), + base.ParseInternalKey("j2.SET.212"), + ), + newFileMeta( + 220, + 1, + base.ParseInternalKey("k1.SET.221"), + base.ParseInternalKey("k2.SET.222"), + ), + newFileMeta( + 230, + 1, + base.ParseInternalKey("l1.SET.231"), + base.ParseInternalKey("l2.SET.232"), + ), + }, + 2: { + newFileMeta( + 300, + 1, + base.ParseInternalKey("a0.SET.301"), + base.ParseInternalKey("j0.SET.302"), + ), + newFileMeta( + 310, + 1, + base.ParseInternalKey("j2.SET.311"), + base.ParseInternalKey("z2.SET.312"), + ), + }, + }), + picker: compactionPickerForTesting{ + score: 99, + level: 1, + baseLevel: 1, + }, + want: "200 300 ", + wantMulti: true, + }, + + { + desc: "4 L1 files, 2 L2 files, can't grow (size)", + version: newVersion(opts, [numLevels][]*fileMetadata{ + 1: { + newFileMeta( + 200, + expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1, + base.ParseInternalKey("i1.SET.201"), + base.ParseInternalKey("i2.SET.202"), + ), + newFileMeta( + 210, + expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1, + base.ParseInternalKey("j1.SET.211"), + base.ParseInternalKey("j2.SET.212"), + ), + newFileMeta( + 220, + expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1, + base.ParseInternalKey("k1.SET.221"), + base.ParseInternalKey("k2.SET.222"), + ), + newFileMeta( + 230, + expandedCompactionByteSizeLimit(opts, 1, math.MaxUint64)-1, + base.ParseInternalKey("l1.SET.231"), + base.ParseInternalKey("l2.SET.232"), + ), + }, + 2: { + newFileMeta( + 300, + expandedCompactionByteSizeLimit(opts, 2, math.MaxUint64)-1, + base.ParseInternalKey("a0.SET.301"), + base.ParseInternalKey("l0.SET.302"), + ), + newFileMeta( + 310, + expandedCompactionByteSizeLimit(opts, 2, math.MaxUint64)-1, + base.ParseInternalKey("l2.SET.311"), + base.ParseInternalKey("z2.SET.312"), + ), + }, + }), + picker: compactionPickerForTesting{ + score: 99, + level: 1, + baseLevel: 1, + }, + want: "200 300 ", + }, + } + + for _, tc := range testCases { + vs := &versionSet{ + opts: opts, + cmp: DefaultComparer.Compare, + cmpName: DefaultComparer.Name, + } + vs.versions.Init(nil) + vs.append(tc.version) + tc.picker.opts = opts + tc.picker.vers = tc.version + vs.picker = &tc.picker + pc, got := vs.picker.pickAuto(compactionEnv{diskAvailBytes: math.MaxUint64}), "" + if pc != nil { + c := newCompaction(pc, opts, time.Now(), nil /* provider */) + + gotStart := fileNums(c.startLevel.files) + gotML := "" + observedMulti := len(c.extraLevels) > 0 + if observedMulti { + gotML = " " + fileNums(c.extraLevels[0].files) + } + gotOutput := " " + fileNums(c.outputLevel.files) + gotGrandparents := " " + fileNums(c.grandparents) + got = gotStart + gotML + gotOutput + gotGrandparents + if tc.wantMulti != observedMulti { + t.Fatalf("Expected Multi %t; Observed Multi %t, for %s", tc.wantMulti, observedMulti, got) + } + + } + if got != tc.want { + t.Fatalf("%s:\ngot %q\nwant %q", tc.desc, got, tc.want) + } + } +} + +func TestElideTombstone(t *testing.T) { + var d *DB + defer func() { + if d != nil { + require.NoError(t, d.Close()) + } + }() + var buf bytes.Buffer + datadriven.RunTest(t, "testdata/compaction_elide_tombstone", + func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + if d != nil { + if err := d.Close(); err != nil { + return err.Error() + } + } + var err error + if d, err = runDBDefineCmd(td, (&Options{ + FS: vfs.NewMem(), + DebugCheck: DebugCheckLevels, + FormatMajorVersion: FormatNewest, + DisableAutomaticCompactions: true, + }).WithFSDefaults()); err != nil { + return err.Error() + } + if td.HasArg("verbose") { + return d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) + } + return d.mu.versions.currentVersion().String() + case "elide": + buf.Reset() + var startLevel int + td.ScanArgs(t, "start-level", &startLevel) + c := compaction{ + cmp: testkeys.Comparer.Compare, + comparer: testkeys.Comparer, + version: d.mu.versions.currentVersion(), + inputs: []compactionLevel{{level: startLevel}, {level: startLevel + 1}}, + smallest: base.ParseInternalKey("a.SET.0"), + largest: base.ParseInternalKey("z.SET.0"), + } + c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] + c.setupInuseKeyRanges() + for _, ukey := range strings.Split(td.Input, "\n") { + fmt.Fprintf(&buf, "elideTombstone(%q) = %t\n", ukey, c.elideTombstone([]byte(ukey))) + } + return buf.String() + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestElideRangeTombstone(t *testing.T) { + opts := (*Options)(nil).EnsureDefaults() + + newFileMeta := func(smallest, largest base.InternalKey) *fileMetadata { + m := (&fileMetadata{}).ExtendPointKeyBounds( + opts.Comparer.Compare, smallest, largest, + ) + m.InitPhysicalBacking() + return m + } + + type want struct { + key string + endKey string + expected bool + } + + testCases := []struct { + desc string + level int + version *version + wants []want + flushing flushableList + }{ + { + desc: "empty", + level: 1, + version: newVersion(opts, [numLevels][]*fileMetadata{}), + wants: []want{ + {"x", "y", true}, + }, + }, + { + desc: "non-empty", + level: 1, + version: newVersion(opts, [numLevels][]*fileMetadata{ + 1: { + newFileMeta( + base.ParseInternalKey("c.SET.801"), + base.ParseInternalKey("g.SET.800"), + ), + newFileMeta( + base.ParseInternalKey("x.SET.701"), + base.ParseInternalKey("y.SET.700"), + ), + }, + 2: { + newFileMeta( + base.ParseInternalKey("d.SET.601"), + base.ParseInternalKey("h.SET.600"), + ), + newFileMeta( + base.ParseInternalKey("r.SET.501"), + base.ParseInternalKey("t.SET.500"), + ), + }, + 3: { + newFileMeta( + base.ParseInternalKey("f.SET.401"), + base.ParseInternalKey("g.SET.400"), + ), + newFileMeta( + base.ParseInternalKey("w.SET.301"), + base.ParseInternalKey("x.SET.300"), + ), + }, + 4: { + newFileMeta( + base.ParseInternalKey("f.SET.201"), + base.ParseInternalKey("m.SET.200"), + ), + newFileMeta( + base.ParseInternalKey("t.SET.101"), + base.ParseInternalKey("t.SET.100"), + ), + }, + }), + wants: []want{ + {"b", "c", true}, + {"c", "d", true}, + {"d", "e", true}, + {"e", "f", false}, + {"f", "g", false}, + {"g", "h", false}, + {"h", "i", false}, + {"l", "m", false}, + {"m", "n", false}, + {"n", "o", true}, + {"q", "r", true}, + {"r", "s", true}, + {"s", "t", false}, + {"t", "u", false}, + {"u", "v", true}, + {"v", "w", false}, + {"w", "x", false}, + {"x", "y", false}, + {"y", "z", true}, + }, + }, + { + desc: "flushing", + level: -1, + version: newVersion(opts, [numLevels][]*fileMetadata{ + 0: { + newFileMeta( + base.ParseInternalKey("h.SET.901"), + base.ParseInternalKey("j.SET.900"), + ), + }, + 1: { + newFileMeta( + base.ParseInternalKey("c.SET.801"), + base.ParseInternalKey("g.SET.800"), + ), + newFileMeta( + base.ParseInternalKey("x.SET.701"), + base.ParseInternalKey("y.SET.700"), + ), + }, + }), + wants: []want{ + {"m", "n", false}, + }, + // Pretend one memtable is being flushed + flushing: flushableList{nil}, + }, + } + + for _, tc := range testCases { + c := compaction{ + cmp: DefaultComparer.Compare, + comparer: DefaultComparer, + version: tc.version, + inputs: []compactionLevel{{level: tc.level}, {level: tc.level + 1}}, + smallest: base.ParseInternalKey("a.SET.0"), + largest: base.ParseInternalKey("z.SET.0"), + flushing: tc.flushing, + } + c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] + c.setupInuseKeyRanges() + for _, w := range tc.wants { + if got := c.elideRangeTombstone([]byte(w.key), []byte(w.endKey)); got != w.expected { + t.Errorf("%s: keys=%q-%q: got %v, want %v", tc.desc, w.key, w.endKey, got, w.expected) + } + } + } +} + +func TestCompactionTransform(t *testing.T) { + datadriven.RunTest(t, "testdata/compaction_transform", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "transform": + var snapshots []uint64 + var keyRanges []manifest.UserKeyRange + disableElision := td.HasArg("disable-elision") + td.MaybeScanArgs(t, "snapshots", &snapshots) + if arg, ok := td.Arg("in-use-key-ranges"); ok { + for _, keyRange := range arg.Vals { + parts := strings.SplitN(keyRange, "-", 2) + start := []byte(strings.TrimSpace(parts[0])) + end := []byte(strings.TrimSpace(parts[1])) + keyRanges = append(keyRanges, manifest.UserKeyRange{ + Start: start, + End: end, + }) + } + } + span := keyspan.ParseSpan(td.Input) + for i := range span.Keys { + if i > 0 { + if span.Keys[i-1].Trailer < span.Keys[i].Trailer { + return "span keys not sorted" + } + } + } + var outSpan keyspan.Span + c := compaction{ + cmp: base.DefaultComparer.Compare, + comparer: base.DefaultComparer, + disableSpanElision: disableElision, + inuseKeyRanges: keyRanges, + } + transformer := rangeKeyCompactionTransform(base.DefaultComparer.Equal, snapshots, c.elideRangeTombstone) + if err := transformer.Transform(base.DefaultComparer.Compare, span, &outSpan); err != nil { + return fmt.Sprintf("error: %s", err) + } + return outSpan.String() + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +type cpuPermissionGranter struct { + // requestCount is used to confirm that every GetPermission function call + // has a corresponding CPUWorkDone function call. + requestCount int + used bool + permit bool +} + +type cpuWorkHandle struct { + permit bool +} + +func (c cpuWorkHandle) Permitted() bool { + return c.permit +} + +func (t *cpuPermissionGranter) GetPermission(dur time.Duration) CPUWorkHandle { + t.requestCount++ + t.used = true + return cpuWorkHandle{t.permit} +} + +func (t *cpuPermissionGranter) CPUWorkDone(_ CPUWorkHandle) { + t.requestCount-- +} + +// Simple test to check if compactions are using the granter, and if exactly +// the acquired handles are returned. +func TestCompactionCPUGranter(t *testing.T) { + mem := vfs.NewMem() + opts := (&Options{FS: mem}).WithFSDefaults() + g := &cpuPermissionGranter{permit: true} + opts.Experimental.CPUWorkPermissionGranter = g + d, err := Open("", opts) + if err != nil { + t.Fatalf("Open: %v", err) + } + defer d.Close() + + d.Set([]byte{'a'}, []byte{'a'}, nil) + err = d.Compact([]byte{'a'}, []byte{'b'}, true) + if err != nil { + t.Fatalf("Compact: %v", err) + } + require.True(t, g.used) + require.Equal(t, g.requestCount, 0) +} + +// Tests that there's no errors or panics when the default CPU granter is used. +func TestCompactionCPUGranterDefault(t *testing.T) { + mem := vfs.NewMem() + opts := (&Options{FS: mem}).WithFSDefaults() + d, err := Open("", opts) + if err != nil { + t.Fatalf("Open: %v", err) + } + defer d.Close() + + d.Set([]byte{'a'}, []byte{'a'}, nil) + err = d.Compact([]byte{'a'}, []byte{'b'}, true) + if err != nil { + t.Fatalf("Compact: %v", err) + } +} + +func TestCompaction(t *testing.T) { + const memTableSize = 10000 + // Tuned so that 2 values can reside in the memtable before a flush, but a + // 3rd value will cause a flush. Needs to account for the max skiplist node + // size. + const valueSize = 3500 + + mem := vfs.NewMem() + opts := &Options{ + FS: mem, + MemTableSize: memTableSize, + DebugCheck: DebugCheckLevels, + L0CompactionThreshold: 8, + } + opts.testingRandomized(t).WithFSDefaults() + d, err := Open("", opts) + if err != nil { + t.Fatalf("Open: %v", err) + } + + get1 := func(iter internalIterator) (ret string) { + b := &bytes.Buffer{} + for key, _ := iter.First(); key != nil; key, _ = iter.Next() { + b.Write(key.UserKey) + } + if err := iter.Close(); err != nil { + t.Fatalf("iterator Close: %v", err) + } + return b.String() + } + getAll := func() (gotMem, gotDisk string, err error) { + d.mu.Lock() + defer d.mu.Unlock() + + if d.mu.mem.mutable != nil { + gotMem = get1(d.mu.mem.mutable.newIter(nil)) + } + ss := []string(nil) + v := d.mu.versions.currentVersion() + provider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(mem, "" /* dirName */)) + if err != nil { + t.Fatalf("%v", err) + } + defer provider.Close() + for _, levelMetadata := range v.Levels { + iter := levelMetadata.Iter() + for meta := iter.First(); meta != nil; meta = iter.Next() { + if meta.Virtual { + continue + } + f, err := provider.OpenForReading(context.Background(), base.FileTypeTable, meta.FileBacking.DiskFileNum, objstorage.OpenOptions{}) + if err != nil { + return "", "", errors.WithStack(err) + } + r, err := sstable.NewReader(f, sstable.ReaderOptions{}) + if err != nil { + return "", "", errors.WithStack(err) + } + defer r.Close() + iter, err := r.NewIter(nil /* lower */, nil /* upper */) + if err != nil { + return "", "", errors.WithStack(err) + } + ss = append(ss, get1(iter)+".") + } + } + sort.Strings(ss) + return gotMem, strings.Join(ss, ""), nil + } + + value := bytes.Repeat([]byte("x"), valueSize) + testCases := []struct { + key, wantMem, wantDisk string + }{ + {"+A", "A", ""}, + {"+a", "Aa", ""}, + {"+B", "B", "Aa."}, + {"+b", "Bb", "Aa."}, + // The next level-0 table overwrites the B key. + {"+C", "C", "Aa.Bb."}, + {"+B", "BC", "Aa.Bb."}, + // The next level-0 table deletes the a key. + {"+D", "D", "Aa.BC.Bb."}, + {"-a", "Da", "Aa.BC.Bb."}, + {"+d", "Dad", "Aa.BC.Bb."}, + {"+E", "E", "Aa.BC.Bb.Dad."}, + {"+e", "Ee", "Aa.BC.Bb.Dad."}, + // The next addition creates the fourth level-0 table, and l0CompactionTrigger == 8, + // but since the sublevel count is doubled when comparing with l0CompactionTrigger, + // the addition of the 4th sublevel triggers a non-trivial compaction into one level-1 table. + // Note that the keys in this one larger table are interleaved from the four smaller ones. + {"+F", "F", "ABCDEbde."}, + } + for _, tc := range testCases { + if key := tc.key[1:]; tc.key[0] == '+' { + if err := d.Set([]byte(key), value, nil); err != nil { + t.Errorf("%q: Set: %v", key, err) + break + } + } else { + if err := d.Delete([]byte(key), nil); err != nil { + t.Errorf("%q: Delete: %v", key, err) + break + } + } + + // try backs off to allow any writes to the memfs to complete. + err := try(100*time.Microsecond, 20*time.Second, func() error { + gotMem, gotDisk, err := getAll() + if err != nil { + return err + } + if testing.Verbose() { + fmt.Printf("mem=%s (%s) disk=%s (%s)\n", gotMem, tc.wantMem, gotDisk, tc.wantDisk) + } + + if gotMem != tc.wantMem { + return errors.Errorf("mem: got %q, want %q", gotMem, tc.wantMem) + } + if gotDisk != tc.wantDisk { + return errors.Errorf("ldb: got %q, want %q", gotDisk, tc.wantDisk) + } + return nil + }) + if err != nil { + t.Errorf("%q: %v", tc.key, err) + } + } + if err := d.Close(); err != nil { + t.Fatalf("db Close: %v", err) + } +} + +func TestValidateVersionEdit(t *testing.T) { + const badKey = "malformed-key" + + errValidationFailed := errors.New("validation failed") + validateFn := func(key []byte) error { + if string(key) == badKey { + return errValidationFailed + } + return nil + } + + cmp := DefaultComparer.Compare + newFileMeta := func(smallest, largest base.InternalKey) *fileMetadata { + m := (&fileMetadata{}).ExtendPointKeyBounds(cmp, smallest, largest) + m.InitPhysicalBacking() + return m + } + + testCases := []struct { + desc string + ve *versionEdit + vFunc func([]byte) error + wantErr error + }{ + { + desc: "single new file; start key", + ve: &versionEdit{ + NewFiles: []manifest.NewFileEntry{ + { + Meta: newFileMeta( + manifest.InternalKey{UserKey: []byte(badKey)}, + manifest.InternalKey{UserKey: []byte("z")}, + ), + }, + }, + }, + vFunc: validateFn, + wantErr: errValidationFailed, + }, + { + desc: "single new file; end key", + ve: &versionEdit{ + NewFiles: []manifest.NewFileEntry{ + { + Meta: newFileMeta( + manifest.InternalKey{UserKey: []byte("a")}, + manifest.InternalKey{UserKey: []byte(badKey)}, + ), + }, + }, + }, + vFunc: validateFn, + wantErr: errValidationFailed, + }, + { + desc: "multiple new files", + ve: &versionEdit{ + NewFiles: []manifest.NewFileEntry{ + { + Meta: newFileMeta( + manifest.InternalKey{UserKey: []byte("a")}, + manifest.InternalKey{UserKey: []byte("c")}, + ), + }, + { + Meta: newFileMeta( + manifest.InternalKey{UserKey: []byte(badKey)}, + manifest.InternalKey{UserKey: []byte("z")}, + ), + }, + }, + }, + vFunc: validateFn, + wantErr: errValidationFailed, + }, + { + desc: "single deleted file; start key", + ve: &versionEdit{ + DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{ + deletedFileEntry{Level: 0, FileNum: 0}: newFileMeta( + manifest.InternalKey{UserKey: []byte(badKey)}, + manifest.InternalKey{UserKey: []byte("z")}, + ), + }, + }, + vFunc: validateFn, + wantErr: errValidationFailed, + }, + { + desc: "single deleted file; end key", + ve: &versionEdit{ + DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{ + deletedFileEntry{Level: 0, FileNum: 0}: newFileMeta( + manifest.InternalKey{UserKey: []byte("a")}, + manifest.InternalKey{UserKey: []byte(badKey)}, + ), + }, + }, + vFunc: validateFn, + wantErr: errValidationFailed, + }, + { + desc: "multiple deleted files", + ve: &versionEdit{ + DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{ + deletedFileEntry{Level: 0, FileNum: 0}: newFileMeta( + manifest.InternalKey{UserKey: []byte("a")}, + manifest.InternalKey{UserKey: []byte("c")}, + ), + deletedFileEntry{Level: 0, FileNum: 1}: newFileMeta( + manifest.InternalKey{UserKey: []byte(badKey)}, + manifest.InternalKey{UserKey: []byte("z")}, + ), + }, + }, + vFunc: validateFn, + wantErr: errValidationFailed, + }, + { + desc: "no errors", + ve: &versionEdit{ + NewFiles: []manifest.NewFileEntry{ + { + Level: 0, + Meta: newFileMeta( + manifest.InternalKey{UserKey: []byte("b")}, + manifest.InternalKey{UserKey: []byte("c")}, + ), + }, + { + Level: 0, + Meta: newFileMeta( + manifest.InternalKey{UserKey: []byte("d")}, + manifest.InternalKey{UserKey: []byte("g")}, + ), + }, + }, + DeletedFiles: map[manifest.DeletedFileEntry]*manifest.FileMetadata{ + deletedFileEntry{Level: 6, FileNum: 0}: newFileMeta( + manifest.InternalKey{UserKey: []byte("a")}, + manifest.InternalKey{UserKey: []byte("d")}, + ), + deletedFileEntry{Level: 6, FileNum: 1}: newFileMeta( + manifest.InternalKey{UserKey: []byte("x")}, + manifest.InternalKey{UserKey: []byte("z")}, + ), + }, + }, + vFunc: validateFn, + }, + } + + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + err := validateVersionEdit(tc.ve, tc.vFunc, base.DefaultFormatter) + if tc.wantErr != nil { + if !errors.Is(err, tc.wantErr) { + t.Fatalf("got: %s; want: %s", err, tc.wantErr) + } + return + } + if err != nil { + t.Fatalf("got %s; wanted no error", err) + } + }) + } +} + +func TestManualCompaction(t *testing.T) { + var mem vfs.FS + var d *DB + defer func() { + if d != nil { + require.NoError(t, closeAllSnapshots(d)) + require.NoError(t, d.Close()) + } + }() + + seed := time.Now().UnixNano() + rng := rand.New(rand.NewSource(seed)) + t.Logf("seed: %d", seed) + + randVersion := func(min, max FormatMajorVersion) FormatMajorVersion { + return FormatMajorVersion(int(min) + rng.Intn(int(max)-int(min)+1)) + } + + var compactionLog bytes.Buffer + compactionLogEventListener := &EventListener{ + CompactionEnd: func(info CompactionInfo) { + // Ensure determinism. + info.JobID = 1 + info.Duration = time.Second + info.TotalDuration = time.Second + fmt.Fprintln(&compactionLog, info.String()) + }, + } + reset := func(minVersion, maxVersion FormatMajorVersion) { + compactionLog.Reset() + if d != nil { + require.NoError(t, closeAllSnapshots(d)) + require.NoError(t, d.Close()) + } + mem = vfs.NewMem() + require.NoError(t, mem.MkdirAll("ext", 0755)) + + opts := (&Options{ + FS: mem, + DebugCheck: DebugCheckLevels, + DisableAutomaticCompactions: true, + EventListener: compactionLogEventListener, + FormatMajorVersion: randVersion(minVersion, maxVersion), + }).WithFSDefaults() + + var err error + d, err = Open("", opts) + require.NoError(t, err) + } + + // d.mu must be held when calling. + createOngoingCompaction := func(start, end []byte, startLevel, outputLevel int) (ongoingCompaction *compaction) { + ongoingCompaction = &compaction{ + inputs: []compactionLevel{{level: startLevel}, {level: outputLevel}}, + smallest: InternalKey{UserKey: start}, + largest: InternalKey{UserKey: end}, + } + ongoingCompaction.startLevel = &ongoingCompaction.inputs[0] + ongoingCompaction.outputLevel = &ongoingCompaction.inputs[1] + // Mark files as compacting. + curr := d.mu.versions.currentVersion() + ongoingCompaction.startLevel.files = curr.Overlaps(startLevel, d.cmp, start, end, false) + ongoingCompaction.outputLevel.files = curr.Overlaps(outputLevel, d.cmp, start, end, false) + for _, cl := range ongoingCompaction.inputs { + iter := cl.files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + f.CompactionState = manifest.CompactionStateCompacting + } + } + d.mu.compact.inProgress[ongoingCompaction] = struct{}{} + d.mu.compact.compactingCount++ + return + } + + // d.mu must be held when calling. + deleteOngoingCompaction := func(ongoingCompaction *compaction) { + for _, cl := range ongoingCompaction.inputs { + iter := cl.files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + f.CompactionState = manifest.CompactionStateNotCompacting + } + } + delete(d.mu.compact.inProgress, ongoingCompaction) + d.mu.compact.compactingCount-- + } + + runTest := func(t *testing.T, testData string, minVersion, maxVersion FormatMajorVersion, verbose bool) { + reset(minVersion, maxVersion) + var ongoingCompaction *compaction + datadriven.RunTest(t, testData, func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "reset": + reset(minVersion, maxVersion) + return "" + + case "batch": + b := d.NewIndexedBatch() + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + require.NoError(t, b.Commit(nil)) + return "" + + case "build": + if err := runBuildCmd(td, d, mem); err != nil { + return err.Error() + } + return "" + + case "compact": + if err := runCompactCmd(td, d); err != nil { + return err.Error() + } + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + if verbose { + s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) + } + d.mu.Unlock() + if td.HasArg("hide-file-num") { + re := regexp.MustCompile(`([0-9]*):\[`) + s = re.ReplaceAllString(s, "[") + } + return s + + case "define": + if d != nil { + if err := closeAllSnapshots(d); err != nil { + return err.Error() + } + if err := d.Close(); err != nil { + return err.Error() + } + } + + mem = vfs.NewMem() + opts := (&Options{ + FS: mem, + DebugCheck: DebugCheckLevels, + EventListener: compactionLogEventListener, + FormatMajorVersion: randVersion(minVersion, maxVersion), + DisableAutomaticCompactions: true, + }).WithFSDefaults() + + var err error + if d, err = runDBDefineCmd(td, opts); err != nil { + return err.Error() + } + + s := d.mu.versions.currentVersion().String() + if verbose { + s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) + } + return s + + case "file-sizes": + return runTableFileSizesCmd(td, d) + + case "flush": + if err := d.Flush(); err != nil { + return err.Error() + } + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + if verbose { + s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) + } + d.mu.Unlock() + return s + + case "ingest": + if err := runIngestCmd(td, d, mem); err != nil { + return err.Error() + } + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + if verbose { + s = d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) + } + d.mu.Unlock() + return s + + case "iter": + // TODO(peter): runDBDefineCmd doesn't properly update the visible + // sequence number. So we have to use a snapshot with a very large + // sequence number, otherwise the DB appears empty. + snap := Snapshot{ + db: d, + seqNum: InternalKeySeqNumMax, + } + iter, _ := snap.NewIter(nil) + return runIterCmd(td, iter, true) + + case "lsm": + return runLSMCmd(td, d) + + case "populate": + b := d.NewBatch() + runPopulateCmd(t, td, b) + count := b.Count() + require.NoError(t, b.Commit(nil)) + return fmt.Sprintf("wrote %d keys\n", count) + + case "async-compact": + var s string + ch := make(chan error, 1) + go func() { + if err := runCompactCmd(td, d); err != nil { + ch <- err + close(ch) + return + } + d.mu.Lock() + s = d.mu.versions.currentVersion().String() + d.mu.Unlock() + close(ch) + }() + + manualDone := func() bool { + select { + case <-ch: + return true + default: + return false + } + } + + err := try(100*time.Microsecond, 20*time.Second, func() error { + if manualDone() { + return nil + } + + d.mu.Lock() + defer d.mu.Unlock() + if len(d.mu.compact.manual) == 0 { + return errors.New("no manual compaction queued") + } + manual := d.mu.compact.manual[0] + if manual.retries == 0 { + return errors.New("manual compaction has not been retried") + } + return nil + }) + if err != nil { + return err.Error() + } + + if manualDone() { + return "manual compaction did not block for ongoing\n" + s + } + + d.mu.Lock() + deleteOngoingCompaction(ongoingCompaction) + ongoingCompaction = nil + d.maybeScheduleCompaction() + d.mu.Unlock() + if err := <-ch; err != nil { + return err.Error() + } + return "manual compaction blocked until ongoing finished\n" + s + + case "add-ongoing-compaction": + var startLevel int + var outputLevel int + var start string + var end string + td.ScanArgs(t, "startLevel", &startLevel) + td.ScanArgs(t, "outputLevel", &outputLevel) + td.ScanArgs(t, "start", &start) + td.ScanArgs(t, "end", &end) + d.mu.Lock() + ongoingCompaction = createOngoingCompaction([]byte(start), []byte(end), startLevel, outputLevel) + d.mu.Unlock() + return "" + + case "remove-ongoing-compaction": + d.mu.Lock() + deleteOngoingCompaction(ongoingCompaction) + ongoingCompaction = nil + d.mu.Unlock() + return "" + + case "set-concurrent-compactions": + var concurrentCompactions int + td.ScanArgs(t, "num", &concurrentCompactions) + d.opts.MaxConcurrentCompactions = func() int { + return concurrentCompactions + } + return "" + + case "sstable-properties": + return runSSTablePropertiesCmd(t, td, d) + + case "wait-pending-table-stats": + return runTableStatsCmd(td, d) + + case "close-snapshots": + d.mu.Lock() + // Re-enable automatic compactions if they were disabled so that + // closing snapshots can trigger elision-only compactions if + // necessary. + d.opts.DisableAutomaticCompactions = false + + var ss []*Snapshot + l := &d.mu.snapshots + for i := l.root.next; i != &l.root; i = i.next { + ss = append(ss, i) + } + d.mu.Unlock() + for i := range ss { + if err := ss[i].Close(); err != nil { + return err.Error() + } + } + return "" + + case "compaction-log": + defer compactionLog.Reset() + return compactionLog.String() + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) + } + + testCases := []struct { + testData string + minVersion FormatMajorVersion + maxVersion FormatMajorVersion // inclusive + verbose bool + }{ + { + testData: "testdata/manual_compaction", + minVersion: FormatMostCompatible, + maxVersion: FormatSetWithDelete - 1, + }, + { + testData: "testdata/manual_compaction_set_with_del", + minVersion: FormatBlockPropertyCollector, + // This test exercises split user keys. + maxVersion: FormatSplitUserKeysMarkedCompacted - 1, + }, + { + testData: "testdata/singledel_manual_compaction", + minVersion: FormatMostCompatible, + maxVersion: FormatSetWithDelete - 1, + }, + { + testData: "testdata/singledel_manual_compaction_set_with_del", + minVersion: FormatSetWithDelete, + maxVersion: internalFormatNewest, + }, + { + testData: "testdata/manual_compaction_range_keys", + minVersion: FormatRangeKeys, + maxVersion: internalFormatNewest, + verbose: true, + }, + { + testData: "testdata/manual_compaction_file_boundaries", + minVersion: FormatBlockPropertyCollector, + // This test exercises split user keys. + maxVersion: FormatSplitUserKeysMarkedCompacted - 1, + }, + { + testData: "testdata/manual_compaction_file_boundaries_delsized", + minVersion: FormatDeleteSizedAndObsolete, + maxVersion: internalFormatNewest, + }, + { + testData: "testdata/manual_compaction_set_with_del_sstable_Pebblev4", + minVersion: FormatDeleteSizedAndObsolete, + maxVersion: internalFormatNewest, + }, + { + testData: "testdata/manual_compaction_multilevel", + minVersion: FormatMostCompatible, + maxVersion: internalFormatNewest, + }, + } + + for _, tc := range testCases { + t.Run(tc.testData, func(t *testing.T) { + runTest(t, tc.testData, tc.minVersion, tc.maxVersion, tc.verbose) + }) + } +} + +func TestCompactionFindGrandparentLimit(t *testing.T) { + cmp := DefaultComparer.Compare + var grandparents []*fileMetadata + + var fileNum base.FileNum + parseMeta := func(s string) *fileMetadata { + parts := strings.Split(s, "-") + if len(parts) != 2 { + t.Fatalf("malformed table spec: %s", s) + } + fileNum++ + m := (&fileMetadata{ + FileNum: fileNum, + }).ExtendPointKeyBounds( + cmp, + InternalKey{UserKey: []byte(parts[0])}, + InternalKey{UserKey: []byte(parts[1])}, + ) + m.InitPhysicalBacking() + return m + } + + datadriven.RunTest(t, "testdata/compaction_find_grandparent_limit", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + grandparents = nil + if len(d.Input) == 0 { + return "" + } + for _, data := range strings.Split(d.Input, "\n") { + parts := strings.Fields(data) + if len(parts) != 2 { + return fmt.Sprintf("malformed test:\n%s", d.Input) + } + + meta := parseMeta(parts[0]) + var err error + meta.Size, err = strconv.ParseUint(parts[1], 10, 64) + if err != nil { + return err.Error() + } + grandparents = append(grandparents, meta) + } + return "" + + case "compact": + c := &compaction{ + cmp: cmp, + equal: DefaultComparer.Equal, + comparer: DefaultComparer, + grandparents: manifest.NewLevelSliceKeySorted(cmp, grandparents), + } + if len(d.CmdArgs) != 1 { + return fmt.Sprintf("%s expects 1 argument", d.Cmd) + } + if len(d.CmdArgs[0].Vals) != 1 { + return fmt.Sprintf("%s expects 1 value", d.CmdArgs[0].Key) + } + var err error + c.maxOverlapBytes, err = strconv.ParseUint(d.CmdArgs[0].Vals[0], 10, 64) + if err != nil { + return err.Error() + } + + var buf bytes.Buffer + var smallest, largest string + var grandparentLimit []byte + for i, key := range strings.Fields(d.Input) { + if i == 0 { + smallest = key + grandparentLimit = c.findGrandparentLimit([]byte(key)) + } + if grandparentLimit != nil && c.cmp(grandparentLimit, []byte(key)) < 0 { + fmt.Fprintf(&buf, "%s-%s\n", smallest, largest) + smallest = key + grandparentLimit = c.findGrandparentLimit([]byte(key)) + } + largest = key + } + fmt.Fprintf(&buf, "%s-%s\n", smallest, largest) + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestCompactionFindL0Limit(t *testing.T) { + cmp := DefaultComparer.Compare + + fileNumCounter := 1 + parseMeta := func(s string) (*fileMetadata, error) { + fields := strings.Fields(s) + parts := strings.Split(fields[0], "-") + if len(parts) != 2 { + return nil, errors.Errorf("malformed table spec: %s", s) + } + m := (&fileMetadata{ + FileNum: base.FileNum(fileNumCounter), + }).ExtendPointKeyBounds( + cmp, + base.ParseInternalKey(strings.TrimSpace(parts[0])), + base.ParseInternalKey(strings.TrimSpace(parts[1])), + ) + fileNumCounter++ + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + + for _, field := range fields[1:] { + parts := strings.Split(field, "=") + switch parts[0] { + case "size": + size, err := strconv.ParseUint(parts[1], 10, 64) + if err != nil { + t.Fatal(err) + } + m.Size = size + } + } + m.InitPhysicalBacking() + return m, nil + } + + var vers *version + flushSplitBytes := int64(0) + + datadriven.RunTest(t, "testdata/compaction_find_l0_limit", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + fileMetas := [manifest.NumLevels][]*fileMetadata{} + baseLevel := manifest.NumLevels - 1 + level := 0 + d.MaybeScanArgs(t, "flush_split_bytes", &flushSplitBytes) + + var err error + for _, data := range strings.Split(d.Input, "\n") { + data = strings.TrimSpace(data) + switch data { + case "L0", "L1", "L2", "L3", "L4", "L5", "L6": + level, err = strconv.Atoi(data[1:]) + if err != nil { + return err.Error() + } + default: + meta, err := parseMeta(data) + if err != nil { + return err.Error() + } + if level != 0 && level < baseLevel { + baseLevel = level + } + fileMetas[level] = append(fileMetas[level], meta) + } + } + + vers = manifest.NewVersion(DefaultComparer.Compare, base.DefaultFormatter, flushSplitBytes, fileMetas) + flushSplitKeys := vers.L0Sublevels.FlushSplitKeys() + + var buf strings.Builder + buf.WriteString(vers.String()) + buf.WriteString("flush split keys:\n") + for _, key := range flushSplitKeys { + fmt.Fprintf(&buf, "\t%s\n", base.DefaultFormatter(key)) + } + + return buf.String() + + case "flush": + c := &compaction{ + cmp: cmp, + equal: DefaultComparer.Equal, + comparer: DefaultComparer, + version: vers, + l0Limits: vers.L0Sublevels.FlushSplitKeys(), + inputs: []compactionLevel{{level: -1}, {level: 0}}, + } + c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] + + var buf bytes.Buffer + var smallest, largest string + var l0Limit []byte + for i, key := range strings.Fields(d.Input) { + if i == 0 { + smallest = key + l0Limit = c.findL0Limit([]byte(key)) + } + if l0Limit != nil && c.cmp(l0Limit, []byte(key)) < 0 { + fmt.Fprintf(&buf, "%s-%s\n", smallest, largest) + smallest = key + l0Limit = c.findL0Limit([]byte(key)) + } + largest = key + } + fmt.Fprintf(&buf, "%s-%s\n", smallest, largest) + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestCompactionOutputLevel(t *testing.T) { + opts := (*Options)(nil).EnsureDefaults() + version := &version{} + + datadriven.RunTest(t, "testdata/compaction_output_level", + func(t *testing.T, d *datadriven.TestData) (res string) { + defer func() { + if r := recover(); r != nil { + res = fmt.Sprintln(r) + } + }() + + switch d.Cmd { + case "compact": + var start, base int + d.ScanArgs(t, "start", &start) + d.ScanArgs(t, "base", &base) + pc := newPickedCompaction(opts, version, start, defaultOutputLevel(start, base), base) + c := newCompaction(pc, opts, time.Now(), nil /* provider */) + return fmt.Sprintf("output=%d\nmax-output-file-size=%d\n", + c.outputLevel.level, c.maxOutputFileSize) + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestCompactionAtomicUnitBounds(t *testing.T) { + cmp := DefaultComparer.Compare + var files manifest.LevelSlice + + parseMeta := func(s string) *fileMetadata { + parts := strings.Split(s, "-") + if len(parts) != 2 { + t.Fatalf("malformed table spec: %s", s) + } + m := (&fileMetadata{}).ExtendPointKeyBounds( + cmp, + base.ParseInternalKey(parts[0]), + base.ParseInternalKey(parts[1]), + ) + m.InitPhysicalBacking() + return m + } + + datadriven.RunTest(t, "testdata/compaction_atomic_unit_bounds", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + files = manifest.LevelSlice{} + if len(d.Input) == 0 { + return "" + } + var ff []*fileMetadata + for _, data := range strings.Split(d.Input, "\n") { + meta := parseMeta(data) + meta.FileNum = FileNum(len(ff)) + ff = append(ff, meta) + } + files = manifest.NewLevelSliceKeySorted(cmp, ff) + return "" + + case "atomic-unit-bounds": + c := &compaction{ + cmp: cmp, + equal: DefaultComparer.Equal, + comparer: DefaultComparer, + inputs: []compactionLevel{{files: files}, {}}, + } + c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] + if len(d.CmdArgs) != 1 { + return fmt.Sprintf("%s expects 1 argument", d.Cmd) + } + index, err := strconv.ParseInt(d.CmdArgs[0].String(), 10, 64) + if err != nil { + return err.Error() + } + iter := files.Iter() + // Advance iter to `index`. + _ = iter.First() + for i := int64(0); i < index; i++ { + _ = iter.Next() + } + atomicUnit, _ := expandToAtomicUnit(c.cmp, iter.Take().Slice(), true /* disableIsCompacting */) + lower, upper := manifest.KeyRange(c.cmp, atomicUnit.Iter()) + return fmt.Sprintf("%s-%s\n", lower.UserKey, upper.UserKey) + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestCompactionDeleteOnlyHints(t *testing.T) { + parseUint64 := func(s string) uint64 { + v, err := strconv.ParseUint(s, 10, 64) + require.NoError(t, err) + return v + } + var d *DB + defer func() { + if d != nil { + require.NoError(t, closeAllSnapshots(d)) + require.NoError(t, d.Close()) + } + }() + + var compactInfo *CompactionInfo // protected by d.mu + reset := func() (*Options, error) { + if d != nil { + compactInfo = nil + if err := closeAllSnapshots(d); err != nil { + return nil, err + } + if err := d.Close(); err != nil { + return nil, err + } + } + opts := (&Options{ + FS: vfs.NewMem(), + DebugCheck: DebugCheckLevels, + EventListener: &EventListener{ + CompactionEnd: func(info CompactionInfo) { + if compactInfo != nil { + return + } + compactInfo = &info + }, + }, + FormatMajorVersion: internalFormatNewest, + }).WithFSDefaults() + + // Collection of table stats can trigger compactions. As we want full + // control over when compactions are run, disable stats by default. + opts.private.disableTableStats = true + + return opts, nil + } + + compactionString := func() string { + for d.mu.compact.compactingCount > 0 { + d.mu.compact.cond.Wait() + } + + s := "(none)" + if compactInfo != nil { + // Fix the job ID and durations for determinism. + compactInfo.JobID = 100 + compactInfo.Duration = time.Second + compactInfo.TotalDuration = 2 * time.Second + s = compactInfo.String() + compactInfo = nil + } + return s + } + + var err error + var opts *Options + datadriven.RunTest(t, "testdata/compaction_delete_only_hints", + func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + opts, err = reset() + if err != nil { + return err.Error() + } + d, err = runDBDefineCmd(td, opts) + if err != nil { + return err.Error() + } + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + case "force-set-hints": + d.mu.Lock() + defer d.mu.Unlock() + d.mu.compact.deletionHints = d.mu.compact.deletionHints[:0] + var buf bytes.Buffer + for _, data := range strings.Split(td.Input, "\n") { + parts := strings.FieldsFunc(strings.TrimSpace(data), + func(r rune) bool { return r == '-' || r == ' ' || r == '.' }) + + start, end := []byte(parts[2]), []byte(parts[3]) + + var tombstoneFile *fileMetadata + tombstoneLevel := int(parseUint64(parts[0][1:])) + + // Set file number to the value provided in the input. + tombstoneFile = &fileMetadata{ + FileNum: base.FileNum(parseUint64(parts[1])), + } + + var hintType deleteCompactionHintType + switch typ := parts[7]; typ { + case "point_key_only": + hintType = deleteCompactionHintTypePointKeyOnly + case "range_key_only": + hintType = deleteCompactionHintTypeRangeKeyOnly + case "point_and_range_key": + hintType = deleteCompactionHintTypePointAndRangeKey + default: + return fmt.Sprintf("unknown hint type: %s", typ) + } + + h := deleteCompactionHint{ + hintType: hintType, + start: start, + end: end, + fileSmallestSeqNum: parseUint64(parts[4]), + tombstoneLevel: tombstoneLevel, + tombstoneFile: tombstoneFile, + tombstoneSmallestSeqNum: parseUint64(parts[5]), + tombstoneLargestSeqNum: parseUint64(parts[6]), + } + d.mu.compact.deletionHints = append(d.mu.compact.deletionHints, h) + fmt.Fprintln(&buf, h.String()) + } + return buf.String() + + case "get-hints": + d.mu.Lock() + defer d.mu.Unlock() + + // Force collection of table stats. This requires re-enabling the + // collection flag. We also do not want compactions to run as part of + // the stats collection job, so we disable it temporarily. + d.opts.private.disableTableStats = false + d.opts.DisableAutomaticCompactions = true + defer func() { + d.opts.private.disableTableStats = true + d.opts.DisableAutomaticCompactions = false + }() + + // NB: collectTableStats attempts to acquire the lock. Temporarily + // unlock here to avoid a deadlock. + d.mu.Unlock() + didRun := d.collectTableStats() + d.mu.Lock() + + if !didRun { + // If a job was already running, wait for the results. + d.waitTableStats() + } + + hints := d.mu.compact.deletionHints + if len(hints) == 0 { + return "(none)" + } + var buf bytes.Buffer + for _, h := range hints { + buf.WriteString(h.String() + "\n") + } + return buf.String() + + case "maybe-compact": + d.mu.Lock() + d.maybeScheduleCompaction() + + var buf bytes.Buffer + fmt.Fprintf(&buf, "Deletion hints:\n") + for _, h := range d.mu.compact.deletionHints { + fmt.Fprintf(&buf, " %s\n", h.String()) + } + if len(d.mu.compact.deletionHints) == 0 { + fmt.Fprintf(&buf, " (none)\n") + } + fmt.Fprintf(&buf, "Compactions:\n") + fmt.Fprintf(&buf, " %s", compactionString()) + d.mu.Unlock() + return buf.String() + + case "compact": + if err := runCompactCmd(td, d); err != nil { + return err.Error() + } + d.mu.Lock() + compactInfo = nil + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + case "close-snapshot": + seqNum, err := strconv.ParseUint(strings.TrimSpace(td.Input), 0, 64) + if err != nil { + return err.Error() + } + d.mu.Lock() + var s *Snapshot + l := &d.mu.snapshots + for i := l.root.next; i != &l.root; i = i.next { + if i.seqNum == seqNum { + s = i + } + } + d.mu.Unlock() + if s == nil { + return "(not found)" + } else if err := s.Close(); err != nil { + return err.Error() + } + + d.mu.Lock() + // Closing the snapshot may have triggered a compaction. + str := compactionString() + d.mu.Unlock() + return str + + case "iter": + snap := Snapshot{ + db: d, + seqNum: InternalKeySeqNumMax, + } + iter, _ := snap.NewIter(nil) + return runIterCmd(td, iter, true) + + case "reset": + opts, err = reset() + if err != nil { + return err.Error() + } + d, err = Open("", opts) + if err != nil { + return err.Error() + } + return "" + + case "ingest": + if err = runBuildCmd(td, d, d.opts.FS); err != nil { + return err.Error() + } + if err = runIngestCmd(td, d, d.opts.FS); err != nil { + return err.Error() + } + return "OK" + + case "describe-lsm": + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestCompactionTombstones(t *testing.T) { + var d *DB + defer func() { + if d != nil { + require.NoError(t, closeAllSnapshots(d)) + require.NoError(t, d.Close()) + } + }() + + var compactInfo *CompactionInfo // protected by d.mu + + compactionString := func() string { + for d.mu.compact.compactingCount > 0 { + d.mu.compact.cond.Wait() + } + + s := "(none)" + if compactInfo != nil { + // Fix the job ID and durations for determinism. + compactInfo.JobID = 100 + compactInfo.Duration = time.Second + compactInfo.TotalDuration = 2 * time.Second + s = compactInfo.String() + compactInfo = nil + } + return s + } + + datadriven.RunTest(t, "testdata/compaction_tombstones", + func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + if d != nil { + compactInfo = nil + require.NoError(t, closeAllSnapshots(d)) + if err := d.Close(); err != nil { + return err.Error() + } + } + opts := (&Options{ + FS: vfs.NewMem(), + DebugCheck: DebugCheckLevels, + EventListener: &EventListener{ + CompactionEnd: func(info CompactionInfo) { + compactInfo = &info + }, + }, + FormatMajorVersion: internalFormatNewest, + }).WithFSDefaults() + var err error + d, err = runDBDefineCmd(td, opts) + if err != nil { + return err.Error() + } + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + case "maybe-compact": + d.mu.Lock() + d.opts.DisableAutomaticCompactions = false + d.maybeScheduleCompaction() + s := compactionString() + d.mu.Unlock() + return s + + case "wait-pending-table-stats": + return runTableStatsCmd(td, d) + + case "close-snapshot": + seqNum, err := strconv.ParseUint(strings.TrimSpace(td.Input), 0, 64) + if err != nil { + return err.Error() + } + d.mu.Lock() + var s *Snapshot + l := &d.mu.snapshots + for i := l.root.next; i != &l.root; i = i.next { + if i.seqNum == seqNum { + s = i + } + } + d.mu.Unlock() + if s == nil { + return "(not found)" + } else if err := s.Close(); err != nil { + return err.Error() + } + + d.mu.Lock() + // Closing the snapshot may have triggered a compaction. + str := compactionString() + d.mu.Unlock() + return str + + case "close": + if err := d.Close(); err != nil { + return err.Error() + } + d = nil + return "" + + case "version": + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func closeAllSnapshots(d *DB) error { + d.mu.Lock() + var ss []*Snapshot + l := &d.mu.snapshots + for i := l.root.next; i != &l.root; i = i.next { + ss = append(ss, i) + } + d.mu.Unlock() + for i := range ss { + if err := ss[i].Close(); err != nil { + return err + } + } + return nil +} + +func TestCompactionReadTriggeredQueue(t *testing.T) { + + // Convert a read compaction to a string which this test + // understands. + showRC := func(rc *readCompaction) string { + return fmt.Sprintf( + "L%d: %s-%s %d\n", rc.level, string(rc.start), string(rc.end), rc.fileNum, + ) + } + + var queue *readCompactionQueue + + datadriven.RunTest(t, "testdata/read_compaction_queue", + func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "create": + queue = &readCompactionQueue{} + return "(success)" + case "add-compaction": + for _, line := range strings.Split(td.Input, "\n") { + if line == "" { + continue + } + parts := strings.Split(line, " ") + + if len(parts) != 3 { + return "error: malformed data for add-compaction. usage: : - " + } + if l, err := strconv.Atoi(parts[0][1:2]); err == nil { + keys := strings.Split(parts[1], "-") + fileNum, _ := strconv.Atoi(parts[2]) + rc := readCompaction{ + level: l, + start: []byte(keys[0]), + end: []byte(keys[1]), + fileNum: base.FileNum(fileNum), + } + queue.add(&rc, DefaultComparer.Compare) + } else { + return err.Error() + } + } + return "" + case "remove-compaction": + rc := queue.remove() + if rc == nil { + return "(nil)" + } + return showRC(rc) + case "print-size": + // Print the size of the queue. + return fmt.Sprintf("%d", queue.size) + case "print-queue": + // Print each element of the queue on a separate line. + var sb strings.Builder + if queue.size == 0 { + sb.WriteString("(empty)") + } + + for i := 0; i < queue.size; i++ { + rc := queue.at(i) + sb.WriteString(showRC(rc)) + } + return sb.String() + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }, + ) +} + +func (qu *readCompactionQueue) at(i int) *readCompaction { + if i >= qu.size { + return nil + } + + return qu.queue[i] +} + +func TestCompactionReadTriggered(t *testing.T) { + var d *DB + defer func() { + if d != nil { + require.NoError(t, d.Close()) + } + }() + + var compactInfo *CompactionInfo // protected by d.mu + + compactionString := func() string { + for d.mu.compact.compactingCount > 0 { + d.mu.compact.cond.Wait() + } + + s := "(none)" + if compactInfo != nil { + // Fix the job ID and durations for determinism. + compactInfo.JobID = 100 + compactInfo.Duration = time.Second + compactInfo.TotalDuration = 2 * time.Second + s = compactInfo.String() + compactInfo = nil + } + return s + } + + datadriven.RunTest(t, "testdata/compaction_read_triggered", + func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + if d != nil { + compactInfo = nil + if err := d.Close(); err != nil { + return err.Error() + } + } + opts := (&Options{ + FS: vfs.NewMem(), + DebugCheck: DebugCheckLevels, + EventListener: &EventListener{ + CompactionEnd: func(info CompactionInfo) { + compactInfo = &info + }, + }, + }).WithFSDefaults() + var err error + d, err = runDBDefineCmd(td, opts) + if err != nil { + return err.Error() + } + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + case "add-read-compaction": + d.mu.Lock() + td.MaybeScanArgs(t, "flushing", &d.mu.compact.flushing) + for _, line := range strings.Split(td.Input, "\n") { + if line == "" { + continue + } + parts := strings.Split(line, " ") + if len(parts) != 3 { + return "error: malformed data for add-read-compaction. usage: : - " + } + if l, err := strconv.Atoi(parts[0][:1]); err == nil { + keys := strings.Split(parts[1], "-") + fileNum, _ := strconv.Atoi(parts[2]) + rc := readCompaction{ + level: l, + start: []byte(keys[0]), + end: []byte(keys[1]), + fileNum: base.FileNum(fileNum), + } + d.mu.compact.readCompactions.add(&rc, DefaultComparer.Compare) + } else { + return err.Error() + } + } + d.mu.Unlock() + return "" + + case "show-read-compactions": + d.mu.Lock() + var sb strings.Builder + if d.mu.compact.readCompactions.size == 0 { + sb.WriteString("(none)") + } + for i := 0; i < d.mu.compact.readCompactions.size; i++ { + rc := d.mu.compact.readCompactions.at(i) + sb.WriteString(fmt.Sprintf("(level: %d, start: %s, end: %s)\n", rc.level, string(rc.start), string(rc.end))) + } + d.mu.Unlock() + return sb.String() + + case "maybe-compact": + d.mu.Lock() + d.opts.DisableAutomaticCompactions = false + d.maybeScheduleCompaction() + s := compactionString() + d.mu.Unlock() + return s + + case "version": + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestCompactionInuseKeyRanges(t *testing.T) { + cmp := DefaultComparer.Compare + parseMeta := func(s string) *fileMetadata { + parts := strings.Split(s, "-") + if len(parts) != 2 { + t.Fatalf("malformed table spec: %s", s) + } + m := (&fileMetadata{}).ExtendRangeKeyBounds( + cmp, + base.ParseInternalKey(strings.TrimSpace(parts[0])), + base.ParseInternalKey(strings.TrimSpace(parts[1])), + ) + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + m.InitPhysicalBacking() + return m + } + + opts := (*Options)(nil).EnsureDefaults() + + var c *compaction + datadriven.RunTest(t, "testdata/compaction_inuse_key_ranges", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + c = &compaction{ + cmp: DefaultComparer.Compare, + equal: DefaultComparer.Equal, + comparer: DefaultComparer, + formatKey: DefaultComparer.FormatKey, + inputs: []compactionLevel{{}, {}}, + } + c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] + var files [numLevels][]*fileMetadata + var currentLevel int + fileNum := FileNum(1) + + for _, data := range strings.Split(td.Input, "\n") { + switch data { + case "L0", "L1", "L2", "L3", "L4", "L5", "L6": + level, err := strconv.Atoi(data[1:]) + if err != nil { + return err.Error() + } + currentLevel = level + + default: + meta := parseMeta(data) + meta.FileNum = fileNum + fileNum++ + files[currentLevel] = append(files[currentLevel], meta) + } + } + c.version = newVersion(opts, files) + return c.version.String() + + case "inuse-key-ranges": + var buf bytes.Buffer + for _, line := range strings.Split(td.Input, "\n") { + parts := strings.Fields(line) + if len(parts) != 3 { + fmt.Fprintf(&buf, "expected : %q\n", line) + continue + } + level, err := strconv.Atoi(parts[0]) + if err != nil { + fmt.Fprintf(&buf, "expected : %q: %v\n", line, err) + continue + } + c.outputLevel.level = level + c.smallest.UserKey = []byte(parts[1]) + c.largest.UserKey = []byte(parts[2]) + + c.inuseKeyRanges = nil + c.setupInuseKeyRanges() + if len(c.inuseKeyRanges) == 0 { + fmt.Fprintf(&buf, ".\n") + } else { + for i, r := range c.inuseKeyRanges { + if i > 0 { + fmt.Fprintf(&buf, " ") + } + fmt.Fprintf(&buf, "%s-%s", r.Start, r.End) + } + fmt.Fprintf(&buf, "\n") + } + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestCompactionInuseKeyRangesRandomized(t *testing.T) { + var ( + fileNum = FileNum(0) + opts = (*Options)(nil).EnsureDefaults() + seed = int64(time.Now().UnixNano()) + rng = rand.New(rand.NewSource(seed)) + endKeyspace = 26 * 26 + ) + t.Logf("Using rng seed %d.", seed) + + for iter := 0; iter < 100; iter++ { + makeUserKey := func(i int) []byte { + if i >= endKeyspace { + i = endKeyspace - 1 + } + return []byte{byte(i/26 + 'a'), byte(i%26 + 'a')} + } + makeIK := func(level, i int) InternalKey { + return base.MakeInternalKey( + makeUserKey(i), + uint64(numLevels-level), + base.InternalKeyKindSet, + ) + } + makeFile := func(level, start, end int) *fileMetadata { + fileNum++ + m := (&fileMetadata{ + FileNum: fileNum, + }).ExtendPointKeyBounds( + opts.Comparer.Compare, + makeIK(level, start), + makeIK(level, end), + ) + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + m.InitPhysicalBacking() + return m + } + overlaps := func(startA, endA, startB, endB []byte) bool { + disjoint := opts.Comparer.Compare(endB, startA) < 0 || opts.Comparer.Compare(endA, startB) < 0 + return !disjoint + } + var files [numLevels][]*fileMetadata + for l := 0; l < numLevels; l++ { + for i := 0; i < rand.Intn(10); i++ { + s := rng.Intn(endKeyspace) + maxWidth := rng.Intn(endKeyspace-s) + 1 + e := rng.Intn(maxWidth) + s + sKey, eKey := makeUserKey(s), makeUserKey(e) + // Discard the key range if it overlaps any existing files + // within this level. + var o bool + for _, f := range files[l] { + o = o || overlaps(sKey, eKey, f.Smallest.UserKey, f.Largest.UserKey) + } + if o { + continue + } + files[l] = append(files[l], makeFile(l, s, e)) + } + slices.SortFunc(files[l], func(a, b *fileMetadata) int { + return opts.Comparer.Compare(a.Smallest.UserKey, b.Smallest.UserKey) + }) + } + v := newVersion(opts, files) + t.Log(v.DebugString(opts.Comparer.FormatKey)) + for i := 0; i < 1000; i++ { + l := rng.Intn(numLevels) + s := rng.Intn(endKeyspace) + maxWidth := rng.Intn(endKeyspace-s) + 1 + e := rng.Intn(maxWidth) + s + sKey, eKey := makeUserKey(s), makeUserKey(e) + keyRanges := calculateInuseKeyRanges(v, opts.Comparer.Compare, l, numLevels-1, sKey, eKey) + + for level := l; level < numLevels; level++ { + for _, f := range files[level] { + if !overlaps(sKey, eKey, f.Smallest.UserKey, f.Largest.UserKey) { + // This file doesn't overlap the queried range. Skip it. + continue + } + // This file does overlap the queried range. The key range + // [MAX(f.Smallest, sKey), MIN(f.Largest, eKey)] must be fully + // contained by a key range in keyRanges. + checkStart, checkEnd := f.Smallest.UserKey, f.Largest.UserKey + if opts.Comparer.Compare(checkStart, sKey) < 0 { + checkStart = sKey + } + if opts.Comparer.Compare(checkEnd, eKey) > 0 { + checkEnd = eKey + } + var contained bool + for _, kr := range keyRanges { + contained = contained || + (opts.Comparer.Compare(checkStart, kr.Start) >= 0 && + opts.Comparer.Compare(checkEnd, kr.End) <= 0) + } + if !contained { + t.Errorf("Seed %d, iter %d: File %s overlaps %q-%q, but is not fully contained in any of the key ranges.", + seed, iter, f, sKey, eKey) + } + } + } + } + } +} + +func TestCompactionAllowZeroSeqNum(t *testing.T) { + var d *DB + defer func() { + if d != nil { + require.NoError(t, closeAllSnapshots(d)) + require.NoError(t, d.Close()) + } + }() + + metaRE := regexp.MustCompile(`^L([0-9]+):([^-]+)-(.+)$`) + var fileNum base.FileNum + parseMeta := func(s string) (level int, meta *fileMetadata) { + match := metaRE.FindStringSubmatch(s) + if match == nil { + t.Fatalf("malformed table spec: %s", s) + } + level, err := strconv.Atoi(match[1]) + if err != nil { + t.Fatalf("malformed table spec: %s: %s", s, err) + } + fileNum++ + meta = (&fileMetadata{ + FileNum: fileNum, + }).ExtendPointKeyBounds( + d.cmp, + InternalKey{UserKey: []byte(match[2])}, + InternalKey{UserKey: []byte(match[3])}, + ) + meta.InitPhysicalBacking() + return level, meta + } + + datadriven.RunTest(t, "testdata/compaction_allow_zero_seqnum", + func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + if d != nil { + require.NoError(t, closeAllSnapshots(d)) + if err := d.Close(); err != nil { + return err.Error() + } + } + + var err error + if d, err = runDBDefineCmd(td, nil /* options */); err != nil { + return err.Error() + } + + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + case "allow-zero-seqnum": + d.mu.Lock() + c := &compaction{ + cmp: d.cmp, + comparer: d.opts.Comparer, + version: d.mu.versions.currentVersion(), + inputs: []compactionLevel{{}, {}}, + } + c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] + d.mu.Unlock() + + var buf bytes.Buffer + for _, line := range strings.Split(td.Input, "\n") { + parts := strings.Fields(line) + if len(parts) == 0 { + continue + } + c.flushing = nil + c.startLevel.level = -1 + + var startFiles, outputFiles []*fileMetadata + + switch { + case len(parts) == 1 && parts[0] == "flush": + c.outputLevel.level = 0 + d.mu.Lock() + c.flushing = d.mu.mem.queue + d.mu.Unlock() + + default: + for _, p := range parts { + level, meta := parseMeta(p) + if c.startLevel.level == -1 { + c.startLevel.level = level + } + + switch level { + case c.startLevel.level: + startFiles = append(startFiles, meta) + case c.startLevel.level + 1: + outputFiles = append(outputFiles, meta) + default: + return fmt.Sprintf("invalid level %d: expected %d or %d", + level, c.startLevel.level, c.startLevel.level+1) + } + } + c.outputLevel.level = c.startLevel.level + 1 + c.startLevel.files = manifest.NewLevelSliceSpecificOrder(startFiles) + c.outputLevel.files = manifest.NewLevelSliceKeySorted(c.cmp, outputFiles) + } + + c.smallest, c.largest = manifest.KeyRange(c.cmp, + c.startLevel.files.Iter(), + c.outputLevel.files.Iter()) + + c.inuseKeyRanges = nil + c.setupInuseKeyRanges() + fmt.Fprintf(&buf, "%t\n", c.allowZeroSeqNum()) + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestCompactionErrorOnUserKeyOverlap(t *testing.T) { + cmp := DefaultComparer.Compare + parseMeta := func(s string) *fileMetadata { + parts := strings.Split(s, "-") + if len(parts) != 2 { + t.Fatalf("malformed table spec: %s", s) + } + m := (&fileMetadata{}).ExtendPointKeyBounds( + cmp, + base.ParseInternalKey(strings.TrimSpace(parts[0])), + base.ParseInternalKey(strings.TrimSpace(parts[1])), + ) + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + m.InitPhysicalBacking() + return m + } + + datadriven.RunTest(t, "testdata/compaction_error_on_user_key_overlap", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "error-on-user-key-overlap": + c := &compaction{ + cmp: DefaultComparer.Compare, + comparer: DefaultComparer, + formatKey: DefaultComparer.FormatKey, + } + var files []manifest.NewFileEntry + fileNum := FileNum(1) + + for _, data := range strings.Split(d.Input, "\n") { + meta := parseMeta(data) + meta.FileNum = fileNum + fileNum++ + files = append(files, manifest.NewFileEntry{Level: 1, Meta: meta}) + } + + result := "OK" + ve := &versionEdit{ + NewFiles: files, + } + if err := c.errorOnUserKeyOverlap(ve); err != nil { + result = fmt.Sprint(err) + } + return result + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +// TestCompactionErrorCleanup tests an error encountered during a compaction +// after some output tables have been created. It ensures that the pending +// output tables are removed from the filesystem. +func TestCompactionErrorCleanup(t *testing.T) { + // protected by d.mu + var ( + initialSetupDone bool + tablesCreated []FileNum + ) + + mem := vfs.NewMem() + ii := errorfs.OnIndex(math.MaxInt32) // start disabled + opts := (&Options{ + FS: errorfs.Wrap(mem, errorfs.ErrInjected.If(ii)), + Levels: make([]LevelOptions, numLevels), + EventListener: &EventListener{ + TableCreated: func(info TableCreateInfo) { + t.Log(info) + + // If the initial setup is over, record tables created and + // inject an error immediately after the second table is + // created. + if initialSetupDone { + tablesCreated = append(tablesCreated, info.FileNum) + if len(tablesCreated) >= 2 { + ii.Store(0) + } + } + }, + }, + }).WithFSDefaults() + for i := range opts.Levels { + opts.Levels[i].TargetFileSize = 1 + } + opts.testingRandomized(t) + d, err := Open("", opts) + require.NoError(t, err) + + ingest := func(keys ...string) { + t.Helper() + f, err := mem.Create("ext") + require.NoError(t, err) + + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: d.FormatMajorVersion().MaxTableFormat(), + }) + for _, k := range keys { + require.NoError(t, w.Set([]byte(k), nil)) + } + require.NoError(t, w.Close()) + require.NoError(t, d.Ingest([]string{"ext"})) + } + ingest("a", "c") + ingest("b") + + // Trigger a manual compaction, which will encounter an injected error + // after the second table is created. + d.mu.Lock() + initialSetupDone = true + d.mu.Unlock() + err = d.Compact([]byte("a"), []byte("d"), false) + require.Error(t, err, "injected error") + + d.mu.Lock() + if len(tablesCreated) < 2 { + t.Fatalf("expected 2 output tables created by compaction: found %d", len(tablesCreated)) + } + d.mu.Unlock() + + require.NoError(t, d.Close()) + for _, fileNum := range tablesCreated { + filename := fmt.Sprintf("%s.sst", fileNum) + if _, err = mem.Stat(filename); err == nil || !oserror.IsNotExist(err) { + t.Errorf("expected %q to not exist: %s", filename, err) + } + } +} + +func TestCompactionCheckOrdering(t *testing.T) { + cmp := DefaultComparer.Compare + parseMeta := func(s string) *fileMetadata { + parts := strings.Split(s, "-") + if len(parts) != 2 { + t.Fatalf("malformed table spec: %s", s) + } + m := (&fileMetadata{}).ExtendPointKeyBounds( + cmp, + base.ParseInternalKey(strings.TrimSpace(parts[0])), + base.ParseInternalKey(strings.TrimSpace(parts[1])), + ) + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + m.InitPhysicalBacking() + return m + } + + datadriven.RunTest(t, "testdata/compaction_check_ordering", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "check-ordering": + c := &compaction{ + cmp: DefaultComparer.Compare, + comparer: DefaultComparer, + formatKey: DefaultComparer.FormatKey, + logger: panicLogger{}, + inputs: []compactionLevel{{level: -1}, {level: -1}}, + } + c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] + var startFiles, outputFiles []*fileMetadata + var sublevels []manifest.LevelSlice + var files *[]*fileMetadata + var sublevel []*fileMetadata + var sublevelNum int + var parsingSublevel bool + fileNum := FileNum(1) + + switchSublevel := func() { + if sublevel != nil { + sublevels = append( + sublevels, manifest.NewLevelSliceSpecificOrder(sublevel), + ) + sublevel = nil + } + parsingSublevel = false + } + + for _, data := range strings.Split(d.Input, "\n") { + if data[0] == 'L' && len(data) == 4 { + // Format L0.{sublevel}. + switchSublevel() + level, err := strconv.Atoi(data[1:2]) + if err != nil { + return err.Error() + } + sublevelNum, err = strconv.Atoi(data[3:]) + if err != nil { + return err.Error() + } + if c.startLevel.level == -1 { + c.startLevel.level = level + files = &startFiles + } + parsingSublevel = true + } else if data[0] == 'L' { + switchSublevel() + level, err := strconv.Atoi(data[1:]) + if err != nil { + return err.Error() + } + if c.startLevel.level == -1 { + c.startLevel.level = level + files = &startFiles + } else if c.outputLevel.level == -1 { + if c.startLevel.level >= level { + return fmt.Sprintf("startLevel=%d >= outputLevel=%d\n", c.startLevel.level, level) + } + c.outputLevel.level = level + files = &outputFiles + } else { + return "outputLevel already set\n" + } + } else { + meta := parseMeta(data) + meta.FileNum = fileNum + fileNum++ + *files = append(*files, meta) + if parsingSublevel { + meta.SubLevel = sublevelNum + sublevel = append(sublevel, meta) + } + } + } + + switchSublevel() + c.startLevel.files = manifest.NewLevelSliceSpecificOrder(startFiles) + c.outputLevel.files = manifest.NewLevelSliceSpecificOrder(outputFiles) + if c.outputLevel.level == -1 { + c.outputLevel.level = 0 + } + if c.startLevel.level == 0 { + // We don't change the input files for the compaction beyond this point. + c.startLevel.l0SublevelInfo = generateSublevelInfo(c.cmp, c.startLevel.files) + } + + newIters := func( + _ context.Context, _ *manifest.FileMetadata, _ *IterOptions, _ internalIterOpts, + ) (internalIterator, keyspan.FragmentIterator, error) { + return &errorIter{}, nil, nil + } + result := "OK" + _, err := c.newInputIter(newIters, nil, nil) + if err != nil { + result = fmt.Sprint(err) + } + return result + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +type mockSplitter struct { + shouldSplitVal maybeSplit +} + +func (m *mockSplitter) shouldSplitBefore(key *InternalKey, tw *sstable.Writer) maybeSplit { + return m.shouldSplitVal +} + +func (m *mockSplitter) onNewOutput(key []byte) []byte { + return nil +} + +func TestCompactionOutputSplitters(t *testing.T) { + var main, child0, child1 compactionOutputSplitter + var prevUserKey []byte + pickSplitter := func(input string) *compactionOutputSplitter { + switch input { + case "main": + return &main + case "child0": + return &child0 + case "child1": + return &child1 + default: + t.Fatalf("invalid splitter slot: %s", input) + return nil + } + } + + datadriven.RunTest(t, "testdata/compaction_output_splitters", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "reset": + main = nil + child0 = nil + child1 = nil + case "init": + if len(d.CmdArgs) < 2 { + return "expected at least 2 args" + } + splitterToInit := pickSplitter(d.CmdArgs[0].Key) + switch d.CmdArgs[1].Key { + case "array": + *splitterToInit = &splitterGroup{ + cmp: base.DefaultComparer.Compare, + splitters: []compactionOutputSplitter{child0, child1}, + } + case "mock": + *splitterToInit = &mockSplitter{} + case "userkey": + *splitterToInit = &userKeyChangeSplitter{ + cmp: base.DefaultComparer.Compare, + unsafePrevUserKey: func() []byte { + return prevUserKey + }, + splitter: child0, + } + } + (*splitterToInit).onNewOutput(nil) + case "set-should-split": + if len(d.CmdArgs) < 2 { + return "expected at least 2 args" + } + splitterToSet := (*pickSplitter(d.CmdArgs[0].Key)).(*mockSplitter) + var val maybeSplit + switch d.CmdArgs[1].Key { + case "split-now": + val = splitNow + case "no-split": + val = noSplit + default: + t.Fatalf("unexpected value for should-split: %s", d.CmdArgs[1].Key) + } + splitterToSet.shouldSplitVal = val + case "should-split-before": + if len(d.CmdArgs) < 1 { + return "expected at least 1 arg" + } + key := base.ParseInternalKey(d.CmdArgs[0].Key) + shouldSplit := main.shouldSplitBefore(&key, nil) + if shouldSplit == splitNow { + main.onNewOutput(key.UserKey) + prevUserKey = nil + } else { + prevUserKey = key.UserKey + } + return shouldSplit.String() + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + return "ok" + }) +} + +func TestCompactFlushQueuedMemTableAndFlushMetrics(t *testing.T) { + t.Run("", func(t *testing.T) { + // Verify that manual compaction forces a flush of a queued memtable. + + mem := vfs.NewMem() + d, err := Open("", testingRandomized(t, &Options{ + FS: mem, + }).WithFSDefaults()) + require.NoError(t, err) + + // Add the key "a" to the memtable, then fill up the memtable with the key + // prefix "b". The compaction will only overlap with the queued memtable, + // not the mutable memtable. + // NB: The initial memtable size is 256KB, which is filled up with random + // values which typically don't compress well. The test also appends the + // random value to the "b" key to limit overwriting of the same key, which + // would get collapsed at flush time since there are no open snapshots. + value := make([]byte, 50) + _, err = crand.Read(value) + require.NoError(t, err) + require.NoError(t, d.Set([]byte("a"), value, nil)) + for { + _, err = crand.Read(value) + require.NoError(t, err) + require.NoError(t, d.Set(append([]byte("b"), value...), value, nil)) + d.mu.Lock() + done := len(d.mu.mem.queue) == 2 + d.mu.Unlock() + if done { + break + } + } + + require.NoError(t, d.Compact([]byte("a"), []byte("a\x00"), false)) + d.mu.Lock() + require.Equal(t, 1, len(d.mu.mem.queue)) + d.mu.Unlock() + // Flush metrics are updated after and non-atomically with the memtable + // being removed from the queue. + for begin := time.Now(); ; { + metrics := d.Metrics() + require.NotNil(t, metrics) + if metrics.Flush.WriteThroughput.Bytes >= 50*1024 { + // The writes (during which the flush is idle) and the flush work + // should not be so fast as to be unrealistic. If these turn out to be + // flaky we could instead inject a clock. + // + // Windows timer precision is bad (on the order of 1 millisecond) and + // can cause the duration to be 0. + if runtime.GOOS != "windows" { + tinyInterval := 50 * time.Microsecond + require.Less(t, tinyInterval, metrics.Flush.WriteThroughput.WorkDuration) + require.Less(t, tinyInterval, metrics.Flush.WriteThroughput.IdleDuration) + } + break + } + if time.Since(begin) > 2*time.Second { + t.Fatal("flush did not happen") + } + time.Sleep(time.Millisecond) + } + require.NoError(t, d.Close()) + }) +} + +func TestCompactFlushQueuedLargeBatch(t *testing.T) { + // Verify that compaction forces a flush of a queued large batch. + + mem := vfs.NewMem() + d, err := Open("", testingRandomized(t, &Options{ + FS: mem, + }).WithFSDefaults()) + require.NoError(t, err) + + // The default large batch threshold is slightly less than 1/2 of the + // memtable size which makes triggering a problem with flushing queued large + // batches irritating. Manually adjust the threshold to 1/8 of the memtable + // size in order to more easily create a situation where a large batch is + // queued but not automatically flushed. + d.mu.Lock() + d.largeBatchThreshold = d.opts.MemTableSize / 8 + require.Equal(t, 1, len(d.mu.mem.queue)) + d.mu.Unlock() + + // Set a record with a large value. This will be transformed into a large + // batch and placed in the flushable queue. + require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("v"), int(d.largeBatchThreshold)), nil)) + d.mu.Lock() + require.Greater(t, len(d.mu.mem.queue), 1) + d.mu.Unlock() + + require.NoError(t, d.Compact([]byte("a"), []byte("a\x00"), false)) + d.mu.Lock() + require.Equal(t, 1, len(d.mu.mem.queue)) + d.mu.Unlock() + + require.NoError(t, d.Close()) +} + +func TestFlushError(t *testing.T) { + // Error the first five times we try to write a sstable. + var errorOps atomic.Int32 + errorOps.Store(3) + fs := errorfs.Wrap(vfs.NewMem(), errorfs.InjectorFunc(func(op errorfs.Op) error { + if op.Kind == errorfs.OpCreate && filepath.Ext(op.Path) == ".sst" && errorOps.Add(-1) >= 0 { + return errorfs.ErrInjected + } + return nil + })) + d, err := Open("", testingRandomized(t, &Options{ + FS: fs, + EventListener: &EventListener{ + BackgroundError: func(err error) { + t.Log(err) + }, + }, + }).WithFSDefaults()) + require.NoError(t, err) + require.NoError(t, d.Set([]byte("a"), []byte("foo"), NoSync)) + require.NoError(t, d.Flush()) + require.NoError(t, d.Close()) +} + +func TestAdjustGrandparentOverlapBytesForFlush(t *testing.T) { + // 500MB in Lbase + var lbaseFiles []*manifest.FileMetadata + const lbaseSize = 5 << 20 + for i := 0; i < 100; i++ { + m := &manifest.FileMetadata{Size: lbaseSize, FileNum: FileNum(i)} + m.InitPhysicalBacking() + lbaseFiles = + append(lbaseFiles, m) + } + const maxOutputFileSize = 2 << 20 + // 20MB max overlap, so flush split into 25 files. + const maxOverlapBytes = 20 << 20 + ls := manifest.NewLevelSliceSpecificOrder(lbaseFiles) + testCases := []struct { + flushingBytes uint64 + adjustedOverlapBytes uint64 + }{ + // Flushes large enough that 25 files is acceptable. + {flushingBytes: 128 << 20, adjustedOverlapBytes: 20971520}, + {flushingBytes: 64 << 20, adjustedOverlapBytes: 20971520}, + // Small increase in adjustedOverlapBytes. + {flushingBytes: 32 << 20, adjustedOverlapBytes: 32768000}, + // Large increase in adjusterOverlapBytes, to limit to 4 files. + {flushingBytes: 1 << 20, adjustedOverlapBytes: 131072000}, + } + for _, tc := range testCases { + t.Run("", func(t *testing.T) { + c := compaction{ + grandparents: ls, + maxOverlapBytes: maxOverlapBytes, + maxOutputFileSize: maxOutputFileSize, + } + adjustGrandparentOverlapBytesForFlush(&c, tc.flushingBytes) + require.Equal(t, tc.adjustedOverlapBytes, c.maxOverlapBytes) + }) + } +} + +func TestCompactionInvalidBounds(t *testing.T) { + db, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + }).WithFSDefaults()) + require.NoError(t, err) + defer db.Close() + require.NoError(t, db.Compact([]byte("a"), []byte("b"), false)) + require.Error(t, db.Compact([]byte("a"), []byte("a"), false)) + require.Error(t, db.Compact([]byte("b"), []byte("a"), false)) +} + +func Test_calculateInuseKeyRanges(t *testing.T) { + opts := (*Options)(nil).EnsureDefaults() + cmp := base.DefaultComparer.Compare + newFileMeta := func(fileNum FileNum, size uint64, smallest, largest base.InternalKey) *fileMetadata { + m := (&fileMetadata{ + FileNum: fileNum, + Size: size, + }).ExtendPointKeyBounds(opts.Comparer.Compare, smallest, largest) + m.InitPhysicalBacking() + return m + } + tests := []struct { + name string + v *version + level int + depth int + smallest []byte + largest []byte + want []manifest.UserKeyRange + }{ + { + name: "No files in next level", + v: newVersion(opts, [numLevels][]*fileMetadata{ + 1: { + newFileMeta( + 1, + 1, + base.ParseInternalKey("a.SET.2"), + base.ParseInternalKey("c.SET.2"), + ), + newFileMeta( + 2, + 1, + base.ParseInternalKey("d.SET.2"), + base.ParseInternalKey("e.SET.2"), + ), + }, + }), + level: 1, + depth: 2, + smallest: []byte("a"), + largest: []byte("e"), + want: []manifest.UserKeyRange{ + { + Start: []byte("a"), + End: []byte("c"), + }, + { + Start: []byte("d"), + End: []byte("e"), + }, + }, + }, + { + name: "No overlapping key ranges", + v: newVersion(opts, [numLevels][]*fileMetadata{ + 1: { + newFileMeta( + 1, + 1, + base.ParseInternalKey("a.SET.1"), + base.ParseInternalKey("c.SET.1"), + ), + newFileMeta( + 2, + 1, + base.ParseInternalKey("l.SET.1"), + base.ParseInternalKey("p.SET.1"), + ), + }, + 2: { + newFileMeta( + 3, + 1, + base.ParseInternalKey("d.SET.1"), + base.ParseInternalKey("i.SET.1"), + ), + newFileMeta( + 4, + 1, + base.ParseInternalKey("s.SET.1"), + base.ParseInternalKey("w.SET.1"), + ), + }, + }), + level: 1, + depth: 2, + smallest: []byte("a"), + largest: []byte("z"), + want: []manifest.UserKeyRange{ + { + Start: []byte("a"), + End: []byte("c"), + }, + { + Start: []byte("d"), + End: []byte("i"), + }, + { + Start: []byte("l"), + End: []byte("p"), + }, + { + Start: []byte("s"), + End: []byte("w"), + }, + }, + }, + { + name: "First few non-overlapping, followed by overlapping", + v: newVersion(opts, [numLevels][]*fileMetadata{ + 1: { + newFileMeta( + 1, + 1, + base.ParseInternalKey("a.SET.1"), + base.ParseInternalKey("c.SET.1"), + ), + newFileMeta( + 2, + 1, + base.ParseInternalKey("d.SET.1"), + base.ParseInternalKey("e.SET.1"), + ), + newFileMeta( + 3, + 1, + base.ParseInternalKey("n.SET.1"), + base.ParseInternalKey("o.SET.1"), + ), + newFileMeta( + 4, + 1, + base.ParseInternalKey("p.SET.1"), + base.ParseInternalKey("q.SET.1"), + ), + }, + 2: { + newFileMeta( + 5, + 1, + base.ParseInternalKey("m.SET.1"), + base.ParseInternalKey("q.SET.1"), + ), + newFileMeta( + 6, + 1, + base.ParseInternalKey("s.SET.1"), + base.ParseInternalKey("w.SET.1"), + ), + }, + }), + level: 1, + depth: 2, + smallest: []byte("a"), + largest: []byte("z"), + want: []manifest.UserKeyRange{ + { + Start: []byte("a"), + End: []byte("c"), + }, + { + Start: []byte("d"), + End: []byte("e"), + }, + { + Start: []byte("m"), + End: []byte("q"), + }, + { + Start: []byte("s"), + End: []byte("w"), + }, + }, + }, + { + name: "All overlapping", + v: newVersion(opts, [numLevels][]*fileMetadata{ + 1: { + newFileMeta( + 1, + 1, + base.ParseInternalKey("d.SET.1"), + base.ParseInternalKey("e.SET.1"), + ), + newFileMeta( + 2, + 1, + base.ParseInternalKey("n.SET.1"), + base.ParseInternalKey("o.SET.1"), + ), + newFileMeta( + 3, + 1, + base.ParseInternalKey("p.SET.1"), + base.ParseInternalKey("q.SET.1"), + ), + }, + 2: { + newFileMeta( + 4, + 1, + base.ParseInternalKey("a.SET.1"), + base.ParseInternalKey("c.SET.1"), + ), + newFileMeta( + 5, + 1, + base.ParseInternalKey("d.SET.1"), + base.ParseInternalKey("w.SET.1"), + ), + }, + }), + level: 1, + depth: 2, + smallest: []byte("a"), + largest: []byte("z"), + want: []manifest.UserKeyRange{ + { + Start: []byte("a"), + End: []byte("c"), + }, + { + Start: []byte("d"), + End: []byte("w"), + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := calculateInuseKeyRanges(tt.v, cmp, tt.level, tt.depth, tt.smallest, tt.largest); !reflect.DeepEqual(got, tt.want) { + t.Errorf("calculateInuseKeyRanges() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestMarkedForCompaction(t *testing.T) { + var mem vfs.FS = vfs.NewMem() + var d *DB + defer func() { + if d != nil { + require.NoError(t, d.Close()) + } + }() + + var buf bytes.Buffer + opts := (&Options{ + FS: mem, + DebugCheck: DebugCheckLevels, + DisableAutomaticCompactions: true, + FormatMajorVersion: internalFormatNewest, + EventListener: &EventListener{ + CompactionEnd: func(info CompactionInfo) { + // Fix the job ID and durations for determinism. + info.JobID = 100 + info.Duration = time.Second + info.TotalDuration = 2 * time.Second + fmt.Fprintln(&buf, info) + }, + }, + }).WithFSDefaults() + + reset := func() { + if d != nil { + require.NoError(t, d.Close()) + } + mem = vfs.NewMem() + require.NoError(t, mem.MkdirAll("ext", 0755)) + + var err error + d, err = Open("", opts) + require.NoError(t, err) + } + datadriven.RunTest(t, "testdata/marked_for_compaction", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "reset": + reset() + return "" + + case "define": + if d != nil { + if err := d.Close(); err != nil { + return err.Error() + } + } + var err error + if d, err = runDBDefineCmd(td, opts); err != nil { + return err.Error() + } + d.mu.Lock() + defer d.mu.Unlock() + t := time.Now() + d.timeNow = func() time.Time { + t = t.Add(time.Second) + return t + } + s := d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) + return s + + case "mark-for-compaction": + d.mu.Lock() + defer d.mu.Unlock() + vers := d.mu.versions.currentVersion() + var fileNum uint64 + td.ScanArgs(t, "file", &fileNum) + for l, lm := range vers.Levels { + iter := lm.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if f.FileNum != base.FileNum(fileNum) { + continue + } + f.MarkedForCompaction = true + vers.Stats.MarkedForCompaction++ + vers.Levels[l].InvalidateAnnotation(markedForCompactionAnnotator{}) + return fmt.Sprintf("marked L%d.%s", l, f.FileNum) + } + } + return "not-found" + + case "maybe-compact": + d.mu.Lock() + defer d.mu.Unlock() + d.opts.DisableAutomaticCompactions = false + d.maybeScheduleCompaction() + for d.mu.compact.compactingCount > 0 { + d.mu.compact.cond.Wait() + } + + fmt.Fprintln(&buf, d.mu.versions.currentVersion().DebugString(base.DefaultFormatter)) + s := strings.TrimSpace(buf.String()) + buf.Reset() + opts.DisableAutomaticCompactions = true + return s + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +// createManifestErrorInjector injects errors (when enabled) into vfs.FS calls +// to create MANIFEST files. +type createManifestErrorInjector struct { + enabled atomic.Bool +} + +// TODO(jackson): Replace the createManifestErrorInjector with the composition +// of primitives defined in errorfs. This may require additional primitives. + +func (i *createManifestErrorInjector) String() string { return "MANIFEST-Creates" } + +// enable enables error injection for the vfs.FS. +func (i *createManifestErrorInjector) enable() { + i.enabled.Store(true) +} + +// MaybeError implements errorfs.Injector. +func (i *createManifestErrorInjector) MaybeError(op errorfs.Op) error { + if !i.enabled.Load() { + return nil + } + // This necessitates having a MaxManifestSize of 1, to reliably induce + // logAndApply errors. + if strings.Contains(op.Path, "MANIFEST") && op.Kind == errorfs.OpCreate { + return errorfs.ErrInjected + } + return nil +} + +var _ errorfs.Injector = &createManifestErrorInjector{} + +// TestCompaction_LogAndApplyFails exercises a flush or ingest encountering an +// unrecoverable error during logAndApply. +// +// Regression test for #1669. +func TestCompaction_LogAndApplyFails(t *testing.T) { + // flushKeys writes the given keys to the DB, flushing the resulting memtable. + var key = []byte("foo") + flushErrC := make(chan error) + flushKeys := func(db *DB) error { + b := db.NewBatch() + err := b.Set(key, nil, nil) + require.NoError(t, err) + err = b.Commit(nil) + require.NoError(t, err) + // An error from a failing flush is returned asynchronously. + go func() { _ = db.Flush() }() + return <-flushErrC + } + + // ingestKeys adds the given keys to the DB via an ingestion. + ingestKeys := func(db *DB) error { + // Create an SST for ingestion. + const fName = "ext" + f, err := db.opts.FS.Create(fName) + require.NoError(t, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + require.NoError(t, w.Set(key, nil)) + require.NoError(t, w.Close()) + // Ingest the SST. + return db.Ingest([]string{fName}) + } + + testCases := []struct { + name string + addFn func(db *DB) error + backgroundErrorFn func(*DB, error) + }{ + { + name: "flush", + addFn: flushKeys, + backgroundErrorFn: func(db *DB, err error) { + require.True(t, errors.Is(err, errorfs.ErrInjected)) + flushErrC <- err + // A flush will attempt to retry in the background. For the purposes of + // testing this particular scenario, where we would have crashed anyway, + // drop the memtable on the floor to short circuit the retry loop. + // NB: we hold db.mu here. + var cur *flushableEntry + cur, db.mu.mem.queue = db.mu.mem.queue[0], db.mu.mem.queue[1:] + cur.readerUnrefLocked(true) + }, + }, + { + name: "ingest", + addFn: ingestKeys, + }, + } + + runTest := func(t *testing.T, addFn func(db *DB) error, bgFn func(*DB, error)) { + var db *DB + inj := &createManifestErrorInjector{} + logger := &fatalCapturingLogger{t: t} + opts := (&Options{ + FS: errorfs.Wrap(vfs.NewMem(), inj), + // Rotate the manifest after each write. This is required to trigger a + // file creation, into which errors can be injected. + MaxManifestFileSize: 1, + Logger: logger, + EventListener: &EventListener{ + BackgroundError: func(err error) { + if bgFn != nil { + bgFn(db, err) + } + }, + }, + DisableAutomaticCompactions: true, + }).WithFSDefaults() + + db, err := Open("", opts) + require.NoError(t, err) + defer func() { _ = db.Close() }() + + inj.enable() + err = addFn(db) + require.True(t, errors.Is(err, errorfs.ErrInjected)) + + // Under normal circumstances, such an error in logAndApply would panic and + // cause the DB to terminate here. Assert that we captured the fatal error. + require.True(t, errors.Is(logger.err, errorfs.ErrInjected)) + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + runTest(t, tc.addFn, tc.backgroundErrorFn) + }) + } +} + +// TestSharedObjectDeletePacing tests that we don't throttle shared object +// deletes (see the TargetBytesDeletionRate option). +func TestSharedObjectDeletePacing(t *testing.T) { + var opts Options + opts.FS = vfs.NewMem() + opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ + "": remote.NewInMem(), + }) + opts.Experimental.CreateOnShared = remote.CreateOnSharedAll + opts.TargetByteDeletionRate = 1 + + d, err := Open("", &opts) + require.NoError(t, err) + require.NoError(t, d.SetCreatorID(1)) + + randVal := func() []byte { + res := make([]byte, 1024) + _, err := crand.Read(res) + require.NoError(t, err) + return res + } + + // We must set up things so that we will have more live bytes than obsolete + // bytes, otherwise delete pacing will be disabled anyway. + key := func(i int) string { + return fmt.Sprintf("k%02d", i) + } + const numKeys = 20 + for i := 1; i <= numKeys; i++ { + require.NoError(t, d.Set([]byte(key(i)), randVal(), nil)) + require.NoError(t, d.Compact([]byte(key(i)), []byte(key(i)+"1"), false)) + } + + done := make(chan struct{}) + go func() { + err = d.DeleteRange([]byte(key(5)), []byte(key(9)), nil) + if err == nil { + err = d.Compact([]byte(key(5)), []byte(key(9)), false) + } + // Wait for objects to be deleted. + for { + time.Sleep(10 * time.Millisecond) + if len(d.objProvider.List()) < numKeys-2 { + break + } + } + close(done) + }() + + select { + case <-time.After(60 * time.Second): + // Don't close the DB in this case (the goroutine above might panic). + t.Fatalf("compaction timed out, possibly due to incorrect deletion pacing") + case <-done: + } + require.NoError(t, err) + d.Close() +} + +type WriteErrorInjector struct { + enabled atomic.Bool +} + +// TODO(jackson): Replace WriteErrorInjector with use of primitives in errorfs, +// adding new primitives as necessary. + +func (i *WriteErrorInjector) String() string { return "FileWrites(ErrInjected)" } + +// enable enables error injection for the vfs.FS. +func (i *WriteErrorInjector) enable() { + i.enabled.Store(true) +} + +// disable disabled error injection for the vfs.FS. +func (i *WriteErrorInjector) disable() { + i.enabled.Store(false) +} + +// MaybeError implements errorfs.Injector. +func (i *WriteErrorInjector) MaybeError(op errorfs.Op) error { + if !i.enabled.Load() { + return nil + } + // Fail any future write. + if op.Kind == errorfs.OpFileWrite { + return errorfs.ErrInjected + } + return nil +} + +var _ errorfs.Injector = &WriteErrorInjector{} + +// Cumulative compaction stats shouldn't be updated on compaction error. +func TestCompactionErrorStats(t *testing.T) { + // protected by d.mu + var ( + useInjector bool + tablesCreated []FileNum + ) + + mem := vfs.NewMem() + injector := &WriteErrorInjector{} + opts := (&Options{ + FS: errorfs.Wrap(mem, injector), + Levels: make([]LevelOptions, numLevels), + EventListener: &EventListener{ + TableCreated: func(info TableCreateInfo) { + t.Log(info) + + if useInjector { + // We'll write 3 tables during compaction, and we only need + // the writes to error on the third file write, so only enable + // the injector after the first two files have been written to. + tablesCreated = append(tablesCreated, info.FileNum) + if len(tablesCreated) >= 2 { + injector.enable() + } + } + }, + }, + }).WithFSDefaults() + for i := range opts.Levels { + opts.Levels[i].TargetFileSize = 1 + } + opts.testingRandomized(t) + d, err := Open("", opts) + require.NoError(t, err) + + ingest := func(keys ...string) { + t.Helper() + f, err := mem.Create("ext") + require.NoError(t, err) + + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: d.FormatMajorVersion().MaxTableFormat(), + }) + for _, k := range keys { + require.NoError(t, w.Set([]byte(k), nil)) + } + require.NoError(t, w.Close()) + require.NoError(t, d.Ingest([]string{"ext"})) + } + ingest("a", "c") + // Snapshot will preserve the older "a" key during compaction. + snap := d.NewSnapshot() + ingest("a", "b") + + // Trigger a manual compaction, which will encounter an injected error + // after the second table is created. + d.mu.Lock() + useInjector = true + d.mu.Unlock() + + err = d.Compact([]byte("a"), []byte("d"), false) + require.Error(t, err, "injected error") + + // Due to the error, stats shouldn't have been updated. + d.mu.Lock() + require.Equal(t, 0, int(d.mu.snapshots.cumulativePinnedCount)) + require.Equal(t, 0, int(d.mu.snapshots.cumulativePinnedSize)) + useInjector = false + d.mu.Unlock() + + injector.disable() + + // The following compaction won't error, but snapshot is open, so snapshot + // pinned stats should update. + require.NoError(t, d.Compact([]byte("a"), []byte("d"), false)) + require.NoError(t, snap.Close()) + + d.mu.Lock() + require.Equal(t, 1, int(d.mu.snapshots.cumulativePinnedCount)) + require.Equal(t, 9, int(d.mu.snapshots.cumulativePinnedSize)) + d.mu.Unlock() + require.NoError(t, d.Close()) +} diff --git a/pebble/comparer.go b/pebble/comparer.go new file mode 100644 index 0000000..c92cd79 --- /dev/null +++ b/pebble/comparer.go @@ -0,0 +1,31 @@ +// Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import "github.com/cockroachdb/pebble/internal/base" + +// Compare exports the base.Compare type. +type Compare = base.Compare + +// Equal exports the base.Equal type. +type Equal = base.Equal + +// AbbreviatedKey exports the base.AbbreviatedKey type. +type AbbreviatedKey = base.AbbreviatedKey + +// Separator exports the base.Separator type. +type Separator = base.Separator + +// Successor exports the base.Successor type. +type Successor = base.Successor + +// Split exports the base.Split type. +type Split = base.Split + +// Comparer exports the base.Comparer type. +type Comparer = base.Comparer + +// DefaultComparer exports the base.DefaultComparer variable. +var DefaultComparer = base.DefaultComparer diff --git a/pebble/data_test.go b/pebble/data_test.go new file mode 100644 index 0000000..9f6260f --- /dev/null +++ b/pebble/data_test.go @@ -0,0 +1,1426 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + crand "crypto/rand" + "fmt" + "io" + "math" + "math/rand" + "strconv" + "strings" + "testing" + "time" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/bloom" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/humanize" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/private" + "github.com/cockroachdb/pebble/internal/rangedel" + "github.com/cockroachdb/pebble/internal/rangekey" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/cockroachdb/pebble/objstorage/objstorageprovider" + "github.com/cockroachdb/pebble/objstorage/remote" + "github.com/cockroachdb/pebble/sstable" + "github.com/cockroachdb/pebble/vfs" + "github.com/cockroachdb/pebble/vfs/errorfs" + "github.com/stretchr/testify/require" +) + +func runGetCmd(t testing.TB, td *datadriven.TestData, d *DB) string { + snap := Snapshot{ + db: d, + seqNum: InternalKeySeqNumMax, + } + td.MaybeScanArgs(t, "seq", &snap.seqNum) + + var buf bytes.Buffer + for _, data := range strings.Split(td.Input, "\n") { + v, closer, err := snap.Get([]byte(data)) + if err != nil { + fmt.Fprintf(&buf, "%s: %s\n", data, err) + } else { + fmt.Fprintf(&buf, "%s:%s\n", data, v) + closer.Close() + } + } + return buf.String() +} + +func runIterCmd(d *datadriven.TestData, iter *Iterator, closeIter bool) string { + if closeIter { + defer func() { + if iter != nil { + iter.Close() + } + }() + } + var b bytes.Buffer + for _, line := range strings.Split(d.Input, "\n") { + parts := strings.Fields(line) + if len(parts) == 0 { + continue + } + printValidityState := false + var valid bool + var validityState IterValidityState + switch parts[0] { + case "seek-ge": + if len(parts) != 2 { + return "seek-ge \n" + } + valid = iter.SeekGE([]byte(parts[1])) + case "seek-prefix-ge": + if len(parts) != 2 { + return "seek-prefix-ge \n" + } + valid = iter.SeekPrefixGE([]byte(parts[1])) + case "seek-lt": + if len(parts) != 2 { + return "seek-lt \n" + } + valid = iter.SeekLT([]byte(parts[1])) + case "seek-ge-limit": + if len(parts) != 3 { + return "seek-ge-limit \n" + } + validityState = iter.SeekGEWithLimit( + []byte(parts[1]), []byte(parts[2])) + printValidityState = true + case "seek-lt-limit": + if len(parts) != 3 { + return "seek-lt-limit \n" + } + validityState = iter.SeekLTWithLimit( + []byte(parts[1]), []byte(parts[2])) + printValidityState = true + case "inspect": + if len(parts) != 2 { + return "inspect \n" + } + field := parts[1] + switch field { + case "lastPositioningOp": + op := "?" + switch iter.lastPositioningOp { + case unknownLastPositionOp: + op = "unknown" + case seekPrefixGELastPositioningOp: + op = "seekprefixge" + case seekGELastPositioningOp: + op = "seekge" + case seekLTLastPositioningOp: + op = "seeklt" + case invalidatedLastPositionOp: + op = "invalidate" + } + fmt.Fprintf(&b, "%s=%q\n", field, op) + default: + return fmt.Sprintf("unrecognized inspect field %q\n", field) + } + continue + case "next-limit": + if len(parts) != 2 { + return "next-limit \n" + } + validityState = iter.NextWithLimit([]byte(parts[1])) + printValidityState = true + case "internal-next": + validity, keyKind := iter.internalNext() + switch validity { + case internalNextError: + fmt.Fprintf(&b, "err: %s\n", iter.Error()) + case internalNextExhausted: + fmt.Fprint(&b, ".\n") + case internalNextValid: + fmt.Fprintf(&b, "%s\n", keyKind) + default: + panic("unreachable") + } + continue + case "can-deterministically-single-delete": + ok, err := CanDeterministicallySingleDelete(iter) + if err != nil { + fmt.Fprintf(&b, "err: %s\n", err) + } else { + fmt.Fprintf(&b, "%t\n", ok) + } + continue + case "prev-limit": + if len(parts) != 2 { + return "prev-limit \n" + } + validityState = iter.PrevWithLimit([]byte(parts[1])) + printValidityState = true + case "first": + valid = iter.First() + case "last": + valid = iter.Last() + case "next": + valid = iter.Next() + case "next-prefix": + valid = iter.NextPrefix() + case "prev": + valid = iter.Prev() + case "set-bounds": + if len(parts) <= 1 || len(parts) > 3 { + return "set-bounds lower= upper=\n" + } + var lower []byte + var upper []byte + for _, part := range parts[1:] { + arg := strings.Split(part, "=") + switch arg[0] { + case "lower": + lower = []byte(arg[1]) + case "upper": + upper = []byte(arg[1]) + default: + return fmt.Sprintf("set-bounds: unknown arg: %s", arg) + } + } + iter.SetBounds(lower, upper) + valid = iter.Valid() + case "set-options": + opts := iter.opts + if _, err := parseIterOptions(&opts, &iter.opts, parts[1:]); err != nil { + return fmt.Sprintf("set-options: %s", err.Error()) + } + iter.SetOptions(&opts) + valid = iter.Valid() + case "stats": + stats := iter.Stats() + // The timing is non-deterministic, so set to 0. + stats.InternalStats.BlockReadDuration = 0 + fmt.Fprintf(&b, "stats: %s\n", stats.String()) + continue + case "clone": + var opts CloneOptions + if len(parts) > 1 { + var iterOpts IterOptions + if foundAny, err := parseIterOptions(&iterOpts, &iter.opts, parts[1:]); err != nil { + return fmt.Sprintf("clone: %s", err.Error()) + } else if foundAny { + opts.IterOptions = &iterOpts + } + for _, part := range parts[1:] { + if arg := strings.Split(part, "="); len(arg) == 2 && arg[0] == "refresh-batch" { + var err error + opts.RefreshBatchView, err = strconv.ParseBool(arg[1]) + if err != nil { + return fmt.Sprintf("clone: refresh-batch: %s", err.Error()) + } + } + } + } + clonedIter, err := iter.Clone(opts) + if err != nil { + fmt.Fprintf(&b, "error in clone, skipping rest of input: err=%v\n", err) + return b.String() + } + if err = iter.Close(); err != nil { + fmt.Fprintf(&b, "err=%v\n", err) + } + iter = clonedIter + case "is-using-combined": + if iter.opts.KeyTypes != IterKeyTypePointsAndRanges { + fmt.Fprintln(&b, "not configured for combined iteration") + } else if iter.lazyCombinedIter.combinedIterState.initialized { + fmt.Fprintln(&b, "using combined (non-lazy) iterator") + } else { + fmt.Fprintln(&b, "using lazy iterator") + } + continue + default: + return fmt.Sprintf("unknown op: %s", parts[0]) + } + + valid = valid || validityState == IterValid + if valid != iter.Valid() { + fmt.Fprintf(&b, "mismatched valid states: %t vs %t\n", valid, iter.Valid()) + } + hasPoint, hasRange := iter.HasPointAndRange() + hasEither := hasPoint || hasRange + if hasEither != valid { + fmt.Fprintf(&b, "mismatched valid/HasPointAndRange states: valid=%t HasPointAndRange=(%t,%t)\n", valid, hasPoint, hasRange) + } + + if valid { + validityState = IterValid + } + printIterState(&b, iter, validityState, printValidityState) + } + return b.String() +} + +func parseIterOptions( + opts *IterOptions, ref *IterOptions, parts []string, +) (foundAny bool, err error) { + const usageString = "[lower=] [upper=] [key-types=point|range|both] [mask-suffix=] [mask-filter=] [only-durable=] [table-filter=reuse|none] [point-filters=reuse|none]\n" + for _, part := range parts { + arg := strings.SplitN(part, "=", 2) + if len(arg) != 2 { + return false, errors.Newf(usageString) + } + switch arg[0] { + case "point-filters": + switch arg[1] { + case "reuse": + opts.PointKeyFilters = ref.PointKeyFilters + case "none": + opts.PointKeyFilters = nil + default: + return false, errors.Newf("unknown arg point-filter=%q:\n%s", arg[1], usageString) + } + case "lower": + opts.LowerBound = []byte(arg[1]) + case "upper": + opts.UpperBound = []byte(arg[1]) + case "key-types": + switch arg[1] { + case "point": + opts.KeyTypes = IterKeyTypePointsOnly + case "range": + opts.KeyTypes = IterKeyTypeRangesOnly + case "both": + opts.KeyTypes = IterKeyTypePointsAndRanges + default: + return false, errors.Newf("unknown key-type %q:\n%s", arg[1], usageString) + } + case "mask-suffix": + opts.RangeKeyMasking.Suffix = []byte(arg[1]) + case "mask-filter": + opts.RangeKeyMasking.Filter = func() BlockPropertyFilterMask { + return sstable.NewTestKeysMaskingFilter() + } + case "table-filter": + switch arg[1] { + case "reuse": + opts.TableFilter = ref.TableFilter + case "none": + opts.TableFilter = nil + default: + return false, errors.Newf("unknown arg table-filter=%q:\n%s", arg[1], usageString) + } + case "only-durable": + var err error + opts.OnlyReadGuaranteedDurable, err = strconv.ParseBool(arg[1]) + if err != nil { + return false, errors.Newf("cannot parse only-durable=%q: %s", arg[1], err) + } + default: + continue + } + foundAny = true + } + return foundAny, nil +} + +func printIterState( + b io.Writer, iter *Iterator, validity IterValidityState, printValidityState bool, +) { + var validityStateStr string + if printValidityState { + switch validity { + case IterExhausted: + validityStateStr = " exhausted" + case IterValid: + validityStateStr = " valid" + case IterAtLimit: + validityStateStr = " at-limit" + } + } + if err := iter.Error(); err != nil { + fmt.Fprintf(b, "err=%v\n", err) + } else if validity == IterValid { + switch { + case iter.opts.pointKeys(): + hasPoint, hasRange := iter.HasPointAndRange() + fmt.Fprintf(b, "%s:%s (", iter.Key(), validityStateStr) + if hasPoint { + fmt.Fprintf(b, "%s, ", iter.Value()) + } else { + fmt.Fprint(b, "., ") + } + if hasRange { + start, end := iter.RangeBounds() + fmt.Fprintf(b, "[%s-%s)", formatASCIIKey(start), formatASCIIKey(end)) + writeRangeKeys(b, iter) + } else { + fmt.Fprint(b, ".") + } + if iter.RangeKeyChanged() { + fmt.Fprint(b, " UPDATED") + } + fmt.Fprint(b, ")") + default: + if iter.Valid() { + hasPoint, hasRange := iter.HasPointAndRange() + if hasPoint || !hasRange { + panic(fmt.Sprintf("pebble: unexpected HasPointAndRange (%t, %t)", hasPoint, hasRange)) + } + start, end := iter.RangeBounds() + fmt.Fprintf(b, "%s [%s-%s)", iter.Key(), formatASCIIKey(start), formatASCIIKey(end)) + writeRangeKeys(b, iter) + } else { + fmt.Fprint(b, ".") + } + if iter.RangeKeyChanged() { + fmt.Fprint(b, " UPDATED") + } + } + fmt.Fprintln(b) + } else { + fmt.Fprintf(b, ".%s\n", validityStateStr) + } +} + +func formatASCIIKey(b []byte) string { + if bytes.IndexFunc(b, func(r rune) bool { return r < 'A' || r > 'z' }) != -1 { + // This key is not just ASCII letters. Quote it. + return fmt.Sprintf("%q", b) + } + return string(b) +} + +func writeRangeKeys(b io.Writer, iter *Iterator) { + rangeKeys := iter.RangeKeys() + for j := 0; j < len(rangeKeys); j++ { + if j > 0 { + fmt.Fprint(b, ",") + } + fmt.Fprintf(b, " %s=%s", rangeKeys[j].Suffix, rangeKeys[j].Value) + } +} + +func runBatchDefineCmd(d *datadriven.TestData, b *Batch) error { + for _, line := range strings.Split(d.Input, "\n") { + parts := strings.Fields(line) + if len(parts) == 0 { + continue + } + if parts[1] == `` { + parts[1] = "" + } + var err error + switch parts[0] { + case "set": + if len(parts) != 3 { + return errors.Errorf("%s expects 2 arguments", parts[0]) + } + err = b.Set([]byte(parts[1]), []byte(parts[2]), nil) + case "del": + if len(parts) != 2 { + return errors.Errorf("%s expects 1 argument", parts[0]) + } + err = b.Delete([]byte(parts[1]), nil) + case "del-sized": + if len(parts) != 3 { + return errors.Errorf("%s expects 2 arguments", parts[0]) + } + var valSize uint64 + valSize, err = strconv.ParseUint(parts[2], 10, 32) + if err != nil { + return err + } + err = b.DeleteSized([]byte(parts[1]), uint32(valSize), nil) + case "singledel": + if len(parts) != 2 { + return errors.Errorf("%s expects 1 argument", parts[0]) + } + err = b.SingleDelete([]byte(parts[1]), nil) + case "del-range": + if len(parts) != 3 { + return errors.Errorf("%s expects 2 arguments", parts[0]) + } + err = b.DeleteRange([]byte(parts[1]), []byte(parts[2]), nil) + case "merge": + if len(parts) != 3 { + return errors.Errorf("%s expects 2 arguments", parts[0]) + } + err = b.Merge([]byte(parts[1]), []byte(parts[2]), nil) + case "range-key-set": + if len(parts) < 4 || len(parts) > 5 { + return errors.Errorf("%s expects 3 or 4 arguments", parts[0]) + } + var val []byte + if len(parts) == 5 { + val = []byte(parts[4]) + } + err = b.RangeKeySet( + []byte(parts[1]), + []byte(parts[2]), + []byte(parts[3]), + val, + nil) + case "range-key-unset": + if len(parts) != 4 { + return errors.Errorf("%s expects 3 arguments", parts[0]) + } + err = b.RangeKeyUnset( + []byte(parts[1]), + []byte(parts[2]), + []byte(parts[3]), + nil) + case "range-key-del": + if len(parts) != 3 { + return errors.Errorf("%s expects 2 arguments", parts[0]) + } + err = b.RangeKeyDelete( + []byte(parts[1]), + []byte(parts[2]), + nil) + default: + return errors.Errorf("unknown op: %s", parts[0]) + } + if err != nil { + return err + } + } + return nil +} + +func runBuildRemoteCmd(td *datadriven.TestData, d *DB, storage remote.Storage) error { + b := d.NewIndexedBatch() + if err := runBatchDefineCmd(td, b); err != nil { + return err + } + + if len(td.CmdArgs) < 1 { + return errors.New("build : argument missing") + } + path := td.CmdArgs[0].String() + + // Override table format, if provided. + tableFormat := d.opts.FormatMajorVersion.MaxTableFormat() + for _, cmdArg := range td.CmdArgs[1:] { + switch cmdArg.Key { + case "format": + switch cmdArg.Vals[0] { + case "leveldb": + tableFormat = sstable.TableFormatLevelDB + case "rocksdbv2": + tableFormat = sstable.TableFormatRocksDBv2 + case "pebblev1": + tableFormat = sstable.TableFormatPebblev1 + case "pebblev2": + tableFormat = sstable.TableFormatPebblev2 + case "pebblev3": + tableFormat = sstable.TableFormatPebblev3 + case "pebblev4": + tableFormat = sstable.TableFormatPebblev4 + default: + return errors.Errorf("unknown format string %s", cmdArg.Vals[0]) + } + } + } + + writeOpts := d.opts.MakeWriterOptions(0 /* level */, tableFormat) + + f, err := storage.CreateObject(path) + if err != nil { + return err + } + w := sstable.NewWriter(objstorageprovider.NewRemoteWritable(f), writeOpts) + iter := b.newInternalIter(nil) + for key, val := iter.First(); key != nil; key, val = iter.Next() { + tmp := *key + tmp.SetSeqNum(0) + if err := w.Add(tmp, val.InPlaceValue()); err != nil { + return err + } + } + if err := iter.Close(); err != nil { + return err + } + + if rdi := b.newRangeDelIter(nil, math.MaxUint64); rdi != nil { + for s := rdi.First(); s != nil; s = rdi.Next() { + err := rangedel.Encode(s, func(k base.InternalKey, v []byte) error { + k.SetSeqNum(0) + return w.Add(k, v) + }) + if err != nil { + return err + } + } + } + + if rki := b.newRangeKeyIter(nil, math.MaxUint64); rki != nil { + for s := rki.First(); s != nil; s = rki.Next() { + for _, k := range s.Keys { + var err error + switch k.Kind() { + case base.InternalKeyKindRangeKeySet: + err = w.RangeKeySet(s.Start, s.End, k.Suffix, k.Value) + case base.InternalKeyKindRangeKeyUnset: + err = w.RangeKeyUnset(s.Start, s.End, k.Suffix) + case base.InternalKeyKindRangeKeyDelete: + err = w.RangeKeyDelete(s.Start, s.End) + default: + panic("not a range key") + } + if err != nil { + return err + } + } + } + } + + return w.Close() +} + +func runBuildCmd(td *datadriven.TestData, d *DB, fs vfs.FS) error { + b := d.NewIndexedBatch() + if err := runBatchDefineCmd(td, b); err != nil { + return err + } + + if len(td.CmdArgs) < 1 { + return errors.New("build : argument missing") + } + path := td.CmdArgs[0].String() + + // Override table format, if provided. + tableFormat := d.opts.FormatMajorVersion.MaxTableFormat() + for _, cmdArg := range td.CmdArgs[1:] { + switch cmdArg.Key { + case "format": + switch cmdArg.Vals[0] { + case "leveldb": + tableFormat = sstable.TableFormatLevelDB + case "rocksdbv2": + tableFormat = sstable.TableFormatRocksDBv2 + case "pebblev1": + tableFormat = sstable.TableFormatPebblev1 + case "pebblev2": + tableFormat = sstable.TableFormatPebblev2 + case "pebblev3": + tableFormat = sstable.TableFormatPebblev3 + case "pebblev4": + tableFormat = sstable.TableFormatPebblev4 + default: + return errors.Errorf("unknown format string %s", cmdArg.Vals[0]) + } + } + } + + writeOpts := d.opts.MakeWriterOptions(0 /* level */, tableFormat) + + f, err := fs.Create(path) + if err != nil { + return err + } + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), writeOpts) + iter := b.newInternalIter(nil) + for key, val := iter.First(); key != nil; key, val = iter.Next() { + tmp := *key + tmp.SetSeqNum(0) + if err := w.Add(tmp, val.InPlaceValue()); err != nil { + return err + } + } + if err := iter.Close(); err != nil { + return err + } + + if rdi := b.newRangeDelIter(nil, math.MaxUint64); rdi != nil { + for s := rdi.First(); s != nil; s = rdi.Next() { + err := rangedel.Encode(s, func(k base.InternalKey, v []byte) error { + k.SetSeqNum(0) + return w.Add(k, v) + }) + if err != nil { + return err + } + } + } + + if rki := b.newRangeKeyIter(nil, math.MaxUint64); rki != nil { + for s := rki.First(); s != nil; s = rki.Next() { + for _, k := range s.Keys { + var err error + switch k.Kind() { + case base.InternalKeyKindRangeKeySet: + err = w.RangeKeySet(s.Start, s.End, k.Suffix, k.Value) + case base.InternalKeyKindRangeKeyUnset: + err = w.RangeKeyUnset(s.Start, s.End, k.Suffix) + case base.InternalKeyKindRangeKeyDelete: + err = w.RangeKeyDelete(s.Start, s.End) + default: + panic("not a range key") + } + if err != nil { + return err + } + } + } + } + + return w.Close() +} + +func runCompactCmd(td *datadriven.TestData, d *DB) error { + if len(td.CmdArgs) > 4 { + return errors.Errorf("%s expects at most four arguments", td.Cmd) + } + parts := strings.Split(td.CmdArgs[0].Key, "-") + if len(parts) != 2 { + return errors.Errorf("expected -: %s", td.Input) + } + parallelize := td.HasArg("parallel") + if len(td.CmdArgs) >= 2 && strings.HasPrefix(td.CmdArgs[1].Key, "L") { + levelString := td.CmdArgs[1].String() + iStart := base.MakeInternalKey([]byte(parts[0]), InternalKeySeqNumMax, InternalKeyKindMax) + iEnd := base.MakeInternalKey([]byte(parts[1]), 0, 0) + if levelString[0] != 'L' { + return errors.Errorf("expected L: %s", levelString) + } + level, err := strconv.Atoi(levelString[1:]) + if err != nil { + return err + } + return d.manualCompact(iStart.UserKey, iEnd.UserKey, level, parallelize) + } + return d.Compact([]byte(parts[0]), []byte(parts[1]), parallelize) +} + +// runDBDefineCmd prepares a database state, returning the opened +// database with the initialized state. +// +// The command accepts input describing memtables and sstables to +// construct. Each new table is indicated by a line containing the +// level of the next table to build (eg, "L6"), or "mem" to build +// a memtable. Each subsequent line contains a new key-value pair. +// +// Point keys and range deletions should be encoded as the +// InternalKey's string representation, as understood by +// ParseInternalKey, followed a colon and the corresponding value. +// +// b.SET.50:foo +// c.DEL.20 +// +// Range keys may be encoded by prefixing the line with `rangekey:`, +// followed by the keyspan.Span string representation, as understood +// by keyspan.ParseSpan. +// +// rangekey:b-d:{(#5,RANGEKEYSET,@2,foo)} +// +// # Mechanics +// +// runDBDefineCmd works by simulating a flush for every file written. +// Keys are written to a memtable. When a file is complete, the table +// is flushed to physical files through manually invoking runCompaction. +// The resulting version edit is then manipulated to write the files +// to the indicated level. +// +// Because of it's low-level manipulation, runDBDefineCmd does allow the +// creation of invalid database states. If opts.DebugCheck is set, the +// level checker should detect the invalid state. +func runDBDefineCmd(td *datadriven.TestData, opts *Options) (*DB, error) { + opts = opts.EnsureDefaults() + opts.FS = vfs.NewMem() + + var snapshots []uint64 + var levelMaxBytes map[int]int64 + for _, arg := range td.CmdArgs { + switch arg.Key { + case "target-file-sizes": + opts.Levels = make([]LevelOptions, len(arg.Vals)) + for i := range arg.Vals { + size, err := strconv.ParseInt(arg.Vals[i], 10, 64) + if err != nil { + return nil, err + } + opts.Levels[i].TargetFileSize = size + } + case "snapshots": + snapshots = make([]uint64, len(arg.Vals)) + for i := range arg.Vals { + seqNum, err := strconv.ParseUint(arg.Vals[i], 10, 64) + if err != nil { + return nil, err + } + snapshots[i] = seqNum + if i > 0 && snapshots[i] < snapshots[i-1] { + return nil, errors.New("Snapshots must be in ascending order") + } + } + case "lbase-max-bytes": + lbaseMaxBytes, err := strconv.ParseInt(arg.Vals[0], 10, 64) + if err != nil { + return nil, err + } + opts.LBaseMaxBytes = lbaseMaxBytes + case "level-max-bytes": + levelMaxBytes = map[int]int64{} + for i := range arg.Vals { + j := strings.Index(arg.Vals[i], ":") + levelStr := strings.TrimSpace(arg.Vals[i][:j]) + level, err := strconv.Atoi(levelStr[1:]) + if err != nil { + return nil, err + } + size, err := strconv.ParseInt(strings.TrimSpace(arg.Vals[i][j+1:]), 10, 64) + if err != nil { + return nil, err + } + levelMaxBytes[level] = size + } + case "auto-compactions": + switch arg.Vals[0] { + case "off": + opts.DisableAutomaticCompactions = true + case "on": + opts.DisableAutomaticCompactions = false + default: + return nil, errors.Errorf("Unrecognized %q %q arg value: %q", td.Cmd, arg.Key, arg.Vals[0]) + } + case "enable-table-stats": + enable, err := strconv.ParseBool(arg.Vals[0]) + if err != nil { + return nil, errors.Errorf("%s: could not parse %q as bool: %s", td.Cmd, arg.Vals[0], err) + } + opts.private.disableTableStats = !enable + case "block-size": + size, err := strconv.Atoi(arg.Vals[0]) + if err != nil { + return nil, err + } + for _, levelOpts := range opts.Levels { + levelOpts.BlockSize = size + } + case "format-major-version": + fmv, err := strconv.Atoi(arg.Vals[0]) + if err != nil { + return nil, err + } + opts.FormatMajorVersion = FormatMajorVersion(fmv) + case "disable-multi-level": + opts.Experimental.MultiLevelCompactionHeuristic = NoMultiLevel{} + } + } + + // This is placed after the argument parsing above, because the arguments + // to define should be parsed even if td.Input is empty. + if td.Input == "" { + // Empty LSM. + d, err := Open("", opts) + if err != nil { + return nil, err + } + d.mu.Lock() + for i := range snapshots { + s := &Snapshot{db: d} + s.seqNum = snapshots[i] + d.mu.snapshots.pushBack(s) + } + for l, maxBytes := range levelMaxBytes { + d.mu.versions.picker.(*compactionPickerByScore).levelMaxBytes[l] = maxBytes + } + d.mu.Unlock() + return d, nil + } + + d, err := Open("", opts) + if err != nil { + return nil, err + } + d.mu.Lock() + d.mu.versions.dynamicBaseLevel = false + for i := range snapshots { + s := &Snapshot{db: d} + s.seqNum = snapshots[i] + d.mu.snapshots.pushBack(s) + } + defer d.mu.Unlock() + + var mem *memTable + var start, end *base.InternalKey + ve := &versionEdit{} + level := -1 + + maybeFlush := func() error { + if level < 0 { + return nil + } + + toFlush := flushableList{{ + flushable: mem, + flushed: make(chan struct{}), + }} + c := newFlush(d.opts, d.mu.versions.currentVersion(), + d.mu.versions.picker.getBaseLevel(), toFlush, time.Now()) + c.disableSpanElision = true + // NB: define allows the test to exactly specify which keys go + // into which sstables. If the test has a small target file + // size to test grandparent limits, etc, the maxOutputFileSize + // can cause splitting /within/ the bounds specified to the + // test. Ignore the target size here, and split only according + // to the user-defined boundaries. + c.maxOutputFileSize = math.MaxUint64 + + newVE, _, _, err := d.runCompaction(0, c) + if err != nil { + return err + } + largestSeqNum := d.mu.versions.logSeqNum.Load() + for _, f := range newVE.NewFiles { + if start != nil { + f.Meta.SmallestPointKey = *start + f.Meta.Smallest = *start + } + if end != nil { + f.Meta.LargestPointKey = *end + f.Meta.Largest = *end + } + if largestSeqNum <= f.Meta.LargestSeqNum { + largestSeqNum = f.Meta.LargestSeqNum + 1 + } + ve.NewFiles = append(ve.NewFiles, newFileEntry{ + Level: level, + Meta: f.Meta, + }) + } + // The committed keys were never written to the WAL, so neither + // the logSeqNum nor the commit pipeline's visibleSeqNum have + // been ratcheted. Manually ratchet them to the largest sequence + // number committed to ensure iterators opened from the database + // correctly observe the committed keys. + if d.mu.versions.logSeqNum.Load() < largestSeqNum { + d.mu.versions.logSeqNum.Store(largestSeqNum) + } + if d.mu.versions.visibleSeqNum.Load() < largestSeqNum { + d.mu.versions.visibleSeqNum.Store(largestSeqNum) + } + level = -1 + return nil + } + + // Example, a-c. + parseMeta := func(s string) (*fileMetadata, error) { + parts := strings.Split(s, "-") + if len(parts) != 2 { + return nil, errors.Errorf("malformed table spec: %s", s) + } + m := (&fileMetadata{}).ExtendPointKeyBounds( + opts.Comparer.Compare, + InternalKey{UserKey: []byte(parts[0])}, + InternalKey{UserKey: []byte(parts[1])}, + ) + m.InitPhysicalBacking() + return m, nil + } + + // Example, compact: a-c. + parseCompaction := func(outputLevel int, s string) (*compaction, error) { + m, err := parseMeta(s[len("compact:"):]) + if err != nil { + return nil, err + } + c := &compaction{ + inputs: []compactionLevel{{}, {level: outputLevel}}, + smallest: m.Smallest, + largest: m.Largest, + } + c.startLevel, c.outputLevel = &c.inputs[0], &c.inputs[1] + return c, nil + } + + for _, line := range strings.Split(td.Input, "\n") { + fields := strings.Fields(line) + if len(fields) > 0 { + switch fields[0] { + case "mem": + if err := maybeFlush(); err != nil { + return nil, err + } + // Add a memtable layer. + if !d.mu.mem.mutable.empty() { + d.mu.mem.mutable = newMemTable(memTableOptions{Options: d.opts}) + entry := d.newFlushableEntry(d.mu.mem.mutable, 0, 0) + entry.readerRefs.Add(1) + d.mu.mem.queue = append(d.mu.mem.queue, entry) + d.updateReadStateLocked(nil) + } + mem = d.mu.mem.mutable + start, end = nil, nil + fields = fields[1:] + case "L0", "L1", "L2", "L3", "L4", "L5", "L6": + if err := maybeFlush(); err != nil { + return nil, err + } + var err error + if level, err = strconv.Atoi(fields[0][1:]); err != nil { + return nil, err + } + fields = fields[1:] + start, end = nil, nil + boundFields := 0 + for _, field := range fields { + toBreak := false + switch { + case strings.HasPrefix(field, "start="): + ikey := base.ParseInternalKey(strings.TrimPrefix(field, "start=")) + start = &ikey + boundFields++ + case strings.HasPrefix(field, "end="): + ikey := base.ParseInternalKey(strings.TrimPrefix(field, "end=")) + end = &ikey + boundFields++ + default: + toBreak = true + } + if toBreak { + break + } + } + fields = fields[boundFields:] + mem = newMemTable(memTableOptions{Options: d.opts}) + } + } + + for _, data := range fields { + i := strings.Index(data, ":") + // Define in-progress compactions. + if data[:i] == "compact" { + c, err := parseCompaction(level, data) + if err != nil { + return nil, err + } + d.mu.compact.inProgress[c] = struct{}{} + continue + } + if data[:i] == "rangekey" { + span := keyspan.ParseSpan(data[i:]) + err := rangekey.Encode(&span, func(k base.InternalKey, v []byte) error { + return mem.set(k, v) + }) + if err != nil { + return nil, err + } + continue + } + key := base.ParseInternalKey(data[:i]) + valueStr := data[i+1:] + value := []byte(valueStr) + var randBytes int + if n, err := fmt.Sscanf(valueStr, "", &randBytes); err == nil && n == 1 { + value = make([]byte, randBytes) + rnd := rand.New(rand.NewSource(int64(key.SeqNum()))) + if _, err := rnd.Read(value[:]); err != nil { + return nil, err + } + } + if err := mem.set(key, value); err != nil { + return nil, err + } + } + } + + if err := maybeFlush(); err != nil { + return nil, err + } + + if len(ve.NewFiles) > 0 { + jobID := d.mu.nextJobID + d.mu.nextJobID++ + d.mu.versions.logLock() + if err := d.mu.versions.logAndApply(jobID, ve, newFileMetrics(ve.NewFiles), false, func() []compactionInfo { + return nil + }); err != nil { + return nil, err + } + d.updateReadStateLocked(nil) + d.updateTableStatsLocked(ve.NewFiles) + } + + for l, maxBytes := range levelMaxBytes { + d.mu.versions.picker.(*compactionPickerByScore).levelMaxBytes[l] = maxBytes + } + + return d, nil +} + +func runTableStatsCmd(td *datadriven.TestData, d *DB) string { + u, err := strconv.ParseUint(strings.TrimSpace(td.Input), 10, 64) + if err != nil { + return err.Error() + } + fileNum := base.FileNum(u) + + d.mu.Lock() + defer d.mu.Unlock() + v := d.mu.versions.currentVersion() + for _, levelMetadata := range v.Levels { + iter := levelMetadata.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if f.FileNum != fileNum { + continue + } + + if !f.StatsValid() { + d.waitTableStats() + } + + var b bytes.Buffer + fmt.Fprintf(&b, "num-entries: %d\n", f.Stats.NumEntries) + fmt.Fprintf(&b, "num-deletions: %d\n", f.Stats.NumDeletions) + fmt.Fprintf(&b, "num-range-key-sets: %d\n", f.Stats.NumRangeKeySets) + fmt.Fprintf(&b, "point-deletions-bytes-estimate: %d\n", f.Stats.PointDeletionsBytesEstimate) + fmt.Fprintf(&b, "range-deletions-bytes-estimate: %d\n", f.Stats.RangeDeletionsBytesEstimate) + return b.String() + } + } + return "(not found)" +} + +func runTableFileSizesCmd(td *datadriven.TestData, d *DB) string { + d.mu.Lock() + defer d.mu.Unlock() + return runVersionFileSizes(d.mu.versions.currentVersion()) +} + +func runVersionFileSizes(v *version) string { + var buf bytes.Buffer + for l, levelMetadata := range v.Levels { + if levelMetadata.Empty() { + continue + } + fmt.Fprintf(&buf, "L%d:\n", l) + iter := levelMetadata.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + fmt.Fprintf(&buf, " %s: %d bytes (%s)", f, f.Size, humanize.Bytes.Uint64(f.Size)) + if f.IsCompacting() { + fmt.Fprintf(&buf, " (IsCompacting)") + } + fmt.Fprintln(&buf) + } + } + return buf.String() +} + +// Prints some metadata about some sstable which is currently in the latest +// version. +func runMetadataCommand(t *testing.T, td *datadriven.TestData, d *DB) string { + var file int + td.ScanArgs(t, "file", &file) + var m *fileMetadata + d.mu.Lock() + currVersion := d.mu.versions.currentVersion() + for _, level := range currVersion.Levels { + lIter := level.Iter() + for f := lIter.First(); f != nil; f = lIter.Next() { + if f.FileNum == base.FileNum(uint64(file)) { + m = f + break + } + } + } + d.mu.Unlock() + var buf bytes.Buffer + // Add more metadata as needed. + fmt.Fprintf(&buf, "size: %d\n", m.Size) + return buf.String() +} + +func runSSTablePropertiesCmd(t *testing.T, td *datadriven.TestData, d *DB) string { + var file int + td.ScanArgs(t, "file", &file) + + // See if we can grab the FileMetadata associated with the file. This is needed + // to easily construct virtual sstable properties. + var m *fileMetadata + d.mu.Lock() + currVersion := d.mu.versions.currentVersion() + for _, level := range currVersion.Levels { + lIter := level.Iter() + for f := lIter.First(); f != nil; f = lIter.Next() { + if f.FileNum == base.FileNum(uint64(file)) { + m = f + break + } + } + } + d.mu.Unlock() + + // Note that m can be nil here if the sstable exists in the file system, but + // not in the lsm. If m is nil just assume that file is not virtual. + + backingFileNum := base.FileNum(uint64(file)).DiskFileNum() + if m != nil { + backingFileNum = m.FileBacking.DiskFileNum + } + fileName := base.MakeFilename(fileTypeTable, backingFileNum) + f, err := d.opts.FS.Open(fileName) + if err != nil { + return err.Error() + } + readable, err := sstable.NewSimpleReadable(f) + if err != nil { + return err.Error() + } + // TODO(bananabrick): cacheOpts is used to set the file number on a Reader, + // and virtual sstables expect this file number to be set. Split out the + // opts into fileNum opts, and cache opts. + cacheOpts := private.SSTableCacheOpts(0, backingFileNum).(sstable.ReaderOption) + r, err := sstable.NewReader(readable, d.opts.MakeReaderOptions(), cacheOpts) + if err != nil { + return err.Error() + } + defer r.Close() + + var v sstable.VirtualReader + props := r.Properties.String() + if m != nil && m.Virtual { + v = sstable.MakeVirtualReader(r, m.VirtualMeta(), false /* isForeign */) + props = v.Properties.String() + } + if len(td.Input) == 0 { + return props + } + var buf bytes.Buffer + propsSlice := strings.Split(props, "\n") + for _, requestedProp := range strings.Split(td.Input, "\n") { + fmt.Fprintf(&buf, "%s:\n", requestedProp) + for _, prop := range propsSlice { + if strings.Contains(prop, requestedProp) { + fmt.Fprintf(&buf, " %s\n", prop) + } + } + } + return buf.String() +} + +func runPopulateCmd(t *testing.T, td *datadriven.TestData, b *Batch) { + var maxKeyLength, valLength int + var timestamps []int + td.ScanArgs(t, "keylen", &maxKeyLength) + td.MaybeScanArgs(t, "timestamps", ×tamps) + td.MaybeScanArgs(t, "vallen", &valLength) + // Default to writing timestamps @1. + if len(timestamps) == 0 { + timestamps = append(timestamps, 1) + } + + ks := testkeys.Alpha(maxKeyLength) + buf := make([]byte, ks.MaxLen()+testkeys.MaxSuffixLen) + vbuf := make([]byte, valLength) + for i := int64(0); i < ks.Count(); i++ { + for _, ts := range timestamps { + n := testkeys.WriteKeyAt(buf, ks, i, int64(ts)) + + // Default to using the key as the value, but if the user provided + // the vallen argument, generate a random value of the specified + // length. + value := buf[:n] + if valLength > 0 { + _, err := crand.Read(vbuf) + require.NoError(t, err) + value = vbuf + } + require.NoError(t, b.Set(buf[:n], value, nil)) + } + } +} + +// waitTableStats waits until all new files' statistics have been loaded. It's +// used in tests. The d.mu mutex must be locked while calling this method. +func (d *DB) waitTableStats() { + for d.mu.tableStats.loading || len(d.mu.tableStats.pending) > 0 { + d.mu.tableStats.cond.Wait() + } +} + +func runIngestAndExciseCmd(td *datadriven.TestData, d *DB, fs vfs.FS) error { + var exciseSpan KeyRange + paths := make([]string, 0, len(td.CmdArgs)) + for i, arg := range td.CmdArgs { + switch td.CmdArgs[i].Key { + case "excise": + if len(td.CmdArgs[i].Vals) != 1 { + return errors.New("expected 2 values for excise separated by -, eg. ingest-and-excise foo1 excise=\"start-end\"") + } + fields := strings.Split(td.CmdArgs[i].Vals[0], "-") + if len(fields) != 2 { + return errors.New("expected 2 values for excise separated by -, eg. ingest-and-excise foo1 excise=\"start-end\"") + } + exciseSpan.Start = []byte(fields[0]) + exciseSpan.End = []byte(fields[1]) + default: + paths = append(paths, arg.String()) + } + } + + if _, err := d.IngestAndExcise(paths, nil /* shared */, exciseSpan); err != nil { + return err + } + return nil +} + +func runIngestCmd(td *datadriven.TestData, d *DB, fs vfs.FS) error { + paths := make([]string, 0, len(td.CmdArgs)) + for _, arg := range td.CmdArgs { + paths = append(paths, arg.String()) + } + + if err := d.Ingest(paths); err != nil { + return err + } + return nil +} + +func runIngestExternalCmd(td *datadriven.TestData, d *DB, locator string) error { + external := make([]ExternalFile, 0) + for _, arg := range strings.Split(td.Input, "\n") { + fields := strings.Split(arg, ",") + if len(fields) != 4 { + return errors.New("usage: path,size,smallest,largest") + } + ef := ExternalFile{} + ef.Locator = remote.Locator(locator) + ef.ObjName = fields[0] + sizeInt, err := strconv.Atoi(fields[1]) + if err != nil { + return err + } + ef.Size = uint64(sizeInt) + ef.SmallestUserKey = []byte(fields[2]) + ef.LargestUserKey = []byte(fields[3]) + ef.HasPointKey = true + external = append(external, ef) + } + + if _, err := d.IngestExternalFiles(external); err != nil { + return err + } + return nil +} + +func runForceIngestCmd(td *datadriven.TestData, d *DB) error { + var paths []string + var level int + for _, arg := range td.CmdArgs { + switch arg.Key { + case "paths": + paths = append(paths, arg.Vals...) + case "level": + var err error + level, err = strconv.Atoi(arg.Vals[0]) + if err != nil { + return err + } + } + } + _, err := d.ingest(paths, func( + tableNewIters, + keyspan.TableNewSpanIter, + IterOptions, + *Comparer, + *version, + int, + map[*compaction]struct{}, + *fileMetadata, + bool, + ) (int, *fileMetadata, error) { + return level, nil, nil + }, nil /* shared */, KeyRange{}, nil /* external */) + return err +} + +func runLSMCmd(td *datadriven.TestData, d *DB) string { + d.mu.Lock() + defer d.mu.Unlock() + if td.HasArg("verbose") { + return d.mu.versions.currentVersion().DebugString(d.opts.Comparer.FormatKey) + } + return d.mu.versions.currentVersion().String() +} + +func parseDBOptionsArgs(opts *Options, args []datadriven.CmdArg) error { + for _, cmdArg := range args { + switch cmdArg.Key { + case "auto-compactions": + switch cmdArg.Vals[0] { + case "off": + opts.DisableAutomaticCompactions = true + case "on": + opts.DisableAutomaticCompactions = false + default: + return errors.Errorf("Unrecognized %q arg value: %q", cmdArg.Key, cmdArg.Vals[0]) + } + case "inject-errors": + injs := make([]errorfs.Injector, len(cmdArg.Vals)) + for i := 0; i < len(cmdArg.Vals); i++ { + inj, err := errorfs.ParseDSL(cmdArg.Vals[i]) + if err != nil { + return err + } + injs[i] = inj + } + opts.FS = errorfs.Wrap(opts.FS, errorfs.Any(injs...)) + case "enable-table-stats": + enable, err := strconv.ParseBool(cmdArg.Vals[0]) + if err != nil { + return errors.Errorf("%s: could not parse %q as bool: %s", cmdArg.Key, cmdArg.Vals[0], err) + } + opts.private.disableTableStats = !enable + case "format-major-version": + v, err := strconv.Atoi(cmdArg.Vals[0]) + if err != nil { + return err + } + // Override the DB version. + opts.FormatMajorVersion = FormatMajorVersion(v) + case "block-size": + v, err := strconv.Atoi(cmdArg.Vals[0]) + if err != nil { + return err + } + for i := range opts.Levels { + opts.Levels[i].BlockSize = v + } + case "index-block-size": + v, err := strconv.Atoi(cmdArg.Vals[0]) + if err != nil { + return err + } + for i := range opts.Levels { + opts.Levels[i].IndexBlockSize = v + } + case "target-file-size": + v, err := strconv.Atoi(cmdArg.Vals[0]) + if err != nil { + return err + } + for i := range opts.Levels { + opts.Levels[i].TargetFileSize = int64(v) + } + case "bloom-bits-per-key": + v, err := strconv.Atoi(cmdArg.Vals[0]) + if err != nil { + return err + } + fp := bloom.FilterPolicy(v) + opts.Filters = map[string]FilterPolicy{fp.Name(): fp} + for i := range opts.Levels { + opts.Levels[i].FilterPolicy = fp + } + case "merger": + switch cmdArg.Vals[0] { + case "appender": + opts.Merger = base.DefaultMerger + default: + return errors.Newf("unrecognized Merger %q\n", cmdArg.Vals[0]) + } + } + } + return nil +} diff --git a/pebble/db.go b/pebble/db.go new file mode 100644 index 0000000..ea23aaa --- /dev/null +++ b/pebble/db.go @@ -0,0 +1,3050 @@ +// Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +// Package pebble provides an ordered key/value store. +package pebble // import "github.com/cockroachdb/pebble" + +import ( + "context" + "fmt" + "io" + "os" + "strconv" + "sync" + "sync/atomic" + "time" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/arenaskl" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invalidating" + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/internal/manual" + "github.com/cockroachdb/pebble/objstorage" + "github.com/cockroachdb/pebble/objstorage/remote" + "github.com/cockroachdb/pebble/rangekey" + "github.com/cockroachdb/pebble/record" + "github.com/cockroachdb/pebble/sstable" + "github.com/cockroachdb/pebble/vfs" + "github.com/cockroachdb/pebble/vfs/atomicfs" + "github.com/cockroachdb/tokenbucket" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + // minTableCacheSize is the minimum size of the table cache, for a single db. + minTableCacheSize = 64 + + // numNonTableCacheFiles is an approximation for the number of files + // that we don't use for table caches, for a given db. + numNonTableCacheFiles = 10 +) + +var ( + // ErrNotFound is returned when a get operation does not find the requested + // key. + ErrNotFound = base.ErrNotFound + // ErrClosed is panicked when an operation is performed on a closed snapshot or + // DB. Use errors.Is(err, ErrClosed) to check for this error. + ErrClosed = errors.New("pebble: closed") + // ErrReadOnly is returned when a write operation is performed on a read-only + // database. + ErrReadOnly = errors.New("pebble: read-only") + // errNoSplit indicates that the user is trying to perform a range key + // operation but the configured Comparer does not provide a Split + // implementation. + errNoSplit = errors.New("pebble: Comparer.Split required for range key operations") +) + +// Reader is a readable key/value store. +// +// It is safe to call Get and NewIter from concurrent goroutines. +type Reader interface { + // Get gets the value for the given key. It returns ErrNotFound if the DB + // does not contain the key. + // + // The caller should not modify the contents of the returned slice, but it is + // safe to modify the contents of the argument after Get returns. The + // returned slice will remain valid until the returned Closer is closed. On + // success, the caller MUST call closer.Close() or a memory leak will occur. + Get(key []byte) (value []byte, closer io.Closer, err error) + + // NewIter returns an iterator that is unpositioned (Iterator.Valid() will + // return false). The iterator can be positioned via a call to SeekGE, + // SeekLT, First or Last. + NewIter(o *IterOptions) (*Iterator, error) + + // NewIterWithContext is like NewIter, and additionally accepts a context + // for tracing. + NewIterWithContext(ctx context.Context, o *IterOptions) (*Iterator, error) + + // Close closes the Reader. It may or may not close any underlying io.Reader + // or io.Writer, depending on how the DB was created. + // + // It is not safe to close a DB until all outstanding iterators are closed. + // It is valid to call Close multiple times. Other methods should not be + // called after the DB has been closed. + Close() error +} + +// Writer is a writable key/value store. +// +// Goroutine safety is dependent on the specific implementation. +type Writer interface { + // Apply the operations contained in the batch to the DB. + // + // It is safe to modify the contents of the arguments after Apply returns. + Apply(batch *Batch, o *WriteOptions) error + + // Delete deletes the value for the given key. Deletes are blind all will + // succeed even if the given key does not exist. + // + // It is safe to modify the contents of the arguments after Delete returns. + Delete(key []byte, o *WriteOptions) error + + // DeleteSized behaves identically to Delete, but takes an additional + // argument indicating the size of the value being deleted. DeleteSized + // should be preferred when the caller has the expectation that there exists + // a single internal KV pair for the key (eg, the key has not been + // overwritten recently), and the caller knows the size of its value. + // + // DeleteSized will record the value size within the tombstone and use it to + // inform compaction-picking heuristics which strive to reduce space + // amplification in the LSM. This "calling your shot" mechanic allows the + // storage engine to more accurately estimate and reduce space + // amplification. + // + // It is safe to modify the contents of the arguments after DeleteSized + // returns. + DeleteSized(key []byte, valueSize uint32, _ *WriteOptions) error + + // SingleDelete is similar to Delete in that it deletes the value for the given key. Like Delete, + // it is a blind operation that will succeed even if the given key does not exist. + // + // WARNING: Undefined (non-deterministic) behavior will result if a key is overwritten and + // then deleted using SingleDelete. The record may appear deleted immediately, but be + // resurrected at a later time after compactions have been performed. Or the record may + // be deleted permanently. A Delete operation lays down a "tombstone" which shadows all + // previous versions of a key. The SingleDelete operation is akin to "anti-matter" and will + // only delete the most recently written version for a key. These different semantics allow + // the DB to avoid propagating a SingleDelete operation during a compaction as soon as the + // corresponding Set operation is encountered. These semantics require extreme care to handle + // properly. Only use if you have a workload where the performance gain is critical and you + // can guarantee that a record is written once and then deleted once. + // + // SingleDelete is internally transformed into a Delete if the most recent record for a key is either + // a Merge or Delete record. + // + // It is safe to modify the contents of the arguments after SingleDelete returns. + SingleDelete(key []byte, o *WriteOptions) error + + // DeleteRange deletes all of the point keys (and values) in the range + // [start,end) (inclusive on start, exclusive on end). DeleteRange does NOT + // delete overlapping range keys (eg, keys set via RangeKeySet). + // + // It is safe to modify the contents of the arguments after DeleteRange + // returns. + DeleteRange(start, end []byte, o *WriteOptions) error + + // LogData adds the specified to the batch. The data will be written to the + // WAL, but not added to memtables or sstables. Log data is never indexed, + // which makes it useful for testing WAL performance. + // + // It is safe to modify the contents of the argument after LogData returns. + LogData(data []byte, opts *WriteOptions) error + + // Merge merges the value for the given key. The details of the merge are + // dependent upon the configured merge operation. + // + // It is safe to modify the contents of the arguments after Merge returns. + Merge(key, value []byte, o *WriteOptions) error + + // Set sets the value for the given key. It overwrites any previous value + // for that key; a DB is not a multi-map. + // + // It is safe to modify the contents of the arguments after Set returns. + Set(key, value []byte, o *WriteOptions) error + + // RangeKeySet sets a range key mapping the key range [start, end) at the MVCC + // timestamp suffix to value. The suffix is optional. If any portion of the key + // range [start, end) is already set by a range key with the same suffix value, + // RangeKeySet overrides it. + // + // It is safe to modify the contents of the arguments after RangeKeySet returns. + RangeKeySet(start, end, suffix, value []byte, opts *WriteOptions) error + + // RangeKeyUnset removes a range key mapping the key range [start, end) at the + // MVCC timestamp suffix. The suffix may be omitted to remove an unsuffixed + // range key. RangeKeyUnset only removes portions of range keys that fall within + // the [start, end) key span, and only range keys with suffixes that exactly + // match the unset suffix. + // + // It is safe to modify the contents of the arguments after RangeKeyUnset + // returns. + RangeKeyUnset(start, end, suffix []byte, opts *WriteOptions) error + + // RangeKeyDelete deletes all of the range keys in the range [start,end) + // (inclusive on start, exclusive on end). It does not delete point keys (for + // that use DeleteRange). RangeKeyDelete removes all range keys within the + // bounds, including those with or without suffixes. + // + // It is safe to modify the contents of the arguments after RangeKeyDelete + // returns. + RangeKeyDelete(start, end []byte, opts *WriteOptions) error +} + +// CPUWorkHandle represents a handle used by the CPUWorkPermissionGranter API. +type CPUWorkHandle interface { + // Permitted indicates whether Pebble can use additional CPU resources. + Permitted() bool +} + +// CPUWorkPermissionGranter is used to request permission to opportunistically +// use additional CPUs to speed up internal background work. +type CPUWorkPermissionGranter interface { + // GetPermission returns a handle regardless of whether permission is granted + // or not. In the latter case, the handle is only useful for recording + // the CPU time actually spent on this calling goroutine. + GetPermission(time.Duration) CPUWorkHandle + // CPUWorkDone must be called regardless of whether CPUWorkHandle.Permitted + // returns true or false. + CPUWorkDone(CPUWorkHandle) +} + +// Use a default implementation for the CPU work granter to avoid excessive nil +// checks in the code. +type defaultCPUWorkHandle struct{} + +func (d defaultCPUWorkHandle) Permitted() bool { + return false +} + +type defaultCPUWorkGranter struct{} + +func (d defaultCPUWorkGranter) GetPermission(_ time.Duration) CPUWorkHandle { + return defaultCPUWorkHandle{} +} + +func (d defaultCPUWorkGranter) CPUWorkDone(_ CPUWorkHandle) {} + +// DB provides a concurrent, persistent ordered key/value store. +// +// A DB's basic operations (Get, Set, Delete) should be self-explanatory. Get +// and Delete will return ErrNotFound if the requested key is not in the store. +// Callers are free to ignore this error. +// +// A DB also allows for iterating over the key/value pairs in key order. If d +// is a DB, the code below prints all key/value pairs whose keys are 'greater +// than or equal to' k: +// +// iter := d.NewIter(readOptions) +// for iter.SeekGE(k); iter.Valid(); iter.Next() { +// fmt.Printf("key=%q value=%q\n", iter.Key(), iter.Value()) +// } +// return iter.Close() +// +// The Options struct holds the optional parameters for the DB, including a +// Comparer to define a 'less than' relationship over keys. It is always valid +// to pass a nil *Options, which means to use the default parameter values. Any +// zero field of a non-nil *Options also means to use the default value for +// that parameter. Thus, the code below uses a custom Comparer, but the default +// values for every other parameter: +// +// db := pebble.Open(&Options{ +// Comparer: myComparer, +// }) +type DB struct { + // The count and size of referenced memtables. This includes memtables + // present in DB.mu.mem.queue, as well as memtables that have been flushed + // but are still referenced by an inuse readState, as well as up to one + // memTable waiting to be reused and stored in d.memTableRecycle. + memTableCount atomic.Int64 + memTableReserved atomic.Int64 // number of bytes reserved in the cache for memtables + // memTableRecycle holds a pointer to an obsolete memtable. The next + // memtable allocation will reuse this memtable if it has not already been + // recycled. + memTableRecycle atomic.Pointer[memTable] + + // The size of the current log file (i.e. db.mu.log.queue[len(queue)-1]. + logSize atomic.Uint64 + + // The number of bytes available on disk. + diskAvailBytes atomic.Uint64 + + cacheID uint64 + dirname string + walDirname string + opts *Options + cmp Compare + equal Equal + merge Merge + split Split + abbreviatedKey AbbreviatedKey + // The threshold for determining when a batch is "large" and will skip being + // inserted into a memtable. + largeBatchThreshold uint64 + // The current OPTIONS file number. + optionsFileNum base.DiskFileNum + // The on-disk size of the current OPTIONS file. + optionsFileSize uint64 + + // objProvider is used to access and manage SSTs. + objProvider objstorage.Provider + + fileLock *Lock + dataDir vfs.File + walDir vfs.File + + tableCache *tableCacheContainer + newIters tableNewIters + tableNewRangeKeyIter keyspan.TableNewSpanIter + + commit *commitPipeline + + // readState provides access to the state needed for reading without needing + // to acquire DB.mu. + readState struct { + sync.RWMutex + val *readState + } + // logRecycler holds a set of log file numbers that are available for + // reuse. Writing to a recycled log file is faster than to a new log file on + // some common filesystems (xfs, and ext3/4) due to avoiding metadata + // updates. + logRecycler logRecycler + + closed *atomic.Value + closedCh chan struct{} + + cleanupManager *cleanupManager + + // During an iterator close, we may asynchronously schedule read compactions. + // We want to wait for those goroutines to finish, before closing the DB. + // compactionShedulers.Wait() should not be called while the DB.mu is held. + compactionSchedulers sync.WaitGroup + + // The main mutex protecting internal DB state. This mutex encompasses many + // fields because those fields need to be accessed and updated atomically. In + // particular, the current version, log.*, mem.*, and snapshot list need to + // be accessed and updated atomically during compaction. + // + // Care is taken to avoid holding DB.mu during IO operations. Accomplishing + // this sometimes requires releasing DB.mu in a method that was called with + // it held. See versionSet.logAndApply() and DB.makeRoomForWrite() for + // examples. This is a common pattern, so be careful about expectations that + // DB.mu will be held continuously across a set of calls. + mu struct { + sync.Mutex + + formatVers struct { + // vers is the database's current format major version. + // Backwards-incompatible features are gated behind new + // format major versions and not enabled until a database's + // version is ratcheted upwards. + // + // Although this is under the `mu` prefix, readers may read vers + // atomically without holding d.mu. Writers must only write to this + // value through finalizeFormatVersUpgrade which requires d.mu is + // held. + vers atomic.Uint64 + // marker is the atomic marker for the format major version. + // When a database's version is ratcheted upwards, the + // marker is moved in order to atomically record the new + // version. + marker *atomicfs.Marker + // ratcheting when set to true indicates that the database is + // currently in the process of ratcheting the format major version + // to vers + 1. As a part of ratcheting the format major version, + // migrations may drop and re-acquire the mutex. + ratcheting bool + } + + // The ID of the next job. Job IDs are passed to event listener + // notifications and act as a mechanism for tying together the events and + // log messages for a single job such as a flush, compaction, or file + // ingestion. Job IDs are not serialized to disk or used for correctness. + nextJobID int + + // The collection of immutable versions and state about the log and visible + // sequence numbers. Use the pointer here to ensure the atomic fields in + // version set are aligned properly. + versions *versionSet + + log struct { + // The queue of logs, containing both flushed and unflushed logs. The + // flushed logs will be a prefix, the unflushed logs a suffix. The + // delimeter between flushed and unflushed logs is + // versionSet.minUnflushedLogNum. + queue []fileInfo + // The number of input bytes to the log. This is the raw size of the + // batches written to the WAL, without the overhead of the record + // envelopes. + bytesIn uint64 + // The LogWriter is protected by commitPipeline.mu. This allows log + // writes to be performed without holding DB.mu, but requires both + // commitPipeline.mu and DB.mu to be held when rotating the WAL/memtable + // (i.e. makeRoomForWrite). + *record.LogWriter + // Can be nil. + metrics struct { + fsyncLatency prometheus.Histogram + record.LogWriterMetrics + } + registerLogWriterForTesting func(w *record.LogWriter) + } + + mem struct { + // The current mutable memTable. + mutable *memTable + // Queue of flushables (the mutable memtable is at end). Elements are + // added to the end of the slice and removed from the beginning. Once an + // index is set it is never modified making a fixed slice immutable and + // safe for concurrent reads. + queue flushableList + // nextSize is the size of the next memtable. The memtable size starts at + // min(256KB,Options.MemTableSize) and doubles each time a new memtable + // is allocated up to Options.MemTableSize. This reduces the memory + // footprint of memtables when lots of DB instances are used concurrently + // in test environments. + nextSize uint64 + } + + compact struct { + // Condition variable used to signal when a flush or compaction has + // completed. Used by the write-stall mechanism to wait for the stall + // condition to clear. See DB.makeRoomForWrite(). + cond sync.Cond + // True when a flush is in progress. + flushing bool + // The number of ongoing compactions. + compactingCount int + // The list of deletion hints, suggesting ranges for delete-only + // compactions. + deletionHints []deleteCompactionHint + // The list of manual compactions. The next manual compaction to perform + // is at the start of the list. New entries are added to the end. + manual []*manualCompaction + // inProgress is the set of in-progress flushes and compactions. + // It's used in the calculation of some metrics and to initialize L0 + // sublevels' state. Some of the compactions contained within this + // map may have already committed an edit to the version but are + // lingering performing cleanup, like deleting obsolete files. + inProgress map[*compaction]struct{} + + // rescheduleReadCompaction indicates to an iterator that a read compaction + // should be scheduled. + rescheduleReadCompaction bool + + // readCompactions is a readCompactionQueue which keeps track of the + // compactions which we might have to perform. + readCompactions readCompactionQueue + + // The cumulative duration of all completed compactions since Open. + // Does not include flushes. + duration time.Duration + // Flush throughput metric. + flushWriteThroughput ThroughputMetric + // The idle start time for the flush "loop", i.e., when the flushing + // bool above transitions to false. + noOngoingFlushStartTime time.Time + } + + // Non-zero when file cleaning is disabled. The disabled count acts as a + // reference count to prohibit file cleaning. See + // DB.{disable,Enable}FileDeletions(). + disableFileDeletions int + + snapshots struct { + // The list of active snapshots. + snapshotList + + // The cumulative count and size of snapshot-pinned keys written to + // sstables. + cumulativePinnedCount uint64 + cumulativePinnedSize uint64 + } + + tableStats struct { + // Condition variable used to signal the completion of a + // job to collect table stats. + cond sync.Cond + // True when a stat collection operation is in progress. + loading bool + // True if stat collection has loaded statistics for all tables + // other than those listed explicitly in pending. This flag starts + // as false when a database is opened and flips to true once stat + // collection has caught up. + loadedInitial bool + // A slice of files for which stats have not been computed. + // Compactions, ingests, flushes append files to be processed. An + // active stat collection goroutine clears the list and processes + // them. + pending []manifest.NewFileEntry + } + + tableValidation struct { + // cond is a condition variable used to signal the completion of a + // job to validate one or more sstables. + cond sync.Cond + // pending is a slice of metadata for sstables waiting to be + // validated. Only physical sstables should be added to the pending + // queue. + pending []newFileEntry + // validating is set to true when validation is running. + validating bool + } + } + + // Normally equal to time.Now() but may be overridden in tests. + timeNow func() time.Time + // the time at database Open; may be used to compute metrics like effective + // compaction concurrency + openedAt time.Time +} + +var _ Reader = (*DB)(nil) +var _ Writer = (*DB)(nil) + +// TestOnlyWaitForCleaning MUST only be used in tests. +func (d *DB) TestOnlyWaitForCleaning() { + d.cleanupManager.Wait() +} + +// Get gets the value for the given key. It returns ErrNotFound if the DB does +// not contain the key. +// +// The caller should not modify the contents of the returned slice, but it is +// safe to modify the contents of the argument after Get returns. The returned +// slice will remain valid until the returned Closer is closed. On success, the +// caller MUST call closer.Close() or a memory leak will occur. +func (d *DB) Get(key []byte) ([]byte, io.Closer, error) { + return d.getInternal(key, nil /* batch */, nil /* snapshot */) +} + +type getIterAlloc struct { + dbi Iterator + keyBuf []byte + get getIter +} + +var getIterAllocPool = sync.Pool{ + New: func() interface{} { + return &getIterAlloc{} + }, +} + +func (d *DB) getInternal(key []byte, b *Batch, s *Snapshot) ([]byte, io.Closer, error) { + if err := d.closed.Load(); err != nil { + panic(err) + } + + // Grab and reference the current readState. This prevents the underlying + // files in the associated version from being deleted if there is a current + // compaction. The readState is unref'd by Iterator.Close(). + readState := d.loadReadState() + + // Determine the seqnum to read at after grabbing the read state (current and + // memtables) above. + var seqNum uint64 + if s != nil { + seqNum = s.seqNum + } else { + seqNum = d.mu.versions.visibleSeqNum.Load() + } + + buf := getIterAllocPool.Get().(*getIterAlloc) + + get := &buf.get + *get = getIter{ + logger: d.opts.Logger, + comparer: d.opts.Comparer, + newIters: d.newIters, + snapshot: seqNum, + key: key, + batch: b, + mem: readState.memtables, + l0: readState.current.L0SublevelFiles, + version: readState.current, + } + + // Strip off memtables which cannot possibly contain the seqNum being read + // at. + for len(get.mem) > 0 { + n := len(get.mem) + if logSeqNum := get.mem[n-1].logSeqNum; logSeqNum < seqNum { + break + } + get.mem = get.mem[:n-1] + } + + i := &buf.dbi + pointIter := get + *i = Iterator{ + ctx: context.Background(), + getIterAlloc: buf, + iter: pointIter, + pointIter: pointIter, + merge: d.merge, + comparer: *d.opts.Comparer, + readState: readState, + keyBuf: buf.keyBuf, + } + + if !i.First() { + err := i.Close() + if err != nil { + return nil, nil, err + } + return nil, nil, ErrNotFound + } + return i.Value(), i, nil +} + +// Set sets the value for the given key. It overwrites any previous value +// for that key; a DB is not a multi-map. +// +// It is safe to modify the contents of the arguments after Set returns. +func (d *DB) Set(key, value []byte, opts *WriteOptions) error { + b := newBatch(d) + _ = b.Set(key, value, opts) + if err := d.Apply(b, opts); err != nil { + return err + } + // Only release the batch on success. + b.release() + return nil +} + +// Delete deletes the value for the given key. Deletes are blind all will +// succeed even if the given key does not exist. +// +// It is safe to modify the contents of the arguments after Delete returns. +func (d *DB) Delete(key []byte, opts *WriteOptions) error { + b := newBatch(d) + _ = b.Delete(key, opts) + if err := d.Apply(b, opts); err != nil { + return err + } + // Only release the batch on success. + b.release() + return nil +} + +// DeleteSized behaves identically to Delete, but takes an additional +// argument indicating the size of the value being deleted. DeleteSized +// should be preferred when the caller has the expectation that there exists +// a single internal KV pair for the key (eg, the key has not been +// overwritten recently), and the caller knows the size of its value. +// +// DeleteSized will record the value size within the tombstone and use it to +// inform compaction-picking heuristics which strive to reduce space +// amplification in the LSM. This "calling your shot" mechanic allows the +// storage engine to more accurately estimate and reduce space amplification. +// +// It is safe to modify the contents of the arguments after DeleteSized +// returns. +func (d *DB) DeleteSized(key []byte, valueSize uint32, opts *WriteOptions) error { + b := newBatch(d) + _ = b.DeleteSized(key, valueSize, opts) + if err := d.Apply(b, opts); err != nil { + return err + } + // Only release the batch on success. + b.release() + return nil +} + +// SingleDelete adds an action to the batch that single deletes the entry for key. +// See Writer.SingleDelete for more details on the semantics of SingleDelete. +// +// It is safe to modify the contents of the arguments after SingleDelete returns. +func (d *DB) SingleDelete(key []byte, opts *WriteOptions) error { + b := newBatch(d) + _ = b.SingleDelete(key, opts) + if err := d.Apply(b, opts); err != nil { + return err + } + // Only release the batch on success. + b.release() + return nil +} + +// DeleteRange deletes all of the keys (and values) in the range [start,end) +// (inclusive on start, exclusive on end). +// +// It is safe to modify the contents of the arguments after DeleteRange +// returns. +func (d *DB) DeleteRange(start, end []byte, opts *WriteOptions) error { + b := newBatch(d) + _ = b.DeleteRange(start, end, opts) + if err := d.Apply(b, opts); err != nil { + return err + } + // Only release the batch on success. + b.release() + return nil +} + +// Merge adds an action to the DB that merges the value at key with the new +// value. The details of the merge are dependent upon the configured merge +// operator. +// +// It is safe to modify the contents of the arguments after Merge returns. +func (d *DB) Merge(key, value []byte, opts *WriteOptions) error { + b := newBatch(d) + _ = b.Merge(key, value, opts) + if err := d.Apply(b, opts); err != nil { + return err + } + // Only release the batch on success. + b.release() + return nil +} + +// LogData adds the specified to the batch. The data will be written to the +// WAL, but not added to memtables or sstables. Log data is never indexed, +// which makes it useful for testing WAL performance. +// +// It is safe to modify the contents of the argument after LogData returns. +func (d *DB) LogData(data []byte, opts *WriteOptions) error { + b := newBatch(d) + _ = b.LogData(data, opts) + if err := d.Apply(b, opts); err != nil { + return err + } + // Only release the batch on success. + b.release() + return nil +} + +// RangeKeySet sets a range key mapping the key range [start, end) at the MVCC +// timestamp suffix to value. The suffix is optional. If any portion of the key +// range [start, end) is already set by a range key with the same suffix value, +// RangeKeySet overrides it. +// +// It is safe to modify the contents of the arguments after RangeKeySet returns. +func (d *DB) RangeKeySet(start, end, suffix, value []byte, opts *WriteOptions) error { + b := newBatch(d) + _ = b.RangeKeySet(start, end, suffix, value, opts) + if err := d.Apply(b, opts); err != nil { + return err + } + // Only release the batch on success. + b.release() + return nil +} + +// RangeKeyUnset removes a range key mapping the key range [start, end) at the +// MVCC timestamp suffix. The suffix may be omitted to remove an unsuffixed +// range key. RangeKeyUnset only removes portions of range keys that fall within +// the [start, end) key span, and only range keys with suffixes that exactly +// match the unset suffix. +// +// It is safe to modify the contents of the arguments after RangeKeyUnset +// returns. +func (d *DB) RangeKeyUnset(start, end, suffix []byte, opts *WriteOptions) error { + b := newBatch(d) + _ = b.RangeKeyUnset(start, end, suffix, opts) + if err := d.Apply(b, opts); err != nil { + return err + } + // Only release the batch on success. + b.release() + return nil +} + +// RangeKeyDelete deletes all of the range keys in the range [start,end) +// (inclusive on start, exclusive on end). It does not delete point keys (for +// that use DeleteRange). RangeKeyDelete removes all range keys within the +// bounds, including those with or without suffixes. +// +// It is safe to modify the contents of the arguments after RangeKeyDelete +// returns. +func (d *DB) RangeKeyDelete(start, end []byte, opts *WriteOptions) error { + b := newBatch(d) + _ = b.RangeKeyDelete(start, end, opts) + if err := d.Apply(b, opts); err != nil { + return err + } + // Only release the batch on success. + b.release() + return nil +} + +// Apply the operations contained in the batch to the DB. If the batch is large +// the contents of the batch may be retained by the database. If that occurs +// the batch contents will be cleared preventing the caller from attempting to +// reuse them. +// +// It is safe to modify the contents of the arguments after Apply returns. +func (d *DB) Apply(batch *Batch, opts *WriteOptions) error { + return d.applyInternal(batch, opts, false) +} + +// ApplyNoSyncWait must only be used when opts.Sync is true and the caller +// does not want to wait for the WAL fsync to happen. The method will return +// once the mutation is applied to the memtable and is visible (note that a +// mutation is visible before the WAL sync even in the wait case, so we have +// not weakened the durability semantics). The caller must call Batch.SyncWait +// to wait for the WAL fsync. The caller must not Close the batch without +// first calling Batch.SyncWait. +// +// RECOMMENDATION: Prefer using Apply unless you really understand why you +// need ApplyNoSyncWait. +// EXPERIMENTAL: API/feature subject to change. Do not yet use outside +// CockroachDB. +func (d *DB) ApplyNoSyncWait(batch *Batch, opts *WriteOptions) error { + if !opts.Sync { + return errors.Errorf("cannot request asynchonous apply when WriteOptions.Sync is false") + } + return d.applyInternal(batch, opts, true) +} + +// REQUIRES: noSyncWait => opts.Sync +func (d *DB) applyInternal(batch *Batch, opts *WriteOptions, noSyncWait bool) error { + if err := d.closed.Load(); err != nil { + panic(err) + } + if batch.committing { + panic("pebble: batch already committing") + } + if batch.applied.Load() { + panic("pebble: batch already applied") + } + if d.opts.ReadOnly { + return ErrReadOnly + } + if batch.db != nil && batch.db != d { + panic(fmt.Sprintf("pebble: batch db mismatch: %p != %p", batch.db, d)) + } + + sync := opts.GetSync() + if sync && d.opts.DisableWAL { + return errors.New("pebble: WAL disabled") + } + + if batch.minimumFormatMajorVersion != FormatMostCompatible { + if fmv := d.FormatMajorVersion(); fmv < batch.minimumFormatMajorVersion { + panic(fmt.Sprintf( + "pebble: batch requires at least format major version %d (current: %d)", + batch.minimumFormatMajorVersion, fmv, + )) + } + } + + if batch.countRangeKeys > 0 { + if d.split == nil { + return errNoSplit + } + // TODO(jackson): Assert that all range key operands are suffixless. + } + batch.committing = true + + if batch.db == nil { + if err := batch.refreshMemTableSize(); err != nil { + return err + } + } + if batch.memTableSize >= d.largeBatchThreshold { + var err error + batch.flushable, err = newFlushableBatch(batch, d.opts.Comparer) + if err != nil { + return err + } + } + if err := d.commit.Commit(batch, sync, noSyncWait); err != nil { + // There isn't much we can do on an error here. The commit pipeline will be + // horked at this point. + d.opts.Logger.Fatalf("pebble: fatal commit error: %v", err) + } + // If this is a large batch, we need to clear the batch contents as the + // flushable batch may still be present in the flushables queue. + // + // TODO(peter): Currently large batches are written to the WAL. We could + // skip the WAL write and instead wait for the large batch to be flushed to + // an sstable. For a 100 MB batch, this might actually be faster. For a 1 + // GB batch this is almost certainly faster. + if batch.flushable != nil { + batch.data = nil + } + return nil +} + +func (d *DB) commitApply(b *Batch, mem *memTable) error { + if b.flushable != nil { + // This is a large batch which was already added to the immutable queue. + return nil + } + err := mem.apply(b, b.SeqNum()) + if err != nil { + return err + } + + // If the batch contains range tombstones and the database is configured + // to flush range deletions, schedule a delayed flush so that disk space + // may be reclaimed without additional writes or an explicit flush. + if b.countRangeDels > 0 && d.opts.FlushDelayDeleteRange > 0 { + d.mu.Lock() + d.maybeScheduleDelayedFlush(mem, d.opts.FlushDelayDeleteRange) + d.mu.Unlock() + } + + // If the batch contains range keys and the database is configured to flush + // range keys, schedule a delayed flush so that the range keys are cleared + // from the memtable. + if b.countRangeKeys > 0 && d.opts.FlushDelayRangeKey > 0 { + d.mu.Lock() + d.maybeScheduleDelayedFlush(mem, d.opts.FlushDelayRangeKey) + d.mu.Unlock() + } + + if mem.writerUnref() { + d.mu.Lock() + d.maybeScheduleFlush() + d.mu.Unlock() + } + return nil +} + +func (d *DB) commitWrite(b *Batch, syncWG *sync.WaitGroup, syncErr *error) (*memTable, error) { + var size int64 + repr := b.Repr() + + if b.flushable != nil { + // We have a large batch. Such batches are special in that they don't get + // added to the memtable, and are instead inserted into the queue of + // memtables. The call to makeRoomForWrite with this batch will force the + // current memtable to be flushed. We want the large batch to be part of + // the same log, so we add it to the WAL here, rather than after the call + // to makeRoomForWrite(). + // + // Set the sequence number since it was not set to the correct value earlier + // (see comment in newFlushableBatch()). + b.flushable.setSeqNum(b.SeqNum()) + if !d.opts.DisableWAL { + var err error + size, err = d.mu.log.SyncRecord(repr, syncWG, syncErr) + if err != nil { + panic(err) + } + } + } + + d.mu.Lock() + + var err error + if !b.ingestedSSTBatch { + // Batches which contain keys of kind InternalKeyKindIngestSST will + // never be applied to the memtable, so we don't need to make room for + // write. For the other cases, switch out the memtable if there was not + // enough room to store the batch. + err = d.makeRoomForWrite(b) + } + + if err == nil && !d.opts.DisableWAL { + d.mu.log.bytesIn += uint64(len(repr)) + } + + // Grab a reference to the memtable while holding DB.mu. Note that for + // non-flushable batches (b.flushable == nil) makeRoomForWrite() added a + // reference to the memtable which will prevent it from being flushed until + // we unreference it. This reference is dropped in DB.commitApply(). + mem := d.mu.mem.mutable + + d.mu.Unlock() + if err != nil { + return nil, err + } + + if d.opts.DisableWAL { + return mem, nil + } + + if b.flushable == nil { + size, err = d.mu.log.SyncRecord(repr, syncWG, syncErr) + if err != nil { + panic(err) + } + } + + d.logSize.Store(uint64(size)) + return mem, err +} + +type iterAlloc struct { + dbi Iterator + keyBuf []byte + boundsBuf [2][]byte + prefixOrFullSeekKey []byte + merging mergingIter + mlevels [3 + numLevels]mergingIterLevel + levels [3 + numLevels]levelIter + levelsPositioned [3 + numLevels]bool +} + +var iterAllocPool = sync.Pool{ + New: func() interface{} { + return &iterAlloc{} + }, +} + +// snapshotIterOpts denotes snapshot-related iterator options when calling +// newIter. These are the possible cases for a snapshotIterOpts: +// - No snapshot: All fields are zero values. +// - Classic snapshot: Only `seqNum` is set. The latest readState will be used +// and the specified seqNum will be used as the snapshot seqNum. +// - EventuallyFileOnlySnapshot (EFOS) behaving as a classic snapshot. Only +// the `seqNum` is set. The latest readState will be used +// and the specified seqNum will be used as the snapshot seqNum. +// - EFOS in file-only state: Only `seqNum` and `vers` are set. All the +// relevant SSTs are referenced by the *version. +type snapshotIterOpts struct { + seqNum uint64 + vers *version +} + +type batchIterOpts struct { + batchOnly bool +} +type newIterOpts struct { + snapshot snapshotIterOpts + batch batchIterOpts +} + +// newIter constructs a new iterator, merging in batch iterators as an extra +// level. +func (d *DB) newIter( + ctx context.Context, batch *Batch, internalOpts newIterOpts, o *IterOptions, +) *Iterator { + if internalOpts.batch.batchOnly { + if batch == nil { + panic("batchOnly is true, but batch is nil") + } + if internalOpts.snapshot.vers != nil { + panic("batchOnly is true, but snapshotIterOpts is initialized") + } + } + if err := d.closed.Load(); err != nil { + panic(err) + } + seqNum := internalOpts.snapshot.seqNum + if o.rangeKeys() { + if d.FormatMajorVersion() < FormatRangeKeys { + panic(fmt.Sprintf( + "pebble: range keys require at least format major version %d (current: %d)", + FormatRangeKeys, d.FormatMajorVersion(), + )) + } + } + if o != nil && o.RangeKeyMasking.Suffix != nil && o.KeyTypes != IterKeyTypePointsAndRanges { + panic("pebble: range key masking requires IterKeyTypePointsAndRanges") + } + if (batch != nil || seqNum != 0) && (o != nil && o.OnlyReadGuaranteedDurable) { + // We could add support for OnlyReadGuaranteedDurable on snapshots if + // there was a need: this would require checking that the sequence number + // of the snapshot has been flushed, by comparing with + // DB.mem.queue[0].logSeqNum. + panic("OnlyReadGuaranteedDurable is not supported for batches or snapshots") + } + var readState *readState + var newIters tableNewIters + var newIterRangeKey keyspan.TableNewSpanIter + if !internalOpts.batch.batchOnly { + // Grab and reference the current readState. This prevents the underlying + // files in the associated version from being deleted if there is a current + // compaction. The readState is unref'd by Iterator.Close(). + if internalOpts.snapshot.vers == nil { + // NB: loadReadState() calls readState.ref(). + readState = d.loadReadState() + } else { + // vers != nil + internalOpts.snapshot.vers.Ref() + } + + // Determine the seqnum to read at after grabbing the read state (current and + // memtables) above. + if seqNum == 0 { + seqNum = d.mu.versions.visibleSeqNum.Load() + } + newIters = d.newIters + newIterRangeKey = d.tableNewRangeKeyIter + } + + // Bundle various structures under a single umbrella in order to allocate + // them together. + buf := iterAllocPool.Get().(*iterAlloc) + dbi := &buf.dbi + *dbi = Iterator{ + ctx: ctx, + alloc: buf, + merge: d.merge, + comparer: *d.opts.Comparer, + readState: readState, + version: internalOpts.snapshot.vers, + keyBuf: buf.keyBuf, + prefixOrFullSeekKey: buf.prefixOrFullSeekKey, + boundsBuf: buf.boundsBuf, + batch: batch, + newIters: newIters, + newIterRangeKey: newIterRangeKey, + seqNum: seqNum, + batchOnlyIter: internalOpts.batch.batchOnly, + } + if o != nil { + dbi.opts = *o + dbi.processBounds(o.LowerBound, o.UpperBound) + } + dbi.opts.logger = d.opts.Logger + if d.opts.private.disableLazyCombinedIteration { + dbi.opts.disableLazyCombinedIteration = true + } + if batch != nil { + dbi.batchSeqNum = dbi.batch.nextSeqNum() + } + return finishInitializingIter(ctx, buf) +} + +// finishInitializingIter is a helper for doing the non-trivial initialization +// of an Iterator. It's invoked to perform the initial initialization of an +// Iterator during NewIter or Clone, and to perform reinitialization due to a +// change in IterOptions by a call to Iterator.SetOptions. +func finishInitializingIter(ctx context.Context, buf *iterAlloc) *Iterator { + // Short-hand. + dbi := &buf.dbi + var memtables flushableList + if dbi.readState != nil { + memtables = dbi.readState.memtables + } + if dbi.opts.OnlyReadGuaranteedDurable { + memtables = nil + } else { + // We only need to read from memtables which contain sequence numbers older + // than seqNum. Trim off newer memtables. + for i := len(memtables) - 1; i >= 0; i-- { + if logSeqNum := memtables[i].logSeqNum; logSeqNum < dbi.seqNum { + break + } + memtables = memtables[:i] + } + } + + if dbi.opts.pointKeys() { + // Construct the point iterator, initializing dbi.pointIter to point to + // dbi.merging. If this is called during a SetOptions call and this + // Iterator has already initialized dbi.merging, constructPointIter is a + // noop and an initialized pointIter already exists in dbi.pointIter. + dbi.constructPointIter(ctx, memtables, buf) + dbi.iter = dbi.pointIter + } else { + dbi.iter = emptyIter + } + + if dbi.opts.rangeKeys() { + dbi.rangeKeyMasking.init(dbi, dbi.comparer.Compare, dbi.comparer.Split) + + // When iterating over both point and range keys, don't create the + // range-key iterator stack immediately if we can avoid it. This + // optimization takes advantage of the expected sparseness of range + // keys, and configures the point-key iterator to dynamically switch to + // combined iteration when it observes a file containing range keys. + // + // Lazy combined iteration is not possible if a batch or a memtable + // contains any range keys. + useLazyCombinedIteration := dbi.rangeKey == nil && + dbi.opts.KeyTypes == IterKeyTypePointsAndRanges && + (dbi.batch == nil || dbi.batch.countRangeKeys == 0) && + !dbi.opts.disableLazyCombinedIteration + if useLazyCombinedIteration { + // The user requested combined iteration, and there's no indexed + // batch currently containing range keys that would prevent lazy + // combined iteration. Check the memtables to see if they contain + // any range keys. + for i := range memtables { + if memtables[i].containsRangeKeys() { + useLazyCombinedIteration = false + break + } + } + } + + if useLazyCombinedIteration { + dbi.lazyCombinedIter = lazyCombinedIter{ + parent: dbi, + pointIter: dbi.pointIter, + combinedIterState: combinedIterState{ + initialized: false, + }, + } + dbi.iter = &dbi.lazyCombinedIter + dbi.iter = invalidating.MaybeWrapIfInvariants(dbi.iter) + } else { + dbi.lazyCombinedIter.combinedIterState = combinedIterState{ + initialized: true, + } + if dbi.rangeKey == nil { + dbi.rangeKey = iterRangeKeyStateAllocPool.Get().(*iteratorRangeKeyState) + dbi.rangeKey.init(dbi.comparer.Compare, dbi.comparer.Split, &dbi.opts) + dbi.constructRangeKeyIter() + } else { + dbi.rangeKey.iterConfig.SetBounds(dbi.opts.LowerBound, dbi.opts.UpperBound) + } + + // Wrap the point iterator (currently dbi.iter) with an interleaving + // iterator that interleaves range keys pulled from + // dbi.rangeKey.rangeKeyIter. + // + // NB: The interleaving iterator is always reinitialized, even if + // dbi already had an initialized range key iterator, in case the point + // iterator changed or the range key masking suffix changed. + dbi.rangeKey.iiter.Init(&dbi.comparer, dbi.iter, dbi.rangeKey.rangeKeyIter, + keyspan.InterleavingIterOpts{ + Mask: &dbi.rangeKeyMasking, + LowerBound: dbi.opts.LowerBound, + UpperBound: dbi.opts.UpperBound, + }) + dbi.iter = &dbi.rangeKey.iiter + } + } else { + // !dbi.opts.rangeKeys() + // + // Reset the combined iterator state. The initialized=true ensures the + // iterator doesn't unnecessarily try to switch to combined iteration. + dbi.lazyCombinedIter.combinedIterState = combinedIterState{initialized: true} + } + return dbi +} + +// ScanInternal scans all internal keys within the specified bounds, truncating +// any rangedels and rangekeys to those bounds if they span past them. For use +// when an external user needs to be aware of all internal keys that make up a +// key range. +// +// Keys deleted by range deletions must not be returned or exposed by this +// method, while the range deletion deleting that key must be exposed using +// visitRangeDel. Keys that would be masked by range key masking (if an +// appropriate prefix were set) should be exposed, alongside the range key +// that would have masked it. This method also collapses all point keys into +// one InternalKey; so only one internal key at most per user key is returned +// to visitPointKey. +// +// If visitSharedFile is not nil, ScanInternal iterates in skip-shared iteration +// mode. In this iteration mode, sstables in levels L5 and L6 are skipped, and +// their metadatas truncated to [lower, upper) and passed into visitSharedFile. +// ErrInvalidSkipSharedIteration is returned if visitSharedFile is not nil and an +// sstable in L5 or L6 is found that is not in shared storage according to +// provider.IsShared, or an sstable in those levels contains a newer key than the +// snapshot sequence number (only applicable for snapshot.ScanInternal). Examples +// of when this could happen could be if Pebble started writing sstables before a +// creator ID was set (as creator IDs are necessary to enable shared storage) +// resulting in some lower level SSTs being on non-shared storage. Skip-shared +// iteration is invalid in those cases. +func (d *DB) ScanInternal( + ctx context.Context, + categoryAndQoS sstable.CategoryAndQoS, + lower, upper []byte, + visitPointKey func(key *InternalKey, value LazyValue, iterInfo IteratorLevel) error, + visitRangeDel func(start, end []byte, seqNum uint64) error, + visitRangeKey func(start, end []byte, keys []rangekey.Key) error, + visitSharedFile func(sst *SharedSSTMeta) error, +) error { + scanInternalOpts := &scanInternalOptions{ + CategoryAndQoS: categoryAndQoS, + visitPointKey: visitPointKey, + visitRangeDel: visitRangeDel, + visitRangeKey: visitRangeKey, + visitSharedFile: visitSharedFile, + skipSharedLevels: visitSharedFile != nil, + IterOptions: IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: lower, + UpperBound: upper, + }, + } + iter, err := d.newInternalIter(ctx, snapshotIterOpts{} /* snapshot */, scanInternalOpts) + if err != nil { + return err + } + defer iter.close() + return scanInternalImpl(ctx, lower, upper, iter, scanInternalOpts) +} + +// newInternalIter constructs and returns a new scanInternalIterator on this db. +// If o.skipSharedLevels is true, levels below sharedLevelsStart are *not* added +// to the internal iterator. +// +// TODO(bilal): This method has a lot of similarities with db.newIter as well as +// finishInitializingIter. Both pairs of methods should be refactored to reduce +// this duplication. +func (d *DB) newInternalIter( + ctx context.Context, sOpts snapshotIterOpts, o *scanInternalOptions, +) (*scanInternalIterator, error) { + if err := d.closed.Load(); err != nil { + panic(err) + } + // Grab and reference the current readState. This prevents the underlying + // files in the associated version from being deleted if there is a current + // compaction. The readState is unref'd by Iterator.Close(). + var readState *readState + if sOpts.vers == nil { + readState = d.loadReadState() + } + if sOpts.vers != nil { + sOpts.vers.Ref() + } + + // Determine the seqnum to read at after grabbing the read state (current and + // memtables) above. + seqNum := sOpts.seqNum + if seqNum == 0 { + seqNum = d.mu.versions.visibleSeqNum.Load() + } + + // Bundle various structures under a single umbrella in order to allocate + // them together. + buf := iterAllocPool.Get().(*iterAlloc) + dbi := &scanInternalIterator{ + ctx: ctx, + db: d, + comparer: d.opts.Comparer, + merge: d.opts.Merger.Merge, + readState: readState, + version: sOpts.vers, + alloc: buf, + newIters: d.newIters, + newIterRangeKey: d.tableNewRangeKeyIter, + seqNum: seqNum, + mergingIter: &buf.merging, + } + dbi.opts = *o + dbi.opts.logger = d.opts.Logger + if d.opts.private.disableLazyCombinedIteration { + dbi.opts.disableLazyCombinedIteration = true + } + return finishInitializingInternalIter(buf, dbi) +} + +func finishInitializingInternalIter( + buf *iterAlloc, i *scanInternalIterator, +) (*scanInternalIterator, error) { + // Short-hand. + var memtables flushableList + if i.readState != nil { + memtables = i.readState.memtables + } + // We only need to read from memtables which contain sequence numbers older + // than seqNum. Trim off newer memtables. + for j := len(memtables) - 1; j >= 0; j-- { + if logSeqNum := memtables[j].logSeqNum; logSeqNum < i.seqNum { + break + } + memtables = memtables[:j] + } + i.initializeBoundBufs(i.opts.LowerBound, i.opts.UpperBound) + + i.constructPointIter(i.opts.CategoryAndQoS, memtables, buf) + + // For internal iterators, we skip the lazy combined iteration optimization + // entirely, and create the range key iterator stack directly. + i.rangeKey = iterRangeKeyStateAllocPool.Get().(*iteratorRangeKeyState) + i.rangeKey.init(i.comparer.Compare, i.comparer.Split, &i.opts.IterOptions) + if err := i.constructRangeKeyIter(); err != nil { + return nil, err + } + + // Wrap the point iterator (currently i.iter) with an interleaving + // iterator that interleaves range keys pulled from + // i.rangeKey.rangeKeyIter. + i.rangeKey.iiter.Init(i.comparer, i.iter, i.rangeKey.rangeKeyIter, + keyspan.InterleavingIterOpts{ + LowerBound: i.opts.LowerBound, + UpperBound: i.opts.UpperBound, + }) + i.iter = &i.rangeKey.iiter + + return i, nil +} + +func (i *Iterator) constructPointIter( + ctx context.Context, memtables flushableList, buf *iterAlloc, +) { + if i.pointIter != nil { + // Already have one. + return + } + internalOpts := internalIterOpts{stats: &i.stats.InternalStats} + if i.opts.RangeKeyMasking.Filter != nil { + internalOpts.boundLimitedFilter = &i.rangeKeyMasking + } + + // Merging levels and levels from iterAlloc. + mlevels := buf.mlevels[:0] + levels := buf.levels[:0] + + // We compute the number of levels needed ahead of time and reallocate a slice if + // the array from the iterAlloc isn't large enough. Doing this allocation once + // should improve the performance. + numMergingLevels := 0 + numLevelIters := 0 + if i.batch != nil { + numMergingLevels++ + } + + var current *version + if !i.batchOnlyIter { + numMergingLevels += len(memtables) + + current = i.version + if current == nil { + current = i.readState.current + } + numMergingLevels += len(current.L0SublevelFiles) + numLevelIters += len(current.L0SublevelFiles) + for level := 1; level < len(current.Levels); level++ { + if current.Levels[level].Empty() { + continue + } + numMergingLevels++ + numLevelIters++ + } + } + + if numMergingLevels > cap(mlevels) { + mlevels = make([]mergingIterLevel, 0, numMergingLevels) + } + if numLevelIters > cap(levels) { + levels = make([]levelIter, 0, numLevelIters) + } + + // Top-level is the batch, if any. + if i.batch != nil { + if i.batch.index == nil { + // This isn't an indexed batch. We shouldn't have gotten this far. + panic(errors.AssertionFailedf("creating an iterator over an unindexed batch")) + } else { + i.batch.initInternalIter(&i.opts, &i.batchPointIter) + i.batch.initRangeDelIter(&i.opts, &i.batchRangeDelIter, i.batchSeqNum) + // Only include the batch's rangedel iterator if it's non-empty. + // This requires some subtle logic in the case a rangedel is later + // written to the batch and the view of the batch is refreshed + // during a call to SetOptions—in this case, we need to reconstruct + // the point iterator to add the batch rangedel iterator. + var rangeDelIter keyspan.FragmentIterator + if i.batchRangeDelIter.Count() > 0 { + rangeDelIter = &i.batchRangeDelIter + } + mlevels = append(mlevels, mergingIterLevel{ + iter: &i.batchPointIter, + rangeDelIter: rangeDelIter, + }) + } + } + + if !i.batchOnlyIter { + // Next are the memtables. + for j := len(memtables) - 1; j >= 0; j-- { + mem := memtables[j] + mlevels = append(mlevels, mergingIterLevel{ + iter: mem.newIter(&i.opts), + rangeDelIter: mem.newRangeDelIter(&i.opts), + }) + } + + // Next are the file levels: L0 sub-levels followed by lower levels. + mlevelsIndex := len(mlevels) + levelsIndex := len(levels) + mlevels = mlevels[:numMergingLevels] + levels = levels[:numLevelIters] + i.opts.snapshotForHideObsoletePoints = buf.dbi.seqNum + addLevelIterForFiles := func(files manifest.LevelIterator, level manifest.Level) { + li := &levels[levelsIndex] + + li.init(ctx, i.opts, &i.comparer, i.newIters, files, level, internalOpts) + li.initRangeDel(&mlevels[mlevelsIndex].rangeDelIter) + li.initBoundaryContext(&mlevels[mlevelsIndex].levelIterBoundaryContext) + li.initCombinedIterState(&i.lazyCombinedIter.combinedIterState) + mlevels[mlevelsIndex].levelIter = li + mlevels[mlevelsIndex].iter = invalidating.MaybeWrapIfInvariants(li) + + levelsIndex++ + mlevelsIndex++ + } + + // Add level iterators for the L0 sublevels, iterating from newest to + // oldest. + for i := len(current.L0SublevelFiles) - 1; i >= 0; i-- { + addLevelIterForFiles(current.L0SublevelFiles[i].Iter(), manifest.L0Sublevel(i)) + } + + // Add level iterators for the non-empty non-L0 levels. + for level := 1; level < len(current.Levels); level++ { + if current.Levels[level].Empty() { + continue + } + addLevelIterForFiles(current.Levels[level].Iter(), manifest.Level(level)) + } + } + buf.merging.init(&i.opts, &i.stats.InternalStats, i.comparer.Compare, i.comparer.Split, mlevels...) + if len(mlevels) <= cap(buf.levelsPositioned) { + buf.merging.levelsPositioned = buf.levelsPositioned[:len(mlevels)] + } + buf.merging.snapshot = i.seqNum + buf.merging.batchSnapshot = i.batchSeqNum + buf.merging.combinedIterState = &i.lazyCombinedIter.combinedIterState + i.pointIter = invalidating.MaybeWrapIfInvariants(&buf.merging) + i.merging = &buf.merging +} + +// NewBatch returns a new empty write-only batch. Any reads on the batch will +// return an error. If the batch is committed it will be applied to the DB. +func (d *DB) NewBatch() *Batch { + return newBatch(d) +} + +// NewBatchWithSize is mostly identical to NewBatch, but it will allocate the +// the specified memory space for the internal slice in advance. +func (d *DB) NewBatchWithSize(size int) *Batch { + return newBatchWithSize(d, size) +} + +// NewIndexedBatch returns a new empty read-write batch. Any reads on the batch +// will read from both the batch and the DB. If the batch is committed it will +// be applied to the DB. An indexed batch is slower that a non-indexed batch +// for insert operations. If you do not need to perform reads on the batch, use +// NewBatch instead. +func (d *DB) NewIndexedBatch() *Batch { + return newIndexedBatch(d, d.opts.Comparer) +} + +// NewIndexedBatchWithSize is mostly identical to NewIndexedBatch, but it will +// allocate the the specified memory space for the internal slice in advance. +func (d *DB) NewIndexedBatchWithSize(size int) *Batch { + return newIndexedBatchWithSize(d, d.opts.Comparer, size) +} + +// NewIter returns an iterator that is unpositioned (Iterator.Valid() will +// return false). The iterator can be positioned via a call to SeekGE, SeekLT, +// First or Last. The iterator provides a point-in-time view of the current DB +// state. This view is maintained by preventing file deletions and preventing +// memtables referenced by the iterator from being deleted. Using an iterator +// to maintain a long-lived point-in-time view of the DB state can lead to an +// apparent memory and disk usage leak. Use snapshots (see NewSnapshot) for +// point-in-time snapshots which avoids these problems. +func (d *DB) NewIter(o *IterOptions) (*Iterator, error) { + return d.NewIterWithContext(context.Background(), o) +} + +// NewIterWithContext is like NewIter, and additionally accepts a context for +// tracing. +func (d *DB) NewIterWithContext(ctx context.Context, o *IterOptions) (*Iterator, error) { + return d.newIter(ctx, nil /* batch */, newIterOpts{}, o), nil +} + +// NewSnapshot returns a point-in-time view of the current DB state. Iterators +// created with this handle will all observe a stable snapshot of the current +// DB state. The caller must call Snapshot.Close() when the snapshot is no +// longer needed. Snapshots are not persisted across DB restarts (close -> +// open). Unlike the implicit snapshot maintained by an iterator, a snapshot +// will not prevent memtables from being released or sstables from being +// deleted. Instead, a snapshot prevents deletion of sequence numbers +// referenced by the snapshot. +func (d *DB) NewSnapshot() *Snapshot { + if err := d.closed.Load(); err != nil { + panic(err) + } + + d.mu.Lock() + s := &Snapshot{ + db: d, + seqNum: d.mu.versions.visibleSeqNum.Load(), + } + d.mu.snapshots.pushBack(s) + d.mu.Unlock() + return s +} + +// NewEventuallyFileOnlySnapshot returns a point-in-time view of the current DB +// state, similar to NewSnapshot, but with consistency constrained to the +// provided set of key ranges. See the comment at EventuallyFileOnlySnapshot for +// its semantics. +func (d *DB) NewEventuallyFileOnlySnapshot(keyRanges []KeyRange) *EventuallyFileOnlySnapshot { + if err := d.closed.Load(); err != nil { + panic(err) + } + + internalKeyRanges := make([]internalKeyRange, len(keyRanges)) + for i := range keyRanges { + if i > 0 && d.cmp(keyRanges[i-1].End, keyRanges[i].Start) > 0 { + panic("pebble: key ranges for eventually-file-only-snapshot not in order") + } + internalKeyRanges[i] = internalKeyRange{ + smallest: base.MakeInternalKey(keyRanges[i].Start, InternalKeySeqNumMax, InternalKeyKindMax), + largest: base.MakeExclusiveSentinelKey(InternalKeyKindRangeDelete, keyRanges[i].End), + } + } + + return d.makeEventuallyFileOnlySnapshot(keyRanges, internalKeyRanges) +} + +// Close closes the DB. +// +// It is not safe to close a DB until all outstanding iterators are closed +// or to call Close concurrently with any other DB method. It is not valid +// to call any of a DB's methods after the DB has been closed. +func (d *DB) Close() error { + // Lock the commit pipeline for the duration of Close. This prevents a race + // with makeRoomForWrite. Rotating the WAL in makeRoomForWrite requires + // dropping d.mu several times for I/O. If Close only holds d.mu, an + // in-progress WAL rotation may re-acquire d.mu only once the database is + // closed. + // + // Additionally, locking the commit pipeline makes it more likely that + // (illegal) concurrent writes will observe d.closed.Load() != nil, creating + // more understable panics if the database is improperly used concurrently + // during Close. + d.commit.mu.Lock() + defer d.commit.mu.Unlock() + d.mu.Lock() + defer d.mu.Unlock() + if err := d.closed.Load(); err != nil { + panic(err) + } + + // Clear the finalizer that is used to check that an unreferenced DB has been + // closed. We're closing the DB here, so the check performed by that + // finalizer isn't necessary. + // + // Note: this is a no-op if invariants are disabled or race is enabled. + invariants.SetFinalizer(d.closed, nil) + + d.closed.Store(errors.WithStack(ErrClosed)) + close(d.closedCh) + + defer d.opts.Cache.Unref() + + for d.mu.compact.compactingCount > 0 || d.mu.compact.flushing { + d.mu.compact.cond.Wait() + } + for d.mu.tableStats.loading { + d.mu.tableStats.cond.Wait() + } + for d.mu.tableValidation.validating { + d.mu.tableValidation.cond.Wait() + } + + var err error + if n := len(d.mu.compact.inProgress); n > 0 { + err = errors.Errorf("pebble: %d unexpected in-progress compactions", errors.Safe(n)) + } + err = firstError(err, d.mu.formatVers.marker.Close()) + err = firstError(err, d.tableCache.close()) + if !d.opts.ReadOnly { + err = firstError(err, d.mu.log.Close()) + } else if d.mu.log.LogWriter != nil { + panic("pebble: log-writer should be nil in read-only mode") + } + err = firstError(err, d.fileLock.Close()) + + // Note that versionSet.close() only closes the MANIFEST. The versions list + // is still valid for the checks below. + err = firstError(err, d.mu.versions.close()) + + err = firstError(err, d.dataDir.Close()) + if d.dataDir != d.walDir { + err = firstError(err, d.walDir.Close()) + } + + d.readState.val.unrefLocked() + + current := d.mu.versions.currentVersion() + for v := d.mu.versions.versions.Front(); true; v = v.Next() { + refs := v.Refs() + if v == current { + if refs != 1 { + err = firstError(err, errors.Errorf("leaked iterators: current\n%s", v)) + } + break + } + if refs != 0 { + err = firstError(err, errors.Errorf("leaked iterators:\n%s", v)) + } + } + + for _, mem := range d.mu.mem.queue { + // Usually, we'd want to delete the files returned by readerUnref. But + // in this case, even if we're unreferencing the flushables, the + // flushables aren't obsolete. They will be reconstructed during WAL + // replay. + mem.readerUnrefLocked(false) + } + // If there's an unused, recycled memtable, we need to release its memory. + if obsoleteMemTable := d.memTableRecycle.Swap(nil); obsoleteMemTable != nil { + d.freeMemTable(obsoleteMemTable) + } + if reserved := d.memTableReserved.Load(); reserved != 0 { + err = firstError(err, errors.Errorf("leaked memtable reservation: %d", errors.Safe(reserved))) + } + + // Since we called d.readState.val.unrefLocked() above, we are expected to + // manually schedule deletion of obsolete files. + if len(d.mu.versions.obsoleteTables) > 0 { + d.deleteObsoleteFiles(d.mu.nextJobID) + } + + d.mu.Unlock() + d.compactionSchedulers.Wait() + + // Wait for all cleaning jobs to finish. + d.cleanupManager.Close() + + // Sanity check metrics. + if invariants.Enabled { + m := d.Metrics() + if m.Compact.NumInProgress > 0 || m.Compact.InProgressBytes > 0 { + d.mu.Lock() + panic(fmt.Sprintf("invalid metrics on close:\n%s", m)) + } + } + + d.mu.Lock() + + // As a sanity check, ensure that there are no zombie tables. A non-zero count + // hints at a reference count leak. + if ztbls := len(d.mu.versions.zombieTables); ztbls > 0 { + err = firstError(err, errors.Errorf("non-zero zombie file count: %d", ztbls)) + } + + err = firstError(err, d.objProvider.Close()) + + // If the options include a closer to 'close' the filesystem, close it. + if d.opts.private.fsCloser != nil { + d.opts.private.fsCloser.Close() + } + + // Return an error if the user failed to close all open snapshots. + if v := d.mu.snapshots.count(); v > 0 { + err = firstError(err, errors.Errorf("leaked snapshots: %d open snapshots on DB %p", v, d)) + } + + return err +} + +// Compact the specified range of keys in the database. +func (d *DB) Compact(start, end []byte, parallelize bool) error { + if err := d.closed.Load(); err != nil { + panic(err) + } + if d.opts.ReadOnly { + return ErrReadOnly + } + if d.cmp(start, end) >= 0 { + return errors.Errorf("Compact start %s is not less than end %s", + d.opts.Comparer.FormatKey(start), d.opts.Comparer.FormatKey(end)) + } + iStart := base.MakeInternalKey(start, InternalKeySeqNumMax, InternalKeyKindMax) + iEnd := base.MakeInternalKey(end, 0, 0) + m := (&fileMetadata{}).ExtendPointKeyBounds(d.cmp, iStart, iEnd) + meta := []*fileMetadata{m} + + d.mu.Lock() + maxLevelWithFiles := 1 + cur := d.mu.versions.currentVersion() + for level := 0; level < numLevels; level++ { + overlaps := cur.Overlaps(level, d.cmp, start, end, iEnd.IsExclusiveSentinel()) + if !overlaps.Empty() { + maxLevelWithFiles = level + 1 + } + } + + keyRanges := make([]internalKeyRange, len(meta)) + for i := range meta { + keyRanges[i] = internalKeyRange{smallest: m.Smallest, largest: m.Largest} + } + // Determine if any memtable overlaps with the compaction range. We wait for + // any such overlap to flush (initiating a flush if necessary). + mem, err := func() (*flushableEntry, error) { + // Check to see if any files overlap with any of the memtables. The queue + // is ordered from oldest to newest with the mutable memtable being the + // last element in the slice. We want to wait for the newest table that + // overlaps. + for i := len(d.mu.mem.queue) - 1; i >= 0; i-- { + mem := d.mu.mem.queue[i] + if ingestMemtableOverlaps(d.cmp, mem, keyRanges) { + var err error + if mem.flushable == d.mu.mem.mutable { + // We have to hold both commitPipeline.mu and DB.mu when calling + // makeRoomForWrite(). Lock order requirements elsewhere force us to + // unlock DB.mu in order to grab commitPipeline.mu first. + d.mu.Unlock() + d.commit.mu.Lock() + d.mu.Lock() + defer d.commit.mu.Unlock() + if mem.flushable == d.mu.mem.mutable { + // Only flush if the active memtable is unchanged. + err = d.makeRoomForWrite(nil) + } + } + mem.flushForced = true + d.maybeScheduleFlush() + return mem, err + } + } + return nil, nil + }() + + d.mu.Unlock() + + if err != nil { + return err + } + if mem != nil { + <-mem.flushed + } + + for level := 0; level < maxLevelWithFiles; { + for { + if err := d.manualCompact( + iStart.UserKey, iEnd.UserKey, level, parallelize); err != nil { + if errors.Is(err, ErrCancelledCompaction) { + continue + } + return err + } + break + } + level++ + if level == numLevels-1 { + // A manual compaction of the bottommost level occurred. + // There is no next level to try and compact. + break + } + } + return nil +} + +func (d *DB) manualCompact(start, end []byte, level int, parallelize bool) error { + d.mu.Lock() + curr := d.mu.versions.currentVersion() + files := curr.Overlaps(level, d.cmp, start, end, false) + if files.Empty() { + d.mu.Unlock() + return nil + } + + var compactions []*manualCompaction + if parallelize { + compactions = append(compactions, d.splitManualCompaction(start, end, level)...) + } else { + compactions = append(compactions, &manualCompaction{ + level: level, + done: make(chan error, 1), + start: start, + end: end, + }) + } + d.mu.compact.manual = append(d.mu.compact.manual, compactions...) + d.maybeScheduleCompaction() + d.mu.Unlock() + + // Each of the channels is guaranteed to be eventually sent to once. After a + // compaction is possibly picked in d.maybeScheduleCompaction(), either the + // compaction is dropped, executed after being scheduled, or retried later. + // Assuming eventual progress when a compaction is retried, all outcomes send + // a value to the done channel. Since the channels are buffered, it is not + // necessary to read from each channel, and so we can exit early in the event + // of an error. + for _, compaction := range compactions { + if err := <-compaction.done; err != nil { + return err + } + } + return nil +} + +// splitManualCompaction splits a manual compaction over [start,end] on level +// such that the resulting compactions have no key overlap. +func (d *DB) splitManualCompaction( + start, end []byte, level int, +) (splitCompactions []*manualCompaction) { + curr := d.mu.versions.currentVersion() + endLevel := level + 1 + baseLevel := d.mu.versions.picker.getBaseLevel() + if level == 0 { + endLevel = baseLevel + } + keyRanges := calculateInuseKeyRanges(curr, d.cmp, level, endLevel, start, end) + for _, keyRange := range keyRanges { + splitCompactions = append(splitCompactions, &manualCompaction{ + level: level, + done: make(chan error, 1), + start: keyRange.Start, + end: keyRange.End, + split: true, + }) + } + return splitCompactions +} + +// DownloadSpan is a key range passed to the Download method. +type DownloadSpan struct { + StartKey []byte + // EndKey is exclusive. + EndKey []byte +} + +// Download ensures that the LSM does not use any external sstables for the +// given key ranges. It does so by performing appropriate compactions so that +// all external data becomes available locally. +// +// Note that calling this method does not imply that all other compactions stop; +// it simply informs Pebble of a list of spans for which external data should be +// downloaded with high priority. +// +// The method returns once no external sstasbles overlap the given spans, the +// context is canceled, or an error is hit. +// +// TODO(radu): consider passing a priority/impact knob to express how important +// the download is (versus live traffic performance, LSM health). +func (d *DB) Download(ctx context.Context, spans []DownloadSpan) error { + return errors.Errorf("not implemented") +} + +// Flush the memtable to stable storage. +func (d *DB) Flush() error { + flushDone, err := d.AsyncFlush() + if err != nil { + return err + } + <-flushDone + return nil +} + +// AsyncFlush asynchronously flushes the memtable to stable storage. +// +// If no error is returned, the caller can receive from the returned channel in +// order to wait for the flush to complete. +func (d *DB) AsyncFlush() (<-chan struct{}, error) { + if err := d.closed.Load(); err != nil { + panic(err) + } + if d.opts.ReadOnly { + return nil, ErrReadOnly + } + + d.commit.mu.Lock() + defer d.commit.mu.Unlock() + d.mu.Lock() + defer d.mu.Unlock() + flushed := d.mu.mem.queue[len(d.mu.mem.queue)-1].flushed + err := d.makeRoomForWrite(nil) + if err != nil { + return nil, err + } + return flushed, nil +} + +// Metrics returns metrics about the database. +func (d *DB) Metrics() *Metrics { + metrics := &Metrics{} + recycledLogsCount, recycledLogSize := d.logRecycler.stats() + + d.mu.Lock() + vers := d.mu.versions.currentVersion() + *metrics = d.mu.versions.metrics + metrics.Compact.EstimatedDebt = d.mu.versions.picker.estimatedCompactionDebt(0) + metrics.Compact.InProgressBytes = d.mu.versions.atomicInProgressBytes.Load() + metrics.Compact.NumInProgress = int64(d.mu.compact.compactingCount) + metrics.Compact.MarkedFiles = vers.Stats.MarkedForCompaction + metrics.Compact.Duration = d.mu.compact.duration + for c := range d.mu.compact.inProgress { + if c.kind != compactionKindFlush { + metrics.Compact.Duration += d.timeNow().Sub(c.beganAt) + } + } + + for _, m := range d.mu.mem.queue { + metrics.MemTable.Size += m.totalBytes() + } + metrics.Snapshots.Count = d.mu.snapshots.count() + if metrics.Snapshots.Count > 0 { + metrics.Snapshots.EarliestSeqNum = d.mu.snapshots.earliest() + } + metrics.Snapshots.PinnedKeys = d.mu.snapshots.cumulativePinnedCount + metrics.Snapshots.PinnedSize = d.mu.snapshots.cumulativePinnedSize + metrics.MemTable.Count = int64(len(d.mu.mem.queue)) + metrics.MemTable.ZombieCount = d.memTableCount.Load() - metrics.MemTable.Count + metrics.MemTable.ZombieSize = uint64(d.memTableReserved.Load()) - metrics.MemTable.Size + metrics.WAL.ObsoleteFiles = int64(recycledLogsCount) + metrics.WAL.ObsoletePhysicalSize = recycledLogSize + metrics.WAL.Size = d.logSize.Load() + // The current WAL size (d.atomic.logSize) is the current logical size, + // which may be less than the WAL's physical size if it was recycled. + // The file sizes in d.mu.log.queue are updated to the physical size + // during WAL rotation. Use the larger of the two for the current WAL. All + // the previous WALs's fileSizes in d.mu.log.queue are already updated. + metrics.WAL.PhysicalSize = metrics.WAL.Size + if len(d.mu.log.queue) > 0 && metrics.WAL.PhysicalSize < d.mu.log.queue[len(d.mu.log.queue)-1].fileSize { + metrics.WAL.PhysicalSize = d.mu.log.queue[len(d.mu.log.queue)-1].fileSize + } + for i, n := 0, len(d.mu.log.queue)-1; i < n; i++ { + metrics.WAL.PhysicalSize += d.mu.log.queue[i].fileSize + } + + metrics.WAL.BytesIn = d.mu.log.bytesIn // protected by d.mu + for i, n := 0, len(d.mu.mem.queue)-1; i < n; i++ { + metrics.WAL.Size += d.mu.mem.queue[i].logSize + } + metrics.WAL.BytesWritten = metrics.Levels[0].BytesIn + metrics.WAL.Size + if p := d.mu.versions.picker; p != nil { + compactions := d.getInProgressCompactionInfoLocked(nil) + for level, score := range p.getScores(compactions) { + metrics.Levels[level].Score = score + } + } + metrics.Table.ZombieCount = int64(len(d.mu.versions.zombieTables)) + for _, size := range d.mu.versions.zombieTables { + metrics.Table.ZombieSize += size + } + metrics.private.optionsFileSize = d.optionsFileSize + + // TODO(jackson): Consider making these metrics optional. + metrics.Keys.RangeKeySetsCount = countRangeKeySetFragments(vers) + metrics.Keys.TombstoneCount = countTombstones(vers) + + d.mu.versions.logLock() + metrics.private.manifestFileSize = uint64(d.mu.versions.manifest.Size()) + metrics.Table.BackingTableCount = uint64(len(d.mu.versions.backingState.fileBackingMap)) + metrics.Table.BackingTableSize = d.mu.versions.backingState.fileBackingSize + if invariants.Enabled { + var totalSize uint64 + for _, backing := range d.mu.versions.backingState.fileBackingMap { + totalSize += backing.Size + } + if totalSize != metrics.Table.BackingTableSize { + panic("pebble: invalid backing table size accounting") + } + } + d.mu.versions.logUnlock() + + metrics.LogWriter.FsyncLatency = d.mu.log.metrics.fsyncLatency + if err := metrics.LogWriter.Merge(&d.mu.log.metrics.LogWriterMetrics); err != nil { + d.opts.Logger.Errorf("metrics error: %s", err) + } + metrics.Flush.WriteThroughput = d.mu.compact.flushWriteThroughput + if d.mu.compact.flushing { + metrics.Flush.NumInProgress = 1 + } + for i := 0; i < numLevels; i++ { + metrics.Levels[i].Additional.ValueBlocksSize = valueBlocksSizeForLevel(vers, i) + } + + d.mu.Unlock() + + metrics.BlockCache = d.opts.Cache.Metrics() + metrics.TableCache, metrics.Filter = d.tableCache.metrics() + metrics.TableIters = int64(d.tableCache.iterCount()) + metrics.CategoryStats = d.tableCache.dbOpts.sstStatsCollector.GetStats() + + metrics.SecondaryCacheMetrics = d.objProvider.Metrics() + + metrics.Uptime = d.timeNow().Sub(d.openedAt) + + return metrics +} + +// sstablesOptions hold the optional parameters to retrieve TableInfo for all sstables. +type sstablesOptions struct { + // set to true will return the sstable properties in TableInfo + withProperties bool + + // if set, return sstables that overlap the key range (end-exclusive) + start []byte + end []byte + + withApproximateSpanBytes bool +} + +// SSTablesOption set optional parameter used by `DB.SSTables`. +type SSTablesOption func(*sstablesOptions) + +// WithProperties enable return sstable properties in each TableInfo. +// +// NOTE: if most of the sstable properties need to be read from disk, +// this options may make method `SSTables` quite slow. +func WithProperties() SSTablesOption { + return func(opt *sstablesOptions) { + opt.withProperties = true + } +} + +// WithKeyRangeFilter ensures returned sstables overlap start and end (end-exclusive) +// if start and end are both nil these properties have no effect. +func WithKeyRangeFilter(start, end []byte) SSTablesOption { + return func(opt *sstablesOptions) { + opt.end = end + opt.start = start + } +} + +// WithApproximateSpanBytes enables capturing the approximate number of bytes that +// overlap the provided key span for each sstable. +// NOTE: this option can only be used with WithKeyRangeFilter and WithProperties +// provided. +func WithApproximateSpanBytes() SSTablesOption { + return func(opt *sstablesOptions) { + opt.withApproximateSpanBytes = true + } +} + +// BackingType denotes the type of storage backing a given sstable. +type BackingType int + +const ( + // BackingTypeLocal denotes an sstable stored on local disk according to the + // objprovider. This file is completely owned by us. + BackingTypeLocal BackingType = iota + // BackingTypeShared denotes an sstable stored on shared storage, created + // by this Pebble instance and possibly shared by other Pebble instances. + // These types of files have lifecycle managed by Pebble. + BackingTypeShared + // BackingTypeSharedForeign denotes an sstable stored on shared storage, + // created by a Pebble instance other than this one. These types of files have + // lifecycle managed by Pebble. + BackingTypeSharedForeign + // BackingTypeExternal denotes an sstable stored on external storage, + // not owned by any Pebble instance and with no refcounting/cleanup methods + // or lifecycle management. An example of an external file is a file restored + // from a backup. + BackingTypeExternal +) + +// SSTableInfo export manifest.TableInfo with sstable.Properties alongside +// other file backing info. +type SSTableInfo struct { + manifest.TableInfo + // Virtual indicates whether the sstable is virtual. + Virtual bool + // BackingSSTNum is the file number associated with backing sstable which + // backs the sstable associated with this SSTableInfo. If Virtual is false, + // then BackingSSTNum == FileNum. + BackingSSTNum base.FileNum + // BackingType is the type of storage backing this sstable. + BackingType BackingType + // Locator is the remote.Locator backing this sstable, if the backing type is + // not BackingTypeLocal. + Locator remote.Locator + + // Properties is the sstable properties of this table. If Virtual is true, + // then the Properties are associated with the backing sst. + Properties *sstable.Properties +} + +// SSTables retrieves the current sstables. The returned slice is indexed by +// level and each level is indexed by the position of the sstable within the +// level. Note that this information may be out of date due to concurrent +// flushes and compactions. +func (d *DB) SSTables(opts ...SSTablesOption) ([][]SSTableInfo, error) { + opt := &sstablesOptions{} + for _, fn := range opts { + fn(opt) + } + + if opt.withApproximateSpanBytes && !opt.withProperties { + return nil, errors.Errorf("Cannot use WithApproximateSpanBytes without WithProperties option.") + } + if opt.withApproximateSpanBytes && (opt.start == nil || opt.end == nil) { + return nil, errors.Errorf("Cannot use WithApproximateSpanBytes without WithKeyRangeFilter option.") + } + + // Grab and reference the current readState. + readState := d.loadReadState() + defer readState.unref() + + // TODO(peter): This is somewhat expensive, especially on a large + // database. It might be worthwhile to unify TableInfo and FileMetadata and + // then we could simply return current.Files. Note that RocksDB is doing + // something similar to the current code, so perhaps it isn't too bad. + srcLevels := readState.current.Levels + var totalTables int + for i := range srcLevels { + totalTables += srcLevels[i].Len() + } + + destTables := make([]SSTableInfo, totalTables) + destLevels := make([][]SSTableInfo, len(srcLevels)) + for i := range destLevels { + iter := srcLevels[i].Iter() + j := 0 + for m := iter.First(); m != nil; m = iter.Next() { + if opt.start != nil && opt.end != nil && !m.Overlaps(d.opts.Comparer.Compare, opt.start, opt.end, true /* exclusive end */) { + continue + } + destTables[j] = SSTableInfo{TableInfo: m.TableInfo()} + if opt.withProperties { + p, err := d.tableCache.getTableProperties( + m, + ) + if err != nil { + return nil, err + } + destTables[j].Properties = p + } + destTables[j].Virtual = m.Virtual + destTables[j].BackingSSTNum = m.FileBacking.DiskFileNum.FileNum() + objMeta, err := d.objProvider.Lookup(fileTypeTable, m.FileBacking.DiskFileNum) + if err != nil { + return nil, err + } + if objMeta.IsRemote() { + if objMeta.IsShared() { + if d.objProvider.IsSharedForeign(objMeta) { + destTables[j].BackingType = BackingTypeSharedForeign + } else { + destTables[j].BackingType = BackingTypeShared + } + } else { + destTables[j].BackingType = BackingTypeExternal + } + destTables[j].Locator = objMeta.Remote.Locator + } else { + destTables[j].BackingType = BackingTypeLocal + } + + if opt.withApproximateSpanBytes { + var spanBytes uint64 + if m.ContainedWithinSpan(d.opts.Comparer.Compare, opt.start, opt.end) { + spanBytes = m.Size + } else { + size, err := d.tableCache.estimateSize(m, opt.start, opt.end) + if err != nil { + return nil, err + } + spanBytes = size + } + propertiesCopy := *destTables[j].Properties + + // Deep copy user properties so approximate span bytes can be added. + propertiesCopy.UserProperties = make(map[string]string, len(destTables[j].Properties.UserProperties)+1) + for k, v := range destTables[j].Properties.UserProperties { + propertiesCopy.UserProperties[k] = v + } + propertiesCopy.UserProperties["approximate-span-bytes"] = strconv.FormatUint(spanBytes, 10) + destTables[j].Properties = &propertiesCopy + } + j++ + } + destLevels[i] = destTables[:j] + destTables = destTables[j:] + } + + return destLevels, nil +} + +// EstimateDiskUsage returns the estimated filesystem space used in bytes for +// storing the range `[start, end]`. The estimation is computed as follows: +// +// - For sstables fully contained in the range the whole file size is included. +// - For sstables partially contained in the range the overlapping data block sizes +// are included. Even if a data block partially overlaps, or we cannot determine +// overlap due to abbreviated index keys, the full data block size is included in +// the estimation. Note that unlike fully contained sstables, none of the +// meta-block space is counted for partially overlapped files. +// - For virtual sstables, we use the overlap between start, end and the virtual +// sstable bounds to determine disk usage. +// - There may also exist WAL entries for unflushed keys in this range. This +// estimation currently excludes space used for the range in the WAL. +func (d *DB) EstimateDiskUsage(start, end []byte) (uint64, error) { + bytes, _, _, err := d.EstimateDiskUsageByBackingType(start, end) + return bytes, err +} + +// EstimateDiskUsageByBackingType is like EstimateDiskUsage but additionally +// returns the subsets of that size in remote ane external files. +func (d *DB) EstimateDiskUsageByBackingType( + start, end []byte, +) (totalSize, remoteSize, externalSize uint64, _ error) { + if err := d.closed.Load(); err != nil { + panic(err) + } + if d.opts.Comparer.Compare(start, end) > 0 { + return 0, 0, 0, errors.New("invalid key-range specified (start > end)") + } + + // Grab and reference the current readState. This prevents the underlying + // files in the associated version from being deleted if there is a concurrent + // compaction. + readState := d.loadReadState() + defer readState.unref() + + for level, files := range readState.current.Levels { + iter := files.Iter() + if level > 0 { + // We can only use `Overlaps` to restrict `files` at L1+ since at L0 it + // expands the range iteratively until it has found a set of files that + // do not overlap any other L0 files outside that set. + overlaps := readState.current.Overlaps(level, d.opts.Comparer.Compare, start, end, false /* exclusiveEnd */) + iter = overlaps.Iter() + } + for file := iter.First(); file != nil; file = iter.Next() { + if d.opts.Comparer.Compare(start, file.Smallest.UserKey) <= 0 && + d.opts.Comparer.Compare(file.Largest.UserKey, end) <= 0 { + // The range fully contains the file, so skip looking it up in + // table cache/looking at its indexes, and add the full file size. + meta, err := d.objProvider.Lookup(fileTypeTable, file.FileBacking.DiskFileNum) + if err != nil { + return 0, 0, 0, err + } + if meta.IsRemote() { + remoteSize += file.Size + if meta.Remote.CleanupMethod == objstorage.SharedNoCleanup { + externalSize += file.Size + } + } + totalSize += file.Size + } else if d.opts.Comparer.Compare(file.Smallest.UserKey, end) <= 0 && + d.opts.Comparer.Compare(start, file.Largest.UserKey) <= 0 { + var size uint64 + var err error + if file.Virtual { + err = d.tableCache.withVirtualReader( + file.VirtualMeta(), + func(r sstable.VirtualReader) (err error) { + size, err = r.EstimateDiskUsage(start, end) + return err + }, + ) + } else { + err = d.tableCache.withReader( + file.PhysicalMeta(), + func(r *sstable.Reader) (err error) { + size, err = r.EstimateDiskUsage(start, end) + return err + }, + ) + } + if err != nil { + return 0, 0, 0, err + } + meta, err := d.objProvider.Lookup(fileTypeTable, file.FileBacking.DiskFileNum) + if err != nil { + return 0, 0, 0, err + } + if meta.IsRemote() { + remoteSize += size + if meta.Remote.CleanupMethod == objstorage.SharedNoCleanup { + externalSize += size + } + } + totalSize += size + } + } + } + return totalSize, remoteSize, externalSize, nil +} + +func (d *DB) walPreallocateSize() int { + // Set the WAL preallocate size to 110% of the memtable size. Note that there + // is a bit of apples and oranges in units here as the memtabls size + // corresponds to the memory usage of the memtable while the WAL size is the + // size of the batches (plus overhead) stored in the WAL. + // + // TODO(peter): 110% of the memtable size is quite hefty for a block + // size. This logic is taken from GetWalPreallocateBlockSize in + // RocksDB. Could a smaller preallocation block size be used? + size := d.opts.MemTableSize + size = (size / 10) + size + return int(size) +} + +func (d *DB) newMemTable(logNum base.DiskFileNum, logSeqNum uint64) (*memTable, *flushableEntry) { + size := d.mu.mem.nextSize + if d.mu.mem.nextSize < d.opts.MemTableSize { + d.mu.mem.nextSize *= 2 + if d.mu.mem.nextSize > d.opts.MemTableSize { + d.mu.mem.nextSize = d.opts.MemTableSize + } + } + + memtblOpts := memTableOptions{ + Options: d.opts, + logSeqNum: logSeqNum, + } + + // Before attempting to allocate a new memtable, check if there's one + // available for recycling in memTableRecycle. Large contiguous allocations + // can be costly as fragmentation makes it more difficult to find a large + // contiguous free space. We've observed 64MB allocations taking 10ms+. + // + // To reduce these costly allocations, up to 1 obsolete memtable is stashed + // in `d.memTableRecycle` to allow a future memtable rotation to reuse + // existing memory. + var mem *memTable + mem = d.memTableRecycle.Swap(nil) + if mem != nil && uint64(len(mem.arenaBuf)) != size { + d.freeMemTable(mem) + mem = nil + } + if mem != nil { + // Carry through the existing buffer and memory reservation. + memtblOpts.arenaBuf = mem.arenaBuf + memtblOpts.releaseAccountingReservation = mem.releaseAccountingReservation + } else { + mem = new(memTable) + memtblOpts.arenaBuf = manual.New(int(size)) + memtblOpts.releaseAccountingReservation = d.opts.Cache.Reserve(int(size)) + d.memTableCount.Add(1) + d.memTableReserved.Add(int64(size)) + + // Note: this is a no-op if invariants are disabled or race is enabled. + invariants.SetFinalizer(mem, checkMemTable) + } + mem.init(memtblOpts) + + entry := d.newFlushableEntry(mem, logNum, logSeqNum) + entry.releaseMemAccounting = func() { + // If the user leaks iterators, we may be releasing the memtable after + // the DB is already closed. In this case, we want to just release the + // memory because DB.Close won't come along to free it for us. + if err := d.closed.Load(); err != nil { + d.freeMemTable(mem) + return + } + + // The next memtable allocation might be able to reuse this memtable. + // Stash it on d.memTableRecycle. + if unusedMem := d.memTableRecycle.Swap(mem); unusedMem != nil { + // There was already a memtable waiting to be recycled. We're now + // responsible for freeing it. + d.freeMemTable(unusedMem) + } + } + return mem, entry +} + +func (d *DB) freeMemTable(m *memTable) { + d.memTableCount.Add(-1) + d.memTableReserved.Add(-int64(len(m.arenaBuf))) + m.free() +} + +func (d *DB) newFlushableEntry( + f flushable, logNum base.DiskFileNum, logSeqNum uint64, +) *flushableEntry { + fe := &flushableEntry{ + flushable: f, + flushed: make(chan struct{}), + logNum: logNum, + logSeqNum: logSeqNum, + deleteFn: d.mu.versions.addObsolete, + deleteFnLocked: d.mu.versions.addObsoleteLocked, + } + fe.readerRefs.Store(1) + return fe +} + +// makeRoomForWrite ensures that the memtable has room to hold the contents of +// Batch. It reserves the space in the memtable and adds a reference to the +// memtable. The caller must later ensure that the memtable is unreferenced. If +// the memtable is full, or a nil Batch is provided, the current memtable is +// rotated (marked as immutable) and a new mutable memtable is allocated. This +// memtable rotation also causes a log rotation. +// +// Both DB.mu and commitPipeline.mu must be held by the caller. Note that DB.mu +// may be released and reacquired. +func (d *DB) makeRoomForWrite(b *Batch) error { + if b != nil && b.ingestedSSTBatch { + panic("pebble: invalid function call") + } + + force := b == nil || b.flushable != nil + stalled := false + for { + if b != nil && b.flushable == nil { + err := d.mu.mem.mutable.prepare(b) + if err != arenaskl.ErrArenaFull { + if stalled { + d.opts.EventListener.WriteStallEnd() + } + return err + } + } else if !force { + if stalled { + d.opts.EventListener.WriteStallEnd() + } + return nil + } + // force || err == ErrArenaFull, so we need to rotate the current memtable. + { + var size uint64 + for i := range d.mu.mem.queue { + size += d.mu.mem.queue[i].totalBytes() + } + if size >= uint64(d.opts.MemTableStopWritesThreshold)*d.opts.MemTableSize { + // We have filled up the current memtable, but already queued memtables + // are still flushing, so we wait. + if !stalled { + stalled = true + d.opts.EventListener.WriteStallBegin(WriteStallBeginInfo{ + Reason: "memtable count limit reached", + }) + } + now := time.Now() + d.mu.compact.cond.Wait() + if b != nil { + b.commitStats.MemTableWriteStallDuration += time.Since(now) + } + continue + } + } + l0ReadAmp := d.mu.versions.currentVersion().L0Sublevels.ReadAmplification() + if l0ReadAmp >= d.opts.L0StopWritesThreshold { + // There are too many level-0 files, so we wait. + if !stalled { + stalled = true + d.opts.EventListener.WriteStallBegin(WriteStallBeginInfo{ + Reason: "L0 file count limit exceeded", + }) + } + now := time.Now() + d.mu.compact.cond.Wait() + if b != nil { + b.commitStats.L0ReadAmpWriteStallDuration += time.Since(now) + } + continue + } + + var newLogNum base.DiskFileNum + var prevLogSize uint64 + if !d.opts.DisableWAL { + now := time.Now() + newLogNum, prevLogSize = d.recycleWAL() + if b != nil { + b.commitStats.WALRotationDuration += time.Since(now) + } + } + + immMem := d.mu.mem.mutable + imm := d.mu.mem.queue[len(d.mu.mem.queue)-1] + imm.logSize = prevLogSize + imm.flushForced = imm.flushForced || (b == nil) + + // If we are manually flushing and we used less than half of the bytes in + // the memtable, don't increase the size for the next memtable. This + // reduces memtable memory pressure when an application is frequently + // manually flushing. + if (b == nil) && uint64(immMem.availBytes()) > immMem.totalBytes()/2 { + d.mu.mem.nextSize = immMem.totalBytes() + } + + if b != nil && b.flushable != nil { + // The batch is too large to fit in the memtable so add it directly to + // the immutable queue. The flushable batch is associated with the same + // log as the immutable memtable, but logically occurs after it in + // seqnum space. We ensure while flushing that the flushable batch + // is flushed along with the previous memtable in the flushable + // queue. See the top level comment in DB.flush1 to learn how this + // is ensured. + // + // See DB.commitWrite for the special handling of log writes for large + // batches. In particular, the large batch has already written to + // imm.logNum. + entry := d.newFlushableEntry(b.flushable, imm.logNum, b.SeqNum()) + // The large batch is by definition large. Reserve space from the cache + // for it until it is flushed. + entry.releaseMemAccounting = d.opts.Cache.Reserve(int(b.flushable.totalBytes())) + d.mu.mem.queue = append(d.mu.mem.queue, entry) + } + + var logSeqNum uint64 + if b != nil { + logSeqNum = b.SeqNum() + if b.flushable != nil { + logSeqNum += uint64(b.Count()) + } + } else { + logSeqNum = d.mu.versions.logSeqNum.Load() + } + d.rotateMemtable(newLogNum, logSeqNum, immMem) + force = false + } +} + +// Both DB.mu and commitPipeline.mu must be held by the caller. +func (d *DB) rotateMemtable(newLogNum base.DiskFileNum, logSeqNum uint64, prev *memTable) { + // Create a new memtable, scheduling the previous one for flushing. We do + // this even if the previous memtable was empty because the DB.Flush + // mechanism is dependent on being able to wait for the empty memtable to + // flush. We can't just mark the empty memtable as flushed here because we + // also have to wait for all previous immutable tables to + // flush. Additionally, the memtable is tied to particular WAL file and we + // want to go through the flush path in order to recycle that WAL file. + // + // NB: newLogNum corresponds to the WAL that contains mutations that are + // present in the new memtable. When immutable memtables are flushed to + // disk, a VersionEdit will be created telling the manifest the minimum + // unflushed log number (which will be the next one in d.mu.mem.mutable + // that was not flushed). + // + // NB: prev should be the current mutable memtable. + var entry *flushableEntry + d.mu.mem.mutable, entry = d.newMemTable(newLogNum, logSeqNum) + d.mu.mem.queue = append(d.mu.mem.queue, entry) + d.updateReadStateLocked(nil) + if prev.writerUnref() { + d.maybeScheduleFlush() + } +} + +// Both DB.mu and commitPipeline.mu must be held by the caller. Note that DB.mu +// may be released and reacquired. +func (d *DB) recycleWAL() (newLogNum base.DiskFileNum, prevLogSize uint64) { + if d.opts.DisableWAL { + panic("pebble: invalid function call") + } + + jobID := d.mu.nextJobID + d.mu.nextJobID++ + newLogNum = d.mu.versions.getNextDiskFileNum() + + prevLogSize = uint64(d.mu.log.Size()) + + // The previous log may have grown past its original physical + // size. Update its file size in the queue so we have a proper + // accounting of its file size. + if d.mu.log.queue[len(d.mu.log.queue)-1].fileSize < prevLogSize { + d.mu.log.queue[len(d.mu.log.queue)-1].fileSize = prevLogSize + } + d.mu.Unlock() + + var err error + // Close the previous log first. This writes an EOF trailer + // signifying the end of the file and syncs it to disk. We must + // close the previous log before linking the new log file, + // otherwise a crash could leave both logs with unclean tails, and + // Open will treat the previous log as corrupt. + err = d.mu.log.LogWriter.Close() + metrics := d.mu.log.LogWriter.Metrics() + d.mu.Lock() + if err := d.mu.log.metrics.Merge(metrics); err != nil { + d.opts.Logger.Errorf("metrics error: %s", err) + } + d.mu.Unlock() + + newLogName := base.MakeFilepath(d.opts.FS, d.walDirname, fileTypeLog, newLogNum) + + // Try to use a recycled log file. Recycling log files is an important + // performance optimization as it is faster to sync a file that has + // already been written, than one which is being written for the first + // time. This is due to the need to sync file metadata when a file is + // being written for the first time. Note this is true even if file + // preallocation is performed (e.g. fallocate). + var recycleLog fileInfo + var recycleOK bool + var newLogFile vfs.File + if err == nil { + recycleLog, recycleOK = d.logRecycler.peek() + if recycleOK { + recycleLogName := base.MakeFilepath(d.opts.FS, d.walDirname, fileTypeLog, recycleLog.fileNum) + newLogFile, err = d.opts.FS.ReuseForWrite(recycleLogName, newLogName) + base.MustExist(d.opts.FS, newLogName, d.opts.Logger, err) + } else { + newLogFile, err = d.opts.FS.Create(newLogName) + base.MustExist(d.opts.FS, newLogName, d.opts.Logger, err) + } + } + + var newLogSize uint64 + if err == nil && recycleOK { + // Figure out the recycled WAL size. This Stat is necessary + // because ReuseForWrite's contract allows for removing the + // old file and creating a new one. We don't know whether the + // WAL was actually recycled. + // TODO(jackson): Adding a boolean to the ReuseForWrite return + // value indicating whether or not the file was actually + // reused would allow us to skip the stat and use + // recycleLog.fileSize. + var finfo os.FileInfo + finfo, err = newLogFile.Stat() + if err == nil { + newLogSize = uint64(finfo.Size()) + } + } + + if err == nil { + // TODO(peter): RocksDB delays sync of the parent directory until the + // first time the log is synced. Is that worthwhile? + err = d.walDir.Sync() + } + + if err != nil && newLogFile != nil { + newLogFile.Close() + } else if err == nil { + newLogFile = vfs.NewSyncingFile(newLogFile, vfs.SyncingFileOptions{ + NoSyncOnClose: d.opts.NoSyncOnClose, + BytesPerSync: d.opts.WALBytesPerSync, + PreallocateSize: d.walPreallocateSize(), + }) + } + + if recycleOK { + err = firstError(err, d.logRecycler.pop(recycleLog.fileNum.FileNum())) + } + + d.opts.EventListener.WALCreated(WALCreateInfo{ + JobID: jobID, + Path: newLogName, + FileNum: newLogNum, + RecycledFileNum: recycleLog.fileNum.FileNum(), + Err: err, + }) + + d.mu.Lock() + + d.mu.versions.metrics.WAL.Files++ + + if err != nil { + // TODO(peter): avoid chewing through file numbers in a tight loop if there + // is an error here. + // + // What to do here? Stumbling on doesn't seem worthwhile. If we failed to + // close the previous log it is possible we lost a write. + panic(err) + } + + d.mu.log.queue = append(d.mu.log.queue, fileInfo{fileNum: newLogNum, fileSize: newLogSize}) + d.mu.log.LogWriter = record.NewLogWriter(newLogFile, newLogNum, record.LogWriterConfig{ + WALFsyncLatency: d.mu.log.metrics.fsyncLatency, + WALMinSyncInterval: d.opts.WALMinSyncInterval, + QueueSemChan: d.commit.logSyncQSem, + }) + if d.mu.log.registerLogWriterForTesting != nil { + d.mu.log.registerLogWriterForTesting(d.mu.log.LogWriter) + } + + return +} + +func (d *DB) getEarliestUnflushedSeqNumLocked() uint64 { + seqNum := InternalKeySeqNumMax + for i := range d.mu.mem.queue { + logSeqNum := d.mu.mem.queue[i].logSeqNum + if seqNum > logSeqNum { + seqNum = logSeqNum + } + } + return seqNum +} + +func (d *DB) getInProgressCompactionInfoLocked(finishing *compaction) (rv []compactionInfo) { + for c := range d.mu.compact.inProgress { + if len(c.flushing) == 0 && (finishing == nil || c != finishing) { + info := compactionInfo{ + versionEditApplied: c.versionEditApplied, + inputs: c.inputs, + smallest: c.smallest, + largest: c.largest, + outputLevel: -1, + } + if c.outputLevel != nil { + info.outputLevel = c.outputLevel.level + } + rv = append(rv, info) + } + } + return +} + +func inProgressL0Compactions(inProgress []compactionInfo) []manifest.L0Compaction { + var compactions []manifest.L0Compaction + for _, info := range inProgress { + // Skip in-progress compactions that have already committed; the L0 + // sublevels initialization code requires the set of in-progress + // compactions to be consistent with the current version. Compactions + // with versionEditApplied=true are already applied to the current + // version and but are performing cleanup without the database mutex. + if info.versionEditApplied { + continue + } + l0 := false + for _, cl := range info.inputs { + l0 = l0 || cl.level == 0 + } + if !l0 { + continue + } + compactions = append(compactions, manifest.L0Compaction{ + Smallest: info.smallest, + Largest: info.largest, + IsIntraL0: info.outputLevel == 0, + }) + } + return compactions +} + +// firstError returns the first non-nil error of err0 and err1, or nil if both +// are nil. +func firstError(err0, err1 error) error { + if err0 != nil { + return err0 + } + return err1 +} + +// SetCreatorID sets the CreatorID which is needed in order to use shared objects. +// Remote object usage is disabled until this method is called the first time. +// Once set, the Creator ID is persisted and cannot change. +// +// Does nothing if SharedStorage was not set in the options when the DB was +// opened or if the DB is in read-only mode. +func (d *DB) SetCreatorID(creatorID uint64) error { + if d.opts.Experimental.RemoteStorage == nil || d.opts.ReadOnly { + return nil + } + return d.objProvider.SetCreatorID(objstorage.CreatorID(creatorID)) +} + +// KeyStatistics keeps track of the number of keys that have been pinned by a +// snapshot as well as counts of the different key kinds in the lsm. +// +// One way of using the accumulated stats, when we only have sets and dels, +// and say the counts are represented as del_count, set_count, +// del_latest_count, set_latest_count, snapshot_pinned_count. +// +// - del_latest_count + set_latest_count is the set of unique user keys +// (unique). +// +// - set_latest_count is the set of live unique user keys (live_unique). +// +// - Garbage is del_count + set_count - live_unique. +// +// - If everything were in the LSM, del_count+set_count-snapshot_pinned_count +// would also be the set of unique user keys (note that +// snapshot_pinned_count is counting something different -- see comment below). +// But snapshot_pinned_count only counts keys in the LSM so the excess here +// must be keys in memtables. +type KeyStatistics struct { + // TODO(sumeer): the SnapshotPinned* are incorrect in that these older + // versions can be in a different level. Either fix the accounting or + // rename these fields. + + // SnapshotPinnedKeys represents obsolete keys that cannot be elided during + // a compaction, because they are required by an open snapshot. + SnapshotPinnedKeys int + // SnapshotPinnedKeysBytes is the total number of bytes of all snapshot + // pinned keys. + SnapshotPinnedKeysBytes uint64 + // KindsCount is the count for each kind of key. It includes point keys, + // range deletes and range keys. + KindsCount [InternalKeyKindMax + 1]int + // LatestKindsCount is the count for each kind of key when it is the latest + // kind for a user key. It is only populated for point keys. + LatestKindsCount [InternalKeyKindMax + 1]int +} + +// LSMKeyStatistics is used by DB.ScanStatistics. +type LSMKeyStatistics struct { + Accumulated KeyStatistics + // Levels contains statistics only for point keys. Range deletions and range keys will + // appear in Accumulated but not Levels. + Levels [numLevels]KeyStatistics + // BytesRead represents the logical, pre-compression size of keys and values read + BytesRead uint64 +} + +// ScanStatisticsOptions is used by DB.ScanStatistics. +type ScanStatisticsOptions struct { + // LimitBytesPerSecond indicates the number of bytes that are able to be read + // per second using ScanInternal. + // A value of 0 indicates that there is no limit set. + LimitBytesPerSecond int64 +} + +// ScanStatistics returns the count of different key kinds within the lsm for a +// key span [lower, upper) as well as the number of snapshot keys. +func (d *DB) ScanStatistics( + ctx context.Context, lower, upper []byte, opts ScanStatisticsOptions, +) (LSMKeyStatistics, error) { + stats := LSMKeyStatistics{} + var prevKey InternalKey + var rateLimitFunc func(key *InternalKey, val LazyValue) error + tb := tokenbucket.TokenBucket{} + + if opts.LimitBytesPerSecond != 0 { + // Each "token" roughly corresponds to a byte that was read. + tb.Init(tokenbucket.TokensPerSecond(opts.LimitBytesPerSecond), tokenbucket.Tokens(1024)) + rateLimitFunc = func(key *InternalKey, val LazyValue) error { + return tb.WaitCtx(ctx, tokenbucket.Tokens(key.Size()+val.Len())) + } + } + + scanInternalOpts := &scanInternalOptions{ + visitPointKey: func(key *InternalKey, value LazyValue, iterInfo IteratorLevel) error { + // If the previous key is equal to the current point key, the current key was + // pinned by a snapshot. + size := uint64(key.Size()) + kind := key.Kind() + sameKey := d.equal(prevKey.UserKey, key.UserKey) + if iterInfo.Kind == IteratorLevelLSM && sameKey { + stats.Levels[iterInfo.Level].SnapshotPinnedKeys++ + stats.Levels[iterInfo.Level].SnapshotPinnedKeysBytes += size + stats.Accumulated.SnapshotPinnedKeys++ + stats.Accumulated.SnapshotPinnedKeysBytes += size + } + if iterInfo.Kind == IteratorLevelLSM { + stats.Levels[iterInfo.Level].KindsCount[kind]++ + } + if !sameKey { + if iterInfo.Kind == IteratorLevelLSM { + stats.Levels[iterInfo.Level].LatestKindsCount[kind]++ + } + stats.Accumulated.LatestKindsCount[kind]++ + } + + stats.Accumulated.KindsCount[kind]++ + prevKey.CopyFrom(*key) + stats.BytesRead += uint64(key.Size() + value.Len()) + return nil + }, + visitRangeDel: func(start, end []byte, seqNum uint64) error { + stats.Accumulated.KindsCount[InternalKeyKindRangeDelete]++ + stats.BytesRead += uint64(len(start) + len(end)) + return nil + }, + visitRangeKey: func(start, end []byte, keys []rangekey.Key) error { + stats.BytesRead += uint64(len(start) + len(end)) + for _, key := range keys { + stats.Accumulated.KindsCount[key.Kind()]++ + stats.BytesRead += uint64(len(key.Value) + len(key.Suffix)) + } + return nil + }, + includeObsoleteKeys: true, + IterOptions: IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: lower, + UpperBound: upper, + }, + rateLimitFunc: rateLimitFunc, + } + iter, err := d.newInternalIter(ctx, snapshotIterOpts{}, scanInternalOpts) + if err != nil { + return LSMKeyStatistics{}, err + } + defer iter.close() + + err = scanInternalImpl(ctx, lower, upper, iter, scanInternalOpts) + + if err != nil { + return LSMKeyStatistics{}, err + } + + return stats, nil +} + +// ObjProvider returns the objstorage.Provider for this database. Meant to be +// used for internal purposes only. +func (d *DB) ObjProvider() objstorage.Provider { + return d.objProvider +} + +func (d *DB) checkVirtualBounds(m *fileMetadata) { + if !invariants.Enabled { + return + } + + objMeta, err := d.objProvider.Lookup(fileTypeTable, m.FileBacking.DiskFileNum) + if err != nil { + panic(err) + } + if objMeta.IsExternal() { + // Nothing to do; bounds are expected to be loose. + return + } + + if m.HasPointKeys { + pointIter, rangeDelIter, err := d.newIters(context.TODO(), m, nil, internalIterOpts{}) + if err != nil { + panic(errors.Wrap(err, "pebble: error creating point iterator")) + } + + defer pointIter.Close() + if rangeDelIter != nil { + defer rangeDelIter.Close() + } + + pointKey, _ := pointIter.First() + var rangeDel *keyspan.Span + if rangeDelIter != nil { + rangeDel = rangeDelIter.First() + } + + // Check that the lower bound is tight. + if (rangeDel == nil || d.cmp(rangeDel.SmallestKey().UserKey, m.SmallestPointKey.UserKey) != 0) && + (pointKey == nil || d.cmp(pointKey.UserKey, m.SmallestPointKey.UserKey) != 0) { + panic(errors.Newf("pebble: virtual sstable %s lower point key bound is not tight", m.FileNum)) + } + + pointKey, _ = pointIter.Last() + rangeDel = nil + if rangeDelIter != nil { + rangeDel = rangeDelIter.Last() + } + + // Check that the upper bound is tight. + if (rangeDel == nil || d.cmp(rangeDel.LargestKey().UserKey, m.LargestPointKey.UserKey) != 0) && + (pointKey == nil || d.cmp(pointKey.UserKey, m.LargestPointKey.UserKey) != 0) { + panic(errors.Newf("pebble: virtual sstable %s upper point key bound is not tight", m.FileNum)) + } + + // Check that iterator keys are within bounds. + for key, _ := pointIter.First(); key != nil; key, _ = pointIter.Next() { + if d.cmp(key.UserKey, m.SmallestPointKey.UserKey) < 0 || d.cmp(key.UserKey, m.LargestPointKey.UserKey) > 0 { + panic(errors.Newf("pebble: virtual sstable %s point key %s is not within bounds", m.FileNum, key.UserKey)) + } + } + + if rangeDelIter != nil { + for key := rangeDelIter.First(); key != nil; key = rangeDelIter.Next() { + if d.cmp(key.SmallestKey().UserKey, m.SmallestPointKey.UserKey) < 0 { + panic(errors.Newf("pebble: virtual sstable %s point key %s is not within bounds", m.FileNum, key.SmallestKey().UserKey)) + } + + if d.cmp(key.LargestKey().UserKey, m.LargestPointKey.UserKey) > 0 { + panic(errors.Newf("pebble: virtual sstable %s point key %s is not within bounds", m.FileNum, key.LargestKey().UserKey)) + } + } + } + } + + if !m.HasRangeKeys { + return + } + + rangeKeyIter, err := d.tableNewRangeKeyIter(m, keyspan.SpanIterOptions{}) + defer rangeKeyIter.Close() + + if err != nil { + panic(errors.Wrap(err, "pebble: error creating range key iterator")) + } + + // Check that the lower bound is tight. + if d.cmp(rangeKeyIter.First().SmallestKey().UserKey, m.SmallestRangeKey.UserKey) != 0 { + panic(errors.Newf("pebble: virtual sstable %s lower range key bound is not tight", m.FileNum)) + } + + // Check that upper bound is tight. + if d.cmp(rangeKeyIter.Last().LargestKey().UserKey, m.LargestRangeKey.UserKey) != 0 { + panic(errors.Newf("pebble: virtual sstable %s upper range key bound is not tight", m.FileNum)) + } + + for key := rangeKeyIter.First(); key != nil; key = rangeKeyIter.Next() { + if d.cmp(key.SmallestKey().UserKey, m.SmallestRangeKey.UserKey) < 0 { + panic(errors.Newf("pebble: virtual sstable %s point key %s is not within bounds", m.FileNum, key.SmallestKey().UserKey)) + } + if d.cmp(key.LargestKey().UserKey, m.LargestRangeKey.UserKey) > 0 { + panic(errors.Newf("pebble: virtual sstable %s point key %s is not within bounds", m.FileNum, key.LargestKey().UserKey)) + } + } +} diff --git a/pebble/db_test.go b/pebble/db_test.go new file mode 100644 index 0000000..631753d --- /dev/null +++ b/pebble/db_test.go @@ -0,0 +1,1969 @@ +// Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "context" + "fmt" + "io" + "path/filepath" + "slices" + "strconv" + "strings" + "sync" + "testing" + "time" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/cache" + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/objstorage/objstorageprovider" + "github.com/cockroachdb/pebble/sstable" + "github.com/cockroachdb/pebble/vfs" + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" +) + +// try repeatedly calls f, sleeping between calls with exponential back-off, +// until f returns a nil error or the total sleep time is greater than or equal +// to maxTotalSleep. It always calls f at least once. +func try(initialSleep, maxTotalSleep time.Duration, f func() error) error { + totalSleep := time.Duration(0) + for d := initialSleep; ; d *= 2 { + time.Sleep(d) + totalSleep += d + if err := f(); err == nil || totalSleep >= maxTotalSleep { + return err + } + } +} + +func TestTry(t *testing.T) { + c := make(chan struct{}) + go func() { + time.Sleep(1 * time.Millisecond) + close(c) + }() + + attemptsMu := sync.Mutex{} + attempts := 0 + + err := try(100*time.Microsecond, 20*time.Second, func() error { + attemptsMu.Lock() + attempts++ + attemptsMu.Unlock() + + select { + default: + return errors.New("timed out") + case <-c: + return nil + } + }) + require.NoError(t, err) + + attemptsMu.Lock() + a := attempts + attemptsMu.Unlock() + + if a == 0 { + t.Fatalf("attempts: got 0, want > 0") + } +} + +func TestBasicReads(t *testing.T) { + testCases := []struct { + dirname string + wantMap map[string]string + }{ + { + "db-stage-1", + map[string]string{ + "aaa": "", + "bar": "", + "baz": "", + "foo": "", + "quux": "", + "zzz": "", + }, + }, + { + "db-stage-2", + map[string]string{ + "aaa": "", + "bar": "", + "baz": "three", + "foo": "four", + "quux": "", + "zzz": "", + }, + }, + { + "db-stage-3", + map[string]string{ + "aaa": "", + "bar": "", + "baz": "three", + "foo": "four", + "quux": "", + "zzz": "", + }, + }, + { + "db-stage-4", + map[string]string{ + "aaa": "", + "bar": "", + "baz": "", + "foo": "five", + "quux": "six", + "zzz": "", + }, + }, + } + for _, tc := range testCases { + t.Run(tc.dirname, func(t *testing.T) { + fs := vfs.NewMem() + _, err := vfs.Clone(vfs.Default, fs, filepath.Join("testdata", tc.dirname), tc.dirname) + if err != nil { + t.Fatalf("%s: cloneFileSystem failed: %v", tc.dirname, err) + } + d, err := Open(tc.dirname, testingRandomized(t, &Options{ + FS: fs, + })) + if err != nil { + t.Fatalf("%s: Open failed: %v", tc.dirname, err) + } + for key, want := range tc.wantMap { + got, closer, err := d.Get([]byte(key)) + if err != nil && err != ErrNotFound { + t.Fatalf("%s: Get(%q) failed: %v", tc.dirname, key, err) + } + if string(got) != string(want) { + t.Fatalf("%s: Get(%q): got %q, want %q", tc.dirname, key, got, want) + } + if closer != nil { + closer.Close() + } + } + err = d.Close() + if err != nil { + t.Fatalf("%s: Close failed: %v", tc.dirname, err) + } + }) + } +} + +func TestBasicWrites(t *testing.T) { + d, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + })) + require.NoError(t, err) + + names := []string{ + "Alatar", + "Gandalf", + "Pallando", + "Radagast", + "Saruman", + "Joe", + } + wantMap := map[string]string{} + + inBatch, batch, pending := false, &Batch{}, [][]string(nil) + set0 := func(k, v string) error { + return d.Set([]byte(k), []byte(v), nil) + } + del0 := func(k string) error { + return d.Delete([]byte(k), nil) + } + set1 := func(k, v string) error { + batch.Set([]byte(k), []byte(v), nil) + return nil + } + del1 := func(k string) error { + batch.Delete([]byte(k), nil) + return nil + } + set, del := set0, del0 + + testCases := []string{ + "set Gandalf Grey", + "set Saruman White", + "set Radagast Brown", + "delete Saruman", + "set Gandalf White", + "batch", + " set Alatar AliceBlue", + "apply", + "delete Pallando", + "set Alatar AntiqueWhite", + "set Pallando PapayaWhip", + "batch", + "apply", + "set Pallando PaleVioletRed", + "batch", + " delete Alatar", + " set Gandalf GhostWhite", + " set Saruman Seashell", + " delete Saruman", + " set Saruman SeaGreen", + " set Radagast RosyBrown", + " delete Pallando", + "apply", + "delete Radagast", + "delete Radagast", + "delete Radagast", + "set Gandalf Goldenrod", + "set Pallando PeachPuff", + "batch", + " delete Joe", + " delete Saruman", + " delete Radagast", + " delete Pallando", + " delete Gandalf", + " delete Alatar", + "apply", + "set Joe Plumber", + } + for i, tc := range testCases { + s := strings.Split(strings.TrimSpace(tc), " ") + switch s[0] { + case "set": + if err := set(s[1], s[2]); err != nil { + t.Fatalf("#%d %s: %v", i, tc, err) + } + if inBatch { + pending = append(pending, s) + } else { + wantMap[s[1]] = s[2] + } + case "delete": + if err := del(s[1]); err != nil { + t.Fatalf("#%d %s: %v", i, tc, err) + } + if inBatch { + pending = append(pending, s) + } else { + delete(wantMap, s[1]) + } + case "batch": + inBatch, batch, set, del = true, &Batch{}, set1, del1 + case "apply": + if err := d.Apply(batch, nil); err != nil { + t.Fatalf("#%d %s: %v", i, tc, err) + } + for _, p := range pending { + switch p[0] { + case "set": + wantMap[p[1]] = p[2] + case "delete": + delete(wantMap, p[1]) + } + } + inBatch, pending, set, del = false, nil, set0, del0 + default: + t.Fatalf("#%d %s: bad test case: %q", i, tc, s) + } + + fail := false + for _, name := range names { + g, closer, err := d.Get([]byte(name)) + if err != nil && err != ErrNotFound { + t.Errorf("#%d %s: Get(%q): %v", i, tc, name, err) + fail = true + } + got, gOK := string(g), err == nil + want, wOK := wantMap[name] + if got != want || gOK != wOK { + t.Errorf("#%d %s: Get(%q): got %q, %t, want %q, %t", + i, tc, name, got, gOK, want, wOK) + fail = true + } + if closer != nil { + closer.Close() + } + } + if fail { + return + } + } + + require.NoError(t, d.Close()) +} + +func TestRandomWrites(t *testing.T) { + d, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + MemTableSize: 8 * 1024, + })) + require.NoError(t, err) + + keys := [64][]byte{} + wants := [64]int{} + for k := range keys { + keys[k] = []byte(strconv.Itoa(k)) + wants[k] = -1 + } + xxx := bytes.Repeat([]byte("x"), 512) + + rng := rand.New(rand.NewSource(123)) + const N = 1000 + for i := 0; i < N; i++ { + k := rng.Intn(len(keys)) + if rng.Intn(20) != 0 { + wants[k] = rng.Intn(len(xxx) + 1) + if err := d.Set(keys[k], xxx[:wants[k]], nil); err != nil { + t.Fatalf("i=%d: Set: %v", i, err) + } + } else { + wants[k] = -1 + if err := d.Delete(keys[k], nil); err != nil { + t.Fatalf("i=%d: Delete: %v", i, err) + } + } + + if i != N-1 || rng.Intn(50) != 0 { + continue + } + for k := range keys { + got := -1 + if v, closer, err := d.Get(keys[k]); err != nil { + if err != ErrNotFound { + t.Fatalf("Get: %v", err) + } + } else { + got = len(v) + closer.Close() + } + if got != wants[k] { + t.Errorf("i=%d, k=%d: got %d, want %d", i, k, got, wants[k]) + } + } + } + + require.NoError(t, d.Close()) +} + +func TestLargeBatch(t *testing.T) { + d, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + MemTableSize: 1400, + MemTableStopWritesThreshold: 100, + })) + require.NoError(t, err) + + verifyLSM := func(expected string) func() error { + return func() error { + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + if expected != s { + if testing.Verbose() { + fmt.Println(strings.TrimSpace(s)) + } + return errors.Errorf("expected %s, but found %s", expected, s) + } + return nil + } + } + + logNum := func() base.DiskFileNum { + d.mu.Lock() + defer d.mu.Unlock() + return d.mu.log.queue[len(d.mu.log.queue)-1].fileNum + } + fileSize := func(fileNum base.DiskFileNum) int64 { + info, err := d.opts.FS.Stat(base.MakeFilepath(d.opts.FS, "", fileTypeLog, fileNum)) + require.NoError(t, err) + return info.Size() + } + memTableCreationSeqNum := func() uint64 { + d.mu.Lock() + defer d.mu.Unlock() + return d.mu.mem.mutable.logSeqNum + } + + startLogNum := logNum() + startLogStartSize := fileSize(startLogNum) + startSeqNum := d.mu.versions.logSeqNum.Load() + + // Write a key with a value larger than the memtable size. + require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("a"), 512), nil)) + + // Verify that the large batch was written to the WAL that existed before it + // was committed. We verify that WAL rotation occurred, where the large batch + // was written to, and that the new WAL is empty. + endLogNum := logNum() + if startLogNum == endLogNum { + t.Fatal("expected WAL rotation") + } + startLogEndSize := fileSize(startLogNum) + if startLogEndSize == startLogStartSize { + t.Fatalf("expected large batch to be written to %s.log, but file size unchanged at %d", + startLogNum, startLogEndSize) + } + endLogSize := fileSize(endLogNum) + if endLogSize != 0 { + t.Fatalf("expected %s.log to be empty, but found %d", endLogNum, endLogSize) + } + if creationSeqNum := memTableCreationSeqNum(); creationSeqNum <= startSeqNum { + t.Fatalf("expected memTable.logSeqNum=%d > largeBatch.seqNum=%d", creationSeqNum, startSeqNum) + } + + // Verify this results in one L0 table being created. + require.NoError(t, try(100*time.Microsecond, 20*time.Second, + verifyLSM("0.0:\n 000005:[a#10,SET-a#10,SET]\n"))) + + require.NoError(t, d.Set([]byte("b"), bytes.Repeat([]byte("b"), 512), nil)) + + // Verify this results in a second L0 table being created. + require.NoError(t, try(100*time.Microsecond, 20*time.Second, + verifyLSM("0.0:\n 000005:[a#10,SET-a#10,SET]\n 000007:[b#11,SET-b#11,SET]\n"))) + + // Allocate a bunch of batches to exhaust the batchPool. None of these + // batches should have a non-zero count. + for i := 0; i < 10; i++ { + b := d.NewBatch() + require.EqualValues(t, 0, b.Count()) + } + + require.NoError(t, d.Close()) +} + +func TestGetNoCache(t *testing.T) { + cache := NewCache(0) + defer cache.Unref() + + d, err := Open("", testingRandomized(t, &Options{ + Cache: cache, + FS: vfs.NewMem(), + })) + require.NoError(t, err) + + require.NoError(t, d.Set([]byte("a"), []byte("aa"), nil)) + require.NoError(t, d.Flush()) + verifyGet(t, d, []byte("a"), []byte("aa")) + + require.NoError(t, d.Close()) +} + +func TestGetMerge(t *testing.T) { + d, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + })) + require.NoError(t, err) + + key := []byte("a") + verify := func(expected string) { + val, closer, err := d.Get(key) + require.NoError(t, err) + + if expected != string(val) { + t.Fatalf("expected %s, but got %s", expected, val) + } + closer.Close() + } + + const val = "1" + for i := 1; i <= 3; i++ { + require.NoError(t, d.Merge(key, []byte(val), nil)) + + expected := strings.Repeat(val, i) + verify(expected) + + require.NoError(t, d.Flush()) + verify(expected) + } + + require.NoError(t, d.Close()) +} + +func TestMergeOrderSameAfterFlush(t *testing.T) { + // Ensure compaction iterator (used by flush) and user iterator process merge + // operands in the same order + d, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + })) + require.NoError(t, err) + + key := []byte("a") + verify := func(expected string) { + iter, _ := d.NewIter(nil) + if !iter.SeekGE([]byte("a")) { + t.Fatal("expected one value, but got empty iterator") + } + if expected != string(iter.Value()) { + t.Fatalf("expected %s, but got %s", expected, string(iter.Value())) + } + if !iter.SeekLT([]byte("b")) { + t.Fatal("expected one value, but got empty iterator") + } + if expected != string(iter.Value()) { + t.Fatalf("expected %s, but got %s", expected, string(iter.Value())) + } + require.NoError(t, iter.Close()) + } + + require.NoError(t, d.Merge(key, []byte("0"), nil)) + require.NoError(t, d.Merge(key, []byte("1"), nil)) + + verify("01") + require.NoError(t, d.Flush()) + verify("01") + + require.NoError(t, d.Close()) +} + +type closableMerger struct { + lastBuf []byte + closed bool +} + +func (m *closableMerger) MergeNewer(value []byte) error { + m.lastBuf = append(m.lastBuf[:0], value...) + return nil +} + +func (m *closableMerger) MergeOlder(value []byte) error { + m.lastBuf = append(m.lastBuf[:0], value...) + return nil +} + +func (m *closableMerger) Finish(includesBase bool) ([]byte, io.Closer, error) { + return m.lastBuf, m, nil +} + +func (m *closableMerger) Close() error { + m.closed = true + return nil +} + +func TestMergerClosing(t *testing.T) { + m := &closableMerger{} + + d, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + Merger: &Merger{ + Merge: func(key, value []byte) (base.ValueMerger, error) { + return m, m.MergeNewer(value) + }, + }, + })) + require.NoError(t, err) + + defer func() { + require.NoError(t, d.Close()) + }() + + err = d.Merge([]byte("a"), []byte("b"), nil) + require.NoError(t, err) + require.False(t, m.closed) + + val, closer, err := d.Get([]byte("a")) + require.NoError(t, err) + require.Equal(t, []byte("b"), val) + require.NotNil(t, closer) + require.False(t, m.closed) + _ = closer.Close() + require.True(t, m.closed) +} + +func TestLogData(t *testing.T) { + d, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + })) + require.NoError(t, err) + + defer func() { + require.NoError(t, d.Close()) + }() + + require.NoError(t, d.LogData([]byte("foo"), Sync)) + require.NoError(t, d.LogData([]byte("bar"), Sync)) + // TODO(itsbilal): Confirm that we wrote some bytes to the WAL. + // For now, LogData proceeding ahead without a panic is good enough. +} + +func TestSingleDeleteGet(t *testing.T) { + d, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + })) + require.NoError(t, err) + defer func() { + require.NoError(t, d.Close()) + }() + + key := []byte("key") + val := []byte("val") + + require.NoError(t, d.Set(key, val, nil)) + verifyGet(t, d, key, val) + + key2 := []byte("key2") + val2 := []byte("val2") + + require.NoError(t, d.Set(key2, val2, nil)) + verifyGet(t, d, key2, val2) + + require.NoError(t, d.SingleDelete(key2, nil)) + verifyGetNotFound(t, d, key2) +} + +func TestSingleDeleteFlush(t *testing.T) { + d, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + })) + require.NoError(t, err) + defer func() { + require.NoError(t, d.Close()) + }() + + key := []byte("key") + valFirst := []byte("first") + valSecond := []byte("second") + key2 := []byte("key2") + val2 := []byte("val2") + + require.NoError(t, d.Set(key, valFirst, nil)) + require.NoError(t, d.Set(key2, val2, nil)) + require.NoError(t, d.Flush()) + + require.NoError(t, d.SingleDelete(key, nil)) + require.NoError(t, d.Set(key, valSecond, nil)) + require.NoError(t, d.Delete(key2, nil)) + require.NoError(t, d.Set(key2, val2, nil)) + require.NoError(t, d.Flush()) + + require.NoError(t, d.SingleDelete(key, nil)) + require.NoError(t, d.Delete(key2, nil)) + require.NoError(t, d.Flush()) + + verifyGetNotFound(t, d, key) + verifyGetNotFound(t, d, key2) +} + +func TestUnremovableSingleDelete(t *testing.T) { + d, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + L0CompactionThreshold: 8, + })) + require.NoError(t, err) + defer func() { + require.NoError(t, d.Close()) + }() + + key := []byte("key") + valFirst := []byte("valFirst") + valSecond := []byte("valSecond") + + require.NoError(t, d.Set(key, valFirst, nil)) + ss := d.NewSnapshot() + defer ss.Close() + require.NoError(t, d.SingleDelete(key, nil)) + require.NoError(t, d.Set(key, valSecond, nil)) + require.NoError(t, d.Flush()) + + verifyGet(t, ss, key, valFirst) + verifyGet(t, d, key, valSecond) + + require.NoError(t, d.SingleDelete(key, nil)) + + verifyGet(t, ss, key, valFirst) + verifyGetNotFound(t, d, key) + + require.NoError(t, d.Flush()) + + verifyGet(t, ss, key, valFirst) + verifyGetNotFound(t, d, key) +} + +func TestIterLeak(t *testing.T) { + for _, leak := range []bool{true, false} { + t.Run(fmt.Sprintf("leak=%t", leak), func(t *testing.T) { + for _, flush := range []bool{true, false} { + t.Run(fmt.Sprintf("flush=%t", flush), func(t *testing.T) { + d, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + })) + require.NoError(t, err) + + require.NoError(t, d.Set([]byte("a"), []byte("a"), nil)) + if flush { + require.NoError(t, d.Flush()) + } + iter, _ := d.NewIter(nil) + iter.First() + if !leak { + require.NoError(t, iter.Close()) + require.NoError(t, d.Close()) + } else { + defer iter.Close() + if err := d.Close(); err == nil { + t.Fatalf("expected failure, but found success") + } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { + t.Fatalf("expected leaked iterators, but found %+v", err) + } else { + t.Log(err.Error()) + } + } + }) + } + }) + } +} + +// Make sure that we detect an iter leak when only one DB closes +// while the second db still holds a reference to the TableCache. +func TestIterLeakSharedCache(t *testing.T) { + for _, leak := range []bool{true, false} { + t.Run(fmt.Sprintf("leak=%t", leak), func(t *testing.T) { + for _, flush := range []bool{true, false} { + t.Run(fmt.Sprintf("flush=%t", flush), func(t *testing.T) { + d1, err := Open("", &Options{ + FS: vfs.NewMem(), + }) + require.NoError(t, err) + + d2, err := Open("", &Options{ + FS: vfs.NewMem(), + }) + require.NoError(t, err) + + require.NoError(t, d1.Set([]byte("a"), []byte("a"), nil)) + if flush { + require.NoError(t, d1.Flush()) + } + + require.NoError(t, d2.Set([]byte("a"), []byte("a"), nil)) + if flush { + require.NoError(t, d2.Flush()) + } + + // Check if leak detection works with only one db closing. + { + iter1, _ := d1.NewIter(nil) + iter1.First() + if !leak { + require.NoError(t, iter1.Close()) + require.NoError(t, d1.Close()) + } else { + defer iter1.Close() + if err := d1.Close(); err == nil { + t.Fatalf("expected failure, but found success") + } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { + t.Fatalf("expected leaked iterators, but found %+v", err) + } else { + t.Log(err.Error()) + } + } + } + + { + iter2, _ := d2.NewIter(nil) + iter2.First() + if !leak { + require.NoError(t, iter2.Close()) + require.NoError(t, d2.Close()) + } else { + defer iter2.Close() + if err := d2.Close(); err == nil { + t.Fatalf("expected failure, but found success") + } else if !strings.HasPrefix(err.Error(), "leaked iterators:") { + t.Fatalf("expected leaked iterators, but found %+v", err) + } else { + t.Log(err.Error()) + } + } + } + + }) + } + }) + } +} + +func TestMemTableReservation(t *testing.T) { + opts := &Options{ + Cache: NewCache(128 << 10 /* 128 KB */), + MemTableSize: initialMemTableSize, + FS: vfs.NewMem(), + } + defer opts.Cache.Unref() + opts.testingRandomized(t) + opts.EnsureDefaults() + // We're going to be looking at and asserting the global memtable reservation + // amount below so we don't want to race with any triggered stats collections. + opts.private.disableTableStats = true + + // Add a block to the cache. Note that the memtable size is larger than the + // cache size, so opening the DB should cause this block to be evicted. + tmpID := opts.Cache.NewID() + helloWorld := []byte("hello world") + value := cache.Alloc(len(helloWorld)) + copy(value.Buf(), helloWorld) + opts.Cache.Set(tmpID, base.FileNum(0).DiskFileNum(), 0, value).Release() + + d, err := Open("", opts) + require.NoError(t, err) + + checkReserved := func(expected int64) { + t.Helper() + if reserved := d.memTableReserved.Load(); expected != reserved { + t.Fatalf("expected %d reserved, but found %d", expected, reserved) + } + } + + checkReserved(int64(opts.MemTableSize)) + if refs := d.mu.mem.queue[len(d.mu.mem.queue)-1].readerRefs.Load(); refs != 2 { + t.Fatalf("expected 2 refs, but found %d", refs) + } + // Verify the memtable reservation has caused our test block to be evicted. + if h := opts.Cache.Get(tmpID, base.FileNum(0).DiskFileNum(), 0); h.Get() != nil { + t.Fatalf("expected failure, but found success: %s", h.Get()) + } + + // Flush the memtable. The memtable reservation should double because old + // memtable will be recycled, saved for the next memtable allocation. + require.NoError(t, d.Flush()) + checkReserved(int64(2 * opts.MemTableSize)) + // Flush again. The memtable reservation should be unchanged because at most + // 1 memtable may be preserved for recycling. + + // Flush in the presence of an active iterator. The iterator will hold a + // reference to a readState which will in turn hold a reader reference to the + // memtable. + iter, _ := d.NewIter(nil) + require.NoError(t, d.Flush()) + // The flush moved the recycled memtable into position as an active mutable + // memtable. There are now two allocated memtables: 1 mutable and 1 pinned + // by the iterator's read state. + checkReserved(2 * int64(opts.MemTableSize)) + + // Flushing again should increase the reservation total to 3x: 1 active + // mutable, 1 for recycling, 1 pinned by iterator's read state. + require.NoError(t, d.Flush()) + checkReserved(3 * int64(opts.MemTableSize)) + + // Closing the iterator will release the iterator's read state, and the old + // memtable will be moved into position as the next memtable to recycle. + // There was already a memtable ready to be recycled, so that memtable will + // be freed and the overall reservation total is reduced to 2x. + require.NoError(t, iter.Close()) + checkReserved(2 * int64(opts.MemTableSize)) + + require.NoError(t, d.Close()) +} + +func TestMemTableReservationLeak(t *testing.T) { + d, err := Open("", &Options{FS: vfs.NewMem()}) + require.NoError(t, err) + + d.mu.Lock() + last := d.mu.mem.queue[len(d.mu.mem.queue)-1] + last.readerRef() + defer func() { + last.readerUnref(true) + }() + d.mu.Unlock() + if err := d.Close(); err == nil { + t.Fatalf("expected failure, but found success") + } else if !strings.HasPrefix(err.Error(), "leaked memtable reservation:") { + t.Fatalf("expected leaked memtable reservation, but found %+v", err) + } else { + t.Log(err.Error()) + } +} + +func TestCacheEvict(t *testing.T) { + cache := NewCache(10 << 20) + defer cache.Unref() + + d, err := Open("", &Options{ + Cache: cache, + FS: vfs.NewMem(), + }) + require.NoError(t, err) + + for i := 0; i < 1000; i++ { + key := []byte(fmt.Sprintf("%04d", i)) + require.NoError(t, d.Set(key, key, nil)) + } + + require.NoError(t, d.Flush()) + iter, _ := d.NewIter(nil) + for iter.First(); iter.Valid(); iter.Next() { + } + require.NoError(t, iter.Close()) + + if size := cache.Size(); size == 0 { + t.Fatalf("expected non-zero cache size") + } + + for i := 0; i < 1000; i++ { + key := []byte(fmt.Sprintf("%04d", i)) + require.NoError(t, d.Delete(key, nil)) + } + + require.NoError(t, d.Compact([]byte("0"), []byte("1"), false)) + + require.NoError(t, d.Close()) + + if size := cache.Size(); size != 0 { + t.Fatalf("expected empty cache, but found %d", size) + } +} + +func TestFlushEmpty(t *testing.T) { + d, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + })) + require.NoError(t, err) + + // Flushing an empty memtable should not fail. + require.NoError(t, d.Flush()) + require.NoError(t, d.Close()) +} + +func TestRollManifest(t *testing.T) { + toPreserve := rand.Int31n(5) + 1 + opts := &Options{ + MaxManifestFileSize: 1, + L0CompactionThreshold: 10, + L0StopWritesThreshold: 1000, + FS: vfs.NewMem(), + NumPrevManifest: int(toPreserve), + } + opts.DisableAutomaticCompactions = true + opts.testingRandomized(t) + d, err := Open("", opts) + require.NoError(t, err) + + manifestFileNumber := func() base.DiskFileNum { + d.mu.Lock() + defer d.mu.Unlock() + return d.mu.versions.manifestFileNum + } + sizeRolloverState := func() (int64, int64) { + d.mu.Lock() + defer d.mu.Unlock() + return d.mu.versions.rotationHelper.DebugInfo() + } + + current := func() string { + desc, err := Peek(d.dirname, d.opts.FS) + require.NoError(t, err) + return desc.ManifestFilename + } + + lastManifestNum := manifestFileNumber() + manifestNums := []base.DiskFileNum{lastManifestNum} + for i := 0; i < 5; i++ { + // MaxManifestFileSize is 1, but the rollover logic also counts edits + // since the last snapshot to decide on rollover, so do as many flushes as + // it demands. + lastSnapshotCount, editsSinceSnapshotCount := sizeRolloverState() + var expectedLastSnapshotCount, expectedEditsSinceSnapshotCount int64 + switch i { + case 0: + // DB is empty. + expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 0, 0 + case 1: + // First edit that caused rollover is not in the snapshot. + expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 0, 1 + case 2: + // One flush is in the snapshot. One flush in the edit. + expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 1, 1 + case 3: + // Two flushes in the snapshot. One flush in the edit. Will need to do + // two more flushes, the first of which will be in the next snapshot. + expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 2, 1 + case 4: + // Four flushes in the snapshot. One flush in the edit. Will need to do + // four more flushes, three of which will be in the snapshot. + expectedLastSnapshotCount, expectedEditsSinceSnapshotCount = 4, 1 + } + require.Equal(t, expectedLastSnapshotCount, lastSnapshotCount) + require.Equal(t, expectedEditsSinceSnapshotCount, editsSinceSnapshotCount) + // Number of flushes to do to trigger the rollover. + steps := int(lastSnapshotCount - editsSinceSnapshotCount + 1) + // Steps can be <= 0, but we need to do at least one edit to trigger the + // rollover logic. + if steps <= 0 { + steps = 1 + } + for j := 0; j < steps; j++ { + require.NoError(t, d.Set([]byte("a"), nil, nil)) + require.NoError(t, d.Flush()) + } + d.TestOnlyWaitForCleaning() + num := manifestFileNumber() + if lastManifestNum == num { + t.Fatalf("manifest failed to roll %d: %d == %d", i, lastManifestNum, num) + } + + manifestNums = append(manifestNums, num) + lastManifestNum = num + + expectedCurrent := fmt.Sprintf("MANIFEST-%s", lastManifestNum) + if v := current(); expectedCurrent != v { + t.Fatalf("expected %s, but found %s", expectedCurrent, v) + } + } + lastSnapshotCount, editsSinceSnapshotCount := sizeRolloverState() + require.EqualValues(t, 8, lastSnapshotCount) + require.EqualValues(t, 1, editsSinceSnapshotCount) + + files, err := d.opts.FS.List("") + require.NoError(t, err) + + var manifests []string + for _, filename := range files { + fileType, _, ok := base.ParseFilename(d.opts.FS, filename) + if !ok { + continue + } + if fileType == fileTypeManifest { + manifests = append(manifests, filename) + } + } + slices.Sort(manifests) + + var expected []string + for i := len(manifestNums) - int(toPreserve) - 1; i < len(manifestNums); i++ { + expected = append( + expected, + fmt.Sprintf("MANIFEST-%s", manifestNums[i]), + ) + } + require.EqualValues(t, expected, manifests) + + // Test the logic that uses the future snapshot size to rollover. + // Reminder: we have a snapshot with 8 files and the manifest has 1 edit + // (flush) with 1 file. + // Add 8 more files with a different key. + lastManifestNum = manifestFileNumber() + for j := 0; j < 8; j++ { + require.NoError(t, d.Set([]byte("c"), nil, nil)) + require.NoError(t, d.Flush()) + } + lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState() + // Need 16 more files in edits to trigger a rollover. + require.EqualValues(t, 16, lastSnapshotCount) + require.EqualValues(t, 1, editsSinceSnapshotCount) + require.NotEqual(t, manifestFileNumber(), lastManifestNum) + lastManifestNum = manifestFileNumber() + // Do a compaction that moves 8 of the files from L0 to 1 file in L6. This + // adds 9 files in edits. We still need 6 more files in edits based on the + // last snapshot. But the current version has only 9 L0 files and 1 L6 file, + // for a total of 10 files. So 1 flush should push us over that threshold. + d.Compact([]byte("c"), []byte("d"), false) + lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState() + require.EqualValues(t, 16, lastSnapshotCount) + require.EqualValues(t, 10, editsSinceSnapshotCount) + require.Equal(t, manifestFileNumber(), lastManifestNum) + require.NoError(t, d.Set([]byte("c"), nil, nil)) + require.NoError(t, d.Flush()) + lastSnapshotCount, editsSinceSnapshotCount = sizeRolloverState() + require.EqualValues(t, 10, lastSnapshotCount) + require.EqualValues(t, 1, editsSinceSnapshotCount) + require.NotEqual(t, manifestFileNumber(), lastManifestNum) + + require.NoError(t, d.Close()) +} + +func TestDBClosed(t *testing.T) { + d, err := Open("", &Options{ + FS: vfs.NewMem(), + }) + require.NoError(t, err) + require.NoError(t, d.Close()) + + catch := func(f func()) (err error) { + defer func() { + if r := recover(); r != nil { + err = r.(error) + } + }() + f() + return nil + } + + require.True(t, errors.Is(catch(func() { _ = d.Close() }), ErrClosed)) + + require.True(t, errors.Is(catch(func() { _ = d.Compact(nil, nil, false) }), ErrClosed)) + require.True(t, errors.Is(catch(func() { _ = d.Flush() }), ErrClosed)) + require.True(t, errors.Is(catch(func() { _, _ = d.AsyncFlush() }), ErrClosed)) + + require.True(t, errors.Is(catch(func() { _, _, _ = d.Get(nil) }), ErrClosed)) + require.True(t, errors.Is(catch(func() { _ = d.Delete(nil, nil) }), ErrClosed)) + require.True(t, errors.Is(catch(func() { _ = d.DeleteRange(nil, nil, nil) }), ErrClosed)) + require.True(t, errors.Is(catch(func() { _ = d.Ingest(nil) }), ErrClosed)) + require.True(t, errors.Is(catch(func() { _ = d.LogData(nil, nil) }), ErrClosed)) + require.True(t, errors.Is(catch(func() { _ = d.Merge(nil, nil, nil) }), ErrClosed)) + require.True(t, errors.Is(catch(func() { _ = d.RatchetFormatMajorVersion(internalFormatNewest) }), ErrClosed)) + require.True(t, errors.Is(catch(func() { _ = d.Set(nil, nil, nil) }), ErrClosed)) + + require.True(t, errors.Is(catch(func() { _ = d.NewSnapshot() }), ErrClosed)) + + b := d.NewIndexedBatch() + require.True(t, errors.Is(catch(func() { _ = b.Commit(nil) }), ErrClosed)) + require.True(t, errors.Is(catch(func() { _ = d.Apply(b, nil) }), ErrClosed)) + require.True(t, errors.Is(catch(func() { _, _ = b.NewIter(nil) }), ErrClosed)) +} + +func TestDBConcurrentCommitCompactFlush(t *testing.T) { + d, err := Open("", testingRandomized(t, &Options{ + FS: vfs.NewMem(), + })) + require.NoError(t, err) + + // Concurrently commit, compact, and flush in order to stress the locking around + // those operations. + const n = 1000 + var wg sync.WaitGroup + wg.Add(n) + for i := 0; i < n; i++ { + go func(i int) { + defer wg.Done() + _ = d.Set([]byte(fmt.Sprint(i)), nil, nil) + var err error + switch i % 3 { + case 0: + err = d.Compact(nil, []byte("\xff"), false) + case 1: + err = d.Flush() + case 2: + _, err = d.AsyncFlush() + } + require.NoError(t, err) + }(i) + } + wg.Wait() + + require.NoError(t, d.Close()) +} + +func TestDBConcurrentCompactClose(t *testing.T) { + // Test closing while a compaction is ongoing. This ensures compaction code + // detects the close and finishes cleanly. + mem := vfs.NewMem() + for i := 0; i < 100; i++ { + opts := &Options{ + FS: mem, + MaxConcurrentCompactions: func() int { + return 2 + }, + } + d, err := Open("", testingRandomized(t, opts)) + require.NoError(t, err) + + // Ingest a series of files containing a single key each. As the outer + // loop progresses, these ingestions will build up compaction debt + // causing compactions to be running concurrently with the close below. + for j := 0; j < 10; j++ { + path := fmt.Sprintf("ext%d", j) + f, err := mem.Create(path) + require.NoError(t, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: d.FormatMajorVersion().MaxTableFormat(), + }) + require.NoError(t, w.Set([]byte(fmt.Sprint(j)), nil)) + require.NoError(t, w.Close()) + require.NoError(t, d.Ingest([]string{path})) + } + + require.NoError(t, d.Close()) + } +} + +func TestDBApplyBatchNilDB(t *testing.T) { + d, err := Open("", &Options{FS: vfs.NewMem()}) + require.NoError(t, err) + + b1 := &Batch{} + b1.Set([]byte("test"), nil, nil) + + b2 := &Batch{} + b2.Apply(b1, nil) + if b2.memTableSize != 0 { + t.Fatalf("expected memTableSize to not be set") + } + require.NoError(t, d.Apply(b2, nil)) + if b1.memTableSize != b2.memTableSize { + t.Fatalf("expected memTableSize %d, but found %d", b1.memTableSize, b2.memTableSize) + } + + require.NoError(t, d.Close()) +} + +func TestDBApplyBatchMismatch(t *testing.T) { + srcDB, err := Open("", &Options{FS: vfs.NewMem()}) + require.NoError(t, err) + + applyDB, err := Open("", &Options{FS: vfs.NewMem()}) + require.NoError(t, err) + + err = func() (err error) { + defer func() { + if v := recover(); v != nil { + err = errors.Errorf("%v", v) + } + }() + + b := srcDB.NewBatch() + b.Set([]byte("test"), nil, nil) + return applyDB.Apply(b, nil) + }() + if err == nil || !strings.Contains(err.Error(), "pebble: batch db mismatch:") { + t.Fatalf("expected error, but found %v", err) + } + + require.NoError(t, srcDB.Close()) + require.NoError(t, applyDB.Close()) +} + +func TestCloseCleanerRace(t *testing.T) { + mem := vfs.NewMem() + for i := 0; i < 20; i++ { + db, err := Open("", testingRandomized(t, &Options{FS: mem})) + require.NoError(t, err) + require.NoError(t, db.Set([]byte("a"), []byte("something"), Sync)) + require.NoError(t, db.Flush()) + // Ref the sstables so cannot be deleted. + it, _ := db.NewIter(nil) + require.NotNil(t, it) + require.NoError(t, db.DeleteRange([]byte("a"), []byte("b"), Sync)) + require.NoError(t, db.Compact([]byte("a"), []byte("b"), false)) + // Only the iterator is keeping the sstables alive. + files, err := mem.List("/") + require.NoError(t, err) + var found bool + for _, f := range files { + if strings.HasSuffix(f, ".sst") { + found = true + break + } + } + require.True(t, found) + // Close the iterator and the db in succession so file cleaning races with DB.Close() -- + // latter should wait for file cleaning to finish. + require.NoError(t, it.Close()) + require.NoError(t, db.Close()) + files, err = mem.List("/") + require.NoError(t, err) + for _, f := range files { + if strings.HasSuffix(f, ".sst") { + t.Fatalf("found sst: %s", f) + } + } + } +} + +func TestSSTablesWithApproximateSpanBytes(t *testing.T) { + d, err := Open("", &Options{ + FS: vfs.NewMem(), + }) + require.NoError(t, err) + defer func() { + if d != nil { + require.NoError(t, d.Close()) + } + }() + + // Create two sstables. + // sstable is contained within keyspan (fileNum = 5). + require.NoError(t, d.Set([]byte("c"), nil, nil)) + require.NoError(t, d.Set([]byte("d"), nil, nil)) + require.NoError(t, d.Flush()) + + // sstable partially overlaps keyspan (fileNum = 7). + require.NoError(t, d.Set([]byte("d"), nil, nil)) + require.NoError(t, d.Set([]byte("g"), nil, nil)) + require.NoError(t, d.Flush()) + + // cannot use WithApproximateSpanBytes without WithProperties. + _, err = d.SSTables(WithKeyRangeFilter([]byte("a"), []byte("e")), WithApproximateSpanBytes()) + require.Error(t, err) + + // cannot use WithApproximateSpanBytes without WithKeyRangeFilter. + _, err = d.SSTables(WithProperties(), WithApproximateSpanBytes()) + require.Error(t, err) + + tableInfos, err := d.SSTables(WithProperties(), WithKeyRangeFilter([]byte("a"), []byte("e")), WithApproximateSpanBytes()) + require.NoError(t, err) + + for _, levelTables := range tableInfos { + for _, table := range levelTables { + approximateSpanBytes, err := strconv.ParseInt(table.Properties.UserProperties["approximate-span-bytes"], 10, 64) + require.NoError(t, err) + if table.FileNum == 5 { + require.Equal(t, uint64(approximateSpanBytes), table.Size) + } + if table.FileNum == 7 { + require.Less(t, uint64(approximateSpanBytes), table.Size) + } + } + } +} + +func TestFilterSSTablesWithOption(t *testing.T) { + d, err := Open("", &Options{ + FS: vfs.NewMem(), + }) + require.NoError(t, err) + defer func() { + if d != nil { + require.NoError(t, d.Close()) + } + }() + + // Create two sstables. + require.NoError(t, d.Set([]byte("/Table/5"), nil, nil)) + require.NoError(t, d.Flush()) + require.NoError(t, d.Set([]byte("/Table/10"), nil, nil)) + require.NoError(t, d.Flush()) + + tableInfos, err := d.SSTables(WithKeyRangeFilter([]byte("/Table/5"), []byte("/Table/6"))) + require.NoError(t, err) + + totalTables := 0 + for _, levelTables := range tableInfos { + totalTables += len(levelTables) + } + + // with filter second sstable should not be returned + require.EqualValues(t, 1, totalTables) + + tableInfos, err = d.SSTables() + require.NoError(t, err) + + totalTables = 0 + for _, levelTables := range tableInfos { + totalTables += len(levelTables) + } + + // without filter + require.EqualValues(t, 2, totalTables) +} + +func TestSSTables(t *testing.T) { + d, err := Open("", &Options{ + FS: vfs.NewMem(), + }) + require.NoError(t, err) + defer func() { + if d != nil { + require.NoError(t, d.Close()) + } + }() + + // Create two sstables. + require.NoError(t, d.Set([]byte("hello"), nil, nil)) + require.NoError(t, d.Flush()) + require.NoError(t, d.Set([]byte("world"), nil, nil)) + require.NoError(t, d.Flush()) + + // by default returned table infos should not contain Properties + tableInfos, err := d.SSTables() + require.NoError(t, err) + for _, levelTables := range tableInfos { + for _, info := range levelTables { + require.Nil(t, info.Properties) + } + } + + // with opt `WithProperties()` the `Properties` in table info should not be nil + tableInfos, err = d.SSTables(WithProperties()) + require.NoError(t, err) + for _, levelTables := range tableInfos { + for _, info := range levelTables { + require.NotNil(t, info.Properties) + } + } +} + +type testTracer struct { + enabledOnlyForNonBackgroundContext bool + buf strings.Builder +} + +func (t *testTracer) Infof(format string, args ...interface{}) {} +func (t *testTracer) Errorf(format string, args ...interface{}) {} +func (t *testTracer) Fatalf(format string, args ...interface{}) {} + +func (t *testTracer) Eventf(ctx context.Context, format string, args ...interface{}) { + if t.enabledOnlyForNonBackgroundContext && ctx == context.Background() { + return + } + fmt.Fprintf(&t.buf, format, args...) + fmt.Fprint(&t.buf, "\n") +} + +func (t *testTracer) IsTracingEnabled(ctx context.Context) bool { + if t.enabledOnlyForNonBackgroundContext && ctx == context.Background() { + return false + } + return true +} + +func TestTracing(t *testing.T) { + if !invariants.Enabled { + // The test relies on timing behavior injected when invariants.Enabled. + return + } + var tracer testTracer + c := NewCache(0) + defer c.Unref() + d, err := Open("", &Options{ + FS: vfs.NewMem(), + Cache: c, + LoggerAndTracer: &tracer, + }) + require.NoError(t, err) + defer func() { + require.NoError(t, d.Close()) + }() + + // Create a sstable. + require.NoError(t, d.Set([]byte("hello"), nil, nil)) + require.NoError(t, d.Flush()) + _, closer, err := d.Get([]byte("hello")) + require.NoError(t, err) + closer.Close() + readerInitTraceString := "reading 37 bytes took 5ms\nreading 628 bytes took 5ms\n" + iterTraceString := "reading 27 bytes took 5ms\nreading 29 bytes took 5ms\n" + require.Equal(t, readerInitTraceString+iterTraceString, tracer.buf.String()) + + // Get again, but since it currently uses context.Background(), no trace + // output is produced. + tracer.buf.Reset() + tracer.enabledOnlyForNonBackgroundContext = true + _, closer, err = d.Get([]byte("hello")) + require.NoError(t, err) + closer.Close() + require.Equal(t, "", tracer.buf.String()) + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + iter, _ := d.NewIterWithContext(ctx, nil) + iter.SeekGE([]byte("hello")) + iter.Close() + require.Equal(t, iterTraceString, tracer.buf.String()) + + tracer.buf.Reset() + snap := d.NewSnapshot() + iter, _ = snap.NewIterWithContext(ctx, nil) + iter.SeekGE([]byte("hello")) + iter.Close() + require.Equal(t, iterTraceString, tracer.buf.String()) + snap.Close() + + tracer.buf.Reset() + b := d.NewIndexedBatch() + iter, err = b.NewIterWithContext(ctx, nil) + require.NoError(t, err) + iter.SeekGE([]byte("hello")) + iter.Close() + require.Equal(t, iterTraceString, tracer.buf.String()) + b.Close() +} + +func TestMemtableIngestInversion(t *testing.T) { + memFS := vfs.NewMem() + opts := &Options{ + FS: memFS, + MemTableSize: 256 << 10, // 4KB + MemTableStopWritesThreshold: 1000, + L0StopWritesThreshold: 1000, + L0CompactionThreshold: 2, + MaxConcurrentCompactions: func() int { + return 1000 + }, + } + + const channelTimeout = 5 * time.Second + + // We induce delay in compactions by passing in an EventListener that stalls on + // the first TableCreated event for a compaction job we want to block. + // FlushBegin and CompactionBegin has info on compaction start/output levels + // which is what we need to identify what compactions to block. However + // FlushBegin and CompactionBegin are called while holding db.mu, so we cannot + // block those events forever. Instead, we grab the job ID from those events + // and store it. Then during TableCreated, we check if we're creating an output + // for a job we have identified earlier as one to block, and then hold on a + // semaphore there until there's a signal from the test code to resume with the + // compaction. + // + // If nextBlockedCompaction is non-zero, we must block the next compaction + // out of the nextBlockedCompaction - 3 start level. 1 means block the next + // intra-L0 compaction and 2 means block the next flush (as flushes have + // a -1 start level). + var nextBlockedCompaction, blockedJobID int + var blockedCompactionsMu sync.Mutex // protects the above two variables. + nextSem := make(chan chan struct{}, 1) + var el EventListener + el.EnsureDefaults(testLogger{t: t}) + el.FlushBegin = func(info FlushInfo) { + blockedCompactionsMu.Lock() + defer blockedCompactionsMu.Unlock() + if nextBlockedCompaction == 2 { + nextBlockedCompaction = 0 + blockedJobID = info.JobID + } + } + el.CompactionBegin = func(info CompactionInfo) { + // 0 = block nothing, 1 = block intra-L0 compaction, 2 = block flush, + // 3 = block L0 -> LBase compaction, 4 = block compaction out of L1, and so on. + blockedCompactionsMu.Lock() + defer blockedCompactionsMu.Unlock() + blockValue := info.Input[0].Level + 3 + if info.Input[0].Level == 0 && info.Output.Level == 0 { + // Intra L0 compaction, denoted by casValue of 1. + blockValue = 1 + } + if nextBlockedCompaction == blockValue { + nextBlockedCompaction = 0 + blockedJobID = info.JobID + } + } + el.TableCreated = func(info TableCreateInfo) { + blockedCompactionsMu.Lock() + if info.JobID != blockedJobID { + blockedCompactionsMu.Unlock() + return + } + blockedJobID = 0 + blockedCompactionsMu.Unlock() + sem := make(chan struct{}) + nextSem <- sem + <-sem + } + tel := TeeEventListener(MakeLoggingEventListener(testLogger{t: t}), el) + opts.EventListener = &tel + opts.Experimental.L0CompactionConcurrency = 1 + d, err := Open("", opts) + require.NoError(t, err) + defer func() { + if d != nil { + require.NoError(t, d.Close()) + } + }() + + printLSM := func() { + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + t.Logf("%s", s) + } + + // Create some sstables. These should go into L6. These are irrelevant for + // the rest of the test. + require.NoError(t, d.Set([]byte("b"), []byte("foo"), nil)) + require.NoError(t, d.Flush()) + require.NoError(t, d.Set([]byte("d"), []byte("bar"), nil)) + require.NoError(t, d.Flush()) + require.NoError(t, d.Compact([]byte("a"), []byte("z"), true)) + + var baseCompactionSem, flushSem, intraL0Sem chan struct{} + // Block an L0 -> LBase compaction. This is necessary to induce intra-L0 + // compactions later on. + blockedCompactionsMu.Lock() + nextBlockedCompaction = 3 + blockedCompactionsMu.Unlock() + timeoutSem := time.After(channelTimeout) + t.Log("blocking an L0 -> LBase compaction") + // Write an sstable to L0 until we're blocked on an L0 -> LBase compaction. + breakLoop := false + for !breakLoop { + select { + case sem := <-nextSem: + baseCompactionSem = sem + breakLoop = true + case <-timeoutSem: + t.Fatal("did not get blocked on an LBase compaction") + default: + require.NoError(t, d.Set([]byte("b"), []byte("foo"), nil)) + require.NoError(t, d.Set([]byte("g"), []byte("bar"), nil)) + require.NoError(t, d.Flush()) + time.Sleep(100 * time.Millisecond) + } + } + printLSM() + + // Do 4 ingests, one with the key cc, one with bb and cc, and two with just bb. + // The purpose of the sstable containing cc is to inflate the L0 sublevel + // count of the interval at cc, as that's where we want the intra-L0 compaction + // to be seeded. However we also need a file left of that interval to have + // the same (or higher) sublevel to trigger the bug in + // cockroachdb/cockroach#101896. That's why we ingest a file after it to + // "bridge" the bb/cc intervals, and then ingest a file at bb. These go + // into sublevels like this: + // + // bb + // bb + // bb-----cc + // cc + // + // Eventually, we'll drop an ingested file containing a range del starting at + // cc around here: + // + // bb + // bb cc---... + // bb-----cc + // cc + { + path := "ingest1.sst" + f, err := memFS.Create(path) + require.NoError(t, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: d.FormatMajorVersion().MaxTableFormat(), + }) + require.NoError(t, w.Set([]byte("cc"), []byte("foo"))) + require.NoError(t, w.Close()) + require.NoError(t, d.Ingest([]string{path})) + } + { + path := "ingest2.sst" + f, err := memFS.Create(path) + require.NoError(t, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: d.FormatMajorVersion().MaxTableFormat(), + }) + require.NoError(t, w.Set([]byte("bb"), []byte("foo2"))) + require.NoError(t, w.Set([]byte("cc"), []byte("foo2"))) + require.NoError(t, w.Close()) + require.NoError(t, d.Ingest([]string{path})) + } + { + path := "ingest3.sst" + f, err := memFS.Create(path) + require.NoError(t, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: d.FormatMajorVersion().MaxTableFormat(), + }) + require.NoError(t, w.Set([]byte("bb"), []byte("foo3"))) + require.NoError(t, w.Close()) + require.NoError(t, d.Ingest([]string{path})) + } + { + path := "ingest4.sst" + f, err := memFS.Create(path) + require.NoError(t, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: d.FormatMajorVersion().MaxTableFormat(), + }) + require.NoError(t, w.Set([]byte("bb"), []byte("foo4"))) + require.NoError(t, w.Close()) + require.NoError(t, d.Ingest([]string{path})) + } + + // We now have a base compaction blocked. Block a memtable flush to cause + // memtables to queue up. + // + // Memtable (stuck): + // + // b-----------------g + // + // Relevant L0 ssstables + // + // bb + // bb + // bb-----cc + // cc + blockedCompactionsMu.Lock() + nextBlockedCompaction = 2 + blockedCompactionsMu.Unlock() + t.Log("blocking a flush") + require.NoError(t, d.Set([]byte("b"), []byte("foo2"), nil)) + require.NoError(t, d.Set([]byte("g"), []byte("bar2"), nil)) + _, _ = d.AsyncFlush() + select { + case sem := <-nextSem: + flushSem = sem + case <-time.After(channelTimeout): + t.Fatal("did not get blocked on a flush") + } + // Add one memtable to flush queue, and finish it off. + // + // Memtables (stuck): + // + // b-----------------g (waiting to flush) + // b-----------------g (flushing, blocked) + // + // Relevant L0 ssstables + // + // bb + // bb + // bb-----cc + // cc + require.NoError(t, d.Set([]byte("b"), []byte("foo3"), nil)) + require.NoError(t, d.Set([]byte("g"), []byte("bar3"), nil)) + // note: this flush will wait for the earlier, blocked flush, but it closes + // off the memtable which is what we want. + _, _ = d.AsyncFlush() + + // Open a new mutable memtable. This gets us an earlier earlierUnflushedSeqNum + // than the ingest below it. + require.NoError(t, d.Set([]byte("c"), []byte("somethingbigishappening"), nil)) + // Block an intra-L0 compaction, as one might happen around this time. + blockedCompactionsMu.Lock() + nextBlockedCompaction = 1 + blockedCompactionsMu.Unlock() + t.Log("blocking an intra-L0 compaction") + // Ingest a file containing a cc-e rangedel. + // + // Memtables: + // + // c (mutable) + // b-----------------g (waiting to flush) + // b-----------------g (flushing, blocked) + // + // Relevant L0 ssstables + // + // bb + // bb cc-----e (just ingested) + // bb-----cc + // cc + { + path := "ingest5.sst" + f, err := memFS.Create(path) + require.NoError(t, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: d.FormatMajorVersion().MaxTableFormat(), + }) + require.NoError(t, w.DeleteRange([]byte("cc"), []byte("e"))) + require.NoError(t, w.Close()) + require.NoError(t, d.Ingest([]string{path})) + } + t.Log("main ingest complete") + printLSM() + t.Logf("%s", d.Metrics().String()) + + require.NoError(t, d.Set([]byte("d"), []byte("ThisShouldNotBeDeleted"), nil)) + + // Do another ingest with a seqnum newer than d. The purpose of this is to + // increase the LargestSeqNum of the intra-L0 compaction output *beyond* + // the flush that contains d=ThisShouldNotBeDeleted, therefore causing + // that point key to be deleted (in the buggy code). + // + // Memtables: + // + // c-----d (mutable) + // b-----------------g (waiting to flush) + // b-----------------g (flushing, blocked) + // + // Relevant L0 ssstables + // + // bb cc + // bb cc-----e (just ingested) + // bb-----cc + // cc + { + path := "ingest6.sst" + f, err := memFS.Create(path) + require.NoError(t, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: d.FormatMajorVersion().MaxTableFormat(), + }) + require.NoError(t, w.Set([]byte("cc"), []byte("doesntmatter"))) + require.NoError(t, w.Close()) + require.NoError(t, d.Ingest([]string{path})) + } + + // Unblock earlier flushes. We will first finish flushing the blocked + // memtable, and end up in this state: + // + // Memtables: + // + // c-----d (mutable) + // b-----------------g (waiting to flush) + // + // Relevant L0 ssstables + // + // b-------------------g (irrelevant, just flushed) + // bb cc (has LargestSeqNum > earliestUnflushedSeqNum) + // bb cc-----e (has a rangedel) + // bb-----cc + // cc + // + // Note that while b----g is relatively old (and so has a low LargestSeqNum), + // it bridges a bunch of intervals. Had we regenerated sublevels from scratch, + // it'd have gone below the cc-e sstable. But due to #101896, we just slapped + // it on top. Now, as long as our seed interval is the one at cc and our seed + // file is the just-flushed L0 sstable, we will go down and include anything + // in that interval even if it has a LargestSeqNum > earliestUnflushedSeqNum. + // + // All asterisked L0 sstables should now get picked in an intra-L0 compaction + // right after the flush finishes, that we then block: + // + // b-------------------g* + // bb* cc* + // bb* cc-----e* + // bb-----cc* + // cc* + t.Log("unblocking flush") + flushSem <- struct{}{} + printLSM() + + select { + case sem := <-nextSem: + intraL0Sem = sem + case <-time.After(channelTimeout): + t.Fatal("did not get blocked on an intra L0 compaction") + } + + // Ensure all memtables are flushed. This will mean d=ThisShouldNotBeDeleted + // will land in L0 and since that was the last key written to a memtable, + // and the ingestion at cc came after it, the output of the intra-L0 + // compaction will elevate the cc-e rangedel above it and delete it + // (if #101896 is not fixed). + ch, _ := d.AsyncFlush() + <-ch + + // Unblock earlier intra-L0 compaction. + t.Log("unblocking intraL0") + intraL0Sem <- struct{}{} + printLSM() + + // Try reading d a couple times. + for i := 0; i < 2; i++ { + val, closer, err := d.Get([]byte("d")) + require.NoError(t, err) + require.Equal(t, []byte("ThisShouldNotBeDeleted"), val) + if closer != nil { + closer.Close() + } + time.Sleep(100 * time.Millisecond) + } + + // Unblock everything. + baseCompactionSem <- struct{}{} +} + +func BenchmarkDelete(b *testing.B) { + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + const keyCount = 10000 + var keys [keyCount][]byte + for i := 0; i < keyCount; i++ { + keys[i] = []byte(strconv.Itoa(rng.Int())) + } + val := bytes.Repeat([]byte("x"), 10) + + benchmark := func(b *testing.B, useSingleDelete bool) { + d, err := Open( + "", + &Options{ + FS: vfs.NewMem(), + }) + if err != nil { + b.Fatal(err) + } + defer func() { + if err := d.Close(); err != nil { + b.Fatal(err) + } + }() + + b.StartTimer() + for _, key := range keys { + _ = d.Set(key, val, nil) + if useSingleDelete { + _ = d.SingleDelete(key, nil) + } else { + _ = d.Delete(key, nil) + } + } + // Manually flush as it is flushing/compaction where SingleDelete + // performance shows up. With SingleDelete, we can elide all of the + // SingleDelete and Set records. + if err := d.Flush(); err != nil { + b.Fatal(err) + } + b.StopTimer() + } + + b.Run("delete", func(b *testing.B) { + for i := 0; i < b.N; i++ { + benchmark(b, false) + } + }) + + b.Run("single-delete", func(b *testing.B) { + for i := 0; i < b.N; i++ { + benchmark(b, true) + } + }) +} + +func BenchmarkNewIterReadAmp(b *testing.B) { + for _, readAmp := range []int{10, 100, 1000} { + b.Run(strconv.Itoa(readAmp), func(b *testing.B) { + opts := &Options{ + FS: vfs.NewMem(), + L0StopWritesThreshold: 1000, + } + opts.DisableAutomaticCompactions = true + + d, err := Open("", opts) + require.NoError(b, err) + + for i := 0; i < readAmp; i++ { + require.NoError(b, d.Set([]byte("a"), []byte("b"), NoSync)) + require.NoError(b, d.Flush()) + } + + require.Equal(b, d.Metrics().ReadAmp(), readAmp) + + b.StopTimer() + b.ResetTimer() + for i := 0; i < b.N; i++ { + b.StartTimer() + iter, _ := d.NewIter(nil) + b.StopTimer() + require.NoError(b, iter.Close()) + } + + require.NoError(b, d.Close()) + }) + } +} + +func verifyGet(t *testing.T, r Reader, key, expected []byte) { + val, closer, err := r.Get(key) + require.NoError(t, err) + if !bytes.Equal(expected, val) { + t.Fatalf("expected %s, but got %s", expected, val) + } + closer.Close() +} + +func verifyGetNotFound(t *testing.T, r Reader, key []byte) { + val, _, err := r.Get(key) + if err != base.ErrNotFound { + t.Fatalf("expected nil, but got %s", val) + } +} + +func BenchmarkRotateMemtables(b *testing.B) { + o := &Options{FS: vfs.NewMem(), MemTableSize: 64 << 20 /* 64 MB */} + d, err := Open("", o) + require.NoError(b, err) + + // We want to jump to full-sized memtables. + d.mu.Lock() + d.mu.mem.nextSize = o.MemTableSize + d.mu.Unlock() + require.NoError(b, d.Flush()) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + if err := d.Flush(); err != nil { + b.Fatal(err) + } + } +} diff --git a/pebble/docs/RFCS/20211018_range_keys.md b/pebble/docs/RFCS/20211018_range_keys.md new file mode 100644 index 0000000..890fa58 --- /dev/null +++ b/pebble/docs/RFCS/20211018_range_keys.md @@ -0,0 +1,961 @@ +- Feature Name: Range Keys +- Status: draft +- Start Date: 2021-10-18 +- Authors: Sumeer Bhola, Jackson Owens +- RFC PR: #1341 +- Pebble Issues: + https://github.com/cockroachdb/pebble/issues/1339 +- Cockroach Issues: + https://github.com/cockroachdb/cockroach/issues/70429 + https://github.com/cockroachdb/cockroach/issues/70412 + +** Design Draft** + +# Summary + +An ongoing effort within CockroachDB to preserve MVCC history across all SQL +operations (see cockroachdb/cockroach#69380) requires a more efficient method of +deleting ranges of MVCC history. + +This document describes an extension to Pebble introducing first-class support +for range keys. Range keys map a range of keyspace to a value. Optionally, the +key range may include an suffix encoding a version (eg, MVCC timestamp). Pebble +iterators may be configured to surface range keys during iteration, or to mask +point keys at lower MVCC timestamps covered by range keys. + +CockroachDB will make use of these range keys to enable history-preserving +removal of contiguous ranges of MVCC keys with constant writes, and efficient +iteration past deleted versions. + +# Background + +A previous CockroachDB RFC cockroach/cockroachdb#69380 describes the motivation +for the larger project of migrating MVCC-noncompliant operations into MVCC +compliance. Implemented with the existing MVCC primitives, some operations like +removal of an index or table would require performing writes linearly +proportional to the size of the table. Dropping a large table using existing +MVCC point-delete primitives would be prohibitively expensive. The desire for a +sublinear delete of an MVCC range motivates this work. + +The detailed design for MVCC compliant bulk operations ([high-level +description](https://github.com/cockroachdb/cockroach/blob/master/docs/RFCS/20210825_mvcc_bulk_ops.md); +detailed design draft for DeleteRange in internal +[doc](https://docs.google.com/document/d/1ItxpitNwuaEnwv95RJORLCGuOczuS2y_GoM2ckJCnFs/edit#heading=h.x6oktstoeb9t)), +ran into complexity by placing range operations above the Pebble layer, such +that Pebble sees these as points. The complexity causes are various: (a) which +key (start or end) to anchor this range on, when represented as a point (there +are performance consequences), (b) rewriting on CockroachDB range splits (and +concerns about rewrite volume), (c) fragmentation on writes and complexity +thereof (and performance concerns for reads when not fragmenting), (d) inability +to efficiently skip older MVCC versions that are masked by a `[k1,k2)@ts` (where +ts is the MVCC timestamp). + +Pebble currently has only one kind of key that is associated with a range: +`RANGEDEL [k1, k2)#seq`, where [k1, k2) is supplied by the caller, and is used +to efficiently remove a set of point keys. + +First-class support for range keys in Pebble eliminates all these issues. +Additionally, it allows for future extensions like efficient transactional range +operations. This issue describes how this feature would work from the +perspective of a user of Pebble (like CockroachDB), and sketches some +implementation details. + +# Design + +## Interface + +### New `Comparer` requirements + +The Pebble `Comparer` type allows users to optionally specify a `Split` function +that splits a user key into a prefix and a suffix. This Split allows users +implementing MVCC (Multi-Version Concurrency Control) to inform Pebble which +part of the key encodes the user key and which part of the key encodes the +version (eg, a timestamp). Pebble does not dictate the encoding of an MVCC +version, only that the version form a suffix on keys. + +The range keys design described in this RFC introduces stricter requirements for +user-provided `Split` implementations and the ordering of keys: + +1. The user key consisting of just a key prefix `k` must sort before all + other user keys containing that prefix. Specifically + `Compare(k[:Split(k)], k) < 0` where `Split(k) < len(k)`. +2. A key consisting of a bare suffix must be a valid key and comparable. The + ordering of the empty key prefix with any suffixes must be consistent with + the ordering of those same suffixes applied to any other key prefix. + Specifically `Compare(k[Split(k):], k2[Split(k2):]) == Compare(k, k2)` where + `Compare(k[:Split(k)], k2[:Split(k2)]) == 0`. + +The details of why these new requirements are necessary are explained in the +implementation section. + +### Writes + +This design introduces three new write operations: + +- `RangeKeySet([k1, k2), [optional suffix], )`: This represents the + mapping `[k1, k2)@suffix => value`. Keys `k1` and `k2` must not contain a + suffix (i.e., `Split(k1)==len(k1)` and `Split(k2)==len(k2))`. + +- `RangeKeyUnset([k1, k2), [optional suffix])`: This removes a mapping + previously applied by `RangeKeySet`. The unset may use a smaller key range + than the original `RangeKeySet`, in which case only part of the range is + deleted. The unset only applies to range keys with a matching optional suffix. + If the optional suffix is absent in both the RangeKeySet and RangeKeyUnset, + they are considered matching. + +- `RangeKeyDelete([k1, k2))`: This removes all range keys within the provided + key span. It behaves like an `Unset` unencumbered by suffix restrictions. + +For example, consider `RangeKeySet([a,d), foo)` (i.e., no suffix). If +there is a later call `RangeKeyUnset([b,c))`, the resulting state seen by +a reader is `[a,b) => foo`, `[c,d) => foo`. Note that the value is not +modified when the key is fragmented. + +Partially overlapping `RangeKeySet`s with the same suffix overwrite one +another. For example, consider `RangeKeySet([a,d), foo)`, followed by +`RangeKeySet([c,e), bar)`. The resulting state is `[a,c) => foo`, `[c,e) +=> bar`. + +Point keys (eg, traditional keys defined at a singular byte string key) and +range keys do not overwrite one another. They have a parallel existence. Point +deletes only apply to points. Range unsets only apply to range keys. However, +users may configure iterators to mask point keys covered by newer range keys. +This masking behavior is explicitly requested by the user in the context of the +iteration. Masking is described in more detail below. + +There exist separate range delete operations for point keys and range keys. A +`RangeKeyDelete` may remove part of a range key, just like the new +`RangeKeyUnset` operation introduced earlier. `RangeKeyDelete`s differ from +`RangeKeyUnset`s, because the latter requires that the suffix matches and +applies only to range keys. `RangeKeyDelete`s completely clear all existing +range keys within their span at all suffix values. + +The optional suffix in `RangeKeySet` and `RangeKeyUnset` operations is related +to the pebble `Comparer.Split` operation which is explicitly documented as being +for [MVCC +keys](https://github.com/cockroachdb/pebble/blob/e95e73745ce8a85d605ef311d29a6574db8ed3bf/internal/base/comparer.go#L69-L88), +without mandating exactly how the versions are represented. `RangeKeySet` and +`RangeKeyUnset` keys with different suffixes do not interact logically, although +Pebble will observably fragment ranges at intersection points. + +### Iteration + +A user iterating over a key interval [k1,k2) can request: + +- **[I1]** An iterator over only point keys. + +- **[I2]** A combined iterator over point and range keys. This is what + we mainly discuss below in the implementation discussion. + +- **[I3]** An iterator over only range keys. In the CockroachDB use + case, range keys will need to be subject to MVCC GC just like + point keys — this iterator may be useful for that purpose. + +The `pebble.Iterator` type will be extended to provide accessors for +range keys for use in the combined and exclusively range iteration +modes. + +``` +// HasPointAndRange indicates whether there exists a point key, a range key or +// both at the current iterator position. +HasPointAndRange() (hasPoint, hasRange bool) + +// RangeKeyChanged indicates whether the most recent iterator positioning +// operation resulted in the iterator stepping into or out of a new range key. +// If true previously returned range key bounds and data has been invalidated. +// If false, previously obtained range key bounds, suffix and value slices are +// still valid and may continue to be read. +RangeKeyChanged() bool + +// Key returns the key of the current key/value pair, or nil if done. If +// positioned at an iterator position that only holds a range key, Key() +// always returns the start bound of the range key. Otherwise, it returns +// the point key's key. +Key() []byte + +// RangeBounds returns the start (inclusive) and end (exclusive) bounds of the +// range key covering the current iterator position. RangeBounds returns nil +// bounds if there is no range key covering the current iterator position, or +// the iterator is not configured to surface range keys. +// +// If valid, the returned start bound is less than or equal to Key() and the +// returned end bound is greater than Key(). +RangeBounds() (start, end []byte) + +// Value returns the value of the current key/value pair, or nil if done. +// The caller should not modify the contents of the returned slice, and +// its contents may change on the next call to Next. +// +// Only valid if HasPointAndRange() returns true for hasPoint. +Value() []byte + +// RangeKeys returns the range key values and their suffixes covering the +// current iterator position. The range bounds may be retrieved separately +// through RangeBounds(). +RangeKeys() []RangeKey + +type RangeKey struct { + Suffix []byte + Value []byte +} +``` + +When a combined iterator exposes range keys, it exposes all the range +keys covering `Key`. During iteration with a combined iterator, an +iteration position may surface just a point key, just a range key or +both at the currently-positioned `Key`. + +Described another way, a Pebble combined iterator guarantees that it +will stop at all positions within the keyspace where: +1. There exists a point key at that position. +2. There exists a range key that logically begins at that postition. + +In addition to the above positions, a Pebble iterator may also stop at keys +in-between the above positions due to fragmentation. Range keys are defined over +continuous spans of keyspace. Range keys with different suffix values may +overlap each other arbitrarily. To surface these arbitrarily overlapping spans +in an understandable and efficient way, the Pebble iterator surfaces range keys +fragmented at intersection points. Consider the following sequence of writes: + +``` + RangeKeySet([a,z), @1, 'apple') + RangeKeySet([c,e), @3, 'banana') + RangeKeySet([e,m), @5, 'orange') + RangeKeySet([b,k), @7, 'kiwi') +``` + +This yields a database containing overlapping range keys: +``` + @7 → kiwi |-----------------) + @5 → orange |---------------) + @3 → banana |---) + @1 → apple |-------------------------------------------------) + a b c d e f g h i j k l m n o p q r s t u v w x y z +``` + +During iteration, these range keys are surfaced using the bounds of their +intersection points. For example, a scan across the keyspace containing only +these range keys would observe the following iterator positions: + +``` + Key() = a RangeKeyBounds() = [a,b) RangeKeys() = {(@1,apple)} + Key() = b RangeKeyBounds() = [b,c) RangeKeys() = {(@7,kiwi), (@1,apple)} + Key() = c RangeKeyBounds() = [c,e) RangeKeys() = {(@7,kiwi), (@3,banana), (@1,apple)} + Key() = e RangeKeyBounds() = [e,k) RangeKeys() = {(@7,kiwi), (@5,orange), (@1,apple)} + Key() = k RangeKeyBounds() = [k,m) RangeKeys() = {(@5,orange), (@1,apple)} + Key() = m RangeKeyBounds() = [m,z) RangeKeys() = {(@1,apple)} +``` + +This fragmentation produces a more understandable interface, and avoids forcing +iterators to read all range keys within the bounds of the broadest range key. +Consider this example: + +``` + iterator pos [ ] - sstable bounds + | +L1: [a----v1@t2--|-h] [l-----unset@t1----u] +L2: [e---|------v1@t1----------r] + a b c d e f g h i j k l m n o p q r s t u v w x y z +``` + +If the iterator is positioned at a point key `g`, there are two overlapping +physical range keys: `[a,h)@t2→v1` and `[e,r)@t1→v1`. + +However, the `RangeKeyUnset([l,u), @t1)` removes part of the `[e,r)@t1→v1` range +key, truncating it to the bounds `[e,l)`. The iterator must return the truncated +bounds that correctly respect the `RangeKeyUnset`. However, when the range keys +are stored within a log-structured merge tree like Pebble, the `RangeKeyUnset` +may not be contained within the level's sstable that overlaps the current point +key. Searching for the unset could require reading an unbounded number of +sstables, losing the log-structured merge tree's property that bounds read +amplification to the number of levels in the tree. + +Fragmenting range keys to intersection points avoids this problem. The iterator +positioned at `g` only surfaces range key state with the bounds `[e,h)`, the +widest bounds in which it can guarantee t2→v1 and t1→v1 without loading +additional sstables. + +#### Iteration order + +Recall that the user-provided `Comparer.Split(k)` function divides all user keys +into a prefix and a suffix, such that the prefix is `k[:Split(k)]`, and the +suffix is `k[Split(k):]`. If a key does not contain a suffix, the key equals the +prefix. + +An iterator that is configured to surface range keys alongside point keys will +surface all range keys covering the current `Key()` position. Revisiting an +earlier example with the addition of three new point key-value pairs: +a→artichoke, b@2→beet and t@3→turnip. Consider '@' to form the suffix +where present, with `` denoting a MVCC timestamp. Higher, more-recent +timestamps sort before lower, older timestamps. + +``` + . a → artichoke + @7 → kiwi |-----------------) + @5 → orange |---------------) + . b@2 b@2 → beet + @3 → banana |---) . t@3 t@3 → turnip + @1 → apple |-------------------------------------------------) + a b c d e f g h i j k l m n o p q r s t u v w x y z +``` + +An iterator configured to surface both point and range keys will visit the +following iterator positions during forward iteration: + +``` + Key() HasPointAndRange() Value() RangeKeyBounds() RangeKeys() + a (true, true) artichoke [a,b) {(@1,apple)} + b (false, true) - [b,c) {(@7,kiwi), (@1,apple)} + b@2 (true, true) beet [b,c) {(@7,kiwi), (@1,apple)} + c (false, true) - [c,e) {(@7,kiwi), (@3,banana), (@1,apple)} + e (false, true) - [e,k) {(@7,kiwi), (@5,orange), (@1,apple)} + k (false, true) - [k,m) {(@5,orange), (@1,apple)} + m (false, true) - [m,z) {(@1,apple)} + t@3 (true, true) turnip [m,z) {(@1,apple)} +``` + +Note that: + +- While positioned over a point key (eg, Key() = 'a', 'b@2' or t@3'), the + iterator exposes both the point key's value through Value() and the + overlapping range keys values through `RangeKeys()`. + +- There can be multiple range keys covering a `Key()`, each with a different + suffix. + +- There cannot be multiple range keys covering a `Key()` with the same suffix, + since the most-recently committed one (eg, the one with the highest sequence + number) will win, just like for point keys. + +- If the iterator has configured lower and/or upper bounds, they will truncate + the range key to those bounds. For example, if the above iterator had an upper + bound 'y', the `[m,z)` range key would be surfaced with the bounds `[m,y)` + instead. + +#### Masking + +Range key masking provides additional, optional functionality designed +specifically for the use case of implementing a MVCC-compatible delete range. + +When constructing an iterator that iterators over both point and range keys, a +user may request that range keys mask point keys. Masking is configured with a +suffix parameter that determines which range keys may mask point keys. Only +range keys with suffixes that sort after the mask's suffix mask point keys. A +range key that meets this condition only masks points with suffixes that sort +after the range key's suffix. + +``` +type IterOptions struct { + // ... + RangeKeyMasking RangeKeyMasking +} + +// RangeKeyMasking configures automatic hiding of point keys by range keys. +// A non-nil Suffix enables range-key masking. When enabled, range keys with +// suffixes ≥ Suffix behave as masks. All point keys that are contained within +// a masking range key's bounds and have suffixes greater than the range key's +// suffix are automatically skipped. +// +// Specifically, when configured with a RangeKeyMasking.Suffix _s_, and there +// exists a range key with suffix _r_ covering a point key with suffix _p_, and +// +// _s_ ≤ _r_ < _p_ +// +// then the point key is elided. +// +// Range-key masking may only be used when iterating over both point keys and +// range keys. +type RangeKeyMasking struct { + // Suffix configures which range keys may mask point keys. Only range keys + // that are defined at suffixes greater than or equal to Suffix will mask + // point keys. + Suffix []byte + // Filter is an optional field that may be used to improve performance of + // range-key masking through a block-property filter defined over key + // suffixes. If non-nil, Filter is called by Pebble to construct a + // block-property filter mask at iterator creation. The filter is used to + // skip whole point-key blocks containing point keys with suffixes greater + // than a covering range-key's suffix. + // + // To use this functionality, the caller must create and configure (through + // Options.BlockPropertyCollectors) a block-property collector that records + // the maxmimum suffix contained within a block. The caller then must write + // and provide a BlockPropertyFilterMask implementation on that same + // property. See the BlockPropertyFilterMask type for more information. + Filter func() BlockPropertyFilterMask +} +``` + +Example: A user may construct an iterator with `RangeKeyMasking.Suffix` set to +`@50`. The range key `[a, c)@60` would mask nothing, because `@60` is a more +recent timestamp than `@50`. However a range key `[a,c)@30` would mask `a@20` +and `apple@10` but not `apple@40`. A range key can only mask keys with MVCC +timestamps older than the range key's own timestamp. Only range keys with +suffixes (eg, MVCC timestamps) may mask anything at all. + +The pebble Iterator surfaces all range keys when masking is enabled. Only point +keys are ever skipped, and only when they are contained within the bounds of a +range key with a more-recent suffix, and the range key's suffix is older than +the timestamp encoded in `RangeKeyMasking.Sufffix`. + +## Implementation + +### Write operations + +This design introduces three new Pebble write operations: `RangeKeySet`, +`RangeKeyUnset` and `RangeKeyDelete`. Internally, these operations are +represented as internal keys with new corresponding key kinds encoded as a part +of the key trailer. These keys are stored within special range key blocks +separate from point keys, but within the same sstable. The range key blocks hold +`RangeKeySet`, `RangeKeyUnset` and `RangeKeyDelete` keys, but do not hold keys +of any other kind. Within the memtables, these range keys are stored in a +separate skip list. + +- `RangeKeySet([k1,k2), @suffix, value)` is encoded as a `k1.RANGEKEYSET` key + with a value encoding the tuple `(k2,@suffix,value)`. +- `RangeKeyUnset([k1,k2), @suffix)` is encoded as a `k1.RANGEUNSET` key + with a value encoding the tuple `(k2,@suffix)`. +- `RangeKeyDelete([k1,k2)` is encoded as a `k1.RANGEKEYDELETE` key with a value + encoding `k2`. + +Range keys are physically fragmented as an artifact of the log-structured merge +tree structure and internal sstable boundaries. This fragmentation is essential +for preserving the performance characteristics of a log-structured merge tree. +Although the public interface operations for `RangeKeySet` and `RangeKeyUnset` +require both boundary keys `[k1,k2)` to always be bare prefixes (eg, to not have +a suffix), internally these keys may be fragmented to bounds containing +suffixes. + +Example: If a user attempts to write `RangeKeySet([a@v1, c@v2), @v3, value)`, +Pebble will return an error to the user. If a user writes `RangeKeySet([a, c), +@v3, value)`, Pebble will allow the write and may later internally fragment the +`RangeKeySet` into three internal keys: + - `RangeKeySet([a, a@v1), @v3, value)` + - `RangeKeySet([a@v1, c@v2), @v3, value)` + - `RangeKeySet([c@v2, c), @v3, value)` + +This fragmentation preserve log-structured merge tree performance +characteristics because it allows a range key to be split across many sstables, +while preserving locality between range keys and point keys. Consider a +`RangeKeySet([a,z), @1, foo)` on a database that contains millions of point keys +in the range [a,z). If the [a,z) range key was not permitted to be fragmented +internally, it would either need to be stored completely separately from the +point keys in a separate sstable or in a single intractably large sstable +containing all the overlapping point keys. Fragmentation allows locality, +ensuring point keys and range keys in the same region of the keyspace can be +stored in the same sstable. + +`RangeKeySet`, `RangeKeyUnset` and `RangeKeyDelete` keys are assigned sequence +numbers, like other internal keys. Log-structured merge tree level invariants +are valid across range key, point keys and between the two. That is: + + 1. The point key `k1#s2` cannot be at a lower level than `k2#s1` where + `k1==k2` and `s1 < s2`. This is the invariant implemented by all LSMs. + 2. `RangeKeySet([k1,k2))#s2` cannot be at a lower level than + `RangeKeySet([k3,k4))#s1` where `[k1,k2)` overlaps `[k3,k4)` and `s1 < s2`. + 3. `RangeKeySet([k1,k2))#s2` cannot be at a lower level than a point key + `k3#s1` where `k3 \in [k1,k2)` and `s1 < s2`. + +Like other tombstones, the `RangeKeyUnset` and `RangeKeyDelete` keys are elided +when they fall to the bottomost level of the LSM and there is no snapshot +preventing its elision. There is no additional garbage collection problem +introduced by these keys. + +There is no Merge operation that affects range keys. + +#### Physical representation + +`RangeKeySet`, `RangeKeyUnset` and `RangeKeyDelete` keys are keyed by their +start key. This poses an obstacle. We must be able to support multiple range +keys at the same sequence number, because all keys within an ingested sstable +adopt the same sequence number. Duplicate internal keys (keys with equal user +keys, sequence numbers and kinds) are prohibited within Pebble. To resolve this +issue, fragments with the same bounds are merged within snapshot stripes into a +single physical key-value, representing multiple logical key-value pairs: + +``` +k1.RangeKeySet#s2 → (k2,[(@t2,v2),(@t1,v1)]) +``` + +Within a physical key-value pair, suffix-value pairs are stored sorted by +suffix, descending. This has a minor advantage of reducing iteration-time +user-key comparisons when there exist multiple range keys in a table. + +Unlike other Pebble keys, the `RangeKeySet` and `RangeKeyUnset` keys have values +that encode fields of data known to Pebble. The value that the user sets in a +call to `RangeKeySet` is opaque to Pebble, but the physical representation of +the `RangeKeySet`'s value is known. This encoding is a sequence of fields: + +* End key, `varstring`, encodes the end user key of the fragment. +* A series of (suffix, value) tuples representing the logical range keys that + were merged into this one physical `RangeKeySet` key: + * Suffix, `varstring` + * Value, `varstring` + +Similarly, `RangeKeyUnset` keys are merged within snapshot stripes and have a +physical representation like: + +``` +k1.RangeKeyUnset#s2 → (k2,[(@t2),(@t1)]) +``` + +A `RangeKeyUnset` key's value is encoded as: +* End key, `varstring`, encodes the end user key of the fragment. +* A series of suffix `varstring`s. + +When `RangeKeySet` and `RangeKeyUnset` fragments with identical bounds meet +within the same snapshot stripe within a compaction, any of the +`RangeKeyUnset`'s suffixes that exist within the `RangeKeySet` key are removed. + +A `RangeKeyDelete` key has no additional data beyond its end key, which is +encoded directly in the value. + +NB: `RangeKeySet` and `RangeKeyUnset` keys are not merged within batches or the +memtable. That's okay, because batches are append-only and indexed batches will +refragment and merge the range keys on-demand. In the memtable, every key is +guaranteed to have a unique sequence number. + +### Sequence numbers + +Like all Pebble keys, `RangeKeySet`, `RangeKeyUnset` and `RangeKeyDelete` are +assigned sequence numbers when committed. As described above, overlapping +`RangeKeySet`s and `RangeKeyUnset`s are fragmented to have matching start and +end bounds. Then the resulting exactly-overlapping range key fragments are +merged into a single internal key-value pair, within the same snapshot stripe +and sstable. The original, unmerged internal keys each have their own sequence +numbers, indicating the moment they were committed within the history of all +write operations. + +Recall that sequence numbers are used within Pebble to determine which keys +appear live to which iterators. When an iterator is constructed, it takes note +of the current _visible sequence number_, and for the lifetime of the iterator, +only surfaces keys less than that sequence number. Similarly, snapshots read the +current _visible sequence number_, remember it, but also leave a note asking +compactions to preserve history at that sequence number. The space between +snapshotted sequence numbers is referred to as a _snapshot stripe_, and +operations cannot drop or otherwise mutate keys unless they fall within the same +_snapshot stripe_. For example a `k.MERGE#5` key may not be merged with a +`k.MERGE#1` operation if there's an open snapshot at `#3`. + +The new `RangeKeySet`, `RangeKeyUnset` and `RangeKeyDelete` keys behave +similarly. Overlapping range keys won't be merged if there's an open snapshot +separating them. Consider a range key `a-z` written at sequence number `#1` and +a point key `d.SET#2`. A combined point-and-range iterator using a sequence +number `#3` and positioned at `d` will surface both the range key `a-z` and the +point key `d`. + +In the context of masking, the suffix-based masking of range keys can cause +potentially unexpected behavior. A range key `[a,z)@10` may be committed as +sequence number `#1`. Afterwards, a point key `d@5#2` may be committed. An +iterator that is configured with range-key masking with suffix `@20` would mask +the point key `d@5#2` because although `d@5#2`'s sequence number is higher, +range-key masking uses suffixes to impose order, not sequence numbers. + +### Boundaries for sstables + +Range keys follow the same relationship to sstable bounadries as the existing +`RANGEDEL` tombstones. The bounds of an internal range key are user keys. Every +range key is limited by its containing sstable's bounds. + +Consider these keys, annotated with sequence numbers: + +``` +Point keys: a#50, b#70, b#49, b#48, c#47, d#46, e#45, f#44 +Range key: [a,e)#60 +``` + +We have created three versions of `b` in this example. In previous versions, +Pebble could split output sstables during a compaction such that the different +`b` versions span more than one sstable. This creates problems for `RANGEDEL`s +which span these two sstables which are discussed in the section on [improperly +truncated RANGEDELS](https://github.com/cockroachdb/pebble/blob/master/docs/range_deletions.md#improperly-truncated-range-deletes). +We manage to tolerate this for `RANGEDEL`s since their semantics are defined by +the system, which is not true for these range keys where the actual semantics +are up to the user. + +Pebble now disallows such sstable split points. In this example, by postponing +the sstable split point to the user key c, we can cleanly split the range key +into `[a,c)#60` and `[c,e)#60`. The sstable end bound for the first sstable +(sstable bounds are inclusive) will be c#inf (where inf is the largest possible +seqnum, which is unused except for these cases), and sstable start bound for the +second sstable will be c#60. + +The above example deals exclusively with point and range keys without suffixes. +Consider this example with suffixed keys, and compaction outputs split in the +middle of the `b` prefix: + +``` +first sstable: points: a@100, a@30, b@100, b@40 ranges: [a,c)@50 +second sstable: points: b@30, c@40, d@40, e@30, ranges: [c,e)@50 +``` + +When the compaction code decides to defer `b@30` to the next sstable and finish +the first sstable, the range key `[a,c)@50` is sitting in the fragmenter. The +compaction must split the range key at the bounds determined by the user key. +The compaction uses the first point key of the next sstable, in this case +`b@30`, to truncate the range key. The compaction flushes the fragment +`[a,b@30)@50` to the first sstable and updates the existing fragment to begin at +`b@30`. + +If a range key extends into the next file, the range key's truncated end is used +for the purposes of determining the sstable end boundary. The first sstable's +end boundary becomes `b@30#inf`, signifying the range key does not cover `b@30`. +The second sstable's start boundary is `b@30`. + +### Block property collectors + +Separate block property collectors may be configured to collect separate +properties about range keys. This is necessary for CockroachDB's MVCC block +property collectors to ensure the sstable-level properties are correct. + +### Iteration + +This design extends the `*pebble.Iterator` with the ability to iterate over +exclusively range keys, range keys and point keys together or exclusively point +keys (the previous behavior). + +- Pebble already requires that the prefix `k` follows the same key validity + rules as `k@suffix`. + +- Previously, Pebble did not require that a user key consisting of just a prefix + `k` sort before the same prefix with a non-empty suffix. CockroachDB has + adopted this behavior since it results in the following clean behavior: + `RANGEDEL` over [k1, k2) deletes all versioned keys which have prefixes in the + interval [k1, k2). Pebble will now require this behavior for all users using + MVCC keys. Specifically, it must hold that `Compare(k[:Split(k)], k) < 0` if + `Split(k) < len(k)`. + +# TKTK: Discuss merging iterator + +#### Determinism + +Range keys will be split based on boundaries of sstables in an LSM. Users of an +LSM typically expect that two different LSMs with different sstable settings +that receive the same writes should output the same key-value pairs when +iterating. To provide this behavior, the iterator implementation may be +configured to defragment range keys during iteration time. The defragmentation +behavior would be: + +- Two visible ranges `[k1,k2)@suffix1=>val1`, `[k2,k3)@suffix2=>val2` are + defragmented if suffix1==suffix2 and val1==val2, and become [k1,k3). + +- Defragmentation during user iteration does not consider the sequence number. + This is necessary since LSM state can be exported to another LSM via the use + of sstable ingestion, which can collapse different seqnums to the same seqnum. + We would like both LSMs to look identical to the user when iterating. + +The above defragmentation is conceptually simple, but hard to implement +efficiently, since it requires stepping ahead from the current position to +defragment range keys. This stepping ahead could switch sstables while there are +still points to be consumed in a previous sstable. This determinism is useful +for testing and verification purposes: + +- Randomized and metamorphic testing is used extensively to reliably test + software including Pebble and CockroachDB. Defragmentation provides + the determinism necessary for this form of testing. + +- CockroachDB's replica divergence detector requires a consistent view of the + database on each replica. + +In order to provide determinism, Pebble constructs an internal range key +iterator stack that's separate from the point iterator stack, even when +performing combined iteration over both range and point keys. The separate range +key iterator allows the internal range key iterator to move independently of the +point key iterator. This allows the range key iterator to independently visit +adjacent sstables in order to defragment their range keys if necessary, without +repositioning the point iterator. + +Two spans [k1,k2) and [k3, k4) of range keys are defragmented if their bounds +abut and their user observable-state is identical. That is, `k2==k3` and each +spans' contains exactly the same set of range key (, ) pairs. In +order to support `RangeKeyUnset` and `RangeKeyDelete`, defragmentation must be +applied _after_ resolving unset and deletes. + +#### Merging iteration + +Recall that range keys are stored in the same sstables as point keys. In a +log-structured merge tree, these sstables are distributed across levels. Within +a level, sstables are non-overlapping but between levels sstables may overlap +arbitrarily. During iteration, keys across levels must be merged together. For +point keys, this is typically done with a heap. + +Range keys too must be merged across levels, and the earlier described +fragmentation at intersection boundaries must be applied. To implement this, a +range key merging iterator is defined. + +A merging iterator is initialized with an arbitrary number of child iterators +over fragmented spans. Each child iterator exposes fragmented range keys, such +that overlapping range keys are surfaced in a single span with a single set of +bounds. Range keys from one child iterator may overlap key spans from another +child iterator arbitrarily. The high-level algorithm is: + +1. Initialize a heap with bound keys from child iterators' range keys. +2. Find the next [or previous, if in reverse] two unique user keys' from bounds. +3. Consider the span formed between the two unique user keys a candidate span. +4. Determine if any of the child iterators' spans overlap the candidate span. + 4a. If any of the child iterator's current bounds are end keys (during + forward iteration) or start keys (during reverse iteration), then all the + spans with that bound overlap the candidate span. + 4b. If no spans overlap, forget the smallest (forward iteration) or largest + (reverse iteration) unique user key and advance the iterators to the next + unique user key. Start again from 3. + +Consider the example: + +``` + i0: b---d e-----h + i1: a---c h-----k + i2: a------------------------------p + +fragments: a-b-c-d-e-----h-----k----------p +``` + +None of the individual child iterators contain a span with the exact bounds +[c,d), but the merging iterator must produce a span [c,d). To accomplish this, +the merging iterator visits every span between unique boundary user keys. In the +above example, this is: + +``` +[a,b), [b,c), [c,d), [d,e), [e, h), [h, k), [k, p) +``` + +The merging iterator first initializes the heap to prepare for iteration. The +description below discusses the mechanics of forward iteration after a call to +First, but the mechanics are similar for reverse iteration and other positioning +methods. + +During a call to First, the heap is initialized by seeking every level to the +first bound of the first fragment. In the above example, this seeks the child +iterators to: + +``` +i0: (b, boundKindStart, [ [b,d) ]) +i1: (a, boundKindStart, [ [a,c) ]) +i2: (a, boundKindStart, [ [a,p) ]) +``` + +After fixing up the heap, the root of the heap is the bound with the smallest +user key ('a' in the example). During forward iteration, the root of the heap's +user key is the start key of next merged span. The merging iterator records this +key as the start key. The heap may contain other levels with range keys that +also have the same user key as a bound of a range key, so the merging iterator +pulls from the heap until it finds the first bound greater than the recorded +start key. + +In the above example, this results in the bounds `[a,b)` and child iterators in +the following positions: + +``` +i0: (b, boundKindStart, [ [b,d) ]) +i1: (c, boundKindEnd, [ [a,c) ]) +i2: (p, boundKindEnd, [ [a,p) ]) +``` + +With the user key bounds of the next merged span established, the merging +iterator must determine which, if any, of the range keys overlap the span. +During forward iteration any child iterator that is now positioned at an end +boundary has an overlapping span. (Justification: The child iterator's end +boundary is ≥ the new end bound. The child iterator's range key's corresponding +start boundary must be ≤ the new start bound since there were no other user keys +between the new span's bounds. So the fragments associated with the iterator's +current end boundary have start and end bounds such that start ≤ < ≤ end). + +The merging iterator iterates over the levels, collecting keys from any child +iterators positioned at end boundaries. In the above example, i1 and i2 are +positioned at end boundaries, so the merging iterator collects the keys of [a,c) +and [a,p). These spans contain the merging iterator's [a,b) span, but they may +also extend beyond the new span's start and end. The merging iterator returns +the keys with the new start and end bounds, preserving the underlying keys' +sequence numbers, key kinds and values. + +It may be the case that the merging iterator finds no levels positioned at span +end boundaries in which case the span overlaps with nothing. In this case the +merging iterator loops, repeating the above process again until it finds a span +that does contain keys. + +#### Efficient masking + +Recollect that in the earlier example from the iteration interface, during +forward iteration an iterator would output the following keys: + +``` + Key() HasPointAndRange() Value() RangeKeyBounds() RangeKeys() + a (true, true) artichoke [a,b) {(@1,apple)} + b (false, true) - [b,c) {(@7,kiwi), (@1,apple)} + b@2 (true, true) beet [b,c) {(@7,kiwi), (@1,apple)} + c (false, true) - [c,e) {(@7,kiwi), (@3,banana), (@1,apple)} + e (false, true) - [e,k) {(@7,kiwi), (@5,orange), (@1,apple)} + k (false, true) - [k,m) {(@5,orange), (@1,apple)} + m (false, true) - [m,z) {(@1,apple)} + t@3 (true, true) turnip [m,z) {(@1,apple)} +``` + +When implementing an MVCC "soft delete range" operation using range keys, the +range key `[b,k)@7→kiwi` may represent that all keys within the range [b,k) are +deleted at MVCC timestamp @7. During iteration, it would be desirable if the +caller could indicate that it does not want to observe any "soft deleted" point +keys, and the iterator can safely skip them. Note that in a MVCC system, whether +or not a key is soft deleted depends on the timestamp at which the database is +read. + +This is implemented through "range key masking," where a range key may act as a +mask, hiding point keys with MVCC timestamps beneath the range key. This +iterator option requires that the client configure the iterator with a MVCC +timestamp `suffix` representing the timestamp at which history should be read. +All range keys with suffixes (MVCC timestamps) less than or equal to the +configured suffix serve as masks. All point keys with suffixes (MVCC timestamps) +less than a covering, masking range key's suffix are hidden. + +Specifically, when configured with a RangeKeyMasking.Suffix _s_, and there +exists a range key with suffix _r_ covering a point key with suffix _p_, and _s_ +≤ _r_ < _p_ then the point key is elided. + +In the above example, if `RangeKeyMasking.Suffix` is set to `@7`, every range +key serves as a mask and the point key `b@2` is hidden during iteration because +it's contained within the masking `[b,k)@7→kiwi` range key. Note that `t@3` +would _not_ be masked, because its timestamp `@3` is more recent than the only +range key that covers it (`[a,z)@1→apple`). + +If `RangeKeyMasking.Suffix` were set to `@6` (a historical, point-in-time read), +the `[b,k)@7→kiwi` range key would no longer serve as a mask, and `b@2` would be +visible. + +To efficiently implement masking, we cannot rely on the LSM invariant since +`b@100` can be at a lower level than `[a,e)@50`. Instead, we build on +block-property filters, supporting special use of a MVCC timestamp block +property in order to skip blocks wholly containing point keys that are masked by +a range key. The client may configure a block-property collector to record the +highest MVCC timestamps of point keys within blocks. + +During read time, when positioned within a range key with a suffix ≤ +`RangeKeyMasking.Suffix`, the iterator configures sstable readers to use a +block-property filter to skip any blocks for which the highest MVCC timestamp is +less than the provided suffix. Additionally, these iterators must consult index +block bounds to ensure the block-property filter is not applied beyond the +bounds of the masking range key. + +### CockroachDB use + +CockroachDB initially will only use range keys to represent MVCC range +tombstones. See the MVCC range tombstones tech note for more details: + +https://github.com/cockroachdb/cockroach/blob/master/docs/tech-notes/mvcc-range-tombstones.md + +### Alternatives + +#### A1. Automatic elision of range keys that don't cover keys + +We could decide that range keys: + +- Don't contribute to `MVCCStats` themselves. +- May be elided by Pebble when they cover zero point keys. + +This means that CockroachDB garbage collection does not need to explicitly +remove the range keys, only the point keys they deleted. This option is clean +when paired with `RANGEDEL`s dropping both point and range keys. CockroachDB can +issue `RANGEDEL`s whenever it wants to drop a contiguous swath of points, and +not worry about the fact that it might also need to update the MVCC stats for +overlapping range keys. + +However, this option makes deterministic iteration over defragmented range keys +for replica divergence detection challenging, because internal fragmentation may +elide regions of a range key at any point. Producing a normalized form would +require storing state in the value (ie, the original start key) and +recalculating the smallest and largest extant covered point keys within the +range key and replica bounds. This would require maintaining _O_(range-keys) +state during the `storage.ComputeStatsForRange` pass over a replica's combined +point and range iterator. + +This likely forces replica divergence detection to use other means (eg, altering +the checksum of covered points) to incorporate MVCC range tombstone state. + +This option is also highly tailored to the MVCC Delete Range use case. Other +range key usages, like ranged intents, would not want this behavior, so we don't +consider it further. + +#### A2. Separate LSM of range keys + +There are two viable options for where to store range keys. They may be encoded +within the same sstables as points in separate blocks, or in separate sstables +forming a parallel range-key LSM. We examine the tradeoffs between storing range +keys in the same sstable in different blocks ("shared sstables") or separate +sstables forming a parallel LSM ("separate sstables"): + +- Storing range keys in separate sstables is possible because the only + iteractions between range keys and point keys happens at a global level. + Masking is defined over suffixes. It may be extended to be defined over + sequence numbers too (see 'Sequence numbers' section below), but that is + optional. Unlike range deletion tombstones, range keys have no effect on point + keys during compactions. + +- With separate sstables, reads may need to open additional sstable(s) and read + additional blocks. The number of additional sstables is the number of nonempty + levels in the range-key LSM, so it grows logarithmically with the number of + range keys. For each sstable, a read must read the index block and a data + block. + +- With our expectation of few range keys, the range-key LSM is expected to be + small, with one or two levels. Heuristics around sstable boundaries may + prevent unnecessary range-key reads when there is no covering range key. Range + key sstables and blocks are expected to have much higher table and block cache + hit rates, since they are orders of magnitude less dense. Reads in any + overlapping point sstables all access the same range key sstables. + +- With shared sstables, `SeekPrefixGE` cannot use bloom filters to entirely + eliminate sstables that contain range keys. Pebble does not always use bloom + filters in L6, so once a range key is compacted into L6 its impact to + `SeekPrefixGE` is lessened. With separate sstables, `SeekPrefixGE` can always + use bloom filters for point-key sstables. If there are any overlapping + range-key sstables, the read must read them. + +- With shared sstables, range keys create dense sstable boundaries. A range key + spanning an sstable boundary leaves no gap between the sstables' bounds. This + can force ingested sstables into higher levels of the LSM, even if the + sstables' point key spans don't overlap. This problem was previously observed + with wide `RANGEDEL` tombstones and was mitigated by prioritizing compaction + of sstables that contain `RANGEDEL` keys. We could do the same with range + keys, but the write amplification is expected to be much worse. The `RANGEDEL` + tombstones drop keys and eventually are dropped themselves as long as there is + not an open snapshot. Range keys do not drop data and are expected to persist + in L6 for long durations, always requiring ingested sstables to be inserted + into L5 or above. + +- With separate sstables, compaction logic is separate, which helps avoid + complexity of tricky sstable boundary conditions. Because there are expected + to be an order of magnitude fewer range keys, we could impose the constraint + that a prefix cannot be split across multiple range key sstables. The + simplified compaction logic comes at the cost of higher levels, iterators, etc + all needing to deal with the concept of two parallel LSMs. + +- With shared sstables, the LSM invariant is maintained between range keys and + point keys. For example, if the point key `b@20` is committed, and + subsequently a range key `RangeKey([a,c), @25, ...)` is committed, the range + key will never fall below the covered point `b@20` within the LSM. + +We decide to share sstables, because preserving the LSM invariant between range +keys and point keys is expected to be useful in the long-term. + +#### A3. Sequence number masking + +In the CockroachDB MVCC range tombstone use case, a point key should never be +written below an existing range key with a higher timestamp. The MVCC range +tombstone use case would allow us to dictate that an overlapping range key with +a higher sequence number always masks range keys with lower sequence numbers. +Adding this additional masking scope would avoid the comparatively costly suffix +comparison when a point key _is_ masked by a range key. We need to consider how +sequence number masking might be affected by the merging of range keys within +snapshot stripes. + +Consider the committing of range key `[a,z)@{t1}#10`, followed by point keys +`d@t2#11` and `m@t2#11`, followed by range key `[j,z)@{t3}#12`. This sequencing +respects the expected timestamp, sequence number relationship in CockroachDB's +use case. If all keys are flushed within the same sstable, fragmentation and +merging overlapping fragments yields range keys `[a,j)@{t1}#10`, +`[j,z)@{t3,t1}#12`. The key `d@t2#11` must not be masked because it's not +covered by the new range key, and indeed that's the case because the covering +range key's fragment is unchanged `[a,j)@{t1}#10`. + +For now we defer this optimization, with the expectation that we may not be able +to preserve this relationship between sequence numbers and suffixes in all range +key use cases. diff --git a/pebble/docs/RFCS/20220112_pebble_sstable_format_versions.md b/pebble/docs/RFCS/20220112_pebble_sstable_format_versions.md new file mode 100644 index 0000000..c2f792f --- /dev/null +++ b/pebble/docs/RFCS/20220112_pebble_sstable_format_versions.md @@ -0,0 +1,290 @@ +- Feature Name: Pebble SSTable Format Versions +- Status: completed +- Start Date: 2022-01-12 +- Authors: Nick Travers +- RFC PR: https://github.com/cockroachdb/pebble/pull/1450 +- Pebble Issues: + https://github.com/cockroachdb/pebble/issues/1409 + https://github.com/cockroachdb/pebble/issues/1339 +- Cockroach Issues: + +# Summary + +To safely support changes to the SSTable structure, a new versioning scheme +under a Pebble magic number is proposed. + +This RFC also outlines the relationship between the SSTable format version and +the existing Pebble format major version, in addition to how the two are to +be used in Cockroach for safely enabling new table format versions. + +# Motivation + +Pebble currently uses a "format major version" scheme for the store (or DB) +that indicates which Pebble features should be enabled when the store is first +opened, before any SSTables are opened. The versions indicate points of +backwards incompatibility for a store. For example, the introduction of the +`SetWithDelete` key kind is gated behind a version, as is block property +collection. This format major version scheme was introduced in +[#1227](https://github.com/cockroachdb/pebble/issues/1227). + +While Pebble can use the format major version to infer how to load and +interpret data in the LSM, the SSTables that make up the store itself have +their own notion of a "version". This "SSTable version" (also referred to as a +"table format") is written to the footer (or trailing section) of each SSTable +file and determines how the file is to be interpreted by Pebble. As of the time +of writing, Pebble supports two table formats - LevelDB's format, and RocksDB's +v2 format. Pebble inherited the latter as the default table format as it was +the version that RocksDB used at the time Pebble was being developed, and +remained the default to allow for a simpler migration path from Cockroach +clusters that were originally using RocksDB as the storage engine. The +RocksDBv2 table format adds various features on top of the LevelDB format, +including a two-level index, configurable checksum algorithms, and an explicit +versioning scheme to allow for the introduction of changes, amongst other +features. + +While the RocksDBv2 SSTable format has been sufficient for Pebble's needs since +inception, new Pebble features and potential backports from RocksDB itself +require that the SSTable format evolve over time and therefore that the table +format be updated. As the majority of new features added over time will be +specific to Pebble, it does not make sense to repurpose the RocksDB format +versions that exist upstream for use with Pebble features (at the time of +writing, RocksDB had added versions 3 and 4 on top of the version 2 in use by +Pebble). A new Pebble-specific table format scheme is proposed. + +In the context of a distributed system such as Cockroach, certain SSTable +features are backwards incompatible (e.g. the block property collection and +filtering feature extends the RocksDBv2 SSTable block index format to encoded +various block properties, which is a breaking change). Participants must +_first_ ensure that their stores have the code-level features available to read +and write these newer SSTables (indicated by Pebble's format major version). +Once all stores agree that they are running the minimum Pebble format major +version and will not roll back (e.g. Cockroach cluster version finalization), +SSTables can be written and read using more recent table formats. The Pebble +"format major version" and "table format version" are therefore no longer +independent - the former implies an upper bound on the latter. + +Additionally, certain SSTable generation operations are independent of a +specific Pebble instance. For example, SSTable construction for the purposes of +backup and restore generates SSTables that are stored external to a specific +Pebble store (e.g. in cloud storage) can be used at a later point in time to +restore a store. SSTables constructed for such purposes must be carefully +versioned to ensure compatibility with existing clusters that may run with a +mixture of Pebble versions. + +As a real-world example of the need for the above, consider two Cockroach nodes +each with a Pebble store, one at version A, the other at version B (version A +(newer) > B (older)). Store A constructs an SSTable for an external backup +containing a newer block index format (for block property collection). This +SSTable is then imported in to store B. Store B fails to read the SSTable as it +is not running with a format major version recent enough make sense of the +newer index format. The two stores require a method for agreeing on a minimum +supported table format. + +The remainder of this document outlines a new table format for Pebble. This new +table format will be used for new table-level features such as block properties +and range keys (see +[#1339](https://github.com/cockroachdb/pebble/issues/1339)), but also for +backporting table-level features from RocksDB that would be useful to Pebble +(e.g. version 3 avoids encoding sequence numbers in the index, and version 4 +uses delta encoding for the block offsets in the index, both of which are +useful for Pebble). + +# Technical design + +## Pebble magic number + +The last 8 bytes of an SSTable is referred to as the "magic number". + +LevelDB uses the first 8 bytes of the SHA1 hash of the string +`http://code.google.com/p/leveldb/` for the magic number. + +RocksDB uses its own magic number, which indicates the use of a slightly +different table layout - the footer (the name for the end of an SSTable) is +slightly larger to accommodate a 32-bit version number and 8 bits for a +checksum type to be used for all blocks in the SSTable. + +A new 8-byte magic number will be introduced for Pebble: + +``` +\xf0\x9f\xaa\xb3\xf0\x9f\xaa\xb3 // 🪳🪳 +``` + +## Pebble version scheme + +Tables with a Pebble magic number will use a dedicated versioning scheme, +starting with version `1`. No new versions other than version `2` will be +supported for tables containing the RocksDB magic number. + +The choice of switching to a Pebble versioning scheme starting `1` simplifies +the implementation. Essentially all existing Pebble stores are managed via +Cockroach, and were either previously using RocksDB and migrated to Pebble, or +were created with Pebble stores. In both situations the table format used is +RocksDB v2. + +Given that Pebble has not needed (and likely will not need) to support other +RocksDB table formats, it is reasonable to introduce a new magic number for +Pebble and reset the version counter to v1. + +The following initial versions will correspond to the following new Pebble +features, that have yet to be introduced to Cockroach clusters as of the time +of writing: + +- Version 1: block property collectors (block properties are encoded into the + block index) +- Version 2: range keys (a new block is present in the table for range keys). + +Subsequent alterations to the SSTable format should only increment the _Pebble +version number_. It should be noted that backported RocksDB table format +features (e.g. RocksDB versions 3 and 4) would use a different version number, +within the Pebble version sequence. While possibly confusing, the RocksDB +features are being "adopted" by Pebble, rather than directly ported, so a +Pebble specific version number is appropriate. + +An alternative would be to allow RocksDB table format features to be backported +into Pebble under their existing RocksDB magic number, _alongside_ +Pebble-specific features. The complexity required to determine the set of +characteristics to read and write to each SSTable would increase with such a +scheme, compared to the simpler "linear history" approach described above, +where new features simply ratchet the Pebble table format version number. + +## Footer format + +The footer format for SSTables with Pebble magic numbers _will remain the same_ +as the RocksDB footer format - specifically, the trailing 53-bytes of the +SSTable consisting of the following fields with the given indices, +little-endian encoded: + +- `0`: Checksum type +- `1-20`: Meta-index block handle +- `21-40`: Index block handle +- `41-44`: Version number +- `45-52`: Magic number + +## Changes / additions to `sstable.TableFormat` + +The `sstable.TableFormat` enum is a `uint32` representation of the tuple +`(magic number, format version). The current values are: + +```go +type TableFormat uint32 + +const ( + TableFormatRocksDBv2 TableFormat = iota + TableFormatLevelDB +) +``` + +It should be noted that this enum is _not_ persisted in the SSTable. It is +purely an internal type that represents the tuple that simplifies a number of +version checks when reading / writing an SSTable. The values are free to +change, provided care is taken with default values and existing usage. + +The existing `sstable.TableFormat` will be altered to reflect the "linear" +nature of the version history. New versions will be added with the next value +in the sequence. + +```go +const ( + TableFormatUnspecified TableFormat = iota + TableFormatLevelDB // The original LevelDB table format. + TableFormatRocksDBv2 // The current default table format. + TableFormatPebblev1 // Block properties. + TableFormatPebblev2 // Range keys. + ... + TableFormatPebbleDBvN +) +``` + +The introduction of `TableFormatUnspecified` can be used to ensure that where a +`sstable.TableFormat` is _not_ specified, Pebble can select a suitable default +for writing the table (most likely based on the format major version in use by +the store; more in the next section). + +## Interaction with the format major version + +The `FormatMajorVersion` type is used to determine the set of features the +store supports. + +A Pebble store may be read-from / written-to by a Pebble binary that supports +newer features, with more recent Pebble format major versions. These newer +features could include the ability to read and write more recent SSTables. +While the store _could_ read and write SSTables at the most recent version the +binary supports, it is not safe to do so, for reasons outlined earlier. + +The format major version will have a "maximum table format version" associated +with it that indicates the maximum `sstable.TableFormat` that can be safely +handled by the store. + +When introducing a new _table format_ version, it should be gated behind an +associated `FormatMajorVersion` that has the new table format as its "maximum +table format version". + +For example: + +```go +// Existing verisons. +FormatDefault.MaxTableFormat() // sstable.TableFormatRocksDBv2 +... +FormatSetWithDelete.MaxTableFormat() // sstable.TableFormatRocksDBv2 +// Proposed versions with Pebble version scheme. +FormatBlockPropertyCollector.MaxTableFormat() // sstable.TableFormatPebbleDBv1 +FormatRangeKeys.MaxTableFormat() // sstable.TableFormatPebbleDBv2 +``` + +## Usage in Cockroach + +The introduction of new SSTable format versions needs to be carefully +coordinated between stores to ensure there are no incompatibilities (i.e. newer +store writes an SSTable that cannot be understood by other stores). + +It is only safe to use a new table format when all nodes in a cluster have been +finalized. A newer Cockroach node, with newer Pebble code, should continue to +write SSTables with a table format version equal to or less than the smallest +table format version across all nodes in the cluster. Once the cluster version +has been finalized, and `(*DB).RatchetFormatMajorVersion(FormatMajorVersion)` +has been called, nodes are free to write SSTables at newer table format +versions. + +At runtime, Pebble exposes a `(*DB).FormatMajorVersion()` method, which may be +used to determine the current format major version of the store, and hence, the +associated table format version. + +In addition to the above, there are situations where SSTables are created for +consumption at a later point in time, independent of any Pebble store - +specifically backup and restore. Currently, Cockroach uses two functions in +`pkg/sstable` to construct SSTables for both ingestion and backup +([here](https://github.com/cockroachdb/cockroach/blob/20eaf0b415f1df361246804e5d1d80c7a20a8eb6/pkg/storage/sst_writer.go#L57) +and +[here](https://github.com/cockroachdb/cockroach/blob/20eaf0b415f1df361246804e5d1d80c7a20a8eb6/pkg/storage/sst_writer.go#L78)). +Both will need to be updated to take into account the cluster version to ensure +that SSTables with newer versions are only written once the cluster version has +been finalized. + +### Cluster version migration sequencing + +Cockroach uses cluster versions as a guarantee that all nodes in a cluster are +running at a particular binary version, with a particular set of features +enabled. The Pebble store is ratcheted as the cluster version passes certain +versions that correspond to new Pebble functionality. Care must be taken to +prevent subtle race conditions while the cluster version is being updated +across all nodes in a cluster. + +Consider a cluster at cluster version `n-1` with corresponding Pebble format +major version `A`. A new cluster version `n` introduces a new Pebble format +major version `B` with new table level features. One by one, nodes will bump +their format major versions from `A` to `B` as they are upgraded to cluster +version `n`. There exists a period of time where nodes in a cluster are split +between cluster versions `n-1` and `n`, and Pebble format major versions `A` +and `B`. If version `B` introduces SSTable level features that nodes with +stores at format major version `A` do not yet understand, there exists the risk +for runtime incompatibilities. + +To guard against the window of incompatibility, _two_ cluster versions are +employed when bumping Pebble format major versions that correspond to new +SSTable level features. The first cluster verison is uesd to synchronize all +stores at the same Pebble format major version (and therefore table format +version). The second cluster version is used as a feature gate that enables +Cockroach nodes to make use of the newer table format, relying on the guarantee +that if a node is at version `n + 1`, then all other nodes in the cluster must +all be at least at version `n`, and therefore have Pebble stores at format +major version `B`. diff --git a/pebble/docs/RFCS/20220311_pebble_flushable_ingested_sstable.md b/pebble/docs/RFCS/20220311_pebble_flushable_ingested_sstable.md new file mode 100644 index 0000000..fb6ff7f --- /dev/null +++ b/pebble/docs/RFCS/20220311_pebble_flushable_ingested_sstable.md @@ -0,0 +1,280 @@ +- Feature Name: Flushable Ingested SSTable +- Status: in-progress +- Start Date: 2022-03-11 +- Authors: Mufeez Amjad +- RFC PR: [#1586](https://github.com/cockroachdb/pebble/pull/1586) +- Pebble Issues: [#25](https://github.com/cockroachdb/pebble/issues/25) +- Cockroach Issues: + +## Summary + +To avoid a forced flush when ingesting SSTables that have an overlap with a +memtable, we "lazily" add the SSTs to the LSM as a `*flushableEntry` to +`d.mu.mem.queue`. In comparison to a regular ingest which adds the SSTs to the +lowest possible level, the SSTs will get placed in the memtable queue before +they are eventually flushed (to the lowest level possible). This state is only +persisted in memory until a flush occurs, thus we require a WAL entry to replay +the ingestion in the event of a crash. + +## Motivation + +Currently, if any of the SSTs that need to be ingested have an overlap with a +memtable, we +[wait](https://github.com/cockroachdb/pebble/blob/56c5aebe151977964db7e464bb6c87ebd3451bd5/ingest.go#L671) +for the memtable to be flushed before the ingestion can proceed. This is to +satisfy the invariant that newer entries (those in the ingested SSTs) in the LSM +have a higher sequence number than old entries (those in the memtables). This +problem is also present for subsequent normal writes that are blocked behind the +ingest waiting for their sequence number to be made visible. + +## Technical Design + +The proposed design is mostly taken from Peter's suggestion in #25. The core +requirements are: +1. Replayable WAL entry for the ingest. +2. Implementation of the `flushable` interface for a new `ingestedSSTables` struct. +3. Lazily adding the ingested SSTs to the LSM. +4. Flushing logic to move SSTs into L0-L6. + +
+ +### 1. WAL Entry + +We require a WAL entry to make the ingestion into the flushable queue +replayable, and there is a need for a new type of WAL entry that does not get +applied to the memtable. 2 approaches were considered: +1. Using `seqnum=0` to differentiate this new WAL entry. +2. Introduce a new `InternalKeyKind` for the new WAL entry, + `InternalKeyKindIngestSST`. + +We believe the second approach is better because it avoids modifying batch +headers which can be messy/hacky and because `seqnum=0` is already used for +unapplied batches. The second approach also gives way for a simpler/cleaner +implementation because it utilizes the extensibility of `InternalKeyKind` and is +similar to the treatment of `InternalKeyKindLogData`. It also follows the +correct seqnum semantics for SSTable ingestion in the event of a WAL replay — +each SST in the ingestion batch already gets its own sequence number. + +This change will need to be gated on a `FormatMajorVersion` because if the store +is opened with an older version of Pebble, Pebble will not understand any WAL +entry that contains the new `InternalKeyKind`. + +
+ +When performing an ingest (with overlap), we create a batch with the header: + +``` ++-------------+------------+--- ... ---+ +| SeqNum (8B) | Count (4B) | Entries | ++-------------+------------+--- ... ---+ +``` + +where`SeqNum` is the current running sequence number in the WAL, `Count` is the +number of ingested SSTs, and each entry has the form: + +``` ++-----------+-----------------+-------------------+ +| Kind (1B) | Key (varstring) | Value (varstring) | ++-----------+-----------------+-------------------+ +``` + +where `Kind` is `InternalKeyKindIngestSST`, and `Key` is a path to the +ingested SST on disk. + +When replaying the WAL, we check every batch's first entry and if `keykind == +InternalKeyKindIngestSSTs` then we continue reading the rest of the entries in +the batch of SSTs and replay the ingestion steps - we construct a +`flushableEntry` and add it to the flushable queue: + +```go +b = Batch{db: d} +b.SetRepr(buf.Bytes()) +seqNum := b.SeqNum() +maxSeqNum = seqNum + uint64(b.Count()) +br := b.Reader() +if kind, _, _, _ := br.Next(); kind == InternalKeyKindIngestSST { + // Continue reading the rest of the batch and construct flushable + // of sstables with correct seqnum and add to queue. + buf.Reset() + continue +} +``` + + +### 2. `flushable` Implementation + +Introduce a new flushable type: `ingestedSSTables`. + +```go +type ingestedSSTables struct { + files []*fileMetadata + size uint64 + + cmp Compare + newIters tableNewIters +} +``` +which implements the following functions from the `flushable` interface: + +#### 1. `newIter(o *IterOptions) internalIterator` + +We return a `levelIter` since the ingested SSTables have no overlap, and we can +treat them like a level in the LSM. + +```go +levelSlice := manifest.NewLevelSliceKeySorted(s.cmp, s.files) +return newLevelIter(*o, s.cmp, nil, s.newIters, levelSlice.Iter(), 0, nil) +``` + +
+ +On the client-side, this iterator would have to be used like this: +```go +var iter internalIteratorWithStats +var rangeDelIter keyspan.FragmentIterator +iter = base.WrapIterWithStats(mem.newIter(&dbi.opts)) +switch mem.flushable.(type) { +case *ingestedSSTables: + iter.(*levelIter).initRangeDel(&rangeDelIter) +default: + rangeDelIter = mem.newRangeDelIter(&dbi.opts) +} + +mlevels = append(mlevels, mergingIterLevel{ + iter: iter, + rangeDelIter: rangeDelIter, +}) +``` + +#### 2. `newFlushIter(o *IterOptions, bytesFlushed *uint64) internalIterator` + +#### 3. `newRangeDelIter(o *IterOptions) keyspan.FragmentIterator` + +The above two methods would return `nil`. By doing so, in `c.newInputIter()`: +```go +if flushIter := f.newFlushIter(nil, &c.bytesIterated); flushIter != nil { + iters = append(iters, flushIter) +} +if rangeDelIter := f.newRangeDelIter(nil); rangeDelIter != nil { + iters = append(iters, rangeDelIter) +} +``` +we ensure that no iterators on `ingestedSSTables` will be used while flushing in +`c.runCompaction()`. + +The special-cased flush process for this flushable is described in [Section +4](#4-flushing-logic-to-move-ssts-into-l0). + +#### 4. `newRangeKeyIter(o *IterOptions) keyspan.FragmentIterator` + +Will wait on range key support in `levelIter` to land before implementing. + +#### 5. `inuseBytes() uint64` and `totalBytes() uint64` + +For both functions, we return 0. + +Returning 0 for `inuseBytes()` means that the calculation of `c.maxOverlapBytes` +is not affected by the SSTs (the ingested SSTs don't participate in the +compaction). + +We don't want the size of the ingested SSTs to contribute to the size of the +memtable when determining whether or not to stall writes +(`MemTableStopWritesThreshold`); they should contribute to the L0 read-amp +instead (`L0StopWritesThreshold`). Thus, we'll have to special case for ingested +SSTs in `d.makeRoomForWrite()` to address this detail. + +`totalBytes()` represents the number of bytes allocated by the flushable, which +in our case is 0. A consequence for this is that the size of the SSTs do not +count towards the flush threshold calculation. However, by setting +`flushableEntry.flushForced` we can achieve the same behaviour. + +#### 6. `readyForFlush() bool` + +The flushable of ingested SSTs can always be flushed because the files are +already on disk, so we return true. + +### 3. Lazily adding the ingested SSTs to the LSM + +The steps to add the ingested SSTs to the flushable queue are: +1. Detect an overlap exists (existing logic). + +Add a check that falls back to the old ingestion logic of blocking the ingest on +the flush when `len(d.mu.mem.queue) >= MemtablesStopWritesThreshold - 1`. This +reduces the chance that many short, overlapping, and successive ingestions cause +a memtable write stall. + +Additionally, to mitigate the hiccup on subsequent normal writes, we could wait +before the call to `d.commit.AllocateSeqNum` until: +1. the number of immutable memtables and `ingestedSSTs` in the flushable queue + is below a certain threshold (to prevent building up too many sublevels) +2. the number of immutable memtables is low. This could lead to starvation if + there is a high rate of normal writes. + +2. Create a batch with the list of ingested SSTs. +```go +b := newBatch() +for _, path := range paths: + b.IngestSSTs([]byte(path), nil) +``` +3. Apply the batch. + +In the call to `d.commit.AllocateSeqNum`, `b.count` sequence numbers are already +allocated before the `prepare` step. When we identify a memtable overlap, we +commit the batch to the WAL manually (through logic similar to +`commitPipeline.prepare`). The `apply` step would be a no-op if we performed a +WAL write in the `prepare` step. We would also need to truncate the memtable/WAL +after this step. + +5. Create `ingestedSSTables` flushable and `flushableEntry`. + +We'd need to call `ingestUpdateSeqNum` on these SSTs before adding them to the +flushable. This is to respect the sequence number ordering invariant while the +SSTs reside in the flushable queue. + +6. Add to flushable queue. + +Pebble requires that the last entry in `d.mu.mem.queue` is the mutable memtable +with value `d.mu.mem.mutable`. When adding a `flushableEntry` to the queue, we +want to maintain this invariant. To do this we pass `nil` as the batch to +`d.makeRoomForWrite()`. The result is + +``` +| immutable old memtable | mutable new memtable | +``` + +We then append our new `flushableEntry`, and swap the last two elements in +`d.mu.mem.queue`: + +``` +| immutable old memtable | ingestedSSTables | mutable new memtable | +``` + +Because we add the ingested SSTs to the flushable queue when there is overlap, +and are skipping applying the version edit through the `apply` step of the +ingestion, we ensure that the SSTs are only added to the LSM once. + +7. Call `d.maybeScheduleFlush()`. + +Because we've added an immutable memtable to the flushable queue and set +`flushForced` on the `flushableEntry`, this will surely result in a flush. This +call can be done asynchronously. + +We can then return to caller without waiting for the flush to finish. + +### 4. Flushing logic to move SSTs into L0-L6 + +By returning `nil` for both `flushable.newFlushIter()` and +`flushable.newRangeDelIter()`, the `ingestedSSTables` flushable will not be +flushed normally. + +The suggestion in issue #25 is to move the SSTs from the flushable queue into +L0. However, only the tables that overlap with the memtable will need to target +L0 (because they will likely overlap with L0 post flush), the others can be +moved to lower levels in the LSM. We can use the existing logic in +`ingestTargetLevel` to determine which level to move the ingested SSTables to +during `c.runCompaction()`. However, it's important to do this step after the +memtable has been flushed to use the correct `version` when determining overlap. + +The flushable of ingested SSTs should not influence the bounds on the +compaction, so we will have to skip updating `c.smallest` and `c.largest` in +`d.newFlush()` for this flushable. diff --git a/pebble/docs/RFCS/20221122_virtual_sstable.md b/pebble/docs/RFCS/20221122_virtual_sstable.md new file mode 100644 index 0000000..168dbf6 --- /dev/null +++ b/pebble/docs/RFCS/20221122_virtual_sstable.md @@ -0,0 +1,366 @@ +- Feature Name: Virtual sstables +- Status: in-progress +- Start Date: 2022-10-27 +- Authors: Arjun Nair +- RFC PR: https://github.com/cockroachdb/pebble/pull/2116 +- Pebble Issues: + https://github.com/cockroachdb/pebble/issues/1683 + + +** Design Draft** + +# Summary + +The RFC outlines the design to enable virtualizing of physical sstables +in Pebble. + +A virtual sstable has no associated physical data on disk, and is instead backed +by an existing physical sstable. Each physical sstable may be shared by one, or +more than one virtual sstable. + +Initially, the design will be used to lower the read-amp and the write-amp +caused by certain ingestions. Sometimes, ingestions are unable to place incoming +files, which have no data overlap with other files in the lsm, lower in the lsm +because of file boundary overlap with files in the lsm. In this case, we are +forced to place files higher in the lsm, sometimes in L0, which can cause higher +read-amp and unnecessary write-amp as the file is moved lower down the lsm. See +https://github.com/cockroachdb/cockroach/issues/80589 for the problem occurring +in practice. + +Eventually, the design will also be used for the disaggregated storage masking +use-case: https://github.com/cockroachdb/cockroach/pull/70419/files. + +This document describes the design of virtual sstables in Pebble with enough +detail to aid the implementation and code review. + +# Design + +### Ingestion + +When an sstable is ingested into Pebble, we try to place it in the lowest level +without any data overlap, or any file boundary overlap. We can make use of +virtual sstables in the cases where we're forced to place the ingested sstable +at a higher level due to file boundary overlap, but no data overlap. + +``` + s2 +ingest: [i-j-------n] + s1 +L6: [e---g-----------------p---r] + a b c d e f g h i j k l m n o p q r s t u v w x y z +``` + +Consider the sstable s1 in L6 and the ingesting sstable s2. It is clear that +the file boundaries of s1 and s2 overlap, but there is no data overlap as shown +in the diagram. Currently, we will be forced to ingest the sstable s2 into a +level higher than L6. With virtual sstables, we can split the existing sstable +s1 into two sstables s3 and s4 as shown in the following diagram. + +``` + s3 s2 s4 +L6: [e---g]-[i-j-------n]-[p---r] + a b c d e f g h i j k l m n o p q r s t u v w x y z +``` + +The sstable s1 will be deleted from the lsm. If s1 was a physical sstable, then +we will keep the file on disk as long as we need to so that it can back the +virtual sstables. + +There are cases where the ingesting sstables have no data overlap with existing +sstables, but we can't make use of virtual sstables. Consider: +``` + s2 +ingest: [f-----i-j-------n] + s1 +L6: [e---g-----------------p---r] + a b c d e f g h i j k l m n o p q r s t u v w x y z +``` +We cannot use virtual sstables in the above scenario for two reasons: +1. We don't have a quick method of detecting no data overlap. +2. We will be forced to split the sstable in L6 into more than two virtual + sstables, but we want to avoid many small virtual sstables in the lsm. + +Note that in Cockroach, the easier-to-solve case happens very regularly when an +sstable spans a range boundary (which pebble has no knowledge of), and we ingest +a snapshot of a range in between the two already-present ranges. + +slide in between two existing sstables is more likely to happen. It occurs when +we ingest a snapshot of a range in between two already present ranges. + +`ingestFindTargetLevel` changes: +- The `ingestFindTargetLevel` function is used to determine the target level + of the file which is being ingested. Currently, this function returns an `int` + which is the target level for the ingesting file. Two additional return + parameters, `[]manifest.NewFileEntry` and `*manifest.DeletedFileEntry`, will be + added to the function. +- If `ingestFindTargetLevel` decides to split an existing sstable into virtual + sstables, then it will return new and deleted entries. Otherwise, it will only + return the target level of the ingesting file. +- Within the `ingestFindTargetLevel` function, the `overlapWithIterator` + function is used to quickly detect data overlap. In the case with file + boundary overlap, but no data overlap, in the lowest possible level, we will + split the existing sstable into virtual sstables and generate the + `NewFileEntry`s and the `DeletedFileEntry`. The `FilemetaData` section + describes how the various fields in the `FilemetaData` will be computed for + the newly created virtual sstables. + +- Note that we will not split physical sstables into virtual sstables in L0 for + the use case described in this RFC. The benefit of doing so would be to reduce + the number of L0 sublevels, but the cost would be additional implementation + complexity(see the `FilemetaData` section). We also want to avoid too many + virtual sstables in the lsm as they can lead to space amp(see `Compaction` + section). However, in the future, for the disaggregated storage masking case, + we would need to support ingestion and use of virtual sstables in L0. + +- Note that we may need an upper bound on the number of times an sstable is + split into smaller virtual sstables. We can further reduce the risk of many + small sstables: + 1. For CockroachDB's snapshot ingestion, there is one large sst (up to 512MB) + and many tiny ones. We can choose the apply this splitting logic only for + the large sst. It is ok for the tiny ssts to be ingested into L0. + 2. Split only if the ingested sst is at least half the size of the sst being + split. So if we have a smaller ingested sst, we will pick a higher level to + split at (where the ssts are smaller). The lifetime of virtual ssts at a + higher level is smaller, so there is lower risk of littering the LSM with + long-lived small virtual ssts. + 3. For disaggregated storage implementation, we can avoid masking for tiny + sstables being ingested and instead write a range delete like we currently + do. Precise details on the masking use case are out of the scope of this + RFC. + +`ingestApply` changes: +- The new and deleted file entries returned by the `ingestFindTargetLevel` + function will be added to the version edit in `ingestApply`. +- We will appropriately update the `levelMetrics` based on the new information + returned by `ingestFindTargetLevel`. + + +### `FilemetaData` changes + +Each virtual sstables will have a unique file metadata value associated with it. +The metadata may be borrowed from the backing physical sstable, or it may be +unique to the virtual sstable. + +This rfc lists out the fields in the `FileMetadata` struct with information on +how each field will be populated. + +`Atomic.AllowedSeeks`: Field is used for read triggered compactions, and we can +populate this field for each virtual sstable since virtual sstables can be +picked for compactions. + +`Atomic.statsValid`: We can set this to true(`1`) when the virtual sstable is +created. On virtual sstable creation we will estimate the table stats of the +virtual sstable based on the table stats of the physical sstable. We can also +set this to `0` and let the table stats job asynchronously compute the stats. + +`refs`: The will be turned into a pointer which will be shared by the +virtual/physical sstables. See the deletion section of the RFC to learn how the +`refs` count will be used. + +`FileNum`: We could give each virtual sstable its own file number or share +the file number between all the virtual sstables. In the former case, the virtual +sstables will be distinguished by the file number, and will have an additional +metadata field to indicate the file number of the parent sstable. In the latter +case, we can use a few of the most significant bits of the 64 bit file number to +distinguish the virtual sstables. + +The benefit of using a single file number for each virtual sstable, is that we +don't need to use additional space to store the file number of the backing +physical sstable. + +It might make sense to give each virtual sstable its own file number. Virtual +sstables are picked for compactions, and compactions and compaction picking +expect a unique file number for each of the files which it is compacting. +For example, read compactions will use the file number of the file to determine +if a file picked for compaction has already been compacted, the version edit +will expect a different file number for each virtual sstable, etc. + +There are direct references to the `FilemetaData.FileNum` throughout Pebble. For +example, the file number is accessed when the the `DB.Checkpoint` function is +called. This function iterates through the files in each level of the lsm, +constructs the filepath using the file number, and reads the file from disk. In +such cases, it is important to exclude virtual sstables. + +`Size`: We compute this using linear interpolation on the number of blocks in +the parent sstable and the number of blocks in the newly created virtual sstable. + +`SmallestSeqNum/LargestSeqNum`: These fields depend on the parent sstable, +but we would need to perform a scan of the physical sstable to compute these +accurately for the virtual sstable upon creation. Instead, we could convert +these fields into lower and upper bounds of the sequence numbers in a file. + +These fields are used for l0 sublevels, pebble tooling, delete compaction hints, +and a lot of plumbing. We don't need to worry about the L0 sublevels use case +because we won't have virtual sstables in L0 for the use case in this RFC. For +the rest of the use cases we can use lower bound for the smallest seq number, +and an upper bound for the largest seq number work. + +TODO(bananabrick): Add more detail for any delete compaction hint changes if +necessary. + +`Smallest/Largest`: These, along with the smallest/largest ranges for the range +and point keys can be computed upon virtual sstable creation. Precisely, these +can be computed when we try and detect data overlap in the `overlapWithIterator` +function during ingestion. + +`Stats`: `TableStats` will either be computed upon virtual sstable creation +using linear interpolation on the block counts of the virtual/physical sstables +or asynchronously using the file bounds of the virtual sstable. + +`PhysicalState`: We can add an additional struct with state associated with +physical ssts which have been virtualized. + +``` +type PhysicalState struct { + // Total refs across all virtual ssts * versions. That is, if the same virtual + // sst is present in multiple versions, it may have multiple refs, if the + // btree node is not the same. + totalRefs int32 + + // Number of virtual ssts in the latest version that refer to this physical + // SST. Will be 1 if there is only a physical sst, or there is only 1 virtual + // sst referencing this physical sst. + // INVARIANT: refsInLatestVersion <= totalRefs + // refsInLatestVersion == 0 is a zombie sstable. + refsInLatestVersion int32 + + fileSize uint64 + + // If sst is not virtualized and in latest version + // virtualSizeSumInLatestVersion == fileSize. If + // virtualSizeSumInLatestVersion > 0 and + // virtualSizeSumInLatestVersion/fileSize is very small, the corresponding + // virtual sst(s) should be candidates for compaction. These candidates can be + // tracked via btree annotations. Incrementlly updated in + // BulkVersionEdit.Apply, when updating refsInLatestVersion. + virtualSizeSumInLatestVersion uint64 +} +``` + +The `Deletion` section and the `Compactions` section describe why we need to +store the `PhysicalState`. + +### Deletion of physical and virtual sstables + +We want to ensure that the physical sstable is only deleted from disk when no +version references it, and when there are no virtual sstables which are backed +by the physical sstable. + +Since `FilemetaData.refs` is a pointer which is shared by the physical and +virtual sstables, the physical sstable won't be deleted when it is removed +from the latest version as the `FilemetaData.refs` will have been increased +when the virtual sstable is added to a version. Therefore, we only need to +ensure that the physical sstable is eventually deleted when there are no +versions which reference it. + +Sstables are deleted from disk by the `DB.doDeleteObsoleteFiles` function which +looks for files to delete in the the `DB.mu.versions.obsoleteTables` slice. +So we need to ensure that any physical sstable which was virtualized is added to +the obsolete tables list iff `FilemetaData.refs` is 0. + +Sstable are added to the obsolete file list when a `Version` is unrefed and +when `DB.scanObsoleteFiles` is called when Pebble is opened. + +When a `Version` is unrefed, sstables referenced by it are only added to the +obsolete table list if the `FilemetaData.refs` hits 0 for the sstable. With +virtual sstables, we can have a case where the last version which directly +references a physical sstable is unrefed, but the physical sstable is not added +to the obsolete table list because its `FilemetaData.refs` count is not 0 +because of indirect references through virtual sstables. Since the last Version +which directly references the physical sstable is deleted, the physical sstable +will never get added to the obsolete table list. Since virtual sstables keep +track of their parent physical sstable, we can just add the physical sstable to +the obsolete table list when the last virtual sstable which references it is +deleted. + +`DB.scanObsoleteFiles` will delete any file which isn't referenced by the +`VersionSet.versions` list. So, it's possible that a physical sstable associated +with a virtual sstable will be deleted. This problem can be fixed by a small +tweak in the `d.mu.versions.addLiveFileNums` to treat the parent sstable of +a virtual sstable as a live file. + +Deleted files still referenced by older versions are considered zombie sstables. +We can extend the definition of zombie sstables to be any sstable which is not +directly, or indirectly through virtual sstables, referenced by the latest +version. See the `PhysicalState` subsection of the `FilemetaData` section +where we describe how the references in the latest version will be tracked. + + +### Reading from virtual sstables + +Since virtual sstables do not exist on disk, we will have to redirect reads +to the physical sstable which backs the virtual sstable. + +All reads to the physical files go through the table cache which opens the file +on disk and creates a `Reader` for the reads. The table cache currently creates +a `FileNum` -> `Reader` mapping for the physical sstables. + +Most of the functions in table cache API take the file metadata of the file as +a parameter. Examples include `newIters`, `newRangeKeyIter`, `withReader`, etc. +Each of these functions then calls a subsequent function on the sstable +`Reader`. + +In the `Reader` API, some functions only really need to be called on physical +sstables, whereas some functions need to be called on both physical and virtual +sstables. For example, the `Reader.EstimateDiskUsage` usage function, or the +`Reader.Layout` function only need to be called on physical sstables, whereas, +some function like, `Reader.NewIter`, and `Reader.NewCompactionIter` need to +work with virtual sstables. + +We could either have an abstraction over the physical sstable `Reader` per +virtual sstable, or update the `Reader` API to accept file bounds of the +sstable. In the latter case, we would create one `Reader` on the physical +sstable for all of the virtual sstables, and update the `Reader` API to accept +the file bounds of the sstable. + +Changes required to share a `Reader` on the physical sstable among the virtual +sstable: +- If the file metadata of the virtual sstable is passed into the table cache, on + a table cache miss, the table cache will load the Reader for the physical + sstable. This step can be performed in the `tableCacheValue.load` function. On + a table cache hit, the file number of the parent sstable will be used to fetch + the appropriate sstable `Reader`. +- The `Reader` api will be updated to support reads from virtual sstables. For + example, the `NewCompactionIter` function will take additional + `lower,upper []byte` parameters. + +Updates to iterators: +- `Reader.NewIter` already has `lower,upper []byte` parameters so this requires + no change. +- Add `lower,upper` fields to the `Reader.NewCompactionIter`. The function + initializes single level and two level iterators, and we can pass in the + `lower,upper` values to those. TODO(bananabrick): Make sure that the value + of `bytesIterated` in the compaction iterator is still accurate. +- `Reader.NewRawRangeKeyIter/NewRawRangeDelIter`: We need to add `lower/upper` + fields to the functions. Both iterators make use of a `fragmentBlockIter`. We + could filter keys above the `fragmentBlockIter` or add filtering within the + `fragmentBlockIter`. To add filtering within the `fragmentBlockIter` we will + initialize it with two additional `lower/upper []byte` fields. +- We would need to update the `SetBounds` logic for the sstable iterators to + never set bounds for the iterators outside the virtual sstable bounds. This + could lead to keys outside the virtual sstable bounds, but inside the physical + sstable bounds, to be surfaced. + +TODO(bananabrick): Add a section about sstable properties, if necessary. + +### Compactions + +Virtual sstables can be picked for compactions. If the `FilemetaData` and the +iterator stack changes work, then compaction shouldn't require much, if any, +additional work. + +Virtual sstables which are picked for compactions may cause space amplification. +For example, if we have two virtual sstables `a` and `b` in L5, backed by a +physical sstable `c`, and the sstable `a` is picked for a compaction. We will +write some additional data into L6, but we won't delete sstable `c` because +sstable `b` still refers to it. In the worst case, sstable `b` will never be +picked for compaction and will never be compacted into and we'll have permanent +space amplification. We should try prioritize compaction of sstable `b` to +prevent such a scenario. + +See the `PhysicalState` subsection in the `FilemetaData` section to see how +we'll store compaction picking metrics to reduce virtual sstable space-amp. + +### `VersionEdit` decode/encode +Any additional fields added to the `FilemetaData` need to be supported in the +version edit `decode/encode` functions. diff --git a/pebble/docs/css/app.css b/pebble/docs/css/app.css new file mode 100644 index 0000000..71e1527 --- /dev/null +++ b/pebble/docs/css/app.css @@ -0,0 +1,117 @@ +body { + margin: 10; + background-color: #fff; + font: 10pt -apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji; +} + +.divider { + border-top: 1px solid #eee; +} + +.columns { + display: flex; + flex-direction: row; + align-items: baseline; + justify-content: space-between; +} + +.rows { + display: flex; + flex-direction: column; + flex-wrap: wrap; +} + +.center { + margin: auto; + width: 90%; + min-width: 400px; + max-width: 1200px; +} + +.section { + flex: 100%; + margin-top: 10px; + overflow: auto; +} + +.title { + font-size: 24pt; + font-weight: bold; +} + +.subtitle { + font-size: 12pt; + font-weight: bold; +} + +.updated { + font-size: 9pt; + text-align: right; +} + +div.annotation { + display: none; +} + +.code { + font-family: SFMono-Regular,Consolas,Liberation Mono,Menlo,monospace; +} + +.overview { + max-width: 800px; +} + +.controls { + margin: 5px; +} + +a { + text-decoration: none; +} + +.selected { + font-weight: bold; + text-decoration: underline; +} + +path.line1 { + fill: none; + stroke-width: 1.5px; +} + +path.line2 { + fill: none; + stroke-width: 1.5px; +} + +svg.chart { + flex: 50%; + height: 200px; +} + +.write-throughput { + flex: 50%; + height: 300px; +} + +.write-throughput-detail { + flex: 50%; + height: 300px; +} + +text.hover { + font-size: 8pt; + font-weight: bold; + filter: url(#textBackground); +} + +@media only screen and (max-width: 900px) { + .columns { + flex-direction: column; + } + + svg.chart { + width: 100%; + flex: auto; + } +} diff --git a/pebble/docs/index.html b/pebble/docs/index.html new file mode 100644 index 0000000..cc70942 --- /dev/null +++ b/pebble/docs/index.html @@ -0,0 +1,166 @@ + + + + + + + Pebble Benchmarks + + +
+
+
+
Pebble Benchmarks
+
Last updated
+
+
+
+
+ Benchmarks are run nightly using pebble + bench ycsb on AWS m6id.4xlarge machines equipped with + local SSD storage. The AWS instances show remarkably high + instance to instance performance variability. In order to + smooth out that variability the benchmarks are run multiple + times each (using different instances) and outliers are + excluded. +
+
+
+ Detail: + Bytes Read | + Bytes Written | + Read Amp | + Write Amp +
+
+ Options: + Local scale +
+
+
+
+
L0-sublevels and + flush-splits enabled
+
Increased + LogWriter free blocks 4->16
+
Began tracking + ycsb/E read-amp
+
Level metadata + switched to use a B-Tree
+
Enabled + read-triggered compactions
+
Readahead + and preallocation bug fixed
+
Removed excess + read samples for read-triggered compactions
+
Switched to Ubuntu + 20.04.2 LTS AMI
+
Read compaction fixes
+
Bumped benchmark + runtime to 90 minutes
+
Data quality issue introduced (YCSB A only)
+
Data quality issue fixed (YCSB A only)
+
Began zeroing reused + iterator structs (#1822)
+
Grandparent boundary + compaction splitting
+
Infrastructure + change (#2578)
+
ycsb/F sampling bug
+
Switched to m6id.4xlarge + (from 5d.4xlarge)
+
+
+
+ YCSB A + (50% reads, 50% updates, zipf key distribution) +
+
+ + +
+
+
+
+ YCSB B + (95 reads, 5% updates, zipf key distribution) +
+
+ + +
+
+
+
+ YCSB C + (100% reads, zipf key distribution) +
+
+ + +
+
+
+
+ YCSB D + (95% reads, 5% updates, uniform key distribution) +
+
+ + +
+
+
+
+ YCSB E + (95% scans, 5% updates, zipf key distribution) +
+
+ + +
+
+
+
+ Insert-only + (100% inserts, zipf key distribution) +
+
+ + +
+
+
+
+
+
+
+ Write throughput + (100% inserts, zipf key distribution) +
+
+
+ This benchmark attempts to find the optimal write throughput by + driving more and more load against the DB until a target heuristic + fails (currently a mixture of number of L0 sublevels, L0 files, and + whether the DB has experienced a write stall). These benchmarks are + run nightly using pebble + bench write on GCP n2-standard-32 machines equipped with 16 local + SSDs in a RAID 0 array. +
+
+
+
+ + +
+
+
+ + + + + + diff --git a/pebble/docs/io_profiling.md b/pebble/docs/io_profiling.md new file mode 100644 index 0000000..8fd6230 --- /dev/null +++ b/pebble/docs/io_profiling.md @@ -0,0 +1,231 @@ +# I/O Profiling + +Linux provide extensive kernel profiling capabilities, including the +ability to trace operations at the block I/O layer. These tools are +incredibly powerful, though sometimes overwhelming in their +flexibility. This document captures some common recipes for profiling +Linux I/O. + +* [Perf](#perf) +* [Blktrace](#blktrace) + +## Perf + +The Linux `perf` command can instrument CPU performance counters, and +the extensive set of kernel trace points. A great place to get started +understanding `perf` are Brendan Gregg's [perf +examples](http://www.brendangregg.com/perf.html). + +The two modes of operation are "live" reporting via `perf top`, and +record and report via `perf record` and `perf +{report,script}`. + +Recording the stack traces for `block:block_rq_insert` event allows +determination of what Pebble level code is generating block requests. + +### Installation + +Ubuntu AWS installation: + +``` +sudo apt-get install linux-tools-common linux-tools-4.4.0-1049-aws linux-cloud-tools-4.4.0-1049-aws +``` + +### Recording + +`perf record` (and `perf top`) requires read and write access to +`/sys/kernel/debug/tracing`. Running as root as an easiest way to get +the right permissions. + +``` +# Trace all block device (disk I/O) requests with stack traces, until Ctrl-C. +sudo perf record -e block:block_rq_insert -ag + +# Trace all block device (disk I/O) issues and completions with stack traces, until Ctrl-C. +sudo perf record -e block:block_rq_issue -e block:block_rq_complete -ag +``` + +The `-a` flag records events on all CPUs (almost always desirable). + +The `-g` flag records call graphs (a.k.a stack traces). Capturing the +stack trace makes the recording somewhat more expensive, but it +enables determining the originator of the event. Note the stack traces +include both the kernel and application code, allowing pinpointing the +source of I/O as due to flush, compaction, WAL writes, etc. + +The `-e` flag controls which events are instrumented. The list of +`perf` events is enormous. See `sudo perf list`. + +The `-o` flag controls where output is recorded. The default is +`perf.data`. + +In order to record events for a specific duration, you can append `-- +sleep ` to the command line. + +``` +# Trace all block device (disk I/O) requests with stack traces for 10s. +sudo perf record -e block:block_rq_insert -ag -- sleep 10 +``` + +### Reporting + +The recorded perf data (`perf.data`) can be explored using `perf +report` and `perf script`. + +``` +# Show perf.data in an ncurses browser. +sudo perf report + +# Show perf.data as a text report. +sudo perf report --stdio +``` + +As an example, `perf report --stdio` from perf data gathered using +`perf record -e block:block_rq_insert -ag` will show something like: + +``` + 96.76% 0.00% pebble pebble [.] runtime.goexit + | + ---runtime.goexit + | + |--85.58%-- github.com/cockroachdb/pebble/internal/record.NewLogWriter.func2 + | runtime/pprof.Do + | github.com/cockroachdb/pebble/internal/record.(*LogWriter).flushLoop-fm + | github.com/cockroachdb/pebble/internal/record.(*LogWriter).flushLoop + | github.com/cockroachdb/pebble/internal/record.(*LogWriter).flushPending + | github.com/cockroachdb/pebble/vfs.(*syncingFile).Sync + | github.com/cockroachdb/pebble/vfs.(*syncingFile).syncFdatasync-fm + | github.com/cockroachdb/pebble/vfs.(*syncingFile).syncFdatasync + | syscall.Syscall + | entry_SYSCALL_64_fastpath + | sys_fdatasync + | do_fsync + | vfs_fsync_range + | ext4_sync_file + | filemap_write_and_wait_range + | __filemap_fdatawrite_range + | do_writepages + | ext4_writepages + | blk_finish_plug + | blk_flush_plug_list + | blk_mq_flush_plug_list + | blk_mq_insert_requests +``` + +This is showing that `96.76%` of block device requests on the entire +system were generated by the `pebble` process, and `85.58%` of the +block device requests on the entire system were generated from WAL +syncing within this `pebble` process. + +The `perf script` command provides access to the raw request +data. While there are various pre-recorded scripts that can be +executed, it is primarily useful for seeing call stacks along with the +"trace" data. For block requests, the trace data shows the device, the +operation type, the offset, and the size. + +``` +# List all events from perf.data with recommended header and fields. +sudo perf script --header -F comm,pid,tid,cpu,time,event,ip,sym,dso,trace +... +pebble 6019/6019 [008] 16492.555957: block:block_rq_insert: 259,0 WS 0 () 3970952 + 256 [pebble] + 7fff813d791a blk_mq_insert_requests + 7fff813d8878 blk_mq_flush_plug_list + 7fff813ccc96 blk_flush_plug_list + 7fff813cd20c blk_finish_plug + 7fff812a143d ext4_writepages + 7fff8119ea1e do_writepages + 7fff81191746 __filemap_fdatawrite_range + 7fff8119188a filemap_write_and_wait_range + 7fff81297c41 ext4_sync_file + 7fff81244ecb vfs_fsync_range + 7fff81244f8d do_fsync + 7fff81245243 sys_fdatasync + 7fff8181ae6d entry_SYSCALL_64_fastpath + 3145e0 syscall.Syscall + 6eddf3 github.com/cockroachdb/pebble/vfs.(*syncingFile).syncFdatasync + 6f069a github.com/cockroachdb/pebble/vfs.(*syncingFile).syncFdatasync-fm + 6ed8d2 github.com/cockroachdb/pebble/vfs.(*syncingFile).Sync + 72542f github.com/cockroachdb/pebble/internal/record.(*LogWriter).flushPending + 724f5c github.com/cockroachdb/pebble/internal/record.(*LogWriter).flushLoop + 72855e github.com/cockroachdb/pebble/internal/record.(*LogWriter).flushLoop-fm + 7231d8 runtime/pprof.Do + 727b09 github.com/cockroachdb/pebble/internal/record.NewLogWriter.func2 + 2c0281 runtime.goexit +``` + +Let's break down the trace data: + +``` +259,0 WS 0 () 3970952 + 256 + | | | | + | | | + size (sectors) + | | | + | | + offset (sectors) + | | + | +- flags: R(ead), W(rite), B(arrier), S(ync), D(iscard), N(one) + | + +- device: , +``` + +The above is indicating that a synchronous write of `256` sectors was +performed starting at sector `3970952`. The sector size is device +dependent and can be determined with `blockdev --report `, +though it is almost always `512` bytes. In this case, the sector size +is `512` bytes indicating that this is a write of 128 KB. + +## Blktrace + +The `blktrace` tool records similar info to `perf`, but is targeted to +the block layer instead of being general purpose. The `blktrace` +command records data, while the `blkparse` command parses and displays +data. The `btrace` command is a shortcut for piping the output from +`blktrace` directly into `blkparse. + +### Installation + +Ubuntu AWS installation: + +``` +sudo apt-get install blktrace +``` + +## Usage + +``` +# Pipe the output of blktrace directly into blkparse. +sudo blktrace -d /dev/nvme1n1 -o - | blkparse -i - + +# Equivalently. +sudo btrace /dev/nvme1n1 +``` + +The information captured by `blktrace` is similar to what `perf` captures: + +``` +sudo btrace /dev/nvme1n1 +... +259,0 4 186 0.016411295 11538 Q WS 129341760 + 296 [pebble] +259,0 4 187 0.016412100 11538 Q WS 129342016 + 40 [pebble] +259,0 4 188 0.016412200 11538 G WS 129341760 + 256 [pebble] +259,0 4 189 0.016412714 11538 G WS 129342016 + 40 [pebble] +259,0 4 190 0.016413148 11538 U N [pebble] 2 +259,0 4 191 0.016413255 11538 I WS 129341760 + 256 [pebble] +259,0 4 192 0.016413321 11538 I WS 129342016 + 40 [pebble] +259,0 4 193 0.016414271 11538 D WS 129341760 + 256 [pebble] +259,0 4 194 0.016414860 11538 D WS 129342016 + 40 [pebble] +259,0 12 217 0.016687595 0 C WS 129341760 + 256 [0] +259,0 12 218 0.016700021 0 C WS 129342016 + 40 [0] +``` + +The standard format is: + +``` + + [] +``` + +See `man blkparse` for an explanation of the actions. + +The `blktrace` output can be used to highlight problematic I/O +patterns. For example, it can be used to determine there are an +excessive number of small sequential read I/Os indicating that dynamic +readahead is not working correctly. diff --git a/pebble/docs/js/app.js b/pebble/docs/js/app.js new file mode 100644 index 0000000..d944350 --- /dev/null +++ b/pebble/docs/js/app.js @@ -0,0 +1,695 @@ +// TODO(peter) +// - Save pan/zoom settings in query params +// +// TODO(travers): There exists an awkward ordering script loading issue where +// write-throughput.js is loaded first, but contains references to functions +// defined in this file. Work out a better way of modularizing this code. + +const parseTime = d3.timeParse("%Y%m%d"); +const formatTime = d3.timeFormat("%b %d"); +const dateBisector = d3.bisector(d => d.date).left; + +let minDate; +let max = { + date: new Date(), + perChart: {}, + opsSec: 0, + readBytes: 0, + writeBytes: 0, + readAmp: 0, + writeAmp: 0 +}; +let usePerChartMax = false; +let detail; +let detailName; +let detailFormat; + +let annotations = []; + +function getMaxes(chartKey) { + return usePerChartMax ? max.perChart[chartKey] : max; +} + +function styleWidth(e) { + const width = +e.style("width").slice(0, -2); + return Math.round(Number(width)); +} + +function styleHeight(e) { + const height = +e.style("height").slice(0, -2); + return Math.round(Number(height)); +} + +function pathGetY(path, x) { + // Walk along the path using binary search to locate the point + // with the supplied x value. + let start = 0; + let end = path.getTotalLength(); + while (start < end) { + const target = (start + end) / 2; + const pos = path.getPointAtLength(target); + if (Math.abs(pos.x - x) < 0.01) { + // Close enough. + return pos.y; + } else if (pos.x > x) { + end = target; + } else { + start = target; + } + } + return path.getPointAtLength(start).y; +} + +// Pretty formatting of a number in human readable units. +function humanize(s) { + const iecSuffixes = [" B", " KB", " MB", " GB", " TB", " PB", " EB"]; + if (s < 10) { + return "" + s; + } + let e = Math.floor(Math.log(s) / Math.log(1024)); + let suffix = iecSuffixes[Math.floor(e)]; + let val = Math.floor(s / Math.pow(1024, e) * 10 + 0.5) / 10; + return val.toFixed(val < 10 ? 1 : 0) + suffix; +} + +function dirname(path) { + return path.match(/.*\//)[0]; +} + +function equalDay(d1, d2) { + return ( + d1.getYear() == d2.getYear() && + d1.getMonth() == d2.getMonth() && + d1.getDate() == d2.getDate() + ); +} + +function computeSegments(data) { + return data.reduce(function(segments, d) { + if (segments.length == 0) { + segments.push([d]); + return segments; + } + + const lastSegment = segments[segments.length - 1]; + const lastDatum = lastSegment[lastSegment.length - 1]; + const days = Math.round( + (d.date.getTime() - lastDatum.date.getTime()) / + (24 * 60 * 60 * 1000) + ); + if (days == 1) { + lastSegment.push(d); + } else { + segments.push([d]); + } + return segments; + }, []); +} + +function computeGaps(segments) { + let gaps = []; + for (let i = 1; i < segments.length; ++i) { + const last = segments[i - 1]; + const cur = segments[i]; + gaps.push([last[last.length - 1], cur[0]]); + } + + // If the last day is not equal to the current day, add a gap that + // spans to the current day. + const last = segments[segments.length - 1]; + const lastDay = last[last.length - 1]; + if (!equalDay(lastDay.date, max.date)) { + const maxDay = Object.assign({}, lastDay); + maxDay.date = max.date; + gaps.push([lastDay, maxDay]); + } + return gaps; +} + +function renderChart(chart) { + const chartKey = chart.attr("data-key"); + const vals = data[chartKey]; + + const svg = chart.html(""); + + const margin = { top: 25, right: 60, bottom: 25, left: 60 }; + + const width = styleWidth(svg) - margin.left - margin.right, + height = styleHeight(svg) - margin.top - margin.bottom; + + const defs = svg.append("defs"); + const filter = defs + .append("filter") + .attr("id", "textBackground") + .attr("x", 0) + .attr("y", 0) + .attr("width", 1) + .attr("height", 1); + filter.append("feFlood").attr("flood-color", "white"); + filter.append("feComposite").attr("in", "SourceGraphic"); + + defs + .append("clipPath") + .attr("id", chartKey) + .append("rect") + .attr("x", 0) + .attr("y", -margin.top) + .attr("width", width) + .attr("height", margin.top + height + 10); + + const title = svg + .append("text") + .attr("class", "chart-title") + .attr("x", margin.left + width / 2) + .attr("y", 15) + .style("text-anchor", "middle") + .style("font", "8pt sans-serif") + .text(chartKey); + + const g = svg + .append("g") + .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); + + const x = d3.scaleTime().range([0, width]); + const x2 = d3.scaleTime().range([0, width]); + const y1 = d3.scaleLinear().range([height, 0]); + const z = d3.scaleOrdinal(d3.schemeCategory10); + const xFormat = formatTime; + + x.domain([minDate, max.date]); + x2.domain([minDate, max.date]); + + y1.domain([0, getMaxes(chartKey).opsSec]); + + const xAxis = d3.axisBottom(x).ticks(5); + + g + .append("g") + .attr("class", "axis axis--x") + .attr("transform", "translate(0," + height + ")") + .call(xAxis); + g + .append("g") + .attr("class", "axis axis--y") + .call(d3.axisLeft(y1).ticks(5)); + + if (!vals) { + // That's all we can draw for an empty chart. + svg + .append("text") + .attr("x", margin.left + width / 2) + .attr("y", margin.top + height / 2) + .style("text-anchor", "middle") + .style("font", "8pt sans-serif") + .text("No data"); + return; + } + + const view = g + .append("g") + .attr("class", "view") + .attr("clip-path", "url(#" + chartKey + ")"); + + const triangle = d3 + .symbol() + .type(d3.symbolTriangle) + .size(12); + view + .selectAll("path.annotation") + .data(annotations) + .enter() + .append("path") + .attr("class", "annotation") + .attr("d", triangle) + .attr("stroke", "#2b2") + .attr("fill", "#2b2") + .attr( + "transform", + d => "translate(" + (x(d.date) + "," + (height + 5) + ")") + ); + + view + .selectAll("line.annotation") + .data(annotations) + .enter() + .append("line") + .attr("class", "annotation") + .attr("fill", "none") + .attr("stroke", "#2b2") + .attr("stroke-width", "1px") + .attr("stroke-dasharray", "1 2") + .attr("x1", d => x(d.date)) + .attr("x2", d => x(d.date)) + .attr("y1", 0) + .attr("y2", height); + + // Divide the data into contiguous days so that we can avoid + // interpolating days where there is missing data. + const segments = computeSegments(vals); + const gaps = computeGaps(segments); + + const line1 = d3 + .line() + .x(d => x(d.date)) + .y(d => y1(d.opsSec)); + const path1 = view + .selectAll(".line1") + .data(segments) + .enter() + .append("path") + .attr("class", "line1") + .attr("d", line1) + .style("stroke", d => z(0)); + + view + .selectAll(".line1-gaps") + .data(gaps) + .enter() + .append("path") + .attr("class", "line1-gaps") + .attr("d", line1) + .attr("opacity", 0.8) + .style("stroke", d => z(0)) + .style("stroke-dasharray", "1 2"); + + let y2 = d3.scaleLinear().range([height, 0]); + let line2; + let path2; + if (detail) { + y2 = d3.scaleLinear().range([height, 0]); + y2.domain([0, detail(getMaxes(chartKey))]); + g + .append("g") + .attr("class", "axis axis--y") + .attr("transform", "translate(" + width + ",0)") + .call( + d3 + .axisRight(y2) + .ticks(5) + .tickFormat(detailFormat) + ); + + line2 = d3 + .line() + .x(d => x(d.date)) + .y(d => y2(detail(d))); + path2 = view + .selectAll(".line2") + .data(segments) + .enter() + .append("path") + .attr("class", "line2") + .attr("d", line2) + .style("stroke", d => z(1)); + view + .selectAll(".line2-gaps") + .data(gaps) + .enter() + .append("path") + .attr("class", "line2-gaps") + .attr("d", line2) + .attr("opacity", 0.8) + .style("stroke", d => z(1)) + .style("stroke-dasharray", "1 2"); + } + + const updateZoom = function(t) { + x.domain(t.rescaleX(x2).domain()); + g.select(".axis--x").call(xAxis); + g.selectAll(".line1").attr("d", line1); + g.selectAll(".line1-gaps").attr("d", line1); + if (detail) { + g.selectAll(".line2").attr("d", line2); + g.selectAll(".line2-gaps").attr("d", line2); + } + g + .selectAll("path.annotation") + .attr( + "transform", + d => "translate(" + (x(d.date) + "," + (height + 5) + ")") + ); + g + .selectAll("line.annotation") + .attr("x1", d => x(d.date)) + .attr("x2", d => x(d.date)); + }; + svg.node().updateZoom = updateZoom; + + const hoverSeries = function(mouse) { + if (!detail) { + return 1; + } + const mousex = mouse[0]; + const mousey = mouse[1] - margin.top; + const path1Y = pathGetY(path1.node(), mousex); + const path2Y = pathGetY(path2.node(), mousex); + return Math.abs(mousey - path1Y) < Math.abs(mousey - path2Y) ? 1 : 2; + }; + + // This is a bit funky: initDate() initializes the date range to + // [today-90,today]. We then allow zooming out by 4x which will + // give a maximum range of 360 days. We limit translation to the + // 360 day period. The funkiness is that it would be more natural + // to start at the maximum zoomed amount and then initialize the + // zoom. But that doesn't work because we want to maintain the + // existing zoom settings whenever we have to (re-)render(). + const zoom = d3 + .zoom() + .scaleExtent([0.25, 2]) + .translateExtent([[-width * 3, 0], [width, 1]]) + .extent([[0, 0], [width, 1]]) + .on("zoom", function() { + const t = d3.event.transform; + if (!d3.event.sourceEvent) { + updateZoom(t); + return; + } + + d3.selectAll(".chart").each(function() { + if (this.updateZoom != null) { + this.updateZoom(t); + } + }); + + d3.selectAll(".chart").each(function() { + this.__zoom = t.translate(0, 0); + }); + + const mouse = d3.mouse(this); + if (mouse) { + mouse[0] -= margin.left; // adjust for rect.mouse position + const date = x.invert(mouse[0]); + const hover = hoverSeries(mouse); + d3.selectAll(".chart.ycsb").each(function() { + this.updateMouse(mouse, date, hover); + }); + } + }); + + svg.call(zoom); + svg.call(zoom.transform, d3.zoomTransform(svg.node())); + + const lineHover = g + .append("line") + .attr("class", "hover") + .style("fill", "none") + .style("stroke", "#f99") + .style("stroke-width", "1px"); + + const dateHover = g + .append("text") + .attr("class", "hover") + .attr("fill", "#f22") + .attr("text-anchor", "middle") + .attr("alignment-baseline", "hanging") + .attr("transform", "translate(0, 0)"); + + const opsHover = g + .append("text") + .attr("class", "hover") + .attr("fill", "#f22") + .attr("text-anchor", "middle") + .attr("transform", "translate(0, 0)"); + + const marker = g + .append("circle") + .attr("class", "hover") + .attr("r", 3) + .style("opacity", "0") + .style("stroke", "#f22") + .style("fill", "#f22"); + + svg.node().updateMouse = function(mouse, date, hover) { + const mousex = mouse[0]; + const mousey = mouse[1]; + const i = dateBisector(vals, date, 1); + const v = + i == vals.length + ? vals[i - 1] + : mousex - x(vals[i - 1].date) < x(vals[i].date) - mousex + ? vals[i - 1] + : vals[i]; + const noData = mousex < x(vals[0].date); + + let lineY = height; + if (!noData) { + if (hover == 1) { + lineY = pathGetY(path1.node(), mousex); + } else { + lineY = pathGetY(path2.node(), mousex); + } + } + + let val, valY, valFormat; + if (hover == 1) { + val = v.opsSec; + valY = y1(val); + valFormat = d3.format(",.0f"); + } else { + val = detail(v); + valY = y2(val); + valFormat = detailFormat; + } + + lineHover + .attr("x1", mousex) + .attr("x2", mousex) + .attr("y1", lineY) + .attr("y2", height); + marker.attr("transform", "translate(" + x(v.date) + "," + valY + ")"); + dateHover + .attr("transform", "translate(" + mousex + "," + (height + 8) + ")") + .text(xFormat(date)); + opsHover + .attr( + "transform", + "translate(" + x(v.date) + "," + (valY - 7) + ")" + ) + .text(valFormat(val)); + }; + + const rect = svg + .append("rect") + .attr("class", "mouse") + .attr("cursor", "move") + .attr("fill", "none") + .attr("pointer-events", "all") + .attr("width", width) + .attr("height", height + margin.top + margin.bottom) + .attr("transform", "translate(" + margin.left + "," + 0 + ")") + .on("mousemove", function() { + const mouse = d3.mouse(this); + const date = x.invert(mouse[0]); + const hover = hoverSeries(mouse); + + let resetTitle = true; + for (let i in annotations) { + if (Math.abs(mouse[0] - x(annotations[i].date)) <= 5) { + title + .style("font-size", "9pt") + .text(annotations[i].message); + resetTitle = false; + break; + } + } + if (resetTitle) { + title.style("font-size", "8pt").text(chartKey); + } + + d3.selectAll(".chart").each(function() { + if (this.updateMouse != null) { + this.updateMouse(mouse, date, hover); + } + }); + }) + .on("mouseover", function() { + d3 + .selectAll(".chart") + .selectAll(".hover") + .style("opacity", 1.0); + }) + .on("mouseout", function() { + d3 + .selectAll(".chart") + .selectAll(".hover") + .style("opacity", 0); + }); +} + +function renderYCSB() { + d3.selectAll(".chart.ycsb").each(function(d, i) { + renderChart(d3.select(this)); + }); +} + +function initData() { + for (key in data) { + data[key] = d3.csvParseRows(data[key], function(d, i) { + return { + date: parseTime(d[0]), + opsSec: +d[1], + readBytes: +d[2], + writeBytes: +d[3], + readAmp: +d[4], + writeAmp: +d[5] + }; + }); + + const vals = data[key]; + max.perChart[key] = { + opsSec: d3.max(vals, d => d.opsSec), + readBytes: d3.max(vals, d => d.readBytes), + writeBytes: d3.max(vals, d => d.writeBytes), + readAmp: d3.max(vals, d => d.readAmp), + writeAmp: d3.max(vals, d => d.writeAmp), + } + max.opsSec = Math.max(max.opsSec, max.perChart[key].opsSec); + max.readBytes = Math.max(max.readBytes, max.perChart[key].readBytes); + max.writeBytes = Math.max( + max.writeBytes, + max.perChart[key].writeBytes, + ); + max.readAmp = Math.max(max.readAmp, max.perChart[key].readAmp); + max.writeAmp = Math.max(max.writeAmp, max.perChart[key].writeAmp); + } + + // Load the write-throughput data and merge with the existing data. We + // return a promise here to allow us to continue to make progress elsewhere. + return fetch(writeThroughputSummaryURL()) + .then(response => response.json()) + .then(wtData => { + for (let key in wtData) { + data[key] = wtData[key]; + } + }); +} + +function initDateRange() { + max.date.setHours(0, 0, 0, 0); + minDate = new Date(new Date().setDate(max.date.getDate() - 90)); +} + +function initAnnotations() { + d3.selectAll(".annotation").each(function() { + const annotation = d3.select(this); + const date = parseTime(annotation.attr("data-date")); + annotations.push({ date: date, message: annotation.text() }); + }); +} + +function setQueryParams() { + var params = new URLSearchParams(); + if (detailName) { + params.set("detail", detailName); + } + if (usePerChartMax) { + params.set("max", "local"); + } + var search = "?" + params; + if (window.location.search != search) { + window.history.pushState(null, null, search); + } +} + +function setDetail(name) { + detail = undefined; + detailFormat = undefined; + detailName = name; + + switch (detailName) { + case "readBytes": + detail = d => d.readBytes; + detailFormat = humanize; + break; + case "writeBytes": + detail = d => d.writeBytes; + detailFormat = humanize; + break; + case "readAmp": + detail = d => d.readAmp; + detailFormat = d3.format(",.1f"); + break; + case "writeAmp": + detail = d => d.writeAmp; + detailFormat = d3.format(",.1f"); + break; + } + + d3.selectAll(".toggle").classed("selected", false); + d3.select("#" + detailName).classed("selected", detail != null); +} + +function initQueryParams() { + var params = new URLSearchParams(window.location.search.substring(1)); + setDetail(params.get("detail")); + usePerChartMax = params.get("max") == "local"; + d3.select("#localMax").classed("selected", usePerChartMax); +} + +function toggleDetail(name) { + const link = d3.select("#" + name); + const selected = !link.classed("selected"); + link.classed("selected", selected); + if (selected) { + setDetail(name); + } else { + setDetail(null); + } + setQueryParams(); + renderYCSB(); +} + +function toggleLocalMax() { + const link = d3.select("#localMax"); + const selected = !link.classed("selected"); + link.classed("selected", selected); + usePerChartMax = selected; + setQueryParams(); + renderYCSB(); +} + +window.onload = function init() { + d3.selectAll(".toggle").each(function() { + const link = d3.select(this); + link.attr("href", 'javascript:toggleDetail("' + link.attr("id") + '")'); + }); + d3.selectAll("#localMax").each(function() { + const link = d3.select(this); + link.attr("href", 'javascript:toggleLocalMax()'); + }); + + initData().then(_ => { + initDateRange(); + initAnnotations(); + initQueryParams(); + + renderYCSB(); + renderWriteThroughputSummary(data); + + // Use the max date to bisect into the workload data to pluck out the + // correct datapoint. + let workloadData = data[writeThroughputWorkload]; + bisectAndRenderWriteThroughputDetail(workloadData, max.date); + + let lastUpdate; + for (let key in data) { + const max = d3.max(data[key], d => d.date); + if (!lastUpdate || lastUpdate < max) { + lastUpdate = max; + } + } + d3.selectAll(".updated") + .text("Last updated: " + d3.timeFormat("%b %e, %Y")(lastUpdate)); + }) + + // By default, display each panel with its local max, which makes spotting + // regressions simpler. + toggleLocalMax(); +}; + +window.onpopstate = function() { + initQueryParams(); + renderYCSB(); +}; + +window.addEventListener("resize", renderYCSB); diff --git a/pebble/docs/js/d3.v5.min.js b/pebble/docs/js/d3.v5.min.js new file mode 100644 index 0000000..a75674c --- /dev/null +++ b/pebble/docs/js/d3.v5.min.js @@ -0,0 +1,2 @@ +// https://d3js.org Version 5.1.0. Copyright 2018 Mike Bostock. +(function(t,n){"object"==typeof exports&&"undefined"!=typeof module?n(exports):"function"==typeof define&&define.amd?define(["exports"],n):n(t.d3=t.d3||{})})(this,function(t){"use strict";function n(t,n){return tn?1:t>=n?0:NaN}function e(t){return 1===t.length&&(t=function(t){return function(e,r){return n(t(e),r)}}(t)),{left:function(n,e,r,i){for(null==r&&(r=0),null==i&&(i=n.length);r>>1;t(n[o],e)<0?r=o+1:i=o}return r},right:function(n,e,r,i){for(null==r&&(r=0),null==i&&(i=n.length);r>>1;t(n[o],e)>0?i=o:r=o+1}return r}}}function r(t,n){return[t,n]}function i(t){return null===t?NaN:+t}function o(t,n){var e,r,o=t.length,a=0,u=-1,f=0,c=0;if(null==n)for(;++u1)return c/(a-1)}function a(t,n){var e=o(t,n);return e?Math.sqrt(e):e}function u(t,n){var e,r,i,o=t.length,a=-1;if(null==n){for(;++a=e)for(r=i=e;++ae&&(r=e),i=e)for(r=i=e;++ae&&(r=e),i0)return[t];if((r=n0)for(t=Math.ceil(t/a),n=Math.floor(n/a),o=new Array(i=Math.ceil(n-t+1));++u=0?(o>=es?10:o>=rs?5:o>=is?2:1)*Math.pow(10,i):-Math.pow(10,-i)/(o>=es?10:o>=rs?5:o>=is?2:1)}function d(t,n,e){var r=Math.abs(n-t)/Math.max(0,e),i=Math.pow(10,Math.floor(Math.log(r)/Math.LN10)),o=r/i;return o>=es?i*=10:o>=rs?i*=5:o>=is&&(i*=2),n=1)return+e(t[r-1],r-1,t);var r,o=(r-1)*n,a=Math.floor(o),u=+e(t[a],a,t);return u+(+e(t[a+1],a+1,t)-u)*(o-a)}}function g(t,n){var e,r,i=t.length,o=-1;if(null==n){for(;++o=e)for(r=e;++or&&(r=e)}else for(;++o=e)for(r=e;++or&&(r=e);return r}function y(t){for(var n,e,r,i=t.length,o=-1,a=0;++o=0;)for(n=(r=t[i]).length;--n>=0;)e[--a]=r[n];return e}function _(t,n){var e,r,i=t.length,o=-1;if(null==n){for(;++o=e)for(r=e;++oe&&(r=e)}else for(;++o=e)for(r=e;++oe&&(r=e);return r}function b(t){if(!(i=t.length))return[];for(var n=-1,e=_(t,m),r=new Array(e);++n=0&&"xmlns"!==(n=t.slice(0,e))&&(t=t.slice(e+1)),ds.hasOwnProperty(n)?{space:ds[n],local:t}:t}function C(t){var n=k(t);return(n.local?function(t){return function(){return this.ownerDocument.createElementNS(t.space,t.local)}}:function(t){return function(){var n=this.ownerDocument,e=this.namespaceURI;return e===hs&&n.documentElement.namespaceURI===hs?n.createElement(t):n.createElementNS(e,t)}})(n)}function P(){}function z(t){return null==t?P:function(){return this.querySelector(t)}}function R(){return[]}function L(t){return null==t?R:function(){return this.querySelectorAll(t)}}function D(t){return new Array(t.length)}function U(t,n){this.ownerDocument=t.ownerDocument,this.namespaceURI=t.namespaceURI,this._next=null,this._parent=t,this.__data__=n}function q(t,n,e,r,i,o){for(var a,u=0,f=n.length,c=o.length;un?1:t>=n?0:NaN}function B(t){return t.ownerDocument&&t.ownerDocument.defaultView||t.document&&t||t.defaultView}function F(t,n){return t.style.getPropertyValue(n)||B(t).getComputedStyle(t,null).getPropertyValue(n)}function I(t){return t.trim().split(/^|\s+/)}function j(t){return t.classList||new H(t)}function H(t){this._node=t,this._names=I(t.getAttribute("class")||"")}function X(t,n){for(var e=j(t),r=-1,i=n.length;++r>8&15|n>>4&240,n>>4&15|240&n,(15&n)<<4|15&n,1)):(n=Ns.exec(t))?Ct(parseInt(n[1],16)):(n=Ss.exec(t))?new Lt(n[1],n[2],n[3],1):(n=Es.exec(t))?new Lt(255*n[1]/100,255*n[2]/100,255*n[3]/100,1):(n=ks.exec(t))?Pt(n[1],n[2],n[3],n[4]):(n=Cs.exec(t))?Pt(255*n[1]/100,255*n[2]/100,255*n[3]/100,n[4]):(n=Ps.exec(t))?Dt(n[1],n[2]/100,n[3]/100,1):(n=zs.exec(t))?Dt(n[1],n[2]/100,n[3]/100,n[4]):Rs.hasOwnProperty(t)?Ct(Rs[t]):"transparent"===t?new Lt(NaN,NaN,NaN,0):null}function Ct(t){return new Lt(t>>16&255,t>>8&255,255&t,1)}function Pt(t,n,e,r){return r<=0&&(t=n=e=NaN),new Lt(t,n,e,r)}function zt(t){return t instanceof Et||(t=kt(t)),t?(t=t.rgb(),new Lt(t.r,t.g,t.b,t.opacity)):new Lt}function Rt(t,n,e,r){return 1===arguments.length?zt(t):new Lt(t,n,e,null==r?1:r)}function Lt(t,n,e,r){this.r=+t,this.g=+n,this.b=+e,this.opacity=+r}function Dt(t,n,e,r){return r<=0?t=n=e=NaN:e<=0||e>=1?t=n=NaN:n<=0&&(t=NaN),new qt(t,n,e,r)}function Ut(t,n,e,r){return 1===arguments.length?function(t){if(t instanceof qt)return new qt(t.h,t.s,t.l,t.opacity);if(t instanceof Et||(t=kt(t)),!t)return new qt;if(t instanceof qt)return t;var n=(t=t.rgb()).r/255,e=t.g/255,r=t.b/255,i=Math.min(n,e,r),o=Math.max(n,e,r),a=NaN,u=o-i,f=(o+i)/2;return u?(a=n===o?(e-r)/u+6*(e0&&f<1?0:a,new qt(a,u,f,t.opacity)}(t):new qt(t,n,e,null==r?1:r)}function qt(t,n,e,r){this.h=+t,this.s=+n,this.l=+e,this.opacity=+r}function Ot(t,n,e){return 255*(t<60?n+(e-n)*t/60:t<180?e:t<240?n+(e-n)*(240-t)/60:n)}function Yt(t){if(t instanceof Ft)return new Ft(t.l,t.a,t.b,t.opacity);if(t instanceof $t){if(isNaN(t.h))return new Ft(t.l,0,0,t.opacity);var n=t.h*Ls;return new Ft(t.l,Math.cos(n)*t.c,Math.sin(n)*t.c,t.opacity)}t instanceof Lt||(t=zt(t));var e,r,i=Xt(t.r),o=Xt(t.g),a=Xt(t.b),u=It((.2225045*i+.7168786*o+.0606169*a)/qs);return i===o&&o===a?e=r=u:(e=It((.4360747*i+.3850649*o+.1430804*a)/Us),r=It((.0139322*i+.0971045*o+.7141733*a)/Os)),new Ft(116*u-16,500*(e-u),200*(u-r),t.opacity)}function Bt(t,n,e,r){return 1===arguments.length?Yt(t):new Ft(t,n,e,null==r?1:r)}function Ft(t,n,e,r){this.l=+t,this.a=+n,this.b=+e,this.opacity=+r}function It(t){return t>Is?Math.pow(t,1/3):t/Fs+Ys}function jt(t){return t>Bs?t*t*t:Fs*(t-Ys)}function Ht(t){return 255*(t<=.0031308?12.92*t:1.055*Math.pow(t,1/2.4)-.055)}function Xt(t){return(t/=255)<=.04045?t/12.92:Math.pow((t+.055)/1.055,2.4)}function Gt(t){if(t instanceof $t)return new $t(t.h,t.c,t.l,t.opacity);if(t instanceof Ft||(t=Yt(t)),0===t.a&&0===t.b)return new $t(NaN,0,t.l,t.opacity);var n=Math.atan2(t.b,t.a)*Ds;return new $t(n<0?n+360:n,Math.sqrt(t.a*t.a+t.b*t.b),t.l,t.opacity)}function Vt(t,n,e,r){return 1===arguments.length?Gt(t):new $t(t,n,e,null==r?1:r)}function $t(t,n,e,r){this.h=+t,this.c=+n,this.l=+e,this.opacity=+r}function Wt(t,n,e,r){return 1===arguments.length?function(t){if(t instanceof Zt)return new Zt(t.h,t.s,t.l,t.opacity);t instanceof Lt||(t=zt(t));var n=t.r/255,e=t.g/255,r=t.b/255,i=($s*r+Gs*n-Vs*e)/($s+Gs-Vs),o=r-i,a=(Xs*(e-i)-js*o)/Hs,u=Math.sqrt(a*a+o*o)/(Xs*i*(1-i)),f=u?Math.atan2(a,o)*Ds-120:NaN;return new Zt(f<0?f+360:f,u,i,t.opacity)}(t):new Zt(t,n,e,null==r?1:r)}function Zt(t,n,e,r){this.h=+t,this.s=+n,this.l=+e,this.opacity=+r}function Qt(t,n,e,r,i){var o=t*t,a=o*t;return((1-3*t+3*o-a)*n+(4-6*o+3*a)*e+(1+3*t+3*o-3*a)*r+a*i)/6}function Jt(t){var n=t.length-1;return function(e){var r=e<=0?e=0:e>=1?(e=1,n-1):Math.floor(e*n),i=t[r],o=t[r+1],a=r>0?t[r-1]:2*i-o,u=r180||e<-180?e-360*Math.round(e/360):e):tn(isNaN(t)?n:t)}function rn(t){return 1==(t=+t)?on:function(n,e){return e-n?function(t,n,e){return t=Math.pow(t,e),n=Math.pow(n,e)-t,e=1/e,function(r){return Math.pow(t+r*n,e)}}(n,e,t):tn(isNaN(n)?e:n)}}function on(t,n){var e=n-t;return e?nn(t,e):tn(isNaN(t)?n:t)}function an(t){return function(n){var e,r,i=n.length,o=new Array(i),a=new Array(i),u=new Array(i);for(e=0;eo&&(i=n.slice(o,i),u[a]?u[a]+=i:u[++a]=i),(e=e[0])===(r=r[0])?u[a]?u[a]+=r:u[++a]=r:(u[++a]=null,f.push({i:a,x:cn(e,r)})),o=ol.lastIndex;return o180?n+=360:n-t>180&&(t+=360),o.push({i:e.push(i(e)+"rotate(",null,r)-2,x:cn(t,n)})):n&&e.push(i(e)+"rotate("+n+r)}(o.rotate,a.rotate,u,f),function(t,n,e,o){t!==n?o.push({i:e.push(i(e)+"skewX(",null,r)-2,x:cn(t,n)}):n&&e.push(i(e)+"skewX("+n+r)}(o.skewX,a.skewX,u,f),function(t,n,e,r,o,a){if(t!==e||n!==r){var u=o.push(i(o)+"scale(",null,",",null,")");a.push({i:u-4,x:cn(t,e)},{i:u-2,x:cn(n,r)})}else 1===e&&1===r||o.push(i(o)+"scale("+e+","+r+")")}(o.scaleX,o.scaleY,a.scaleX,a.scaleY,u,f),o=a=null,function(t){for(var n,e=-1,r=f.length;++e=0&&n._call.call(null,t),n=n._next;--ml}function Nn(){Tl=(Al=Sl.now())+Nl,ml=xl=0;try{Tn()}finally{ml=0,function(){var t,n,e=Ks,r=1/0;for(;e;)e._call?(r>e._time&&(r=e._time),t=e,e=e._next):(n=e._next,e._next=null,e=t?t._next=n:Ks=n);tl=t,En(r)}(),Tl=0}}function Sn(){var t=Sl.now(),n=t-Al;n>Ml&&(Nl-=n,Al=t)}function En(t){if(!ml){xl&&(xl=clearTimeout(xl));t-Tl>24?(t<1/0&&(xl=setTimeout(Nn,t-Sl.now()-Nl)),wl&&(wl=clearInterval(wl))):(wl||(Al=Sl.now(),wl=setInterval(Sn,Ml)),ml=1,El(Nn))}}function kn(t,n,e){var r=new Mn;return n=null==n?0:+n,r.restart(function(e){r.stop(),t(e+n)},n,e),r}function Cn(t,n,e,r,i,o){var a=t.__transition;if(a){if(e in a)return}else t.__transition={};(function(t,n,e){function r(f){var c,s,l,h;if(e.state!==zl)return o();for(c in u)if((h=u[c]).name===e.name){if(h.state===Ll)return kn(r);h.state===Dl?(h.state=ql,h.timer.stop(),h.on.call("interrupt",t,t.__data__,h.index,h.group),delete u[c]):+cPl)throw new Error("too late; already scheduled");return e}function zn(t,n){var e=Rn(t,n);if(e.state>Rl)throw new Error("too late; already started");return e}function Rn(t,n){var e=t.__transition;if(!e||!(e=e[n]))throw new Error("transition not found");return e}function Ln(t,n){var e,r,i,o=t.__transition,a=!0;if(o){n=null==n?null:n+"";for(i in o)(e=o[i]).name===n?(r=e.state>Rl&&e.stateMath.abs(t[1]-U[1])?x=!0:m=!0),U=t,b=!0,Wn(),o()}function o(){var t;switch(y=U[0]-D[0],_=U[1]-D[1],A){case hh:case lh:T&&(y=Math.max(C-u,Math.min(z-d,y)),c=u+y,p=d+y),N&&(_=Math.max(P-l,Math.min(R-v,_)),h=l+_,g=v+_);break;case dh:T<0?(y=Math.max(C-u,Math.min(z-u,y)),c=u+y,p=d):T>0&&(y=Math.max(C-d,Math.min(z-d,y)),c=u,p=d+y),N<0?(_=Math.max(P-l,Math.min(R-l,_)),h=l+_,g=v):N>0&&(_=Math.max(P-v,Math.min(R-v,_)),h=l,g=v+_);break;case ph:T&&(c=Math.max(C,Math.min(z,u-y*T)),p=Math.max(C,Math.min(z,d+y*T))),N&&(h=Math.max(P,Math.min(R,l-_*N)),g=Math.max(P,Math.min(R,v+_*N)))}p0&&(u=c-y),N<0?v=g-_:N>0&&(l=h-_),A=hh,Y.attr("cursor",_h.selection),o());break;default:return}Wn()},!0).on("keyup.brush",function(){switch(t.event.keyCode){case 16:L&&(m=x=L=!1,o());break;case 18:A===ph&&(T<0?d=p:T>0&&(u=c),N<0?v=g:N>0&&(l=h),A=dh,o());break;case 32:A===hh&&(t.event.altKey?(T&&(d=p-y*T,u=c+y*T),N&&(v=g-_*N,l=h+_*N),A=ph):(T<0?d=p:T>0&&(u=c),N<0?v=g:N>0&&(l=h),A=dh),Y.attr("cursor",_h[M]),o());break;default:return}Wn()},!0).on("mousemove.brush",e,!0).on("mouseup.brush",a,!0);_t(t.event.view)}$n(),Ln(w),r.call(w),q.start()}}function u(){var t=this.__brush||{selection:null};return t.extent=c.apply(this,arguments),t.dim=n,t}var f,c=Jn,s=Qn,l=N(e,"start","brush","end"),h=6;return e.move=function(t,e){t.selection?t.on("start.brush",function(){i(this,arguments).beforestart().start()}).on("interrupt.brush end.brush",function(){i(this,arguments).end()}).tween("brush",function(){function t(t){a.selection=1===t&&te(c)?null:s(t),r.call(o),u.brush()}var o=this,a=o.__brush,u=i(o,arguments),f=a.selection,c=n.input("function"==typeof e?e.apply(this,arguments):e,a.extent),s=hn(f,c);return f&&c?t:t(1)}):t.each(function(){var t=arguments,o=this.__brush,a=n.input("function"==typeof e?e.apply(this,t):e,o.extent),u=i(this,t).beforestart();Ln(this),o.selection=null==a||te(a)?null:a,r.call(this),u.start().brush().end()})},o.prototype={beforestart:function(){return 1==++this.active&&(this.state.emitter=this,this.starting=!0),this},start:function(){return this.starting&&(this.starting=!1,this.emit("start")),this},brush:function(){return this.emit("brush"),this},end:function(){return 0==--this.active&&(delete this.state.emitter,this.emit("end")),this},emit:function(t){ot(new function(t,n,e){this.target=t,this.type=n,this.selection=e}(e,t,n.output(this.state.selection)),l.apply,l,[t,this.that,this.args])}},e.extent=function(t){return arguments.length?(c="function"==typeof t?t:Vn([[+t[0][0],+t[0][1]],[+t[1][0],+t[1][1]]]),e):c},e.filter=function(t){return arguments.length?(s="function"==typeof t?t:Vn(!!t),e):s},e.handleSize=function(t){return arguments.length?(h=+t,e):h},e.on=function(){var t=l.on.apply(l,arguments);return t===l?e:t},e}function ee(t){return function(){return t}}function re(){this._x0=this._y0=this._x1=this._y1=null,this._=""}function ie(){return new re}function oe(t){return t.source}function ae(t){return t.target}function ue(t){return t.radius}function fe(t){return t.startAngle}function ce(t){return t.endAngle}function se(){}function le(t,n){var e=new se;if(t instanceof se)t.each(function(t,n){e.set(n,t)});else if(Array.isArray(t)){var r,i=-1,o=t.length;if(null==n)for(;++ir!=d>r&&e<(h-c)*(r-s)/(d-s)+c&&(i=-i)}return i}(t,n[r]))return e;return 0}function xe(){}function we(){function t(t){var e=a(t);if(Array.isArray(e))e=e.slice().sort(_e);else{var r=u(t),i=r[0],o=r[1];e=d(i,o,e),e=s(Math.floor(i/e)*e,Math.floor(o/e)*e,e)}return e.map(function(e){return n(t,e)})}function n(t,n){var r=[],a=[];return function(t,n,r){function a(t){var n,i,o=[t[0][0]+u,t[0][1]+f],a=[t[1][0]+u,t[1][1]+f],c=e(o),s=e(a);(n=p[c])?(i=d[s])?(delete p[n.end],delete d[i.start],n===i?(n.ring.push(a),r(n.ring)):d[n.start]=p[i.end]={start:n.start,end:i.end,ring:n.ring.concat(i.ring)}):(delete p[n.end],n.ring.push(a),p[n.end=s]=n):(n=d[s])?(i=p[c])?(delete d[n.start],delete p[i.end],n===i?(n.ring.push(a),r(n.ring)):d[i.start]=p[n.end]={start:i.start,end:n.end,ring:i.ring.concat(n.ring)}):(delete d[n.start],n.ring.unshift(o),d[n.start=c]=n):d[c]=p[s]={start:c,end:s,ring:[o,a]}}var u,f,c,s,l,h,d=new Array,p=new Array;u=f=-1,s=t[0]>=n,Dh[s<<1].forEach(a);for(;++u=n,Dh[c|s<<1].forEach(a);Dh[s<<0].forEach(a);for(;++f=n,l=t[f*i]>=n,Dh[s<<1|l<<2].forEach(a);++u=n,h=l,l=t[f*i+u+1]>=n,Dh[c|s<<1|l<<2|h<<3].forEach(a);Dh[s|l<<3].forEach(a)}u=-1,l=t[f*i]>=n,Dh[l<<2].forEach(a);for(;++u=n,Dh[l<<2|h<<3].forEach(a);Dh[l<<3].forEach(a)}(t,n,function(e){f(e,t,n),function(t){for(var n=0,e=t.length,r=t[e-1][1]*t[0][0]-t[e-1][0]*t[0][1];++n0?r.push([e]):a.push(e)}),a.forEach(function(t){for(var n,e=0,i=r.length;e0&&a0&&u0&&r>0))throw new Error("invalid size");return i=e,o=r,t},t.thresholds=function(n){return arguments.length?(a="function"==typeof n?n:Array.isArray(n)?be(Lh.call(n)):be(n),t):a},t.smooth=function(n){return arguments.length?(f=n?r:xe,t):f===r},t}function Me(t,n,e){for(var r=t.width,i=t.height,o=1+(e<<1),a=0;a=e&&(u>=o&&(f-=t.data[u-o+a*r]),n.data[u-e+a*r]=f/Math.min(u+1,r-1+o-u,o))}function Ae(t,n,e){for(var r=t.width,i=t.height,o=1+(e<<1),a=0;a=e&&(u>=o&&(f-=t.data[a+(u-o)*r]),n.data[a+(u-e)*r]=f/Math.min(u+1,i-1+o-u,o))}function Te(t){return t[0]}function Ne(t){return t[1]}function Se(t){return new Function("d","return {"+t.map(function(t,n){return JSON.stringify(t)+": d["+n+"]"}).join(",")+"}")}function Ee(t){function n(t,n){function e(){if(c)return qh;if(s)return s=!1,Uh;var n,e,r=u;if(t.charCodeAt(r)===Oh){for(;u++=a?c=!0:(e=t.charCodeAt(u++))===Yh?s=!0:e===Bh&&(s=!0,t.charCodeAt(u)===Yh&&++u),t.slice(r+1,n-1).replace(/""/g,'"')}for(;u=(o=(v+y)/2))?v=o:y=o,(s=e>=(a=(g+_)/2))?g=a:_=a,i=d,!(d=d[l=s<<1|c]))return i[l]=p,t;if(u=+t._x.call(null,d.data),f=+t._y.call(null,d.data),n===u&&e===f)return p.next=d,i?i[l]=p:t._root=p,t;do{i=i?i[l]=new Array(4):t._root=new Array(4),(c=n>=(o=(v+y)/2))?v=o:y=o,(s=e>=(a=(g+_)/2))?g=a:_=a}while((l=s<<1|c)==(h=(f>=a)<<1|u>=o));return i[h]=d,i[l]=p,t}function Ye(t,n,e,r,i){this.node=t,this.x0=n,this.y0=e,this.x1=r,this.y1=i}function Be(t){return t[0]}function Fe(t){return t[1]}function Ie(t,n,e){var r=new je(null==n?Be:n,null==e?Fe:e,NaN,NaN,NaN,NaN);return null==t?r:r.addAll(t)}function je(t,n,e,r,i,o){this._x=t,this._y=n,this._x0=e,this._y0=r,this._x1=i,this._y1=o,this._root=void 0}function He(t){for(var n={data:t.data},e=n;t=t.next;)e=e.next={data:t.data};return n}function Xe(t){return t.x+t.vx}function Ge(t){return t.y+t.vy}function Ve(t){return t.index}function $e(t,n){var e=t.get(n);if(!e)throw new Error("missing: "+n);return e}function We(t){return t.x}function Ze(t){return t.y}function Qe(t,n){if((e=(t=n?t.toExponential(n-1):t.toExponential()).indexOf("e"))<0)return null;var e,r=t.slice(0,e);return[r.length>1?r[0]+r.slice(2):r,+t.slice(e+1)]}function Je(t){return(t=Qe(Math.abs(t)))?t[1]:NaN}function Ke(t,n){var e=Qe(t,n);if(!e)return t+"";var r=e[0],i=e[1];return i<0?"0."+new Array(-i).join("0")+r:r.length>i+1?r.slice(0,i+1)+"."+r.slice(i+1):r+new Array(i-r.length+2).join("0")}function tr(t){return new nr(t)}function nr(t){if(!(n=ud.exec(t)))throw new Error("invalid format: "+t);var n,e=n[1]||" ",r=n[2]||">",i=n[3]||"-",o=n[4]||"",a=!!n[5],u=n[6]&&+n[6],f=!!n[7],c=n[8]&&+n[8].slice(1),s=n[9]||"";"n"===s?(f=!0,s="g"):ad[s]||(s=""),(a||"0"===e&&"="===r)&&(a=!0,e="0",r="="),this.fill=e,this.align=r,this.sign=i,this.symbol=o,this.zero=a,this.width=u,this.comma=f,this.precision=c,this.type=s}function er(t){return t}function rr(t){function n(t){function n(t){var n,r,a,s=g,m=y;if("c"===v)m=_(t)+m,t="";else{var x=(t=+t)<0;if(t=_(Math.abs(t),p),x&&0==+t&&(x=!1),s=(x?"("===c?c:"-":"-"===c||"("===c?"":c)+s,m=("s"===v?cd[8+rd/3]:"")+m+(x&&"("===c?")":""),b)for(n=-1,r=t.length;++n(a=t.charCodeAt(n))||a>57){m=(46===a?i+t.slice(n+1):t.slice(n))+m,t=t.slice(0,n);break}}d&&!l&&(t=e(t,1/0));var w=s.length+t.length+m.length,M=w>1)+s+t+m+M.slice(w);break;default:t=M+s+t+m}return o(t)}var u=(t=tr(t)).fill,f=t.align,c=t.sign,s=t.symbol,l=t.zero,h=t.width,d=t.comma,p=t.precision,v=t.type,g="$"===s?r[0]:"#"===s&&/[boxX]/.test(v)?"0"+v.toLowerCase():"",y="$"===s?r[1]:/[%p]/.test(v)?a:"",_=ad[v],b=!v||/[defgprs%]/.test(v);return p=null==p?v?6:12:/[gprs]/.test(v)?Math.max(1,Math.min(21,p)):Math.max(0,Math.min(20,p)),n.toString=function(){return t+""},n}var e=t.grouping&&t.thousands?function(t,n){return function(e,r){for(var i=e.length,o=[],a=0,u=t[0],f=0;i>0&&u>0&&(f+u+1>r&&(u=Math.max(1,r-f)),o.push(e.substring(i-=u,i+u)),!((f+=u+1)>r));)u=t[a=(a+1)%t.length];return o.reverse().join(n)}}(t.grouping,t.thousands):er,r=t.currency,i=t.decimal,o=t.numerals?function(t){return function(n){return n.replace(/[0-9]/g,function(n){return t[+n]})}}(t.numerals):er,a=t.percent||"%";return{format:n,formatPrefix:function(t,e){var r=n((t=tr(t),t.type="f",t)),i=3*Math.max(-8,Math.min(8,Math.floor(Je(e)/3))),o=Math.pow(10,-i),a=cd[8+i/3];return function(t){return r(o*t)+a}}}}function ir(n){return fd=rr(n),t.format=fd.format,t.formatPrefix=fd.formatPrefix,fd}function or(t){return Math.max(0,-Je(Math.abs(t)))}function ar(t,n){return Math.max(0,3*Math.max(-8,Math.min(8,Math.floor(Je(n)/3)))-Je(Math.abs(t)))}function ur(t,n){return t=Math.abs(t),n=Math.abs(n)-t,Math.max(0,Je(n)-Je(t))+1}function fr(){return new cr}function cr(){this.reset()}function sr(t,n,e){var r=t.s=n+e,i=r-n,o=r-i;t.t=n-o+(e-i)}function lr(t){return t>1?0:t<-1?Hd:Math.acos(t)}function hr(t){return t>1?Xd:t<-1?-Xd:Math.asin(t)}function dr(t){return(t=ip(t/2))*t}function pr(){}function vr(t,n){t&&cp.hasOwnProperty(t.type)&&cp[t.type](t,n)}function gr(t,n,e){var r,i=-1,o=t.length-e;for(n.lineStart();++i=0?1:-1,i=r*e,o=Kd(n),a=ip(n),u=pd*a,f=dd*o+u*Kd(i),c=u*r*ip(i);sp.add(Jd(c,f)),hd=t,dd=o,pd=a}function Mr(t){return[Jd(t[1],t[0]),hr(t[2])]}function Ar(t){var n=t[0],e=t[1],r=Kd(e);return[r*Kd(n),r*ip(n),ip(e)]}function Tr(t,n){return t[0]*n[0]+t[1]*n[1]+t[2]*n[2]}function Nr(t,n){return[t[1]*n[2]-t[2]*n[1],t[2]*n[0]-t[0]*n[2],t[0]*n[1]-t[1]*n[0]]}function Sr(t,n){t[0]+=n[0],t[1]+=n[1],t[2]+=n[2]}function Er(t,n){return[t[0]*n,t[1]*n,t[2]*n]}function kr(t){var n=ap(t[0]*t[0]+t[1]*t[1]+t[2]*t[2]);t[0]/=n,t[1]/=n,t[2]/=n}function Cr(t,n){Md.push(Ad=[vd=t,yd=t]),n_d&&(_d=n)}function Pr(t,n){var e=Ar([t*Wd,n*Wd]);if(wd){var r=Nr(wd,e),i=Nr([r[1],-r[0],0],r);kr(i),i=Mr(i);var o,a=t-bd,u=a>0?1:-1,f=i[0]*$d*u,c=Zd(a)>180;c^(u*bd_d&&(_d=o):(f=(f+360)%360-180,c^(u*bd_d&&(_d=n))),c?tqr(vd,yd)&&(yd=t):qr(t,yd)>qr(vd,yd)&&(vd=t):yd>=vd?(tyd&&(yd=t)):t>bd?qr(vd,t)>qr(vd,yd)&&(yd=t):qr(t,yd)>qr(vd,yd)&&(vd=t)}else Md.push(Ad=[vd=t,yd=t]);n_d&&(_d=n),wd=e,bd=t}function zr(){pp.point=Pr}function Rr(){Ad[0]=vd,Ad[1]=yd,pp.point=Cr,wd=null}function Lr(t,n){if(wd){var e=t-bd;dp.add(Zd(e)>180?e+(e>0?360:-360):e)}else md=t,xd=n;hp.point(t,n),Pr(t,n)}function Dr(){hp.lineStart()}function Ur(){Lr(md,xd),hp.lineEnd(),Zd(dp)>Id&&(vd=-(yd=180)),Ad[0]=vd,Ad[1]=yd,wd=null}function qr(t,n){return(n-=t)<0?n+360:n}function Or(t,n){return t[0]-n[0]}function Yr(t,n){return t[0]<=t[1]?t[0]<=n&&n<=t[1]:nHd?t-Vd:t<-Hd?t+Vd:t,n]}function Kr(t,n,e){return(t%=Vd)?n||e?Qr(ni(t),ei(n,e)):ni(t):n||e?ei(n,e):Jr}function ti(t){return function(n,e){return n+=t,[n>Hd?n-Vd:n<-Hd?n+Vd:n,e]}}function ni(t){var n=ti(t);return n.invert=ti(-t),n}function ei(t,n){function e(t,n){var e=Kd(n),u=Kd(t)*e,f=ip(t)*e,c=ip(n),s=c*r+u*i;return[Jd(f*o-s*a,u*r-c*i),hr(s*o+f*a)]}var r=Kd(t),i=ip(t),o=Kd(n),a=ip(n);return e.invert=function(t,n){var e=Kd(n),u=Kd(t)*e,f=ip(t)*e,c=ip(n),s=c*o-f*a;return[Jd(f*o+c*a,u*r+s*i),hr(s*r-u*i)]},e}function ri(t){function n(n){return n=t(n[0]*Wd,n[1]*Wd),n[0]*=$d,n[1]*=$d,n}return t=Kr(t[0]*Wd,t[1]*Wd,t.length>2?t[2]*Wd:0),n.invert=function(n){return n=t.invert(n[0]*Wd,n[1]*Wd),n[0]*=$d,n[1]*=$d,n},n}function ii(t,n,e,r,i,o){if(e){var a=Kd(n),u=ip(n),f=r*e;null==i?(i=n+r*Vd,o=n-f/2):(i=oi(a,i),o=oi(a,o),(r>0?io)&&(i+=r*Vd));for(var c,s=i;r>0?s>o:s1&&n.push(n.pop().concat(n.shift()))},result:function(){var e=n;return n=[],t=null,e}}}function ui(t,n){return Zd(t[0]-n[0])=0;--o)i.point((s=c[o])[0],s[1]);else r(h.x,h.p.x,-1,i);h=h.p}c=(h=h.o).z,d=!d}while(!h.v);i.lineEnd()}}}function si(t){if(n=t.length){for(var n,e,r=0,i=t[0];++r=0?1:-1,T=A*M,N=T>Hd,S=v*x;if(Sp.add(Jd(S*A*ip(T),g*w+S*Kd(T))),a+=N?M+A*Vd:M,N^d>=e^b>=e){var E=Nr(Ar(h),Ar(_));kr(E);var k=Nr(o,E);kr(k);var C=(N^M>=0?-1:1)*hr(k[2]);(r>C||r===C&&(E[0]||E[1]))&&(u+=N^M>=0?1:-1)}}return(a<-Id||a0){for(b||(i.polygonStart(),b=!0),i.lineStart(),t=0;t1&&2&o&&a.push(a.pop().concat(a.shift())),d.push(a.filter(di))}var h,d,p,v=n(i),g=ai(),_=n(g),b=!1,m={point:o,lineStart:u,lineEnd:f,polygonStart:function(){m.point=c,m.lineStart=s,m.lineEnd=l,d=[],h=[]},polygonEnd:function(){m.point=o,m.lineStart=u,m.lineEnd=f,d=y(d);var t=li(h,r);d.length?(b||(i.polygonStart(),b=!0),ci(d,pi,t,e,i)):t&&(b||(i.polygonStart(),b=!0),i.lineStart(),e(null,null,1,i),i.lineEnd()),b&&(i.polygonEnd(),b=!1),d=h=null},sphere:function(){i.polygonStart(),i.lineStart(),e(null,null,1,i),i.lineEnd(),i.polygonEnd()}};return m}}function di(t){return t.length>1}function pi(t,n){return((t=t.x)[0]<0?t[1]-Xd-Id:Xd-t[1])-((n=n.x)[0]<0?n[1]-Xd-Id:Xd-n[1])}function vi(t){function n(t,n){return Kd(t)*Kd(n)>i}function e(t,n,e){var r=[1,0,0],o=Nr(Ar(t),Ar(n)),a=Tr(o,o),u=o[0],f=a-u*u;if(!f)return!e&&t;var c=i*a/f,s=-i*u/f,l=Nr(r,o),h=Er(r,c);Sr(h,Er(o,s));var d=l,p=Tr(h,d),v=Tr(d,d),g=p*p-v*(Tr(h,h)-1);if(!(g<0)){var y=ap(g),_=Er(d,(-p-y)/v);if(Sr(_,h),_=Mr(_),!e)return _;var b,m=t[0],x=n[0],w=t[1],M=n[1];x0^_[1]<(Zd(_[0]-m)Hd^(m<=_[0]&&_[0]<=x)){var N=Er(d,(-p+y)/v);return Sr(N,h),[_,Mr(N)]}}}function r(n,e){var r=a?t:Hd-t,i=0;return n<-r?i|=1:n>r&&(i|=2),e<-r?i|=4:e>r&&(i|=8),i}var i=Kd(t),o=6*Wd,a=i>0,u=Zd(i)>Id;return hi(n,function(t){var i,o,f,c,s;return{lineStart:function(){c=f=!1,s=1},point:function(l,h){var d,p=[l,h],v=n(l,h),g=a?v?0:r(l,h):v?r(l+(l<0?Hd:-Hd),h):0;if(!i&&(c=f=v)&&t.lineStart(),v!==f&&(!(d=e(i,p))||ui(i,d)||ui(p,d))&&(p[0]+=Id,p[1]+=Id,v=n(p[0],p[1])),v!==f)s=0,v?(t.lineStart(),d=e(p,i),t.point(d[0],d[1])):(d=e(i,p),t.point(d[0],d[1]),t.lineEnd()),i=d;else if(u&&i&&a^v){var y;g&o||!(y=e(p,i,!0))||(s=0,a?(t.lineStart(),t.point(y[0][0],y[0][1]),t.point(y[1][0],y[1][1]),t.lineEnd()):(t.point(y[1][0],y[1][1]),t.lineEnd(),t.lineStart(),t.point(y[0][0],y[0][1])))}!v||i&&ui(i,p)||t.point(p[0],p[1]),i=p,f=v,o=g},lineEnd:function(){f&&t.lineEnd(),i=null},clean:function(){return s|(c&&f)<<1}}},function(n,e,r,i){ii(i,t,o,r,n,e)},a?[0,-t]:[-Hd,t-Hd])}function gi(t,n,e,r){function i(i,o){return t<=i&&i<=e&&n<=o&&o<=r}function o(i,o,u,c){var s=0,l=0;if(null==i||(s=a(i,u))!==(l=a(o,u))||f(i,o)<0^u>0)do{c.point(0===s||3===s?t:e,s>1?r:n)}while((s=(s+u+4)%4)!==l);else c.point(o[0],o[1])}function a(r,i){return Zd(r[0]-t)0?0:3:Zd(r[0]-e)0?2:1:Zd(r[1]-n)0?1:0:i>0?3:2}function u(t,n){return f(t.x,n.x)}function f(t,n){var e=a(t,1),r=a(n,1);return e!==r?e-r:0===e?n[1]-t[1]:1===e?t[0]-n[0]:2===e?t[1]-n[1]:n[0]-t[0]}return function(a){function f(t,n){i(t,n)&&w.point(t,n)}function c(o,a){var u=i(o,a);if(l&&h.push([o,a]),m)d=o,p=a,v=u,m=!1,u&&(w.lineStart(),w.point(o,a));else if(u&&b)w.point(o,a);else{var f=[g=Math.max(Cp,Math.min(kp,g)),_=Math.max(Cp,Math.min(kp,_))],c=[o=Math.max(Cp,Math.min(kp,o)),a=Math.max(Cp,Math.min(kp,a))];!function(t,n,e,r,i,o){var a,u=t[0],f=t[1],c=0,s=1,l=n[0]-u,h=n[1]-f;if(a=e-u,l||!(a>0)){if(a/=l,l<0){if(a0){if(a>s)return;a>c&&(c=a)}if(a=i-u,l||!(a<0)){if(a/=l,l<0){if(a>s)return;a>c&&(c=a)}else if(l>0){if(a0)){if(a/=h,h<0){if(a0){if(a>s)return;a>c&&(c=a)}if(a=o-f,h||!(a<0)){if(a/=h,h<0){if(a>s)return;a>c&&(c=a)}else if(h>0){if(a0&&(t[0]=u+c*l,t[1]=f+c*h),s<1&&(n[0]=u+s*l,n[1]=f+s*h),!0}}}}}(f,c,t,n,e,r)?u&&(w.lineStart(),w.point(o,a),x=!1):(b||(w.lineStart(),w.point(f[0],f[1])),w.point(c[0],c[1]),u||w.lineEnd(),x=!1)}g=o,_=a,b=u}var s,l,h,d,p,v,g,_,b,m,x,w=a,M=ai(),A={point:f,lineStart:function(){A.point=c,l&&l.push(h=[]),m=!0,b=!1,g=_=NaN},lineEnd:function(){s&&(c(d,p),v&&b&&M.rejoin(),s.push(M.result())),A.point=f,b&&w.lineEnd()},polygonStart:function(){w=M,s=[],l=[],x=!0},polygonEnd:function(){var n=function(){for(var n=0,e=0,i=l.length;er&&(h-o)*(r-a)>(d-a)*(t-o)&&++n:d<=r&&(h-o)*(r-a)<(d-a)*(t-o)&&--n;return n}(),e=x&&n,i=(s=y(s)).length;(e||i)&&(a.polygonStart(),e&&(a.lineStart(),o(null,null,1,a),a.lineEnd()),i&&ci(s,u,n,o,a),a.polygonEnd()),w=a,s=l=h=null}};return A}}function yi(){zp.point=zp.lineEnd=pr}function _i(t,n){gp=t*=Wd,yp=ip(n*=Wd),_p=Kd(n),zp.point=bi}function bi(t,n){t*=Wd;var e=ip(n*=Wd),r=Kd(n),i=Zd(t-gp),o=Kd(i),a=r*ip(i),u=_p*e-yp*r*o,f=yp*e+_p*r*o;Pp.add(Jd(ap(a*a+u*u),f)),gp=t,yp=e,_p=r}function mi(t){return Pp.reset(),_r(t,zp),+Pp}function xi(t,n){return Rp[0]=t,Rp[1]=n,mi(Lp)}function wi(t,n){return!(!t||!Up.hasOwnProperty(t.type))&&Up[t.type](t,n)}function Mi(t,n){return 0===xi(t,n)}function Ai(t,n){var e=xi(t[0],t[1]);return xi(t[0],n)+xi(n,t[1])<=e+Id}function Ti(t,n){return!!li(t.map(Ni),Si(n))}function Ni(t){return(t=t.map(Si)).pop(),t}function Si(t){return[t[0]*Wd,t[1]*Wd]}function Ei(t,n,e){var r=s(t,n-Id,e).concat(n);return function(t){return r.map(function(n){return[t,n]})}}function ki(t,n,e){var r=s(t,n-Id,e).concat(n);return function(t){return r.map(function(n){return[n,t]})}}function Ci(){function t(){return{type:"MultiLineString",coordinates:n()}}function n(){return s(tp(o/y)*y,i,y).map(d).concat(s(tp(c/_)*_,f,_).map(p)).concat(s(tp(r/v)*v,e,v).filter(function(t){return Zd(t%y)>Id}).map(l)).concat(s(tp(u/g)*g,a,g).filter(function(t){return Zd(t%_)>Id}).map(h))}var e,r,i,o,a,u,f,c,l,h,d,p,v=10,g=v,y=90,_=360,b=2.5;return t.lines=function(){return n().map(function(t){return{type:"LineString",coordinates:t}})},t.outline=function(){return{type:"Polygon",coordinates:[d(o).concat(p(f).slice(1),d(i).reverse().slice(1),p(c).reverse().slice(1))]}},t.extent=function(n){return arguments.length?t.extentMajor(n).extentMinor(n):t.extentMinor()},t.extentMajor=function(n){return arguments.length?(o=+n[0][0],i=+n[1][0],c=+n[0][1],f=+n[1][1],o>i&&(n=o,o=i,i=n),c>f&&(n=c,c=f,f=n),t.precision(b)):[[o,c],[i,f]]},t.extentMinor=function(n){return arguments.length?(r=+n[0][0],e=+n[1][0],u=+n[0][1],a=+n[1][1],r>e&&(n=r,r=e,e=n),u>a&&(n=u,u=a,a=n),t.precision(b)):[[r,u],[e,a]]},t.step=function(n){return arguments.length?t.stepMajor(n).stepMinor(n):t.stepMinor()},t.stepMajor=function(n){return arguments.length?(y=+n[0],_=+n[1],t):[y,_]},t.stepMinor=function(n){return arguments.length?(v=+n[0],g=+n[1],t):[v,g]},t.precision=function(n){return arguments.length?(b=+n,l=Ei(u,a,90),h=ki(r,e,b),d=Ei(c,f,90),p=ki(o,i,b),t):b},t.extentMajor([[-180,-90+Id],[180,90-Id]]).extentMinor([[-180,-80-Id],[180,80+Id]])}function Pi(t){return t}function zi(){Yp.point=Ri}function Ri(t,n){Yp.point=Li,bp=xp=t,mp=wp=n}function Li(t,n){Op.add(wp*t-xp*n),xp=t,wp=n}function Di(){Li(bp,mp)}function Ui(t,n){Xp+=t,Gp+=n,++Vp}function qi(){tv.point=Oi}function Oi(t,n){tv.point=Yi,Ui(Tp=t,Np=n)}function Yi(t,n){var e=t-Tp,r=n-Np,i=ap(e*e+r*r);$p+=i*(Tp+t)/2,Wp+=i*(Np+n)/2,Zp+=i,Ui(Tp=t,Np=n)}function Bi(){tv.point=Ui}function Fi(){tv.point=ji}function Ii(){Hi(Mp,Ap)}function ji(t,n){tv.point=Hi,Ui(Mp=Tp=t,Ap=Np=n)}function Hi(t,n){var e=t-Tp,r=n-Np,i=ap(e*e+r*r);$p+=i*(Tp+t)/2,Wp+=i*(Np+n)/2,Zp+=i,Qp+=(i=Np*t-Tp*n)*(Tp+t),Jp+=i*(Np+n),Kp+=3*i,Ui(Tp=t,Np=n)}function Xi(t){this._context=t}function Gi(t,n){uv.point=Vi,ev=iv=t,rv=ov=n}function Vi(t,n){iv-=t,ov-=n,av.add(ap(iv*iv+ov*ov)),iv=t,ov=n}function $i(){this._string=[]}function Wi(t){return"m0,"+t+"a"+t+","+t+" 0 1,1 0,"+-2*t+"a"+t+","+t+" 0 1,1 0,"+2*t+"z"}function Zi(t){return function(n){var e=new Qi;for(var r in t)e[r]=t[r];return e.stream=n,e}}function Qi(){}function Ji(t,n,e){var r=t.clipExtent&&t.clipExtent();return t.scale(150).translate([0,0]),null!=r&&t.clipExtent(null),_r(e,t.stream(Hp)),n(Hp.result()),null!=r&&t.clipExtent(r),t}function Ki(t,n,e){return Ji(t,function(e){var r=n[1][0]-n[0][0],i=n[1][1]-n[0][1],o=Math.min(r/(e[1][0]-e[0][0]),i/(e[1][1]-e[0][1])),a=+n[0][0]+(r-o*(e[1][0]+e[0][0]))/2,u=+n[0][1]+(i-o*(e[1][1]+e[0][1]))/2;t.scale(150*o).translate([a,u])},e)}function to(t,n,e){return Ki(t,[[0,0],n],e)}function no(t,n,e){return Ji(t,function(e){var r=+n,i=r/(e[1][0]-e[0][0]),o=(r-i*(e[1][0]+e[0][0]))/2,a=-i*e[0][1];t.scale(150*i).translate([o,a])},e)}function eo(t,n,e){return Ji(t,function(e){var r=+n,i=r/(e[1][1]-e[0][1]),o=-i*e[0][0],a=(r-i*(e[1][1]+e[0][1]))/2;t.scale(150*i).translate([o,a])},e)}function ro(t,n){return+n?function(t,n){function e(r,i,o,a,u,f,c,s,l,h,d,p,v,g){var y=c-r,_=s-i,b=y*y+_*_;if(b>4*n&&v--){var m=a+h,x=u+d,w=f+p,M=ap(m*m+x*x+w*w),A=hr(w/=M),T=Zd(Zd(w)-1)n||Zd((y*k+_*C)/b-.5)>.3||a*h+u*d+f*p2?t[2]%360*Wd:0,e()):[b*$d,m*$d,x*$d]},n.angle=function(t){return arguments.length?(w=t%360*Wd,e()):w*$d},n.precision=function(t){return arguments.length?(c=ro(s,S=t*t),r()):ap(S)},n.fitExtent=function(t,e){return Ki(n,t,e)},n.fitSize=function(t,e){return to(n,t,e)},n.fitWidth=function(t,e){return no(n,t,e)},n.fitHeight=function(t,e){return eo(n,t,e)},function(){return i=t.apply(this,arguments),n.invert=i.invert&&function(t){return(t=l.invert(t[0],t[1]))&&[t[0]*$d,t[1]*$d]},e()}}function uo(t){var n=0,e=Hd/3,r=ao(t),i=r(n,e);return i.parallels=function(t){return arguments.length?r(n=t[0]*Wd,e=t[1]*Wd):[n*$d,e*$d]},i}function fo(t,n){function e(t,n){var e=ap(o-2*i*ip(n))/i;return[e*ip(t*=i),a-e*Kd(t)]}var r=ip(t),i=(r+ip(n))/2;if(Zd(i)0?n<-Xd+Id&&(n=-Xd+Id):n>Xd-Id&&(n=Xd-Id);var e=o/rp(go(n),i);return[e*ip(i*t),o-e*Kd(i*t)]}var r=Kd(t),i=t===n?ip(t):ep(r/Kd(n))/ep(go(n)/go(t)),o=r*rp(go(t),i)/i;return i?(e.invert=function(t,n){var e=o-n,r=op(i)*ap(t*t+e*e);return[Jd(t,Zd(e))/i*op(e),2*Qd(rp(o/r,1/i))-Xd]},e):po}function _o(t,n){return[t,n]}function bo(t,n){function e(t,n){var e=o-n,r=i*t;return[e*ip(r),o-e*Kd(r)]}var r=Kd(t),i=t===n?ip(t):(r-Kd(n))/(n-t),o=r/i+t;return Zd(i)=0;)n+=e[r].value;else n=1;t.value=n}function Co(t,n){var e,r,i,o,a,u=new Lo(t),f=+t.value&&(u.value=t.value),c=[u];for(null==n&&(n=Po);e=c.pop();)if(f&&(e.value=+e.data.value),(i=n(e.data))&&(a=i.length))for(e.children=new Array(a),o=a-1;o>=0;--o)c.push(r=e.children[o]=new Lo(i[o])),r.parent=e,r.depth=e.depth+1;return u.eachBefore(Ro)}function Po(t){return t.children}function zo(t){t.data=t.data.data}function Ro(t){var n=0;do{t.height=n}while((t=t.parent)&&t.height<++n)}function Lo(t){this.data=t,this.depth=this.height=0,this.parent=null}function Do(t){for(var n,e,r=0,i=(t=function(t){for(var n,e,r=t.length;r;)e=Math.random()*r--|0,n=t[r],t[r]=t[e],t[e]=n;return t}(dv.call(t))).length,o=[];r0&&e*e>r*r+i*i}function Oo(t,n){for(var e=0;e(a*=a)?(r=(c+a-i)/(2*c),o=Math.sqrt(Math.max(0,a/c-r*r)),e.x=t.x-r*u-o*f,e.y=t.y-r*f+o*u):(r=(c+i-a)/(2*c),o=Math.sqrt(Math.max(0,i/c-r*r)),e.x=n.x+r*u-o*f,e.y=n.y+r*f+o*u)):(e.x=n.x+e.r,e.y=n.y)}function Io(t,n){var e=t.r+n.r-1e-6,r=n.x-t.x,i=n.y-t.y;return e>0&&e*e>r*r+i*i}function jo(t){var n=t._,e=t.next._,r=n.r+e.r,i=(n.x*e.r+e.x*n.r)/r,o=(n.y*e.r+e.y*n.r)/r;return i*i+o*o}function Ho(t){this._=t,this.next=null,this.previous=null}function Xo(t){if(!(i=t.length))return 0;var n,e,r,i,o,a,u,f,c,s,l;if(n=t[0],n.x=0,n.y=0,!(i>1))return n.r;if(e=t[1],n.x=-e.r,e.x=n.r,e.y=0,!(i>2))return n.r+e.r;Fo(e,n,r=t[2]),n=new Ho(n),e=new Ho(e),r=new Ho(r),n.next=r.previous=e,e.next=n.previous=r,r.next=e.previous=n;t:for(u=3;uh&&(h=u),g=s*s*v,(d=Math.max(h/g,g/l))>p){s-=u;break}p=d}y.push(a={value:s,dice:f1&&la(t[e[r-2]],t[e[r-1]],t[i])<=0;)--r;e[r++]=i}return e.slice(0,r)}function pa(){return Math.random()}function va(t){function n(n){var o=n+"",a=e.get(o);if(!a){if(i!==kv)return i;e.set(o,a=r.push(n))}return t[(a-1)%t.length]}var e=le(),r=[],i=kv;return t=null==t?[]:Ev.call(t),n.domain=function(t){if(!arguments.length)return r.slice();r=[],e=le();for(var i,o,a=-1,u=t.length;++a2?wa:xa,o=a=null,r}function r(n){return(o||(o=i(u,f,s?function(t){return function(n,e){var r=t(n=+n,e=+e);return function(t){return t<=n?0:t>=e?1:r(t)}}}(t):t,c)))(+n)}var i,o,a,u=Cv,f=Cv,c=hn,s=!1;return r.invert=function(t){return(a||(a=i(f,u,ma,s?function(t){return function(n,e){var r=t(n=+n,e=+e);return function(t){return t<=0?n:t>=1?e:r(t)}}}(n):n)))(+t)},r.domain=function(t){return arguments.length?(u=Sv.call(t,ba),e()):u.slice()},r.range=function(t){return arguments.length?(f=Ev.call(t),e()):f.slice()},r.rangeRound=function(t){return f=Ev.call(t),c=dn,e()},r.clamp=function(t){return arguments.length?(s=!!t,e()):s},r.interpolate=function(t){return arguments.length?(c=t,e()):c},e()}function Ta(n){var e=n.domain;return n.ticks=function(t){var n=e();return l(n[0],n[n.length-1],null==t?10:t)},n.tickFormat=function(n,r){return function(n,e,r){var i,o=n[0],a=n[n.length-1],u=d(o,a,null==e?10:e);switch((r=tr(null==r?",f":r)).type){case"s":var f=Math.max(Math.abs(o),Math.abs(a));return null!=r.precision||isNaN(i=ar(u,f))||(r.precision=i),t.formatPrefix(r,f);case"":case"e":case"g":case"p":case"r":null!=r.precision||isNaN(i=ur(u,Math.max(Math.abs(o),Math.abs(a))))||(r.precision=i-("e"===r.type));break;case"f":case"%":null!=r.precision||isNaN(i=or(u))||(r.precision=i-2*("%"===r.type))}return t.format(r)}(e(),n,r)},n.nice=function(t){null==t&&(t=10);var r,i=e(),o=0,a=i.length-1,u=i[o],f=i[a];return f0?r=h(u=Math.floor(u/r)*r,f=Math.ceil(f/r)*r,t):r<0&&(r=h(u=Math.ceil(u*r)/r,f=Math.floor(f*r)/r,t)),r>0?(i[o]=Math.floor(u/r)*r,i[a]=Math.ceil(f/r)*r,e(i)):r<0&&(i[o]=Math.ceil(u*r)/r,i[a]=Math.floor(f*r)/r,e(i)),n},n}function Na(){var t=Aa(ma,cn);return t.copy=function(){return Ma(t,Na())},Ta(t)}function Sa(){function t(t){return+t}var n=[0,1];return t.invert=t,t.domain=t.range=function(e){return arguments.length?(n=Sv.call(e,ba),t):n.slice()},t.copy=function(){return Sa().domain(n)},Ta(t)}function Ea(t,n){var e,r=0,i=(t=t.slice()).length-1,o=t[r],a=t[i];return a0){for(;df)break;g.push(h)}}else for(;d=1;--s)if(!((h=c*s)f)break;g.push(h)}}else g=l(d,p,Math.min(p-d,v)).map(a);return n?g.reverse():g},e.tickFormat=function(n,r){if(null==r&&(r=10===i?".0e":","),"function"!=typeof r&&(r=t.format(r)),n===1/0)return r;null==n&&(n=10);var u=Math.max(1,i*n/e.ticks().length);return function(t){var n=t/a(Math.round(o(t)));return n*i0?o[n-1]:r[0],n=i?[o[i-1],r]:[o[n-1],o[n]]},t.copy=function(){return Ya().domain([e,r]).range(a)},Ta(t)}function Ba(){function t(t){if(t<=t)return e[Qc(n,t,0,r)]}var n=[.5],e=[0,1],r=1;return t.domain=function(i){return arguments.length?(n=Ev.call(i),r=Math.min(n.length,e.length-1),t):n.slice()},t.range=function(i){return arguments.length?(e=Ev.call(i),r=Math.min(n.length,e.length-1),t):e.slice()},t.invertExtent=function(t){var r=e.indexOf(t);return[n[r-1],n[r]]},t.copy=function(){return Ba().domain(n).range(e)},t}function Fa(t,n,e,r){function i(n){return t(n=new Date(+n)),n}return i.floor=i,i.ceil=function(e){return t(e=new Date(e-1)),n(e,1),t(e),e},i.round=function(t){var n=i(t),e=i.ceil(t);return t-n0))return u;do{u.push(a=new Date(+e)),n(e,o),t(e)}while(a=n)for(;t(n),!e(n);)n.setTime(n-1)},function(t,r){if(t>=t)if(r<0)for(;++r<=0;)for(;n(t,-1),!e(t););else for(;--r>=0;)for(;n(t,1),!e(t););})},e&&(i.count=function(n,r){return Pv.setTime(+n),zv.setTime(+r),t(Pv),t(zv),Math.floor(e(Pv,zv))},i.every=function(t){return t=Math.floor(t),isFinite(t)&&t>0?t>1?i.filter(r?function(n){return r(n)%t==0}:function(n){return i.count(0,n)%t==0}):i:null}),i}function Ia(t){return Fa(function(n){n.setDate(n.getDate()-(n.getDay()+7-t)%7),n.setHours(0,0,0,0)},function(t,n){t.setDate(t.getDate()+7*n)},function(t,n){return(n-t-(n.getTimezoneOffset()-t.getTimezoneOffset())*Dv)/Uv})}function ja(t){return Fa(function(n){n.setUTCDate(n.getUTCDate()-(n.getUTCDay()+7-t)%7),n.setUTCHours(0,0,0,0)},function(t,n){t.setUTCDate(t.getUTCDate()+7*n)},function(t,n){return(n-t)/Uv})}function Ha(t){if(0<=t.y&&t.y<100){var n=new Date(-1,t.m,t.d,t.H,t.M,t.S,t.L);return n.setFullYear(t.y),n}return new Date(t.y,t.m,t.d,t.H,t.M,t.S,t.L)}function Xa(t){if(0<=t.y&&t.y<100){var n=new Date(Date.UTC(-1,t.m,t.d,t.H,t.M,t.S,t.L));return n.setUTCFullYear(t.y),n}return new Date(Date.UTC(t.y,t.m,t.d,t.H,t.M,t.S,t.L))}function Ga(t){return{y:t,m:0,d:1,H:0,M:0,S:0,L:0}}function Va(t){function n(t,n){return function(e){var r,i,o,a=[],u=-1,f=0,c=t.length;for(e instanceof Date||(e=new Date(+e));++u53)return null;"w"in a||(a.w=1),"Z"in a?(i=(o=(i=Xa(Ga(a.y))).getUTCDay())>4||0===o?gg.ceil(i):gg(i),i=dg.offset(i,7*(a.V-1)),a.y=i.getUTCFullYear(),a.m=i.getUTCMonth(),a.d=i.getUTCDate()+(a.w+6)%7):(i=(o=(i=n(Ga(a.y))).getDay())>4||0===o?Gv.ceil(i):Gv(i),i=jv.offset(i,7*(a.V-1)),a.y=i.getFullYear(),a.m=i.getMonth(),a.d=i.getDate()+(a.w+6)%7)}else("W"in a||"U"in a)&&("w"in a||(a.w="u"in a?a.u%7:"W"in a?1:0),o="Z"in a?Xa(Ga(a.y)).getUTCDay():n(Ga(a.y)).getDay(),a.m=0,a.d="W"in a?(a.w+6)%7+7*a.W-(o+5)%7:a.w+7*a.U-(o+6)%7);return"Z"in a?(a.H+=a.Z/100|0,a.M+=a.Z%100,Xa(a)):n(a)}}function r(t,n,e,r){for(var i,o,a=0,u=n.length,f=e.length;a=f)return-1;if(37===(i=n.charCodeAt(a++))){if(i=n.charAt(a++),!(o=A[i in Lg?n.charAt(a++):i])||(r=o(t,e,r))<0)return-1}else if(i!=e.charCodeAt(r++))return-1}return r}var i=t.dateTime,o=t.date,a=t.time,u=t.periods,f=t.days,c=t.shortDays,s=t.months,l=t.shortMonths,h=Za(u),d=Qa(u),p=Za(f),v=Qa(f),g=Za(c),y=Qa(c),_=Za(s),b=Qa(s),m=Za(l),x=Qa(l),w={a:function(t){return c[t.getDay()]},A:function(t){return f[t.getDay()]},b:function(t){return l[t.getMonth()]},B:function(t){return s[t.getMonth()]},c:null,d:yu,e:yu,f:wu,H:_u,I:bu,j:mu,L:xu,m:Mu,M:Au,p:function(t){return u[+(t.getHours()>=12)]},Q:Ju,s:Ku,S:Tu,u:Nu,U:Su,V:Eu,w:ku,W:Cu,x:null,X:null,y:Pu,Y:zu,Z:Ru,"%":Qu},M={a:function(t){return c[t.getUTCDay()]},A:function(t){return f[t.getUTCDay()]},b:function(t){return l[t.getUTCMonth()]},B:function(t){return s[t.getUTCMonth()]},c:null,d:Lu,e:Lu,f:Yu,H:Du,I:Uu,j:qu,L:Ou,m:Bu,M:Fu,p:function(t){return u[+(t.getUTCHours()>=12)]},Q:Ju,s:Ku,S:Iu,u:ju,U:Hu,V:Xu,w:Gu,W:Vu,x:null,X:null,y:$u,Y:Wu,Z:Zu,"%":Qu},A={a:function(t,n,e){var r=g.exec(n.slice(e));return r?(t.w=y[r[0].toLowerCase()],e+r[0].length):-1},A:function(t,n,e){var r=p.exec(n.slice(e));return r?(t.w=v[r[0].toLowerCase()],e+r[0].length):-1},b:function(t,n,e){var r=m.exec(n.slice(e));return r?(t.m=x[r[0].toLowerCase()],e+r[0].length):-1},B:function(t,n,e){var r=_.exec(n.slice(e));return r?(t.m=b[r[0].toLowerCase()],e+r[0].length):-1},c:function(t,n,e){return r(t,i,n,e)},d:uu,e:uu,f:du,H:cu,I:cu,j:fu,L:hu,m:au,M:su,p:function(t,n,e){var r=h.exec(n.slice(e));return r?(t.p=d[r[0].toLowerCase()],e+r[0].length):-1},Q:vu,s:gu,S:lu,u:Ka,U:tu,V:nu,w:Ja,W:eu,x:function(t,n,e){return r(t,o,n,e)},X:function(t,n,e){return r(t,a,n,e)},y:iu,Y:ru,Z:ou,"%":pu};return w.x=n(o,w),w.X=n(a,w),w.c=n(i,w),M.x=n(o,M),M.X=n(a,M),M.c=n(i,M),{format:function(t){var e=n(t+="",w);return e.toString=function(){return t},e},parse:function(t){var n=e(t+="",Ha);return n.toString=function(){return t},n},utcFormat:function(t){var e=n(t+="",M);return e.toString=function(){return t},e},utcParse:function(t){var n=e(t,Xa);return n.toString=function(){return t},n}}}function $a(t,n,e){var r=t<0?"-":"",i=(r?-t:t)+"",o=i.length;return r+(o68?1900:2e3),e+r[0].length):-1}function ou(t,n,e){var r=/^(Z)|([+-]\d\d)(?::?(\d\d))?/.exec(n.slice(e,e+6));return r?(t.Z=r[1]?0:-(r[2]+(r[3]||"00")),e+r[0].length):-1}function au(t,n,e){var r=Dg.exec(n.slice(e,e+2));return r?(t.m=r[0]-1,e+r[0].length):-1}function uu(t,n,e){var r=Dg.exec(n.slice(e,e+2));return r?(t.d=+r[0],e+r[0].length):-1}function fu(t,n,e){var r=Dg.exec(n.slice(e,e+3));return r?(t.m=0,t.d=+r[0],e+r[0].length):-1}function cu(t,n,e){var r=Dg.exec(n.slice(e,e+2));return r?(t.H=+r[0],e+r[0].length):-1}function su(t,n,e){var r=Dg.exec(n.slice(e,e+2));return r?(t.M=+r[0],e+r[0].length):-1}function lu(t,n,e){var r=Dg.exec(n.slice(e,e+2));return r?(t.S=+r[0],e+r[0].length):-1}function hu(t,n,e){var r=Dg.exec(n.slice(e,e+3));return r?(t.L=+r[0],e+r[0].length):-1}function du(t,n,e){var r=Dg.exec(n.slice(e,e+6));return r?(t.L=Math.floor(r[0]/1e3),e+r[0].length):-1}function pu(t,n,e){var r=Ug.exec(n.slice(e,e+1));return r?e+r[0].length:-1}function vu(t,n,e){var r=Dg.exec(n.slice(e));return r?(t.Q=+r[0],e+r[0].length):-1}function gu(t,n,e){var r=Dg.exec(n.slice(e));return r?(t.Q=1e3*+r[0],e+r[0].length):-1}function yu(t,n){return $a(t.getDate(),n,2)}function _u(t,n){return $a(t.getHours(),n,2)}function bu(t,n){return $a(t.getHours()%12||12,n,2)}function mu(t,n){return $a(1+jv.count(ug(t),t),n,3)}function xu(t,n){return $a(t.getMilliseconds(),n,3)}function wu(t,n){return xu(t,n)+"000"}function Mu(t,n){return $a(t.getMonth()+1,n,2)}function Au(t,n){return $a(t.getMinutes(),n,2)}function Tu(t,n){return $a(t.getSeconds(),n,2)}function Nu(t){var n=t.getDay();return 0===n?7:n}function Su(t,n){return $a(Xv.count(ug(t),t),n,2)}function Eu(t,n){var e=t.getDay();return t=e>=4||0===e?Wv(t):Wv.ceil(t),$a(Wv.count(ug(t),t)+(4===ug(t).getDay()),n,2)}function ku(t){return t.getDay()}function Cu(t,n){return $a(Gv.count(ug(t),t),n,2)}function Pu(t,n){return $a(t.getFullYear()%100,n,2)}function zu(t,n){return $a(t.getFullYear()%1e4,n,4)}function Ru(t){var n=t.getTimezoneOffset();return(n>0?"-":(n*=-1,"+"))+$a(n/60|0,"0",2)+$a(n%60,"0",2)}function Lu(t,n){return $a(t.getUTCDate(),n,2)}function Du(t,n){return $a(t.getUTCHours(),n,2)}function Uu(t,n){return $a(t.getUTCHours()%12||12,n,2)}function qu(t,n){return $a(1+dg.count(Pg(t),t),n,3)}function Ou(t,n){return $a(t.getUTCMilliseconds(),n,3)}function Yu(t,n){return Ou(t,n)+"000"}function Bu(t,n){return $a(t.getUTCMonth()+1,n,2)}function Fu(t,n){return $a(t.getUTCMinutes(),n,2)}function Iu(t,n){return $a(t.getUTCSeconds(),n,2)}function ju(t){var n=t.getUTCDay();return 0===n?7:n}function Hu(t,n){return $a(vg.count(Pg(t),t),n,2)}function Xu(t,n){var e=t.getUTCDay();return t=e>=4||0===e?bg(t):bg.ceil(t),$a(bg.count(Pg(t),t)+(4===Pg(t).getUTCDay()),n,2)}function Gu(t){return t.getUTCDay()}function Vu(t,n){return $a(gg.count(Pg(t),t),n,2)}function $u(t,n){return $a(t.getUTCFullYear()%100,n,2)}function Wu(t,n){return $a(t.getUTCFullYear()%1e4,n,4)}function Zu(){return"+0000"}function Qu(){return"%"}function Ju(t){return+t}function Ku(t){return Math.floor(+t/1e3)}function tf(n){return zg=Va(n),t.timeFormat=zg.format,t.timeParse=zg.parse,t.utcFormat=zg.utcFormat,t.utcParse=zg.utcParse,zg}function nf(t){return new Date(t)}function ef(t){return t instanceof Date?+t:+new Date(+t)}function rf(t,n,r,i,o,a,u,f,c){function s(e){return(u(e)=1?m_:t<=-1?-m_:Math.asin(t)}function lf(t){return t.innerRadius}function hf(t){return t.outerRadius}function df(t){return t.startAngle}function pf(t){return t.endAngle}function vf(t){return t&&t.padAngle}function gf(t,n,e,r,i,o,a){var u=t-e,f=n-r,c=(a?o:-o)/y_(u*u+f*f),s=c*f,l=-c*u,h=t+s,d=n+l,p=e+s,v=r+l,g=(h+p)/2,y=(d+v)/2,_=p-h,b=v-d,m=_*_+b*b,x=i-o,w=h*v-p*d,M=(b<0?-1:1)*y_(p_(0,x*x*m-w*w)),A=(w*b-_*M)/m,T=(-w*_-b*M)/m,N=(w*b+_*M)/m,S=(-w*_+b*M)/m,E=A-g,k=T-y,C=N-g,P=S-y;return E*E+k*k>C*C+P*P&&(A=N,T=S),{cx:A,cy:T,x01:-s,y01:-l,x11:A*(i/x-1),y11:T*(i/x-1)}}function yf(t){this._context=t}function _f(t){return new yf(t)}function bf(t){return t[0]}function mf(t){return t[1]}function xf(){function t(t){var u,f,c,s=t.length,l=!1;for(null==i&&(a=o(c=ie())),u=0;u<=s;++u)!(u=s;--l)c.point(g[l],y[l]);c.lineEnd(),c.areaEnd()}v&&(g[n]=+e(h,n,t),y[n]=+i(h,n,t),c.point(r?+r(h,n,t):g[n],o?+o(h,n,t):y[n]))}if(d)return c=null,d+""||null}function n(){return xf().defined(a).curve(f).context(u)}var e=bf,r=null,i=cf(0),o=mf,a=cf(!0),u=null,f=_f,c=null;return t.x=function(n){return arguments.length?(e="function"==typeof n?n:cf(+n),r=null,t):e},t.x0=function(n){return arguments.length?(e="function"==typeof n?n:cf(+n),t):e},t.x1=function(n){return arguments.length?(r=null==n?null:"function"==typeof n?n:cf(+n),t):r},t.y=function(n){return arguments.length?(i="function"==typeof n?n:cf(+n),o=null,t):i},t.y0=function(n){return arguments.length?(i="function"==typeof n?n:cf(+n),t):i},t.y1=function(n){return arguments.length?(o=null==n?null:"function"==typeof n?n:cf(+n),t):o},t.lineX0=t.lineY0=function(){return n().x(e).y(i)},t.lineY1=function(){return n().x(e).y(o)},t.lineX1=function(){return n().x(r).y(i)},t.defined=function(n){return arguments.length?(a="function"==typeof n?n:cf(!!n),t):a},t.curve=function(n){return arguments.length?(f=n,null!=u&&(c=f(u)),t):f},t.context=function(n){return arguments.length?(null==n?u=c=null:c=f(u=n),t):u},t}function Mf(t,n){return nt?1:n>=t?0:NaN}function Af(t){return t}function Tf(t){this._curve=t}function Nf(t){function n(n){return new Tf(t(n))}return n._curve=t,n}function Sf(t){var n=t.curve;return t.angle=t.x,delete t.x,t.radius=t.y,delete t.y,t.curve=function(t){return arguments.length?n(Nf(t)):n()._curve},t}function Ef(){return Sf(xf().curve(w_))}function kf(){var t=wf().curve(w_),n=t.curve,e=t.lineX0,r=t.lineX1,i=t.lineY0,o=t.lineY1;return t.angle=t.x,delete t.x,t.startAngle=t.x0,delete t.x0,t.endAngle=t.x1,delete t.x1,t.radius=t.y,delete t.y,t.innerRadius=t.y0,delete t.y0,t.outerRadius=t.y1,delete t.y1,t.lineStartAngle=function(){return Sf(e())},delete t.lineX0,t.lineEndAngle=function(){return Sf(r())},delete t.lineX1,t.lineInnerRadius=function(){return Sf(i())},delete t.lineY0,t.lineOuterRadius=function(){return Sf(o())},delete t.lineY1,t.curve=function(t){return arguments.length?n(Nf(t)):n()._curve},t}function Cf(t,n){return[(n=+n)*Math.cos(t-=Math.PI/2),n*Math.sin(t)]}function Pf(t){return t.source}function zf(t){return t.target}function Rf(t){function n(){var n,u=M_.call(arguments),f=e.apply(this,u),c=r.apply(this,u);if(a||(a=n=ie()),t(a,+i.apply(this,(u[0]=f,u)),+o.apply(this,u),+i.apply(this,(u[0]=c,u)),+o.apply(this,u)),n)return a=null,n+""||null}var e=Pf,r=zf,i=bf,o=mf,a=null;return n.source=function(t){return arguments.length?(e=t,n):e},n.target=function(t){return arguments.length?(r=t,n):r},n.x=function(t){return arguments.length?(i="function"==typeof t?t:cf(+t),n):i},n.y=function(t){return arguments.length?(o="function"==typeof t?t:cf(+t),n):o},n.context=function(t){return arguments.length?(a=null==t?null:t,n):a},n}function Lf(t,n,e,r,i){t.moveTo(n,e),t.bezierCurveTo(n=(n+r)/2,e,n,i,r,i)}function Df(t,n,e,r,i){t.moveTo(n,e),t.bezierCurveTo(n,e=(e+i)/2,r,e,r,i)}function Uf(t,n,e,r,i){var o=Cf(n,e),a=Cf(n,e=(e+i)/2),u=Cf(r,e),f=Cf(r,i);t.moveTo(o[0],o[1]),t.bezierCurveTo(a[0],a[1],u[0],u[1],f[0],f[1])}function qf(){}function Of(t,n,e){t._context.bezierCurveTo((2*t._x0+t._x1)/3,(2*t._y0+t._y1)/3,(t._x0+2*t._x1)/3,(t._y0+2*t._y1)/3,(t._x0+4*t._x1+n)/6,(t._y0+4*t._y1+e)/6)}function Yf(t){this._context=t}function Bf(t){this._context=t}function Ff(t){this._context=t}function If(t,n){this._basis=new Yf(t),this._beta=n}function jf(t,n,e){t._context.bezierCurveTo(t._x1+t._k*(t._x2-t._x0),t._y1+t._k*(t._y2-t._y0),t._x2+t._k*(t._x1-n),t._y2+t._k*(t._y1-e),t._x2,t._y2)}function Hf(t,n){this._context=t,this._k=(1-n)/6}function Xf(t,n){this._context=t,this._k=(1-n)/6}function Gf(t,n){this._context=t,this._k=(1-n)/6}function Vf(t,n,e){var r=t._x1,i=t._y1,o=t._x2,a=t._y2;if(t._l01_a>__){var u=2*t._l01_2a+3*t._l01_a*t._l12_a+t._l12_2a,f=3*t._l01_a*(t._l01_a+t._l12_a);r=(r*u-t._x0*t._l12_2a+t._x2*t._l01_2a)/f,i=(i*u-t._y0*t._l12_2a+t._y2*t._l01_2a)/f}if(t._l23_a>__){var c=2*t._l23_2a+3*t._l23_a*t._l12_a+t._l12_2a,s=3*t._l23_a*(t._l23_a+t._l12_a);o=(o*c+t._x1*t._l23_2a-n*t._l12_2a)/s,a=(a*c+t._y1*t._l23_2a-e*t._l12_2a)/s}t._context.bezierCurveTo(r,i,o,a,t._x2,t._y2)}function $f(t,n){this._context=t,this._alpha=n}function Wf(t,n){this._context=t,this._alpha=n}function Zf(t,n){this._context=t,this._alpha=n}function Qf(t){this._context=t}function Jf(t){return t<0?-1:1}function Kf(t,n,e){var r=t._x1-t._x0,i=n-t._x1,o=(t._y1-t._y0)/(r||i<0&&-0),a=(e-t._y1)/(i||r<0&&-0),u=(o*i+a*r)/(r+i);return(Jf(o)+Jf(a))*Math.min(Math.abs(o),Math.abs(a),.5*Math.abs(u))||0}function tc(t,n){var e=t._x1-t._x0;return e?(3*(t._y1-t._y0)/e-n)/2:n}function nc(t,n,e){var r=t._x0,i=t._y0,o=t._x1,a=t._y1,u=(o-r)/3;t._context.bezierCurveTo(r+u,i+u*n,o-u,a-u*e,o,a)}function ec(t){this._context=t}function rc(t){this._context=new ic(t)}function ic(t){this._context=t}function oc(t){this._context=t}function ac(t){var n,e,r=t.length-1,i=new Array(r),o=new Array(r),a=new Array(r);for(i[0]=0,o[0]=2,a[0]=t[0]+2*t[1],n=1;n=0;--n)i[n]=(a[n]-i[n+1])/o[n];for(o[r-1]=(t[r]+i[r-1])/2,n=0;n1)for(var e,r,i,o=1,a=t[n[0]],u=a.length;o=0;)e[n]=n;return e}function sc(t,n){return t[n]}function lc(t){var n=t.map(hc);return cc(t).sort(function(t,e){return n[t]-n[e]})}function hc(t){for(var n,e=0,r=-1,i=t.length;++r0)){if(o/=h,h<0){if(o0){if(o>l)return;o>s&&(s=o)}if(o=r-f,h||!(o<0)){if(o/=h,h<0){if(o>l)return;o>s&&(s=o)}else if(h>0){if(o0)){if(o/=d,d<0){if(o0){if(o>l)return;o>s&&(s=o)}if(o=i-c,d||!(o<0)){if(o/=d,d<0){if(o>l)return;o>s&&(s=o)}else if(d>0){if(o0||l<1)||(s>0&&(t[0]=[f+s*h,c+s*d]),l<1&&(t[1]=[f+l*h,c+l*d]),!0)}}}}}function Tc(t,n,e,r,i){var o=t[1];if(o)return!0;var a,u,f=t[0],c=t.left,s=t.right,l=c[0],h=c[1],d=s[0],p=s[1],v=(l+d)/2,g=(h+p)/2;if(p===h){if(v=r)return;if(l>d){if(f){if(f[1]>=i)return}else f=[v,e];o=[v,i]}else{if(f){if(f[1]1)if(l>d){if(f){if(f[1]>=i)return}else f=[(e-u)/a,e];o=[(i-u)/a,i]}else{if(f){if(f[1]=r)return}else f=[n,a*n+u];o=[r,a*r+u]}else{if(f){if(f[0]=-eb)){var d=f*f+c*c,p=s*s+l*l,v=(l*d-c*p)/h,g=(f*p-s*d)/h,y=K_.pop()||new function(){yc(this),this.x=this.y=this.arc=this.site=this.cy=null};y.arc=t,y.site=i,y.x=v+a,y.y=(y.cy=g+u)+Math.sqrt(v*v+g*g),t.circle=y;for(var _=null,b=Q_._;b;)if(y.ynb)u=u.L;else{if(!((i=o-function(t,n){var e=t.N;if(e)return Dc(e,n);var r=t.site;return r[1]===n?r[0]:1/0}(u,a))>nb)){r>-nb?(n=u.P,e=u):i>-nb?(n=u,e=u.N):n=e=u;break}if(!u.R){n=u;break}u=u.R}(function(t){Z_[t.index]={site:t,halfedges:[]}})(t);var f=Pc(t);if(W_.insert(n,f),n||e){if(n===e)return Cc(n),e=Pc(n.site),W_.insert(f,e),f.edge=e.edge=xc(n.site,f.site),kc(n),void kc(e);if(e){Cc(n),Cc(e);var c=n.site,s=c[0],l=c[1],h=t[0]-s,d=t[1]-l,p=e.site,v=p[0]-s,g=p[1]-l,y=2*(h*g-d*v),_=h*h+d*d,b=v*v+g*g,m=[(g*_-d*b)/y+s,(h*b-v*_)/y+l];Mc(e.edge,c,p,m),f.edge=xc(c,t,null,m),e.edge=xc(t,p,null,m),kc(n),kc(e)}else f.edge=xc(n.site,f.site)}}function Dc(t,n){var e=t.site,r=e[0],i=e[1],o=i-n;if(!o)return r;var a=t.P;if(!a)return-1/0;var u=(e=a.site)[0],f=e[1],c=f-n;if(!c)return u;var s=u-r,l=1/o-1/c,h=s/c;return l?(-h+Math.sqrt(h*h-2*l*(s*s/(-2*c)-f+c/2+i-o/2)))/l+r:(r+u)/2}function Uc(t,n,e){return(t[0]-e[0])*(n[1]-t[1])-(t[0]-n[0])*(e[1]-t[1])}function qc(t,n){return n[1]-t[1]||n[0]-t[0]}function Oc(t,n){var e,r,i,o=t.sort(qc).pop();for(J_=[],Z_=new Array(t.length),W_=new gc,Q_=new gc;;)if(i=$_,o&&(!i||o[1]nb||Math.abs(i[0][1]-i[1][1])>nb)||delete J_[o]})(a,u,f,c),function(t,n,e,r){var i,o,a,u,f,c,s,l,h,d,p,v,g=Z_.length,y=!0;for(i=0;inb||Math.abs(v-h)>nb)&&(f.splice(u,0,J_.push(wc(a,d,Math.abs(p-t)nb?[t,Math.abs(l-t)nb?[Math.abs(h-r)nb?[e,Math.abs(l-e)nb?[Math.abs(h-n)r?(r+i)/2:Math.min(0,r)||Math.max(0,i),a>o?(o+a)/2:Math.min(0,o)||Math.max(0,a))}var Zc=e(n),Qc=Zc.right,Jc=Zc.left,Kc=Array.prototype,ts=Kc.slice,ns=Kc.map,es=Math.sqrt(50),rs=Math.sqrt(10),is=Math.sqrt(2),os=Array.prototype.slice,as=1,us=2,fs=3,cs=4,ss=1e-6,ls={value:function(){}};S.prototype=N.prototype={constructor:S,on:function(t,n){var e,r=this._,i=function(t,n){return t.trim().split(/^|\s+/).map(function(t){var e="",r=t.indexOf(".");if(r>=0&&(e=t.slice(r+1),t=t.slice(0,r)),t&&!n.hasOwnProperty(t))throw new Error("unknown type: "+t);return{type:t,name:e}})}(t+"",r),o=-1,a=i.length;{if(!(arguments.length<2)){if(null!=n&&"function"!=typeof n)throw new Error("invalid callback: "+n);for(;++o0)for(var e,r,i=new Array(e),o=0;o=0&&(this._names.splice(n,1),this._node.setAttribute("class",this._names.join(" ")))},contains:function(t){return this._names.indexOf(t)>=0}};var bs={};if(t.event=null,"undefined"!=typeof document){"onmouseenter"in document.documentElement||(bs={mouseenter:"mouseover",mouseleave:"mouseout"})}var ms=[null];ut.prototype=ft.prototype={constructor:ut,select:function(t){"function"!=typeof t&&(t=z(t));for(var n=this._groups,e=n.length,r=new Array(e),i=0;i=m&&(m=b+1);!(_=g[m])&&++m=0;)(r=i[o])&&(a&&a!==r.nextSibling&&a.parentNode.insertBefore(r,a),a=r);return this},sort:function(t){function n(n,e){return n&&e?t(n.__data__,e.__data__):!n-!e}t||(t=Y);for(var e=this._groups,r=e.length,i=new Array(r),o=0;o1?this.each((null==n?function(t){return function(){this.style.removeProperty(t)}}:"function"==typeof n?function(t,n,e){return function(){var r=n.apply(this,arguments);null==r?this.style.removeProperty(t):this.style.setProperty(t,r,e)}}:function(t,n,e){return function(){this.style.setProperty(t,n,e)}})(t,n,null==e?"":e)):F(this.node(),t)},property:function(t,n){return arguments.length>1?this.each((null==n?function(t){return function(){delete this[t]}}:"function"==typeof n?function(t,n){return function(){var e=n.apply(this,arguments);null==e?delete this[t]:this[t]=e}}:function(t,n){return function(){this[t]=n}})(t,n)):this.node()[t]},classed:function(t,n){var e=I(t+"");if(arguments.length<2){for(var r=j(this.node()),i=-1,o=e.length;++i=0&&(n=t.slice(e+1),t=t.slice(0,e)),{type:t,name:n}})}(t+""),a=o.length;if(!(arguments.length<2)){for(u=n?it:rt,null==e&&(e=!1),r=0;r=240?t-240:t+120,i,r),Ot(t,i,r),Ot(t<120?t+240:t-120,i,r),this.opacity)},displayable:function(){return(0<=this.s&&this.s<=1||isNaN(this.s))&&0<=this.l&&this.l<=1&&0<=this.opacity&&this.opacity<=1}}));var Ls=Math.PI/180,Ds=180/Math.PI,Us=.96422,qs=1,Os=.82521,Ys=4/29,Bs=6/29,Fs=3*Bs*Bs,Is=Bs*Bs*Bs;Nt(Ft,Bt,St(Et,{brighter:function(t){return new Ft(this.l+18*(null==t?1:t),this.a,this.b,this.opacity)},darker:function(t){return new Ft(this.l-18*(null==t?1:t),this.a,this.b,this.opacity)},rgb:function(){var t=(this.l+16)/116,n=isNaN(this.a)?t:t+this.a/500,e=isNaN(this.b)?t:t-this.b/200;return n=Us*jt(n),t=qs*jt(t),e=Os*jt(e),new Lt(Ht(3.1338561*n-1.6168667*t-.4906146*e),Ht(-.9787684*n+1.9161415*t+.033454*e),Ht(.0719453*n-.2289914*t+1.4052427*e),this.opacity)}})),Nt($t,Vt,St(Et,{brighter:function(t){return new $t(this.h,this.c,this.l+18*(null==t?1:t),this.opacity)},darker:function(t){return new $t(this.h,this.c,this.l-18*(null==t?1:t),this.opacity)},rgb:function(){return Yt(this).rgb()}}));var js=-.29227,Hs=-.90649,Xs=1.97294,Gs=Xs*Hs,Vs=1.78277*Xs,$s=1.78277*js- -.14861*Hs;Nt(Zt,Wt,St(Et,{brighter:function(t){return t=null==t?1/.7:Math.pow(1/.7,t),new Zt(this.h,this.s,this.l*t,this.opacity)},darker:function(t){return t=null==t?.7:Math.pow(.7,t),new Zt(this.h,this.s,this.l*t,this.opacity)},rgb:function(){var t=isNaN(this.h)?0:(this.h+120)*Ls,n=+this.l,e=isNaN(this.s)?0:this.s*n*(1-n),r=Math.cos(t),i=Math.sin(t);return new Lt(255*(n+e*(-.14861*r+1.78277*i)),255*(n+e*(js*r+Hs*i)),255*(n+e*(Xs*r)),this.opacity)}}));var Ws,Zs,Qs,Js,Ks,tl,nl=function t(n){function e(t,n){var e=r((t=Rt(t)).r,(n=Rt(n)).r),i=r(t.g,n.g),o=r(t.b,n.b),a=on(t.opacity,n.opacity);return function(n){return t.r=e(n),t.g=i(n),t.b=o(n),t.opacity=a(n),t+""}}var r=rn(n);return e.gamma=t,e}(1),el=an(Jt),rl=an(Kt),il=/[-+]?(?:\d+\.?\d*|\.?\d+)(?:[eE][-+]?\d+)?/g,ol=new RegExp(il.source,"g"),al=180/Math.PI,ul={translateX:0,translateY:0,rotate:0,skewX:0,scaleX:1,scaleY:1},fl=vn(function(t){return"none"===t?ul:(Ws||(Ws=document.createElement("DIV"),Zs=document.documentElement,Qs=document.defaultView),Ws.style.transform=t,t=Qs.getComputedStyle(Zs.appendChild(Ws),null).getPropertyValue("transform"),Zs.removeChild(Ws),t=t.slice(7,-1).split(","),pn(+t[0],+t[1],+t[2],+t[3],+t[4],+t[5]))},"px, ","px)","deg)"),cl=vn(function(t){return null==t?ul:(Js||(Js=document.createElementNS("http://www.w3.org/2000/svg","g")),Js.setAttribute("transform",t),(t=Js.transform.baseVal.consolidate())?(t=t.matrix,pn(t.a,t.b,t.c,t.d,t.e,t.f)):ul)},", ",")",")"),sl=Math.SQRT2,ll=2,hl=4,dl=1e-12,pl=_n(en),vl=_n(on),gl=bn(en),yl=bn(on),_l=mn(en),bl=mn(on),ml=0,xl=0,wl=0,Ml=1e3,Al=0,Tl=0,Nl=0,Sl="object"==typeof performance&&performance.now?performance:Date,El="object"==typeof window&&window.requestAnimationFrame?window.requestAnimationFrame.bind(window):function(t){setTimeout(t,17)};Mn.prototype=An.prototype={constructor:Mn,restart:function(t,n,e){if("function"!=typeof t)throw new TypeError("callback is not a function");e=(null==e?xn():+e)+(null==n?0:+n),this._next||tl===this||(tl?tl._next=this:Ks=this,tl=this),this._call=t,this._time=e,En()},stop:function(){this._call&&(this._call=null,this._time=1/0,En())}};var kl=N("start","end","interrupt"),Cl=[],Pl=0,zl=1,Rl=2,Ll=3,Dl=4,Ul=5,ql=6,Ol=ft.prototype.constructor,Yl=0,Bl=ft.prototype;qn.prototype=On.prototype={constructor:qn,select:function(t){var n=this._name,e=this._id;"function"!=typeof t&&(t=z(t));for(var r=this._groups,i=r.length,o=new Array(i),a=0;a=0&&(t=t.slice(0,n)),!t||"start"===t})}(n)?Pn:zn;return function(){var a=o(this,t),u=a.on;u!==r&&(i=(r=u).copy()).on(n,e),a.on=i}}(e,t,n))},attr:function(t,n){var e=k(t),r="transform"===e?cl:Un;return this.attrTween(t,"function"==typeof n?(e.local?function(t,n,e){var r,i,o;return function(){var a,u=e(this);if(null!=u)return(a=this.getAttributeNS(t.space,t.local))===u?null:a===r&&u===i?o:o=n(r=a,i=u);this.removeAttributeNS(t.space,t.local)}}:function(t,n,e){var r,i,o;return function(){var a,u=e(this);if(null!=u)return(a=this.getAttribute(t))===u?null:a===r&&u===i?o:o=n(r=a,i=u);this.removeAttribute(t)}})(e,r,Dn(this,"attr."+t,n)):null==n?(e.local?function(t){return function(){this.removeAttributeNS(t.space,t.local)}}:function(t){return function(){this.removeAttribute(t)}})(e):(e.local?function(t,n,e){var r,i;return function(){var o=this.getAttributeNS(t.space,t.local);return o===e?null:o===r?i:i=n(r=o,e)}}:function(t,n,e){var r,i;return function(){var o=this.getAttribute(t);return o===e?null:o===r?i:i=n(r=o,e)}})(e,r,n+""))},attrTween:function(t,n){var e="attr."+t;if(arguments.length<2)return(e=this.tween(e))&&e._value;if(null==n)return this.tween(e,null);if("function"!=typeof n)throw new Error;var r=k(t);return this.tween(e,(r.local?function(t,n){function e(){var e=this,r=n.apply(e,arguments);return r&&function(n){e.setAttributeNS(t.space,t.local,r(n))}}return e._value=n,e}:function(t,n){function e(){var e=this,r=n.apply(e,arguments);return r&&function(n){e.setAttribute(t,r(n))}}return e._value=n,e})(r,n))},style:function(t,n,e){var r="transform"==(t+="")?fl:Un;return null==n?this.styleTween(t,function(t,n){var e,r,i;return function(){var o=F(this,t),a=(this.style.removeProperty(t),F(this,t));return o===a?null:o===e&&a===r?i:i=n(e=o,r=a)}}(t,r)).on("end.style."+t,function(t){return function(){this.style.removeProperty(t)}}(t)):this.styleTween(t,"function"==typeof n?function(t,n,e){var r,i,o;return function(){var a=F(this,t),u=e(this);return null==u&&(this.style.removeProperty(t),u=F(this,t)),a===u?null:a===r&&u===i?o:o=n(r=a,i=u)}}(t,r,Dn(this,"style."+t,n)):function(t,n,e){var r,i;return function(){var o=F(this,t);return o===e?null:o===r?i:i=n(r=o,e)}}(t,r,n+""),e)},styleTween:function(t,n,e){var r="style."+(t+="");if(arguments.length<2)return(r=this.tween(r))&&r._value;if(null==n)return this.tween(r,null);if("function"!=typeof n)throw new Error;return this.tween(r,function(t,n,e){function r(){var r=this,i=n.apply(r,arguments);return i&&function(n){r.style.setProperty(t,i(n),e)}}return r._value=n,r}(t,n,null==e?"":e))},text:function(t){return this.tween("text","function"==typeof t?function(t){return function(){var n=t(this);this.textContent=null==n?"":n}}(Dn(this,"text",t)):function(t){return function(){this.textContent=t}}(null==t?"":t+""))},remove:function(){return this.on("end.remove",function(t){return function(){var n=this.parentNode;for(var e in this.__transition)if(+e!==t)return;n&&n.removeChild(this)}}(this._id))},tween:function(t,n){var e=this._id;if(t+="",arguments.length<2){for(var r,i=Rn(this.node(),e).tween,o=0,a=i.length;o1e-6)if(Math.abs(s*u-f*c)>1e-6&&i){var h=e-o,d=r-a,p=u*u+f*f,v=h*h+d*d,g=Math.sqrt(p),y=Math.sqrt(l),_=i*Math.tan((Ch-Math.acos((p+l-v)/(2*g*y)))/2),b=_/y,m=_/g;Math.abs(b-1)>1e-6&&(this._+="L"+(t+b*c)+","+(n+b*s)),this._+="A"+i+","+i+",0,0,"+ +(s*h>c*d)+","+(this._x1=t+m*u)+","+(this._y1=n+m*f)}else this._+="L"+(this._x1=t)+","+(this._y1=n);else;},arc:function(t,n,e,r,i,o){t=+t,n=+n;var a=(e=+e)*Math.cos(r),u=e*Math.sin(r),f=t+a,c=n+u,s=1^o,l=o?r-i:i-r;if(e<0)throw new Error("negative radius: "+e);null===this._x1?this._+="M"+f+","+c:(Math.abs(this._x1-f)>1e-6||Math.abs(this._y1-c)>1e-6)&&(this._+="L"+f+","+c),e&&(l<0&&(l=l%Ph+Ph),l>zh?this._+="A"+e+","+e+",0,1,"+s+","+(t-a)+","+(n-u)+"A"+e+","+e+",0,1,"+s+","+(this._x1=f)+","+(this._y1=c):l>1e-6&&(this._+="A"+e+","+e+",0,"+ +(l>=Ch)+","+s+","+(this._x1=t+e*Math.cos(i))+","+(this._y1=n+e*Math.sin(i))))},rect:function(t,n,e,r){this._+="M"+(this._x0=this._x1=+t)+","+(this._y0=this._y1=+n)+"h"+ +e+"v"+ +r+"h"+-e+"Z"},toString:function(){return this._}};se.prototype=le.prototype={constructor:se,has:function(t){return"$"+t in this},get:function(t){return this["$"+t]},set:function(t,n){return this["$"+t]=n,this},remove:function(t){var n="$"+t;return n in this&&delete this[n]},clear:function(){for(var t in this)"$"===t[0]&&delete this[t]},keys:function(){var t=[];for(var n in this)"$"===n[0]&&t.push(n.slice(1));return t},values:function(){var t=[];for(var n in this)"$"===n[0]&&t.push(this[n]);return t},entries:function(){var t=[];for(var n in this)"$"===n[0]&&t.push({key:n.slice(1),value:this[n]});return t},size:function(){var t=0;for(var n in this)"$"===n[0]&&++t;return t},empty:function(){for(var t in this)if("$"===t[0])return!1;return!0},each:function(t){for(var n in this)"$"===n[0]&&t(this[n],n.slice(1),this)}};var Rh=le.prototype;ge.prototype=ye.prototype={constructor:ge,has:Rh.has,add:function(t){return t+="",this["$"+t]=t,this},remove:Rh.remove,clear:Rh.clear,values:Rh.keys,size:Rh.size,empty:Rh.empty,each:Rh.each};var Lh=Array.prototype.slice,Dh=[[],[[[1,1.5],[.5,1]]],[[[1.5,1],[1,1.5]]],[[[1.5,1],[.5,1]]],[[[1,.5],[1.5,1]]],[[[1,1.5],[.5,1]],[[1,.5],[1.5,1]]],[[[1,.5],[1,1.5]]],[[[1,.5],[.5,1]]],[[[.5,1],[1,.5]]],[[[1,1.5],[1,.5]]],[[[.5,1],[1,.5]],[[1.5,1],[1,1.5]]],[[[1.5,1],[1,.5]]],[[[.5,1],[1.5,1]]],[[[1,1.5],[1.5,1]]],[[[.5,1],[1,1.5]]],[]],Uh={},qh={},Oh=34,Yh=10,Bh=13,Fh=Ee(","),Ih=Fh.parse,jh=Fh.parseRows,Hh=Fh.format,Xh=Fh.formatRows,Gh=Ee("\t"),Vh=Gh.parse,$h=Gh.parseRows,Wh=Gh.format,Zh=Gh.formatRows,Qh=Re(Ih),Jh=Re(Vh),Kh=De("application/xml"),td=De("text/html"),nd=De("image/svg+xml"),ed=Ie.prototype=je.prototype;ed.copy=function(){var t,n,e=new je(this._x,this._y,this._x0,this._y0,this._x1,this._y1),r=this._root;if(!r)return e;if(!r.length)return e._root=He(r),e;for(t=[{source:r,target:e._root=new Array(4)}];r=t.pop();)for(var i=0;i<4;++i)(n=r.source[i])&&(n.length?t.push({source:n,target:r.target[i]=new Array(4)}):r.target[i]=He(n));return e},ed.add=function(t){var n=+this._x.call(null,t),e=+this._y.call(null,t);return Oe(this.cover(n,e),n,e,t)},ed.addAll=function(t){var n,e,r,i,o=t.length,a=new Array(o),u=new Array(o),f=1/0,c=1/0,s=-1/0,l=-1/0;for(e=0;es&&(s=r),il&&(l=i));for(st||t>i||r>n||n>o))return this;var a,u,f=i-e,c=this._root;switch(u=(n<(r+o)/2)<<1|t<(e+i)/2){case 0:do{a=new Array(4),a[u]=c,c=a}while(f*=2,i=e+f,o=r+f,t>i||n>o);break;case 1:do{a=new Array(4),a[u]=c,c=a}while(f*=2,e=i-f,o=r+f,e>t||n>o);break;case 2:do{a=new Array(4),a[u]=c,c=a}while(f*=2,i=e+f,r=o-f,t>i||r>n);break;case 3:do{a=new Array(4),a[u]=c,c=a}while(f*=2,e=i-f,r=o-f,e>t||r>n)}this._root&&this._root.length&&(this._root=c)}return this._x0=e,this._y0=r,this._x1=i,this._y1=o,this},ed.data=function(){var t=[];return this.visit(function(n){if(!n.length)do{t.push(n.data)}while(n=n.next)}),t},ed.extent=function(t){return arguments.length?this.cover(+t[0][0],+t[0][1]).cover(+t[1][0],+t[1][1]):isNaN(this._x0)?void 0:[[this._x0,this._y0],[this._x1,this._y1]]},ed.find=function(t,n,e){var r,i,o,a,u,f,c,s=this._x0,l=this._y0,h=this._x1,d=this._y1,p=[],v=this._root;for(v&&p.push(new Ye(v,s,l,h,d)),null==e?e=1/0:(s=t-e,l=n-e,h=t+e,d=n+e,e*=e);f=p.pop();)if(!(!(v=f.node)||(i=f.x0)>h||(o=f.y0)>d||(a=f.x1)=y)<<1|t>=g)&&(f=p[p.length-1],p[p.length-1]=p[p.length-1-c],p[p.length-1-c]=f)}else{var _=t-+this._x.call(null,v.data),b=n-+this._y.call(null,v.data),m=_*_+b*b;if(m=(u=(p+g)/2))?p=u:g=u,(s=a>=(f=(v+y)/2))?v=f:y=f,n=d,!(d=d[l=s<<1|c]))return this;if(!d.length)break;(n[l+1&3]||n[l+2&3]||n[l+3&3])&&(e=n,h=l)}for(;d.data!==t;)if(r=d,!(d=d.next))return this;return(i=d.next)&&delete d.next,r?(i?r.next=i:delete r.next,this):n?(i?n[l]=i:delete n[l],(d=n[0]||n[1]||n[2]||n[3])&&d===(n[3]||n[2]||n[1]||n[0])&&!d.length&&(e?e[h]=d:this._root=d),this):(this._root=i,this)},ed.removeAll=function(t){for(var n=0,e=t.length;n0&&(o=0)}return o>0?t.slice(0,o)+t.slice(e+1):t},"%":function(t,n){return(100*t).toFixed(n)},b:function(t){return Math.round(t).toString(2)},c:function(t){return t+""},d:function(t){return Math.round(t).toString(10)},e:function(t,n){return t.toExponential(n)},f:function(t,n){return t.toFixed(n)},g:function(t,n){return t.toPrecision(n)},o:function(t){return Math.round(t).toString(8)},p:function(t,n){return Ke(100*t,n)},r:Ke,s:function(t,n){var e=Qe(t,n);if(!e)return t+"";var r=e[0],i=e[1],o=i-(rd=3*Math.max(-8,Math.min(8,Math.floor(i/3))))+1,a=r.length;return o===a?r:o>a?r+new Array(o-a+1).join("0"):o>0?r.slice(0,o)+"."+r.slice(o):"0."+new Array(1-o).join("0")+Qe(t,Math.max(0,n+o-1))[0]},X:function(t){return Math.round(t).toString(16).toUpperCase()},x:function(t){return Math.round(t).toString(16)}},ud=/^(?:(.)?([<>=^]))?([+\-\( ])?([$#])?(0)?(\d+)?(,)?(\.\d+)?([a-z%])?$/i;tr.prototype=nr.prototype,nr.prototype.toString=function(){return this.fill+this.align+this.sign+this.symbol+(this.zero?"0":"")+(null==this.width?"":Math.max(1,0|this.width))+(this.comma?",":"")+(null==this.precision?"":"."+Math.max(0,0|this.precision))+this.type};var fd,cd=["y","z","a","f","p","n","µ","m","","k","M","G","T","P","E","Z","Y"];ir({decimal:".",thousands:",",grouping:[3],currency:["$",""]}),cr.prototype={constructor:cr,reset:function(){this.s=this.t=0},add:function(t){sr(Fd,t,this.t),sr(this,Fd.s,this.s),this.s?this.t+=Fd.t:this.s=Fd.t},valueOf:function(){return this.s}};var sd,ld,hd,dd,pd,vd,gd,yd,_d,bd,md,xd,wd,Md,Ad,Td,Nd,Sd,Ed,kd,Cd,Pd,zd,Rd,Ld,Dd,Ud,qd,Od,Yd,Bd,Fd=new cr,Id=1e-6,jd=1e-12,Hd=Math.PI,Xd=Hd/2,Gd=Hd/4,Vd=2*Hd,$d=180/Hd,Wd=Hd/180,Zd=Math.abs,Qd=Math.atan,Jd=Math.atan2,Kd=Math.cos,tp=Math.ceil,np=Math.exp,ep=Math.log,rp=Math.pow,ip=Math.sin,op=Math.sign||function(t){return t>0?1:t<0?-1:0},ap=Math.sqrt,up=Math.tan,fp={Feature:function(t,n){vr(t.geometry,n)},FeatureCollection:function(t,n){for(var e=t.features,r=-1,i=e.length;++rId?_d=90:dp<-Id&&(gd=-90),Ad[0]=vd,Ad[1]=yd}},vp={sphere:pr,point:Br,lineStart:Ir,lineEnd:Xr,polygonStart:function(){vp.lineStart=Gr,vp.lineEnd=Vr},polygonEnd:function(){vp.lineStart=Ir,vp.lineEnd=Xr}};Jr.invert=Jr;var gp,yp,_p,bp,mp,xp,wp,Mp,Ap,Tp,Np,Sp=fr(),Ep=hi(function(){return!0},function(t){var n,e=NaN,r=NaN,i=NaN;return{lineStart:function(){t.lineStart(),n=1},point:function(o,a){var u=o>0?Hd:-Hd,f=Zd(o-e);Zd(f-Hd)0?Xd:-Xd),t.point(i,r),t.lineEnd(),t.lineStart(),t.point(u,r),t.point(o,r),n=0):i!==u&&f>=Hd&&(Zd(e-i)Id?Qd((ip(n)*(o=Kd(r))*ip(e)-ip(r)*(i=Kd(n))*ip(t))/(i*o*a)):(n+r)/2}(e,r,o,a),t.point(i,r),t.lineEnd(),t.lineStart(),t.point(u,r),n=0),t.point(e=o,r=a),i=u},lineEnd:function(){t.lineEnd(),e=r=NaN},clean:function(){return 2-n}}},function(t,n,e,r){var i;if(null==t)i=e*Xd,r.point(-Hd,i),r.point(0,i),r.point(Hd,i),r.point(Hd,0),r.point(Hd,-i),r.point(0,-i),r.point(-Hd,-i),r.point(-Hd,0),r.point(-Hd,i);else if(Zd(t[0]-n[0])>Id){var o=t[0]Ip&&(Ip=t),njp&&(jp=n)},lineStart:pr,lineEnd:pr,polygonStart:pr,polygonEnd:pr,result:function(){var t=[[Bp,Fp],[Ip,jp]];return Ip=jp=-(Fp=Bp=1/0),t}},Xp=0,Gp=0,Vp=0,$p=0,Wp=0,Zp=0,Qp=0,Jp=0,Kp=0,tv={point:Ui,lineStart:qi,lineEnd:Bi,polygonStart:function(){tv.lineStart=Fi,tv.lineEnd=Ii},polygonEnd:function(){tv.point=Ui,tv.lineStart=qi,tv.lineEnd=Bi},result:function(){var t=Kp?[Qp/Kp,Jp/Kp]:Zp?[$p/Zp,Wp/Zp]:Vp?[Xp/Vp,Gp/Vp]:[NaN,NaN];return Xp=Gp=Vp=$p=Wp=Zp=Qp=Jp=Kp=0,t}};Xi.prototype={_radius:4.5,pointRadius:function(t){return this._radius=t,this},polygonStart:function(){this._line=0},polygonEnd:function(){this._line=NaN},lineStart:function(){this._point=0},lineEnd:function(){0===this._line&&this._context.closePath(),this._point=NaN},point:function(t,n){switch(this._point){case 0:this._context.moveTo(t,n),this._point=1;break;case 1:this._context.lineTo(t,n);break;default:this._context.moveTo(t+this._radius,n),this._context.arc(t,n,this._radius,0,Vd)}},result:pr};var nv,ev,rv,iv,ov,av=fr(),uv={point:pr,lineStart:function(){uv.point=Gi},lineEnd:function(){nv&&Vi(ev,rv),uv.point=pr},polygonStart:function(){nv=!0},polygonEnd:function(){nv=null},result:function(){var t=+av;return av.reset(),t}};$i.prototype={_radius:4.5,_circle:Wi(4.5),pointRadius:function(t){return(t=+t)!==this._radius&&(this._radius=t,this._circle=null),this},polygonStart:function(){this._line=0},polygonEnd:function(){this._line=NaN},lineStart:function(){this._point=0},lineEnd:function(){0===this._line&&this._string.push("Z"),this._point=NaN},point:function(t,n){switch(this._point){case 0:this._string.push("M",t,",",n),this._point=1;break;case 1:this._string.push("L",t,",",n);break;default:null==this._circle&&(this._circle=Wi(this._radius)),this._string.push("M",t,",",n,this._circle)}},result:function(){if(this._string.length){var t=this._string.join("");return this._string=[],t}return null}},Qi.prototype={constructor:Qi,point:function(t,n){this.stream.point(t,n)},sphere:function(){this.stream.sphere()},lineStart:function(){this.stream.lineStart()},lineEnd:function(){this.stream.lineEnd()},polygonStart:function(){this.stream.polygonStart()},polygonEnd:function(){this.stream.polygonEnd()}};var fv=16,cv=Kd(30*Wd),sv=Zi({point:function(t,n){this.stream.point(t*Wd,n*Wd)}}),lv=lo(function(t){return ap(2/(1+t))});lv.invert=ho(function(t){return 2*hr(t/2)});var hv=lo(function(t){return(t=lr(t))&&t/ip(t)});hv.invert=ho(function(t){return t}),po.invert=function(t,n){return[t,2*Qd(np(n))-Xd]},_o.invert=_o,mo.invert=ho(Qd),wo.invert=function(t,n){var e,r=n,i=25;do{var o=r*r,a=o*o;r-=e=(r*(1.007226+o*(.015085+a*(.028874*o-.044475-.005916*a)))-n)/(1.007226+o*(.045255+a*(.259866*o-.311325-.005916*11*a)))}while(Zd(e)>Id&&--i>0);return[t/(.8707+(o=r*r)*(o*(o*o*o*(.003971-.001529*o)-.013791)-.131979)),r]},Mo.invert=ho(hr),Ao.invert=ho(function(t){return 2*Qd(t)}),To.invert=function(t,n){return[-n,2*Qd(np(t))-Xd]},Lo.prototype=Co.prototype={constructor:Lo,count:function(){return this.eachAfter(ko)},each:function(t){var n,e,r,i,o=this,a=[o];do{for(n=a.reverse(),a=[];o=n.pop();)if(t(o),e=o.children)for(r=0,i=e.length;r=0;--e)i.push(n[e]);return this},sum:function(t){return this.eachAfter(function(n){for(var e=+t(n.data)||0,r=n.children,i=r&&r.length;--i>=0;)e+=r[i].value;n.value=e})},sort:function(t){return this.eachBefore(function(n){n.children&&n.children.sort(t)})},path:function(t){for(var n=this,e=function(t,n){if(t===n)return t;var e=t.ancestors(),r=n.ancestors(),i=null;for(t=e.pop(),n=r.pop();t===n;)i=t,t=e.pop(),n=r.pop();return i}(n,t),r=[n];n!==e;)n=n.parent,r.push(n);for(var i=r.length;t!==e;)r.splice(i,0,t),t=t.parent;return r},ancestors:function(){for(var t=this,n=[t];t=t.parent;)n.push(t);return n},descendants:function(){var t=[];return this.each(function(n){t.push(n)}),t},leaves:function(){var t=[];return this.eachBefore(function(n){n.children||t.push(n)}),t},links:function(){var t=this,n=[];return t.each(function(e){e!==t&&n.push({source:e.parent,target:e})}),n},copy:function(){return Co(this).eachBefore(zo)}};var dv=Array.prototype.slice,pv="$",vv={depth:-1},gv={};fa.prototype=Object.create(Lo.prototype);var yv=(1+Math.sqrt(5))/2,_v=function t(n){function e(t,e,r,i,o){sa(n,t,e,r,i,o)}return e.ratio=function(n){return t((n=+n)>1?n:1)},e}(yv),bv=function t(n){function e(t,e,r,i,o){if((a=t._squarify)&&a.ratio===n)for(var a,u,f,c,s,l=-1,h=a.length,d=t.value;++l1?n:1)},e}(yv),mv=function t(n){function e(t,e){return t=null==t?0:+t,e=null==e?1:+e,1===arguments.length?(e=t,t=0):e-=t,function(){return n()*e+t}}return e.source=t,e}(pa),xv=function t(n){function e(t,e){var r,i;return t=null==t?0:+t,e=null==e?1:+e,function(){var o;if(null!=r)o=r,r=null;else do{r=2*n()-1,o=2*n()-1,i=r*r+o*o}while(!i||i>1);return t+e*o*Math.sqrt(-2*Math.log(i)/i)}}return e.source=t,e}(pa),wv=function t(n){function e(){var t=xv.source(n).apply(this,arguments);return function(){return Math.exp(t())}}return e.source=t,e}(pa),Mv=function t(n){function e(t){return function(){for(var e=0,r=0;r0?t>1?Fa(function(n){n.setTime(Math.floor(n/t)*t)},function(n,e){n.setTime(+n+e*t)},function(n,e){return(e-n)/t}):Rv:null};var Lv=Rv.range,Dv=6e4,Uv=6048e5,qv=Fa(function(t){t.setTime(1e3*Math.floor(t/1e3))},function(t,n){t.setTime(+t+1e3*n)},function(t,n){return(n-t)/1e3},function(t){return t.getUTCSeconds()}),Ov=qv.range,Yv=Fa(function(t){t.setTime(Math.floor(t/Dv)*Dv)},function(t,n){t.setTime(+t+n*Dv)},function(t,n){return(n-t)/Dv},function(t){return t.getMinutes()}),Bv=Yv.range,Fv=Fa(function(t){var n=t.getTimezoneOffset()*Dv%36e5;n<0&&(n+=36e5),t.setTime(36e5*Math.floor((+t-n)/36e5)+n)},function(t,n){t.setTime(+t+36e5*n)},function(t,n){return(n-t)/36e5},function(t){return t.getHours()}),Iv=Fv.range,jv=Fa(function(t){t.setHours(0,0,0,0)},function(t,n){t.setDate(t.getDate()+n)},function(t,n){return(n-t-(n.getTimezoneOffset()-t.getTimezoneOffset())*Dv)/864e5},function(t){return t.getDate()-1}),Hv=jv.range,Xv=Ia(0),Gv=Ia(1),Vv=Ia(2),$v=Ia(3),Wv=Ia(4),Zv=Ia(5),Qv=Ia(6),Jv=Xv.range,Kv=Gv.range,tg=Vv.range,ng=$v.range,eg=Wv.range,rg=Zv.range,ig=Qv.range,og=Fa(function(t){t.setDate(1),t.setHours(0,0,0,0)},function(t,n){t.setMonth(t.getMonth()+n)},function(t,n){return n.getMonth()-t.getMonth()+12*(n.getFullYear()-t.getFullYear())},function(t){return t.getMonth()}),ag=og.range,ug=Fa(function(t){t.setMonth(0,1),t.setHours(0,0,0,0)},function(t,n){t.setFullYear(t.getFullYear()+n)},function(t,n){return n.getFullYear()-t.getFullYear()},function(t){return t.getFullYear()});ug.every=function(t){return isFinite(t=Math.floor(t))&&t>0?Fa(function(n){n.setFullYear(Math.floor(n.getFullYear()/t)*t),n.setMonth(0,1),n.setHours(0,0,0,0)},function(n,e){n.setFullYear(n.getFullYear()+e*t)}):null};var fg=ug.range,cg=Fa(function(t){t.setUTCSeconds(0,0)},function(t,n){t.setTime(+t+n*Dv)},function(t,n){return(n-t)/Dv},function(t){return t.getUTCMinutes()}),sg=cg.range,lg=Fa(function(t){t.setUTCMinutes(0,0,0)},function(t,n){t.setTime(+t+36e5*n)},function(t,n){return(n-t)/36e5},function(t){return t.getUTCHours()}),hg=lg.range,dg=Fa(function(t){t.setUTCHours(0,0,0,0)},function(t,n){t.setUTCDate(t.getUTCDate()+n)},function(t,n){return(n-t)/864e5},function(t){return t.getUTCDate()-1}),pg=dg.range,vg=ja(0),gg=ja(1),yg=ja(2),_g=ja(3),bg=ja(4),mg=ja(5),xg=ja(6),wg=vg.range,Mg=gg.range,Ag=yg.range,Tg=_g.range,Ng=bg.range,Sg=mg.range,Eg=xg.range,kg=Fa(function(t){t.setUTCDate(1),t.setUTCHours(0,0,0,0)},function(t,n){t.setUTCMonth(t.getUTCMonth()+n)},function(t,n){return n.getUTCMonth()-t.getUTCMonth()+12*(n.getUTCFullYear()-t.getUTCFullYear())},function(t){return t.getUTCMonth()}),Cg=kg.range,Pg=Fa(function(t){t.setUTCMonth(0,1),t.setUTCHours(0,0,0,0)},function(t,n){t.setUTCFullYear(t.getUTCFullYear()+n)},function(t,n){return n.getUTCFullYear()-t.getUTCFullYear()},function(t){return t.getUTCFullYear()});Pg.every=function(t){return isFinite(t=Math.floor(t))&&t>0?Fa(function(n){n.setUTCFullYear(Math.floor(n.getUTCFullYear()/t)*t),n.setUTCMonth(0,1),n.setUTCHours(0,0,0,0)},function(n,e){n.setUTCFullYear(n.getUTCFullYear()+e*t)}):null};var zg,Rg=Pg.range,Lg={"-":"",_:" ",0:"0"},Dg=/^\s*\d+/,Ug=/^%/,qg=/[\\^$*+?|[\]().{}]/g;tf({dateTime:"%x, %X",date:"%-m/%-d/%Y",time:"%-I:%M:%S %p",periods:["AM","PM"],days:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],shortDays:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],months:["January","February","March","April","May","June","July","August","September","October","November","December"],shortMonths:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]});var Og="%Y-%m-%dT%H:%M:%S.%LZ",Yg=Date.prototype.toISOString?function(t){return t.toISOString()}:t.utcFormat(Og),Bg=+new Date("2000-01-01T00:00:00.000Z")?function(t){var n=new Date(t);return isNaN(n)?null:n}:t.utcParse(Og),Fg=1e3,Ig=60*Fg,jg=60*Ig,Hg=24*jg,Xg=7*Hg,Gg=30*Hg,Vg=365*Hg,$g=af("1f77b4ff7f0e2ca02cd627289467bd8c564be377c27f7f7fbcbd2217becf"),Wg=af("7fc97fbeaed4fdc086ffff99386cb0f0027fbf5b17666666"),Zg=af("1b9e77d95f027570b3e7298a66a61ee6ab02a6761d666666"),Qg=af("a6cee31f78b4b2df8a33a02cfb9a99e31a1cfdbf6fff7f00cab2d66a3d9affff99b15928"),Jg=af("fbb4aeb3cde3ccebc5decbe4fed9a6ffffcce5d8bdfddaecf2f2f2"),Kg=af("b3e2cdfdcdaccbd5e8f4cae4e6f5c9fff2aef1e2cccccccc"),ty=af("e41a1c377eb84daf4a984ea3ff7f00ffff33a65628f781bf999999"),ny=af("66c2a5fc8d628da0cbe78ac3a6d854ffd92fe5c494b3b3b3"),ey=af("8dd3c7ffffb3bebadafb807280b1d3fdb462b3de69fccde5d9d9d9bc80bdccebc5ffed6f"),ry=new Array(3).concat("d8b365f5f5f55ab4ac","a6611adfc27d80cdc1018571","a6611adfc27df5f5f580cdc1018571","8c510ad8b365f6e8c3c7eae55ab4ac01665e","8c510ad8b365f6e8c3f5f5f5c7eae55ab4ac01665e","8c510abf812ddfc27df6e8c3c7eae580cdc135978f01665e","8c510abf812ddfc27df6e8c3f5f5f5c7eae580cdc135978f01665e","5430058c510abf812ddfc27df6e8c3c7eae580cdc135978f01665e003c30","5430058c510abf812ddfc27df6e8c3f5f5f5c7eae580cdc135978f01665e003c30").map(af),iy=uf(ry),oy=new Array(3).concat("af8dc3f7f7f77fbf7b","7b3294c2a5cfa6dba0008837","7b3294c2a5cff7f7f7a6dba0008837","762a83af8dc3e7d4e8d9f0d37fbf7b1b7837","762a83af8dc3e7d4e8f7f7f7d9f0d37fbf7b1b7837","762a839970abc2a5cfe7d4e8d9f0d3a6dba05aae611b7837","762a839970abc2a5cfe7d4e8f7f7f7d9f0d3a6dba05aae611b7837","40004b762a839970abc2a5cfe7d4e8d9f0d3a6dba05aae611b783700441b","40004b762a839970abc2a5cfe7d4e8f7f7f7d9f0d3a6dba05aae611b783700441b").map(af),ay=uf(oy),uy=new Array(3).concat("e9a3c9f7f7f7a1d76a","d01c8bf1b6dab8e1864dac26","d01c8bf1b6daf7f7f7b8e1864dac26","c51b7de9a3c9fde0efe6f5d0a1d76a4d9221","c51b7de9a3c9fde0eff7f7f7e6f5d0a1d76a4d9221","c51b7dde77aef1b6dafde0efe6f5d0b8e1867fbc414d9221","c51b7dde77aef1b6dafde0eff7f7f7e6f5d0b8e1867fbc414d9221","8e0152c51b7dde77aef1b6dafde0efe6f5d0b8e1867fbc414d9221276419","8e0152c51b7dde77aef1b6dafde0eff7f7f7e6f5d0b8e1867fbc414d9221276419").map(af),fy=uf(uy),cy=new Array(3).concat("998ec3f7f7f7f1a340","5e3c99b2abd2fdb863e66101","5e3c99b2abd2f7f7f7fdb863e66101","542788998ec3d8daebfee0b6f1a340b35806","542788998ec3d8daebf7f7f7fee0b6f1a340b35806","5427888073acb2abd2d8daebfee0b6fdb863e08214b35806","5427888073acb2abd2d8daebf7f7f7fee0b6fdb863e08214b35806","2d004b5427888073acb2abd2d8daebfee0b6fdb863e08214b358067f3b08","2d004b5427888073acb2abd2d8daebf7f7f7fee0b6fdb863e08214b358067f3b08").map(af),sy=uf(cy),ly=new Array(3).concat("ef8a62f7f7f767a9cf","ca0020f4a58292c5de0571b0","ca0020f4a582f7f7f792c5de0571b0","b2182bef8a62fddbc7d1e5f067a9cf2166ac","b2182bef8a62fddbc7f7f7f7d1e5f067a9cf2166ac","b2182bd6604df4a582fddbc7d1e5f092c5de4393c32166ac","b2182bd6604df4a582fddbc7f7f7f7d1e5f092c5de4393c32166ac","67001fb2182bd6604df4a582fddbc7d1e5f092c5de4393c32166ac053061","67001fb2182bd6604df4a582fddbc7f7f7f7d1e5f092c5de4393c32166ac053061").map(af),hy=uf(ly),dy=new Array(3).concat("ef8a62ffffff999999","ca0020f4a582bababa404040","ca0020f4a582ffffffbababa404040","b2182bef8a62fddbc7e0e0e09999994d4d4d","b2182bef8a62fddbc7ffffffe0e0e09999994d4d4d","b2182bd6604df4a582fddbc7e0e0e0bababa8787874d4d4d","b2182bd6604df4a582fddbc7ffffffe0e0e0bababa8787874d4d4d","67001fb2182bd6604df4a582fddbc7e0e0e0bababa8787874d4d4d1a1a1a","67001fb2182bd6604df4a582fddbc7ffffffe0e0e0bababa8787874d4d4d1a1a1a").map(af),py=uf(dy),vy=new Array(3).concat("fc8d59ffffbf91bfdb","d7191cfdae61abd9e92c7bb6","d7191cfdae61ffffbfabd9e92c7bb6","d73027fc8d59fee090e0f3f891bfdb4575b4","d73027fc8d59fee090ffffbfe0f3f891bfdb4575b4","d73027f46d43fdae61fee090e0f3f8abd9e974add14575b4","d73027f46d43fdae61fee090ffffbfe0f3f8abd9e974add14575b4","a50026d73027f46d43fdae61fee090e0f3f8abd9e974add14575b4313695","a50026d73027f46d43fdae61fee090ffffbfe0f3f8abd9e974add14575b4313695").map(af),gy=uf(vy),yy=new Array(3).concat("fc8d59ffffbf91cf60","d7191cfdae61a6d96a1a9641","d7191cfdae61ffffbfa6d96a1a9641","d73027fc8d59fee08bd9ef8b91cf601a9850","d73027fc8d59fee08bffffbfd9ef8b91cf601a9850","d73027f46d43fdae61fee08bd9ef8ba6d96a66bd631a9850","d73027f46d43fdae61fee08bffffbfd9ef8ba6d96a66bd631a9850","a50026d73027f46d43fdae61fee08bd9ef8ba6d96a66bd631a9850006837","a50026d73027f46d43fdae61fee08bffffbfd9ef8ba6d96a66bd631a9850006837").map(af),_y=uf(yy),by=new Array(3).concat("fc8d59ffffbf99d594","d7191cfdae61abdda42b83ba","d7191cfdae61ffffbfabdda42b83ba","d53e4ffc8d59fee08be6f59899d5943288bd","d53e4ffc8d59fee08bffffbfe6f59899d5943288bd","d53e4ff46d43fdae61fee08be6f598abdda466c2a53288bd","d53e4ff46d43fdae61fee08bffffbfe6f598abdda466c2a53288bd","9e0142d53e4ff46d43fdae61fee08be6f598abdda466c2a53288bd5e4fa2","9e0142d53e4ff46d43fdae61fee08bffffbfe6f598abdda466c2a53288bd5e4fa2").map(af),my=uf(by),xy=new Array(3).concat("e5f5f999d8c92ca25f","edf8fbb2e2e266c2a4238b45","edf8fbb2e2e266c2a42ca25f006d2c","edf8fbccece699d8c966c2a42ca25f006d2c","edf8fbccece699d8c966c2a441ae76238b45005824","f7fcfde5f5f9ccece699d8c966c2a441ae76238b45005824","f7fcfde5f5f9ccece699d8c966c2a441ae76238b45006d2c00441b").map(af),wy=uf(xy),My=new Array(3).concat("e0ecf49ebcda8856a7","edf8fbb3cde38c96c688419d","edf8fbb3cde38c96c68856a7810f7c","edf8fbbfd3e69ebcda8c96c68856a7810f7c","edf8fbbfd3e69ebcda8c96c68c6bb188419d6e016b","f7fcfde0ecf4bfd3e69ebcda8c96c68c6bb188419d6e016b","f7fcfde0ecf4bfd3e69ebcda8c96c68c6bb188419d810f7c4d004b").map(af),Ay=uf(My),Ty=new Array(3).concat("e0f3dba8ddb543a2ca","f0f9e8bae4bc7bccc42b8cbe","f0f9e8bae4bc7bccc443a2ca0868ac","f0f9e8ccebc5a8ddb57bccc443a2ca0868ac","f0f9e8ccebc5a8ddb57bccc44eb3d32b8cbe08589e","f7fcf0e0f3dbccebc5a8ddb57bccc44eb3d32b8cbe08589e","f7fcf0e0f3dbccebc5a8ddb57bccc44eb3d32b8cbe0868ac084081").map(af),Ny=uf(Ty),Sy=new Array(3).concat("fee8c8fdbb84e34a33","fef0d9fdcc8afc8d59d7301f","fef0d9fdcc8afc8d59e34a33b30000","fef0d9fdd49efdbb84fc8d59e34a33b30000","fef0d9fdd49efdbb84fc8d59ef6548d7301f990000","fff7ecfee8c8fdd49efdbb84fc8d59ef6548d7301f990000","fff7ecfee8c8fdd49efdbb84fc8d59ef6548d7301fb300007f0000").map(af),Ey=uf(Sy),ky=new Array(3).concat("ece2f0a6bddb1c9099","f6eff7bdc9e167a9cf02818a","f6eff7bdc9e167a9cf1c9099016c59","f6eff7d0d1e6a6bddb67a9cf1c9099016c59","f6eff7d0d1e6a6bddb67a9cf3690c002818a016450","fff7fbece2f0d0d1e6a6bddb67a9cf3690c002818a016450","fff7fbece2f0d0d1e6a6bddb67a9cf3690c002818a016c59014636").map(af),Cy=uf(ky),Py=new Array(3).concat("ece7f2a6bddb2b8cbe","f1eef6bdc9e174a9cf0570b0","f1eef6bdc9e174a9cf2b8cbe045a8d","f1eef6d0d1e6a6bddb74a9cf2b8cbe045a8d","f1eef6d0d1e6a6bddb74a9cf3690c00570b0034e7b","fff7fbece7f2d0d1e6a6bddb74a9cf3690c00570b0034e7b","fff7fbece7f2d0d1e6a6bddb74a9cf3690c00570b0045a8d023858").map(af),zy=uf(Py),Ry=new Array(3).concat("e7e1efc994c7dd1c77","f1eef6d7b5d8df65b0ce1256","f1eef6d7b5d8df65b0dd1c77980043","f1eef6d4b9dac994c7df65b0dd1c77980043","f1eef6d4b9dac994c7df65b0e7298ace125691003f","f7f4f9e7e1efd4b9dac994c7df65b0e7298ace125691003f","f7f4f9e7e1efd4b9dac994c7df65b0e7298ace125698004367001f").map(af),Ly=uf(Ry),Dy=new Array(3).concat("fde0ddfa9fb5c51b8a","feebe2fbb4b9f768a1ae017e","feebe2fbb4b9f768a1c51b8a7a0177","feebe2fcc5c0fa9fb5f768a1c51b8a7a0177","feebe2fcc5c0fa9fb5f768a1dd3497ae017e7a0177","fff7f3fde0ddfcc5c0fa9fb5f768a1dd3497ae017e7a0177","fff7f3fde0ddfcc5c0fa9fb5f768a1dd3497ae017e7a017749006a").map(af),Uy=uf(Dy),qy=new Array(3).concat("edf8b17fcdbb2c7fb8","ffffcca1dab441b6c4225ea8","ffffcca1dab441b6c42c7fb8253494","ffffccc7e9b47fcdbb41b6c42c7fb8253494","ffffccc7e9b47fcdbb41b6c41d91c0225ea80c2c84","ffffd9edf8b1c7e9b47fcdbb41b6c41d91c0225ea80c2c84","ffffd9edf8b1c7e9b47fcdbb41b6c41d91c0225ea8253494081d58").map(af),Oy=uf(qy),Yy=new Array(3).concat("f7fcb9addd8e31a354","ffffccc2e69978c679238443","ffffccc2e69978c67931a354006837","ffffccd9f0a3addd8e78c67931a354006837","ffffccd9f0a3addd8e78c67941ab5d238443005a32","ffffe5f7fcb9d9f0a3addd8e78c67941ab5d238443005a32","ffffe5f7fcb9d9f0a3addd8e78c67941ab5d238443006837004529").map(af),By=uf(Yy),Fy=new Array(3).concat("fff7bcfec44fd95f0e","ffffd4fed98efe9929cc4c02","ffffd4fed98efe9929d95f0e993404","ffffd4fee391fec44ffe9929d95f0e993404","ffffd4fee391fec44ffe9929ec7014cc4c028c2d04","ffffe5fff7bcfee391fec44ffe9929ec7014cc4c028c2d04","ffffe5fff7bcfee391fec44ffe9929ec7014cc4c02993404662506").map(af),Iy=uf(Fy),jy=new Array(3).concat("ffeda0feb24cf03b20","ffffb2fecc5cfd8d3ce31a1c","ffffb2fecc5cfd8d3cf03b20bd0026","ffffb2fed976feb24cfd8d3cf03b20bd0026","ffffb2fed976feb24cfd8d3cfc4e2ae31a1cb10026","ffffccffeda0fed976feb24cfd8d3cfc4e2ae31a1cb10026","ffffccffeda0fed976feb24cfd8d3cfc4e2ae31a1cbd0026800026").map(af),Hy=uf(jy),Xy=new Array(3).concat("deebf79ecae13182bd","eff3ffbdd7e76baed62171b5","eff3ffbdd7e76baed63182bd08519c","eff3ffc6dbef9ecae16baed63182bd08519c","eff3ffc6dbef9ecae16baed64292c62171b5084594","f7fbffdeebf7c6dbef9ecae16baed64292c62171b5084594","f7fbffdeebf7c6dbef9ecae16baed64292c62171b508519c08306b").map(af),Gy=uf(Xy),Vy=new Array(3).concat("e5f5e0a1d99b31a354","edf8e9bae4b374c476238b45","edf8e9bae4b374c47631a354006d2c","edf8e9c7e9c0a1d99b74c47631a354006d2c","edf8e9c7e9c0a1d99b74c47641ab5d238b45005a32","f7fcf5e5f5e0c7e9c0a1d99b74c47641ab5d238b45005a32","f7fcf5e5f5e0c7e9c0a1d99b74c47641ab5d238b45006d2c00441b").map(af),$y=uf(Vy),Wy=new Array(3).concat("f0f0f0bdbdbd636363","f7f7f7cccccc969696525252","f7f7f7cccccc969696636363252525","f7f7f7d9d9d9bdbdbd969696636363252525","f7f7f7d9d9d9bdbdbd969696737373525252252525","fffffff0f0f0d9d9d9bdbdbd969696737373525252252525","fffffff0f0f0d9d9d9bdbdbd969696737373525252252525000000").map(af),Zy=uf(Wy),Qy=new Array(3).concat("efedf5bcbddc756bb1","f2f0f7cbc9e29e9ac86a51a3","f2f0f7cbc9e29e9ac8756bb154278f","f2f0f7dadaebbcbddc9e9ac8756bb154278f","f2f0f7dadaebbcbddc9e9ac8807dba6a51a34a1486","fcfbfdefedf5dadaebbcbddc9e9ac8807dba6a51a34a1486","fcfbfdefedf5dadaebbcbddc9e9ac8807dba6a51a354278f3f007d").map(af),Jy=uf(Qy),Ky=new Array(3).concat("fee0d2fc9272de2d26","fee5d9fcae91fb6a4acb181d","fee5d9fcae91fb6a4ade2d26a50f15","fee5d9fcbba1fc9272fb6a4ade2d26a50f15","fee5d9fcbba1fc9272fb6a4aef3b2ccb181d99000d","fff5f0fee0d2fcbba1fc9272fb6a4aef3b2ccb181d99000d","fff5f0fee0d2fcbba1fc9272fb6a4aef3b2ccb181da50f1567000d").map(af),t_=uf(Ky),n_=new Array(3).concat("fee6cefdae6be6550d","feeddefdbe85fd8d3cd94701","feeddefdbe85fd8d3ce6550da63603","feeddefdd0a2fdae6bfd8d3ce6550da63603","feeddefdd0a2fdae6bfd8d3cf16913d948018c2d04","fff5ebfee6cefdd0a2fdae6bfd8d3cf16913d948018c2d04","fff5ebfee6cefdd0a2fdae6bfd8d3cf16913d94801a636037f2704").map(af),e_=uf(n_),r_=bl(Wt(300,.5,0),Wt(-240,.5,1)),i_=bl(Wt(-100,.75,.35),Wt(80,1.5,.8)),o_=bl(Wt(260,.75,.35),Wt(80,1.5,.8)),a_=Wt(),u_=ff(af("44015444025645045745055946075a46085c460a5d460b5e470d60470e6147106347116447136548146748166848176948186a481a6c481b6d481c6e481d6f481f70482071482173482374482475482576482677482878482979472a7a472c7a472d7b472e7c472f7d46307e46327e46337f463480453581453781453882443983443a83443b84433d84433e85423f854240864241864142874144874045884046883f47883f48893e49893e4a893e4c8a3d4d8a3d4e8a3c4f8a3c508b3b518b3b528b3a538b3a548c39558c39568c38588c38598c375a8c375b8d365c8d365d8d355e8d355f8d34608d34618d33628d33638d32648e32658e31668e31678e31688e30698e306a8e2f6b8e2f6c8e2e6d8e2e6e8e2e6f8e2d708e2d718e2c718e2c728e2c738e2b748e2b758e2a768e2a778e2a788e29798e297a8e297b8e287c8e287d8e277e8e277f8e27808e26818e26828e26828e25838e25848e25858e24868e24878e23888e23898e238a8d228b8d228c8d228d8d218e8d218f8d21908d21918c20928c20928c20938c1f948c1f958b1f968b1f978b1f988b1f998a1f9a8a1e9b8a1e9c891e9d891f9e891f9f881fa0881fa1881fa1871fa28720a38620a48621a58521a68522a78522a88423a98324aa8325ab8225ac8226ad8127ad8128ae8029af7f2ab07f2cb17e2db27d2eb37c2fb47c31b57b32b67a34b67935b77937b87838b9773aba763bbb753dbc743fbc7340bd7242be7144bf7046c06f48c16e4ac16d4cc26c4ec36b50c46a52c56954c56856c66758c7655ac8645cc8635ec96260ca6063cb5f65cb5e67cc5c69cd5b6ccd5a6ece5870cf5773d05675d05477d1537ad1517cd2507fd34e81d34d84d44b86d54989d5488bd6468ed64590d74393d74195d84098d83e9bd93c9dd93ba0da39a2da37a5db36a8db34aadc32addc30b0dd2fb2dd2db5de2bb8de29bade28bddf26c0df25c2df23c5e021c8e020cae11fcde11dd0e11cd2e21bd5e21ad8e219dae319dde318dfe318e2e418e5e419e7e419eae51aece51befe51cf1e51df4e61ef6e620f8e621fbe723fde725")),f_=ff(af("00000401000501010601010802010902020b02020d03030f03031204041405041606051806051a07061c08071e0907200a08220b09240c09260d0a290e0b2b100b2d110c2f120d31130d34140e36150e38160f3b180f3d19103f1a10421c10441d11471e114920114b21114e22115024125325125527125829115a2a115c2c115f2d11612f116331116533106734106936106b38106c390f6e3b0f703d0f713f0f72400f74420f75440f764510774710784910784a10794c117a4e117b4f127b51127c52137c54137d56147d57157e59157e5a167e5c167f5d177f5f187f601880621980641a80651a80671b80681c816a1c816b1d816d1d816e1e81701f81721f817320817521817621817822817922827b23827c23827e24828025828125818326818426818627818827818928818b29818c29818e2a81902a81912b81932b80942c80962c80982d80992d809b2e7f9c2e7f9e2f7fa02f7fa1307ea3307ea5317ea6317da8327daa337dab337cad347cae347bb0357bb2357bb3367ab5367ab73779b83779ba3878bc3978bd3977bf3a77c03a76c23b75c43c75c53c74c73d73c83e73ca3e72cc3f71cd4071cf4070d0416fd2426fd3436ed5446dd6456cd8456cd9466bdb476adc4869de4968df4a68e04c67e24d66e34e65e44f64e55064e75263e85362e95462ea5661eb5760ec5860ed5a5fee5b5eef5d5ef05f5ef1605df2625df2645cf3655cf4675cf4695cf56b5cf66c5cf66e5cf7705cf7725cf8745cf8765cf9785df9795df97b5dfa7d5efa7f5efa815ffb835ffb8560fb8761fc8961fc8a62fc8c63fc8e64fc9065fd9266fd9467fd9668fd9869fd9a6afd9b6bfe9d6cfe9f6dfea16efea36ffea571fea772fea973feaa74feac76feae77feb078feb27afeb47bfeb67cfeb77efeb97ffebb81febd82febf84fec185fec287fec488fec68afec88cfeca8dfecc8ffecd90fecf92fed194fed395fed597fed799fed89afdda9cfddc9efddea0fde0a1fde2a3fde3a5fde5a7fde7a9fde9aafdebacfcecaefceeb0fcf0b2fcf2b4fcf4b6fcf6b8fcf7b9fcf9bbfcfbbdfcfdbf")),c_=ff(af("00000401000501010601010802010a02020c02020e03021004031204031405041706041907051b08051d09061f0a07220b07240c08260d08290e092b10092d110a30120a32140b34150b37160b39180c3c190c3e1b0c411c0c431e0c451f0c48210c4a230c4c240c4f260c51280b53290b552b0b572d0b592f0a5b310a5c320a5e340a5f3609613809623909633b09643d09653e0966400a67420a68440a68450a69470b6a490b6a4a0c6b4c0c6b4d0d6c4f0d6c510e6c520e6d540f6d550f6d57106e59106e5a116e5c126e5d126e5f136e61136e62146e64156e65156e67166e69166e6a176e6c186e6d186e6f196e71196e721a6e741a6e751b6e771c6d781c6d7a1d6d7c1d6d7d1e6d7f1e6c801f6c82206c84206b85216b87216b88226a8a226a8c23698d23698f24699025689225689326679526679727669827669a28659b29649d29649f2a63a02a63a22b62a32c61a52c60a62d60a82e5fa92e5eab2f5ead305dae305cb0315bb1325ab3325ab43359b63458b73557b93556ba3655bc3754bd3853bf3952c03a51c13a50c33b4fc43c4ec63d4dc73e4cc83f4bca404acb4149cc4248ce4347cf4446d04545d24644d34743d44842d54a41d74b3fd84c3ed94d3dda4e3cdb503bdd513ade5238df5337e05536e15635e25734e35933e45a31e55c30e65d2fe75e2ee8602de9612bea632aeb6429eb6628ec6726ed6925ee6a24ef6c23ef6e21f06f20f1711ff1731df2741cf3761bf37819f47918f57b17f57d15f67e14f68013f78212f78410f8850ff8870ef8890cf98b0bf98c0af98e09fa9008fa9207fa9407fb9606fb9706fb9906fb9b06fb9d07fc9f07fca108fca309fca50afca60cfca80dfcaa0ffcac11fcae12fcb014fcb216fcb418fbb61afbb81dfbba1ffbbc21fbbe23fac026fac228fac42afac62df9c72ff9c932f9cb35f8cd37f8cf3af7d13df7d340f6d543f6d746f5d949f5db4cf4dd4ff4df53f4e156f3e35af3e55df2e661f2e865f2ea69f1ec6df1ed71f1ef75f1f179f2f27df2f482f3f586f3f68af4f88ef5f992f6fa96f8fb9af9fc9dfafda1fcffa4")),s_=ff(af("0d088710078813078916078a19068c1b068d1d068e20068f2206902406912605912805922a05932c05942e05952f059631059733059735049837049938049a3a049a3c049b3e049c3f049c41049d43039e44039e46039f48039f4903a04b03a14c02a14e02a25002a25102a35302a35502a45601a45801a45901a55b01a55c01a65e01a66001a66100a76300a76400a76600a76700a86900a86a00a86c00a86e00a86f00a87100a87201a87401a87501a87701a87801a87a02a87b02a87d03a87e03a88004a88104a78305a78405a78606a68707a68808a68a09a58b0aa58d0ba58e0ca48f0da4910ea3920fa39410a29511a19613a19814a099159f9a169f9c179e9d189d9e199da01a9ca11b9ba21d9aa31e9aa51f99a62098a72197a82296aa2395ab2494ac2694ad2793ae2892b02991b12a90b22b8fb32c8eb42e8db52f8cb6308bb7318ab83289ba3388bb3488bc3587bd3786be3885bf3984c03a83c13b82c23c81c33d80c43e7fc5407ec6417dc7427cc8437bc9447aca457acb4679cc4778cc4977cd4a76ce4b75cf4c74d04d73d14e72d24f71d35171d45270d5536fd5546ed6556dd7566cd8576bd9586ada5a6ada5b69db5c68dc5d67dd5e66de5f65de6164df6263e06363e16462e26561e26660e3685fe4695ee56a5de56b5de66c5ce76e5be76f5ae87059e97158e97257ea7457eb7556eb7655ec7754ed7953ed7a52ee7b51ef7c51ef7e50f07f4ff0804ef1814df1834cf2844bf3854bf3874af48849f48948f58b47f58c46f68d45f68f44f79044f79143f79342f89441f89540f9973ff9983ef99a3efa9b3dfa9c3cfa9e3bfb9f3afba139fba238fca338fca537fca636fca835fca934fdab33fdac33fdae32fdaf31fdb130fdb22ffdb42ffdb52efeb72dfeb82cfeba2cfebb2bfebd2afebe2afec029fdc229fdc328fdc527fdc627fdc827fdca26fdcb26fccd25fcce25fcd025fcd225fbd324fbd524fbd724fad824fada24f9dc24f9dd25f8df25f8e125f7e225f7e425f6e626f6e826f5e926f5eb27f4ed27f3ee27f3f027f2f227f1f426f1f525f0f724f0f921")),l_=Math.abs,h_=Math.atan2,d_=Math.cos,p_=Math.max,v_=Math.min,g_=Math.sin,y_=Math.sqrt,__=1e-12,b_=Math.PI,m_=b_/2,x_=2*b_;yf.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._point=0},lineEnd:function(){(this._line||0!==this._line&&1===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1,this._line?this._context.lineTo(t,n):this._context.moveTo(t,n);break;case 1:this._point=2;default:this._context.lineTo(t,n)}}};var w_=Nf(_f);Tf.prototype={areaStart:function(){this._curve.areaStart()},areaEnd:function(){this._curve.areaEnd()},lineStart:function(){this._curve.lineStart()},lineEnd:function(){this._curve.lineEnd()},point:function(t,n){this._curve.point(n*Math.sin(t),n*-Math.cos(t))}};var M_=Array.prototype.slice,A_={draw:function(t,n){var e=Math.sqrt(n/b_);t.moveTo(e,0),t.arc(0,0,e,0,x_)}},T_={draw:function(t,n){var e=Math.sqrt(n/5)/2;t.moveTo(-3*e,-e),t.lineTo(-e,-e),t.lineTo(-e,-3*e),t.lineTo(e,-3*e),t.lineTo(e,-e),t.lineTo(3*e,-e),t.lineTo(3*e,e),t.lineTo(e,e),t.lineTo(e,3*e),t.lineTo(-e,3*e),t.lineTo(-e,e),t.lineTo(-3*e,e),t.closePath()}},N_=Math.sqrt(1/3),S_=2*N_,E_={draw:function(t,n){var e=Math.sqrt(n/S_),r=e*N_;t.moveTo(0,-e),t.lineTo(r,0),t.lineTo(0,e),t.lineTo(-r,0),t.closePath()}},k_=Math.sin(b_/10)/Math.sin(7*b_/10),C_=Math.sin(x_/10)*k_,P_=-Math.cos(x_/10)*k_,z_={draw:function(t,n){var e=Math.sqrt(.8908130915292852*n),r=C_*e,i=P_*e;t.moveTo(0,-e),t.lineTo(r,i);for(var o=1;o<5;++o){var a=x_*o/5,u=Math.cos(a),f=Math.sin(a);t.lineTo(f*e,-u*e),t.lineTo(u*r-f*i,f*r+u*i)}t.closePath()}},R_={draw:function(t,n){var e=Math.sqrt(n),r=-e/2;t.rect(r,r,e,e)}},L_=Math.sqrt(3),D_={draw:function(t,n){var e=-Math.sqrt(n/(3*L_));t.moveTo(0,2*e),t.lineTo(-L_*e,-e),t.lineTo(L_*e,-e),t.closePath()}},U_=Math.sqrt(3)/2,q_=1/Math.sqrt(12),O_=3*(q_/2+1),Y_={draw:function(t,n){var e=Math.sqrt(n/O_),r=e/2,i=e*q_,o=r,a=e*q_+e,u=-o,f=a;t.moveTo(r,i),t.lineTo(o,a),t.lineTo(u,f),t.lineTo(-.5*r-U_*i,U_*r+-.5*i),t.lineTo(-.5*o-U_*a,U_*o+-.5*a),t.lineTo(-.5*u-U_*f,U_*u+-.5*f),t.lineTo(-.5*r+U_*i,-.5*i-U_*r),t.lineTo(-.5*o+U_*a,-.5*a-U_*o),t.lineTo(-.5*u+U_*f,-.5*f-U_*u),t.closePath()}},B_=[A_,T_,E_,R_,z_,D_,Y_];Yf.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._y0=this._y1=NaN,this._point=0},lineEnd:function(){switch(this._point){case 3:Of(this,this._x1,this._y1);case 2:this._context.lineTo(this._x1,this._y1)}(this._line||0!==this._line&&1===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1,this._line?this._context.lineTo(t,n):this._context.moveTo(t,n);break;case 1:this._point=2;break;case 2:this._point=3,this._context.lineTo((5*this._x0+this._x1)/6,(5*this._y0+this._y1)/6);default:Of(this,t,n)}this._x0=this._x1,this._x1=t,this._y0=this._y1,this._y1=n}},Bf.prototype={areaStart:qf,areaEnd:qf,lineStart:function(){this._x0=this._x1=this._x2=this._x3=this._x4=this._y0=this._y1=this._y2=this._y3=this._y4=NaN,this._point=0},lineEnd:function(){switch(this._point){case 1:this._context.moveTo(this._x2,this._y2),this._context.closePath();break;case 2:this._context.moveTo((this._x2+2*this._x3)/3,(this._y2+2*this._y3)/3),this._context.lineTo((this._x3+2*this._x2)/3,(this._y3+2*this._y2)/3),this._context.closePath();break;case 3:this.point(this._x2,this._y2),this.point(this._x3,this._y3),this.point(this._x4,this._y4)}},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1,this._x2=t,this._y2=n;break;case 1:this._point=2,this._x3=t,this._y3=n;break;case 2:this._point=3,this._x4=t,this._y4=n,this._context.moveTo((this._x0+4*this._x1+t)/6,(this._y0+4*this._y1+n)/6);break;default:Of(this,t,n)}this._x0=this._x1,this._x1=t,this._y0=this._y1,this._y1=n}},Ff.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._y0=this._y1=NaN,this._point=0},lineEnd:function(){(this._line||0!==this._line&&3===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1;break;case 1:this._point=2;break;case 2:this._point=3;var e=(this._x0+4*this._x1+t)/6,r=(this._y0+4*this._y1+n)/6;this._line?this._context.lineTo(e,r):this._context.moveTo(e,r);break;case 3:this._point=4;default:Of(this,t,n)}this._x0=this._x1,this._x1=t,this._y0=this._y1,this._y1=n}},If.prototype={lineStart:function(){this._x=[],this._y=[],this._basis.lineStart()},lineEnd:function(){var t=this._x,n=this._y,e=t.length-1;if(e>0)for(var r,i=t[0],o=n[0],a=t[e]-i,u=n[e]-o,f=-1;++f<=e;)r=f/e,this._basis.point(this._beta*t[f]+(1-this._beta)*(i+r*a),this._beta*n[f]+(1-this._beta)*(o+r*u));this._x=this._y=null,this._basis.lineEnd()},point:function(t,n){this._x.push(+t),this._y.push(+n)}};var F_=function t(n){function e(t){return 1===n?new Yf(t):new If(t,n)}return e.beta=function(n){return t(+n)},e}(.85);Hf.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._x2=this._y0=this._y1=this._y2=NaN,this._point=0},lineEnd:function(){switch(this._point){case 2:this._context.lineTo(this._x2,this._y2);break;case 3:jf(this,this._x1,this._y1)}(this._line||0!==this._line&&1===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1,this._line?this._context.lineTo(t,n):this._context.moveTo(t,n);break;case 1:this._point=2,this._x1=t,this._y1=n;break;case 2:this._point=3;default:jf(this,t,n)}this._x0=this._x1,this._x1=this._x2,this._x2=t,this._y0=this._y1,this._y1=this._y2,this._y2=n}};var I_=function t(n){function e(t){return new Hf(t,n)}return e.tension=function(n){return t(+n)},e}(0);Xf.prototype={areaStart:qf,areaEnd:qf,lineStart:function(){this._x0=this._x1=this._x2=this._x3=this._x4=this._x5=this._y0=this._y1=this._y2=this._y3=this._y4=this._y5=NaN,this._point=0},lineEnd:function(){switch(this._point){case 1:this._context.moveTo(this._x3,this._y3),this._context.closePath();break;case 2:this._context.lineTo(this._x3,this._y3),this._context.closePath();break;case 3:this.point(this._x3,this._y3),this.point(this._x4,this._y4),this.point(this._x5,this._y5)}},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1,this._x3=t,this._y3=n;break;case 1:this._point=2,this._context.moveTo(this._x4=t,this._y4=n);break;case 2:this._point=3,this._x5=t,this._y5=n;break;default:jf(this,t,n)}this._x0=this._x1,this._x1=this._x2,this._x2=t,this._y0=this._y1,this._y1=this._y2,this._y2=n}};var j_=function t(n){function e(t){return new Xf(t,n)}return e.tension=function(n){return t(+n)},e}(0);Gf.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._x2=this._y0=this._y1=this._y2=NaN,this._point=0},lineEnd:function(){(this._line||0!==this._line&&3===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1;break;case 1:this._point=2;break;case 2:this._point=3,this._line?this._context.lineTo(this._x2,this._y2):this._context.moveTo(this._x2,this._y2);break;case 3:this._point=4;default:jf(this,t,n)}this._x0=this._x1,this._x1=this._x2,this._x2=t,this._y0=this._y1,this._y1=this._y2,this._y2=n}};var H_=function t(n){function e(t){return new Gf(t,n)}return e.tension=function(n){return t(+n)},e}(0);$f.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._x2=this._y0=this._y1=this._y2=NaN,this._l01_a=this._l12_a=this._l23_a=this._l01_2a=this._l12_2a=this._l23_2a=this._point=0},lineEnd:function(){switch(this._point){case 2:this._context.lineTo(this._x2,this._y2);break;case 3:this.point(this._x2,this._y2)}(this._line||0!==this._line&&1===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){if(t=+t,n=+n,this._point){var e=this._x2-t,r=this._y2-n;this._l23_a=Math.sqrt(this._l23_2a=Math.pow(e*e+r*r,this._alpha))}switch(this._point){case 0:this._point=1,this._line?this._context.lineTo(t,n):this._context.moveTo(t,n);break;case 1:this._point=2;break;case 2:this._point=3;default:Vf(this,t,n)}this._l01_a=this._l12_a,this._l12_a=this._l23_a,this._l01_2a=this._l12_2a,this._l12_2a=this._l23_2a,this._x0=this._x1,this._x1=this._x2,this._x2=t,this._y0=this._y1,this._y1=this._y2,this._y2=n}};var X_=function t(n){function e(t){return n?new $f(t,n):new Hf(t,0)}return e.alpha=function(n){return t(+n)},e}(.5);Wf.prototype={areaStart:qf,areaEnd:qf,lineStart:function(){this._x0=this._x1=this._x2=this._x3=this._x4=this._x5=this._y0=this._y1=this._y2=this._y3=this._y4=this._y5=NaN,this._l01_a=this._l12_a=this._l23_a=this._l01_2a=this._l12_2a=this._l23_2a=this._point=0},lineEnd:function(){switch(this._point){case 1:this._context.moveTo(this._x3,this._y3),this._context.closePath();break;case 2:this._context.lineTo(this._x3,this._y3),this._context.closePath();break;case 3:this.point(this._x3,this._y3),this.point(this._x4,this._y4),this.point(this._x5,this._y5)}},point:function(t,n){if(t=+t,n=+n,this._point){var e=this._x2-t,r=this._y2-n;this._l23_a=Math.sqrt(this._l23_2a=Math.pow(e*e+r*r,this._alpha))}switch(this._point){case 0:this._point=1,this._x3=t,this._y3=n;break;case 1:this._point=2,this._context.moveTo(this._x4=t,this._y4=n);break;case 2:this._point=3,this._x5=t,this._y5=n;break;default:Vf(this,t,n)}this._l01_a=this._l12_a,this._l12_a=this._l23_a,this._l01_2a=this._l12_2a,this._l12_2a=this._l23_2a,this._x0=this._x1,this._x1=this._x2,this._x2=t,this._y0=this._y1,this._y1=this._y2,this._y2=n}};var G_=function t(n){function e(t){return n?new Wf(t,n):new Xf(t,0)}return e.alpha=function(n){return t(+n)},e}(.5);Zf.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._x2=this._y0=this._y1=this._y2=NaN,this._l01_a=this._l12_a=this._l23_a=this._l01_2a=this._l12_2a=this._l23_2a=this._point=0},lineEnd:function(){(this._line||0!==this._line&&3===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){if(t=+t,n=+n,this._point){var e=this._x2-t,r=this._y2-n;this._l23_a=Math.sqrt(this._l23_2a=Math.pow(e*e+r*r,this._alpha))}switch(this._point){case 0:this._point=1;break;case 1:this._point=2;break;case 2:this._point=3,this._line?this._context.lineTo(this._x2,this._y2):this._context.moveTo(this._x2,this._y2);break;case 3:this._point=4;default:Vf(this,t,n)}this._l01_a=this._l12_a,this._l12_a=this._l23_a,this._l01_2a=this._l12_2a,this._l12_2a=this._l23_2a,this._x0=this._x1,this._x1=this._x2,this._x2=t,this._y0=this._y1,this._y1=this._y2,this._y2=n}};var V_=function t(n){function e(t){return n?new Zf(t,n):new Gf(t,0)}return e.alpha=function(n){return t(+n)},e}(.5);Qf.prototype={areaStart:qf,areaEnd:qf,lineStart:function(){this._point=0},lineEnd:function(){this._point&&this._context.closePath()},point:function(t,n){t=+t,n=+n,this._point?this._context.lineTo(t,n):(this._point=1,this._context.moveTo(t,n))}},ec.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._y0=this._y1=this._t0=NaN,this._point=0},lineEnd:function(){switch(this._point){case 2:this._context.lineTo(this._x1,this._y1);break;case 3:nc(this,this._t0,tc(this,this._t0))}(this._line||0!==this._line&&1===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){var e=NaN;if(t=+t,n=+n,t!==this._x1||n!==this._y1){switch(this._point){case 0:this._point=1,this._line?this._context.lineTo(t,n):this._context.moveTo(t,n);break;case 1:this._point=2;break;case 2:this._point=3,nc(this,tc(this,e=Kf(this,t,n)),e);break;default:nc(this,this._t0,e=Kf(this,t,n))}this._x0=this._x1,this._x1=t,this._y0=this._y1,this._y1=n,this._t0=e}}},(rc.prototype=Object.create(ec.prototype)).point=function(t,n){ec.prototype.point.call(this,n,t)},ic.prototype={moveTo:function(t,n){this._context.moveTo(n,t)},closePath:function(){this._context.closePath()},lineTo:function(t,n){this._context.lineTo(n,t)},bezierCurveTo:function(t,n,e,r,i,o){this._context.bezierCurveTo(n,t,r,e,o,i)}},oc.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x=[],this._y=[]},lineEnd:function(){var t=this._x,n=this._y,e=t.length;if(e)if(this._line?this._context.lineTo(t[0],n[0]):this._context.moveTo(t[0],n[0]),2===e)this._context.lineTo(t[1],n[1]);else for(var r=ac(t),i=ac(n),o=0,a=1;a=0&&(this._t=1-this._t,this._line=1-this._line)},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1,this._line?this._context.lineTo(t,n):this._context.moveTo(t,n);break;case 1:this._point=2;default:if(this._t<=0)this._context.lineTo(this._x,n),this._context.lineTo(t,n);else{var e=this._x*(1-this._t)+t*this._t;this._context.lineTo(e,this._y),this._context.lineTo(e,n)}}this._x=t,this._y=n}},gc.prototype={constructor:gc,insert:function(t,n){var e,r,i;if(t){if(n.P=t,n.N=t.N,t.N&&(t.N.P=n),t.N=n,t.R){for(t=t.R;t.L;)t=t.L;t.L=n}else t.R=n;e=t}else this._?(t=mc(this._),n.P=null,n.N=t,t.P=t.L=n,e=t):(n.P=n.N=null,this._=n,e=null);for(n.L=n.R=null,n.U=e,n.C=!0,t=n;e&&e.C;)e===(r=e.U).L?(i=r.R)&&i.C?(e.C=i.C=!1,r.C=!0,t=r):(t===e.R&&(_c(this,e),e=(t=e).U),e.C=!1,r.C=!0,bc(this,r)):(i=r.L)&&i.C?(e.C=i.C=!1,r.C=!0,t=r):(t===e.L&&(bc(this,e),e=(t=e).U),e.C=!1,r.C=!0,_c(this,r)),e=t.U;this._.C=!1},remove:function(t){t.N&&(t.N.P=t.P),t.P&&(t.P.N=t.N),t.N=t.P=null;var n,e,r,i=t.U,o=t.L,a=t.R;if(e=o?a?mc(a):o:a,i?i.L===t?i.L=e:i.R=e:this._=e,o&&a?(r=e.C,e.C=t.C,e.L=o,o.U=e,e!==a?(i=e.U,e.U=t.U,t=e.R,i.L=t,e.R=a,a.U=e):(e.U=i,i=e,t=e.R)):(r=t.C,t=e),t&&(t.U=i),!r)if(t&&t.C)t.C=!1;else{do{if(t===this._)break;if(t===i.L){if((n=i.R).C&&(n.C=!1,i.C=!0,_c(this,i),n=i.R),n.L&&n.L.C||n.R&&n.R.C){n.R&&n.R.C||(n.L.C=!1,n.C=!0,bc(this,n),n=i.R),n.C=i.C,i.C=n.R.C=!1,_c(this,i),t=this._;break}}else if((n=i.L).C&&(n.C=!1,i.C=!0,bc(this,i),n=i.L),n.L&&n.L.C||n.R&&n.R.C){n.L&&n.L.C||(n.R.C=!1,n.C=!0,_c(this,n),n=i.L),n.C=i.C,i.C=n.L.C=!1,bc(this,i),t=this._;break}n.C=!0,t=i,i=i.U}while(!t.C);t&&(t.C=!1)}}};var $_,W_,Z_,Q_,J_,K_=[],tb=[],nb=1e-6,eb=1e-12;Oc.prototype={constructor:Oc,polygons:function(){var t=this.edges;return this.cells.map(function(n){var e=n.halfedges.map(function(e){return Sc(n,t[e])});return e.data=n.site.data,e})},triangles:function(){var t=[],n=this.edges;return this.cells.forEach(function(e,r){if(o=(i=e.halfedges).length)for(var i,o,a,u=e.site,f=-1,c=n[i[o-1]],s=c.left===u?c.right:c.left;++f=u)return null;var f=t-i.site[0],c=n-i.site[1],s=f*f+c*c;do{i=o.cells[r=a],a=null,i.halfedges.forEach(function(e){var r=o.edges[e],u=r.left;if(u!==i.site&&u||(u=r.right)){var f=t-u[0],c=n-u[1],l=f*f+c*c;lt?1:n>=t?0:NaN},t.deviation=a,t.extent=u,t.histogram=function(){function t(t){var i,o,a=t.length,u=new Array(a);for(i=0;il;)h.pop(),--p;var v,g=new Array(p+1);for(i=0;i<=p;++i)(v=g[i]=[]).x0=i>0?h[i-1]:c,v.x1=i=o.length)return null!=e&&n.sort(e),null!=r?r(n):n;for(var f,c,s,l=-1,h=n.length,d=o[i++],p=le(),v=a();++lo.length)return t;var i,u=a[e-1];return null!=r&&e>=o.length?i=t.entries():(i=[],t.each(function(t,r){i.push({key:r,values:n(t,e)})})),null!=u?i.sort(function(t,n){return u(t.key,n.key)}):i}var e,r,i,o=[],a=[];return i={object:function(n){return t(n,0,he,de)},map:function(n){return t(n,0,pe,ve)},entries:function(e){return n(t(e,0,pe,ve),0)},key:function(t){return o.push(t),i},sortKeys:function(t){return a[o.length-1]=t,i},sortValues:function(t){return e=t,i},rollup:function(t){return r=t,i}}},t.set=ye,t.map=le,t.keys=function(t){var n=[];for(var e in t)n.push(e);return n},t.values=function(t){var n=[];for(var e in t)n.push(t[e]);return n},t.entries=function(t){var n=[];for(var e in t)n.push({key:e,value:t[e]});return n},t.color=kt,t.rgb=Rt,t.hsl=Ut,t.lab=Bt,t.hcl=Vt,t.lch=function(t,n,e,r){return 1===arguments.length?Gt(t):new $t(e,n,t,null==r?1:r)},t.gray=function(t,n){return new Ft(t,0,0,null==n?1:n)},t.cubehelix=Wt,t.contours=we,t.contourDensity=function(){function t(t){var e=new Float32Array(v*y),r=new Float32Array(v*y);t.forEach(function(t,n,r){var i=a(t,n,r)+p>>h,o=u(t,n,r)+p>>h;i>=0&&i=0&&o>h),Ae({width:v,height:y,data:r},{width:v,height:y,data:e},l>>h),Me({width:v,height:y,data:e},{width:v,height:y,data:r},l>>h),Ae({width:v,height:y,data:r},{width:v,height:y,data:e},l>>h),Me({width:v,height:y,data:e},{width:v,height:y,data:r},l>>h),Ae({width:v,height:y,data:r},{width:v,height:y,data:e},l>>h);var i=_(e);if(!Array.isArray(i)){var o=g(e);i=d(0,o,i),(i=s(0,Math.floor(o/i)*i,i)).shift()}return we().thresholds(i).size([v,y])(e).map(n)}function n(t){return t.value*=Math.pow(2,-2*h),t.coordinates.forEach(e),t}function e(t){t.forEach(r)}function r(t){t.forEach(i)}function i(t){t[0]=t[0]*Math.pow(2,h)-p,t[1]=t[1]*Math.pow(2,h)-p}function o(){return p=3*l,v=f+2*p>>h,y=c+2*p>>h,t}var a=Te,u=Ne,f=960,c=500,l=20,h=2,p=3*l,v=f+2*p>>h,y=c+2*p>>h,_=be(20);return t.x=function(n){return arguments.length?(a="function"==typeof n?n:be(+n),t):a},t.y=function(n){return arguments.length?(u="function"==typeof n?n:be(+n),t):u},t.size=function(t){if(!arguments.length)return[f,c];var n=Math.ceil(t[0]),e=Math.ceil(t[1]);if(!(n>=0||n>=0))throw new Error("invalid size");return f=n,c=e,o()},t.cellSize=function(t){if(!arguments.length)return 1<=1))throw new Error("invalid cell size");return h=Math.floor(Math.log(t)/Math.LN2),o()},t.thresholds=function(n){return arguments.length?(_="function"==typeof n?n:Array.isArray(n)?be(Lh.call(n)):be(n),t):_},t.bandwidth=function(t){if(!arguments.length)return Math.sqrt(l*(l+1));if(!((t=+t)>=0))throw new Error("invalid bandwidth");return l=Math.round((Math.sqrt(4*t*t+1)-1)/2),o()},t},t.dispatch=N,t.drag=function(){function n(t){t.on("mousedown.drag",e).filter(g).on("touchstart.drag",o).on("touchmove.drag",a).on("touchend.drag touchcancel.drag",u).style("touch-action","none").style("-webkit-tap-highlight-color","rgba(0,0,0,0)")}function e(){if(!h&&d.apply(this,arguments)){var n=f("mouse",p.apply(this,arguments),pt,this,arguments);n&&(ct(t.event.view).on("mousemove.drag",r,!0).on("mouseup.drag",i,!0),_t(t.event.view),gt(),l=!1,c=t.event.clientX,s=t.event.clientY,n("start"))}}function r(){if(yt(),!l){var n=t.event.clientX-c,e=t.event.clientY-s;l=n*n+e*e>m}y.mouse("drag")}function i(){ct(t.event.view).on("mousemove.drag mouseup.drag",null),bt(t.event.view,l),yt(),y.mouse("end")}function o(){if(d.apply(this,arguments)){var n,e,r=t.event.changedTouches,i=p.apply(this,arguments),o=r.length;for(n=0;nf+d||ic+d||or.index){var p=f-u.x-u.vx,v=c-u.y-u.vy,g=p*p+v*v;gt.r&&(t.r=t[n].r)}function r(){if(i){var n,e,r=i.length;for(o=new Array(r),n=0;n=s)){(t.data!==o||t.next)&&(0===i&&(i=qe(),d+=i*i),0===f&&(f=qe(),d+=f*f),d1?(null==n?l.remove(t):l.set(t,i(n)),o):l.get(t)},find:function(n,e,r){var i,o,a,u,f,c=0,s=t.length;for(null==r?r=1/0:r*=r,c=0;c1?(d.on(t,n),o):d.on(t)}}},t.forceX=function(t){function n(t){for(var n,e=0,a=r.length;eqr(r[0],r[1])&&(r[1]=i[1]),qr(i[0],r[1])>qr(r[0],r[1])&&(r[0]=i[0])):o.push(r=i);for(a=-1/0,n=0,r=o[e=o.length-1];n<=e;r=i,++n)i=o[n],(u=qr(r[1],i[0]))>a&&(a=u,vd=i[0],yd=r[1])}return Md=Ad=null,vd===1/0||gd===1/0?[[NaN,NaN],[NaN,NaN]]:[[vd,gd],[yd,_d]]},t.geoCentroid=function(t){Td=Nd=Sd=Ed=kd=Cd=Pd=zd=Rd=Ld=Dd=0,_r(t,vp);var n=Rd,e=Ld,r=Dd,i=n*n+e*e+r*r;return i=.12&&i<.234&&r>=-.425&&r<-.214?c:i>=.166&&i<.234&&r>=-.214&&r<-.115?s:f).invert(t)},t.stream=function(t){return e&&r===t?e:e=function(t){var n=t.length;return{point:function(e,r){for(var i=-1;++i2?t[2]+90:90]):(t=e(),[t[0],t[1],t[2]-90])},e([0,0,90]).scale(159.155)},t.geoTransverseMercatorRaw=To,t.geoRotation=ri,t.geoStream=_r,t.geoTransform=function(t){return{stream:Zi(t)}},t.cluster=function(){function t(t){var o,a=0;t.eachAfter(function(t){var e=t.children;e?(t.x=function(t){return t.reduce(So,0)/t.length}(e),t.y=function(t){return 1+t.reduce(Eo,0)}(e)):(t.x=o?a+=n(t,o):0,t.y=0,o=t)});var u=function(t){for(var n;n=t.children;)t=n[0];return t}(t),f=function(t){for(var n;n=t.children;)t=n[n.length-1];return t}(t),c=u.x-n(u,f)/2,s=f.x+n(f,u)/2;return t.eachAfter(i?function(n){n.x=(n.x-t.x)*e,n.y=(t.y-n.y)*r}:function(n){n.x=(n.x-c)/(s-c)*e,n.y=(1-(t.y?n.y/t.y:1))*r})}var n=No,e=1,r=1,i=!1;return t.separation=function(e){return arguments.length?(n=e,t):n},t.size=function(n){return arguments.length?(i=!1,e=+n[0],r=+n[1],t):i?null:[e,r]},t.nodeSize=function(n){return arguments.length?(i=!0,e=+n[0],r=+n[1],t):i?[e,r]:null},t},t.hierarchy=Co,t.pack=function(){function t(t){return t.x=e/2,t.y=r/2,n?t.eachBefore(Zo(n)).eachAfter(Qo(i,.5)).eachBefore(Jo(1)):t.eachBefore(Zo(Wo)).eachAfter(Qo(Vo,1)).eachAfter(Qo(i,t.r/Math.min(e,r))).eachBefore(Jo(Math.min(e,r)/(2*t.r))),t}var n=null,e=1,r=1,i=Vo;return t.radius=function(e){return arguments.length?(n=function(t){return null==t?null:Go(t)}(e),t):n},t.size=function(n){return arguments.length?(e=+n[0],r=+n[1],t):[e,r]},t.padding=function(n){return arguments.length?(i="function"==typeof n?n:$o(+n),t):i},t},t.packSiblings=function(t){return Xo(t),t},t.packEnclose=Do,t.partition=function(){function t(t){var o=t.height+1;return t.x0=t.y0=r,t.x1=n,t.y1=e/o,t.eachBefore(function(t,n){return function(e){e.children&&ta(e,e.x0,t*(e.depth+1)/n,e.x1,t*(e.depth+2)/n);var i=e.x0,o=e.y0,a=e.x1-r,u=e.y1-r;a0)throw new Error("cycle");return o}var n=na,e=ea;return t.id=function(e){return arguments.length?(n=Go(e),t):n},t.parentId=function(n){return arguments.length?(e=Go(n),t):e},t},t.tree=function(){function t(t){var f=function(t){for(var n,e,r,i,o,a=new fa(t,0),u=[a];n=u.pop();)if(r=n._.children)for(n.children=new Array(o=r.length),i=o-1;i>=0;--i)u.push(e=n.children[i]=new fa(r[i],i)),e.parent=n;return(a.parent=new fa(null,0)).children=[a],a}(t);if(f.eachAfter(n),f.parent.m=-f.z,f.eachBefore(e),u)t.eachBefore(r);else{var c=t,s=t,l=t;t.eachBefore(function(t){t.xs.x&&(s=t),t.depth>l.depth&&(l=t)});var h=c===s?1:i(c,s)/2,d=h-c.x,p=o/(s.x+h+d),v=a/(l.depth||1);t.eachBefore(function(t){t.x=(t.x+d)*p,t.y=t.depth*v})}return t}function n(t){var n=t.children,e=t.parent.children,r=t.i?e[t.i-1]:null;if(n){(function(t){for(var n,e=0,r=0,i=t.children,o=i.length;--o>=0;)(n=i[o]).z+=e,n.m+=e,e+=n.s+(r+=n.c)})(t);var o=(n[0].z+n[n.length-1].z)/2;r?(t.z=r.z+i(t._,r._),t.m=t.z-o):t.z=o}else r&&(t.z=r.z+i(t._,r._));t.parent.A=function(t,n,e){if(n){for(var r,o=t,a=t,u=n,f=o.parent.children[0],c=o.m,s=a.m,l=u.m,h=f.m;u=oa(u),o=ia(o),u&&o;)f=ia(f),(a=oa(a)).a=t,(r=u.z+l-o.z-c+i(u._,o._))>0&&(aa(ua(u,t,e),t,r),c+=r,s+=r),l+=u.m,c+=o.m,h+=f.m,s+=a.m;u&&!oa(a)&&(a.t=u,a.m+=l-s),o&&!ia(f)&&(f.t=o,f.m+=c-h,e=t)}return e}(t,r,t.parent.A||e[0])}function e(t){t._.x=t.z+t.parent.m,t.m+=t.parent.m}function r(t){t.x*=o,t.y=t.depth*a}var i=ra,o=1,a=1,u=null;return t.separation=function(n){return arguments.length?(i=n,t):i},t.size=function(n){return arguments.length?(u=!1,o=+n[0],a=+n[1],t):u?null:[o,a]},t.nodeSize=function(n){return arguments.length?(u=!0,o=+n[0],a=+n[1],t):u?[o,a]:null},t},t.treemap=function(){function t(t){return t.x0=t.y0=0,t.x1=i,t.y1=o,t.eachBefore(n),a=[0],r&&t.eachBefore(Ko),t}function n(t){var n=a[t.depth],r=t.x0+n,i=t.y0+n,o=t.x1-n,h=t.y1-n;o=n-1){var c=f[t];return c.x0=r,c.y0=i,c.x1=a,void(c.y1=u)}for(var l=s[t],h=e/2+l,d=t+1,p=n-1;d>>1;s[v]u-i){var _=(r*y+a*g)/e;o(t,d,g,r,i,_,u),o(d,n,y,_,i,a,u)}else{var b=(i*y+u*g)/e;o(t,d,g,r,i,a,b),o(d,n,y,r,b,a,u)}}var a,u,f=t.children,c=f.length,s=new Array(c+1);for(s[0]=u=a=0;a=0;--n)c.push(t[r[o[n]][2]]);for(n=+u;nu!=c>u&&a<(f-e)*(u-r)/(c-r)+e&&(s=!s),f=e,c=r;return s},t.polygonLength=function(t){for(var n,e,r=-1,i=t.length,o=t[i-1],a=o[0],u=o[1],f=0;++r1)&&(t-=Math.floor(t));var n=Math.abs(t-.5);return a_.h=360*t-100,a_.s=1.5-1.5*n,a_.l=.8-.9*n,a_+""},t.interpolateWarm=i_,t.interpolateCool=o_,t.interpolateViridis=u_,t.interpolateMagma=f_,t.interpolateInferno=c_,t.interpolatePlasma=s_,t.create=function(t){return ct(C(t).call(document.documentElement))},t.creator=C,t.local=st,t.matcher=ys,t.mouse=pt,t.namespace=k,t.namespaces=ds,t.clientPoint=dt,t.select=ct,t.selectAll=function(t){return"string"==typeof t?new ut([document.querySelectorAll(t)],[document.documentElement]):new ut([null==t?[]:t],ms)},t.selection=ft,t.selector=z,t.selectorAll=L,t.style=F,t.touch=vt,t.touches=function(t,n){null==n&&(n=ht().touches);for(var e=0,r=n?n.length:0,i=new Array(r);eh;if(f||(f=t=ie()),l__)if(p>x_-__)f.moveTo(l*d_(h),l*g_(h)),f.arc(0,0,l,h,d,!v),s>__&&(f.moveTo(s*d_(d),s*g_(d)),f.arc(0,0,s,d,h,v));else{var g,y,_=h,b=d,m=h,x=d,w=p,M=p,A=u.apply(this,arguments)/2,T=A>__&&(i?+i.apply(this,arguments):y_(s*s+l*l)),N=v_(l_(l-s)/2,+r.apply(this,arguments)),S=N,E=N;if(T>__){var k=sf(T/s*g_(A)),C=sf(T/l*g_(A));(w-=2*k)>__?(k*=v?1:-1,m+=k,x-=k):(w=0,m=x=(h+d)/2),(M-=2*C)>__?(C*=v?1:-1,_+=C,b-=C):(M=0,_=b=(h+d)/2)}var P=l*d_(_),z=l*g_(_),R=s*d_(x),L=s*g_(x);if(N>__){var D=l*d_(b),U=l*g_(b),q=s*d_(m),O=s*g_(m);if(p__?function(t,n,e,r,i,o,a,u){var f=e-t,c=r-n,s=a-i,l=u-o,h=(s*(n-o)-l*(t-i))/(l*f-s*c);return[t+h*f,n+h*c]}(P,z,q,O,D,U,R,L):[R,L],B=P-Y[0],F=z-Y[1],I=D-Y[0],j=U-Y[1],H=1/g_(function(t){return t>1?0:t<-1?b_:Math.acos(t)}((B*I+F*j)/(y_(B*B+F*F)*y_(I*I+j*j)))/2),X=y_(Y[0]*Y[0]+Y[1]*Y[1]);S=v_(N,(s-X)/(H-1)),E=v_(N,(l-X)/(H+1))}}M>__?E>__?(g=gf(q,O,P,z,l,E,v),y=gf(D,U,R,L,l,E,v),f.moveTo(g.cx+g.x01,g.cy+g.y01),E__&&w>__?S>__?(g=gf(R,L,D,U,s,-S,v),y=gf(P,z,q,O,s,-S,v),f.lineTo(g.cx+g.x01,g.cy+g.y01),S0&&(d+=l);for(null!=e?p.sort(function(t,n){return e(v[t],v[n])}):null!=r&&p.sort(function(n,e){return r(t[n],t[e])}),u=0,c=d?(y-h*b)/d:0;u0?l*c:0)+b,v[f]={data:t[f],index:u,value:l,startAngle:g,endAngle:s,padAngle:_};return v}var n=Af,e=Mf,r=null,i=cf(0),o=cf(x_),a=cf(0);return t.value=function(e){return arguments.length?(n="function"==typeof e?e:cf(+e),t):n},t.sortValues=function(n){return arguments.length?(e=n,r=null,t):e},t.sort=function(n){return arguments.length?(r=n,e=null,t):r},t.startAngle=function(n){return arguments.length?(i="function"==typeof n?n:cf(+n),t):i},t.endAngle=function(n){return arguments.length?(o="function"==typeof n?n:cf(+n),t):o},t.padAngle=function(n){return arguments.length?(a="function"==typeof n?n:cf(+n),t):a},t},t.areaRadial=kf,t.radialArea=kf,t.lineRadial=Ef,t.radialLine=Ef,t.pointRadial=Cf,t.linkHorizontal=function(){return Rf(Lf)},t.linkVertical=function(){return Rf(Df)},t.linkRadial=function(){var t=Rf(Uf);return t.angle=t.x,delete t.x,t.radius=t.y,delete t.y,t},t.symbol=function(){function t(){var t;if(r||(r=t=ie()),n.apply(this,arguments).draw(r,+e.apply(this,arguments)),t)return r=null,t+""||null}var n=cf(A_),e=cf(64),r=null;return t.type=function(e){return arguments.length?(n="function"==typeof e?e:cf(e),t):n},t.size=function(n){return arguments.length?(e="function"==typeof n?n:cf(+n),t):e},t.context=function(n){return arguments.length?(r=null==n?null:n,t):r},t},t.symbols=B_,t.symbolCircle=A_,t.symbolCross=T_,t.symbolDiamond=E_,t.symbolSquare=R_,t.symbolStar=z_,t.symbolTriangle=D_,t.symbolWye=Y_,t.curveBasisClosed=function(t){return new Bf(t)},t.curveBasisOpen=function(t){return new Ff(t)},t.curveBasis=function(t){return new Yf(t)},t.curveBundle=F_,t.curveCardinalClosed=j_,t.curveCardinalOpen=H_,t.curveCardinal=I_,t.curveCatmullRomClosed=G_,t.curveCatmullRomOpen=V_,t.curveCatmullRom=X_,t.curveLinearClosed=function(t){return new Qf(t)},t.curveLinear=_f,t.curveMonotoneX=function(t){return new ec(t)},t.curveMonotoneY=function(t){return new rc(t)},t.curveNatural=function(t){return new oc(t)},t.curveStep=function(t){return new uc(t,.5)},t.curveStepAfter=function(t){return new uc(t,1)},t.curveStepBefore=function(t){return new uc(t,0)},t.stack=function(){function t(t){var o,a,u=n.apply(this,arguments),f=t.length,c=u.length,s=new Array(c);for(o=0;o0){for(var e,r,i,o=0,a=t[0].length;o1)for(var e,r,i,o,a,u,f=0,c=t[n[0]].length;f=0?(r[0]=o,r[1]=o+=i):i<0?(r[1]=a,r[0]=a+=i):r[0]=o},t.stackOffsetNone=fc,t.stackOffsetSilhouette=function(t,n){if((e=t.length)>0){for(var e,r=0,i=t[n[0]],o=i.length;r0&&(r=(e=t[n[0]]).length)>0){for(var e,r,i,o=0,a=1;azl&&e.name===n)return new qn([[t]],sh,n,+r)}return null},t.interrupt=Ln,t.voronoi=function(){function t(t){return new Oc(t.map(function(r,i){var o=[Math.round(n(r,i,t)/nb)*nb,Math.round(e(r,i,t)/nb)*nb];return o.index=i,o.data=r,o}),r)}var n=pc,e=vc,r=null;return t.polygons=function(n){return t(n).polygons()},t.links=function(n){return t(n).links()},t.triangles=function(n){return t(n).triangles()},t.x=function(e){return arguments.length?(n="function"==typeof e?e:dc(+e),t):n},t.y=function(n){return arguments.length?(e="function"==typeof n?n:dc(+n),t):e},t.extent=function(n){return arguments.length?(r=null==n?null:[[+n[0][0],+n[0][1]],[+n[1][0],+n[1][1]]],t):r&&[[r[0][0],r[0][1]],[r[1][0],r[1][1]]]},t.size=function(n){return arguments.length?(r=null==n?null:[[0,0],[+n[0],+n[1]]],t):r&&[r[1][0]-r[0][0],r[1][1]-r[0][1]]},t},t.zoom=function(){function n(t){t.property("__zoom",Gc).on("wheel.zoom",f).on("mousedown.zoom",c).on("dblclick.zoom",s).filter(m).on("touchstart.zoom",l).on("touchmove.zoom",h).on("touchend.zoom touchcancel.zoom",d).style("touch-action","none").style("-webkit-tap-highlight-color","rgba(0,0,0,0)")}function e(t,n){return(n=Math.max(x[0],Math.min(x[1],n)))===t.k?t:new Bc(n,t.x,t.y)}function r(t,n,e){var r=n[0]-e[0]*t.k,i=n[1]-e[1]*t.k;return r===t.x&&i===t.y?t:new Bc(t.k,r,i)}function i(t){return[(+t[0][0]+ +t[1][0])/2,(+t[0][1]+ +t[1][1])/2]}function o(t,n,e){t.on("start.zoom",function(){a(this,arguments).start()}).on("interrupt.zoom end.zoom",function(){a(this,arguments).end()}).tween("zoom",function(){var t=arguments,r=a(this,t),o=y.apply(this,t),u=e||i(o),f=Math.max(o[1][0]-o[0][0],o[1][1]-o[0][1]),c=this.__zoom,s="function"==typeof n?n.apply(this,t):n,l=A(c.invert(u).concat(f/c.k),s.invert(u).concat(f/s.k));return function(t){if(1===t)t=s;else{var n=l(t),e=f/n[2];t=new Bc(e,u[0]-n[0]*e,u[1]-n[1]*e)}r.zoom(null,t)}})}function a(t,n){for(var e,r=0,i=T.length;rC}n.zoom("mouse",_(r(n.that.__zoom,n.mouse[0]=pt(n.that),n.mouse[1]),n.extent,w))},!0).on("mouseup.zoom",function(){e.on("mousemove.zoom mouseup.zoom",null),bt(t.event.view,n.moved),jc(),n.end()},!0),i=pt(this),o=t.event.clientX,u=t.event.clientY;_t(t.event.view),Ic(),n.mouse=[i,this.__zoom.invert(i)],Ln(this),n.start()}}function s(){if(g.apply(this,arguments)){var i=this.__zoom,a=pt(this),u=i.invert(a),f=i.k*(t.event.shiftKey?.5:2),c=_(r(e(i,f),a,u),y.apply(this,arguments),w);jc(),M>0?ct(this).transition().duration(M).call(o,c,a):ct(this).call(n.transform,c)}}function l(){if(g.apply(this,arguments)){var n,e,r,i,o=a(this,arguments),u=t.event.changedTouches,f=u.length;for(Ic(),e=0;e parseTime(d.date)).left; + let i = bisect(data, detailDate, 1); + + let workload = data[i]; + let date = workload.date; + let name = workload.name; + let opsSec = workload.opsSec; + let filename = workload.summaryPath; + + fetchWriteThroughputSummaryData(filename) + .then( + d => renderWriteThroughputSummaryDetail(name, date, opsSec, d), + _ => renderWriteThroughputSummaryDetail(name, date, opsSec, null), + ); +} + +/* + * Renders the write-throughput summary view, given the correspnding data. + * + * This function generates a time-series similar to the YCSB benchmark data. + * The x-axis represents the day on which the becnhmark was run, and the y-axis + * represents the calculated "max sustainable throughput" in ops-second. + * + * Clicking on an individual day renders the detail view for the given day, + * allowing the user to drill down into the per-worker performance. + */ +function renderWriteThroughputSummary(allData) { + const svg = d3.select(".chart.write-throughput"); + + // Filter on the appropriate time-series. + const dataKey = "write/values=1024"; + const data = allData[dataKey]; + + // Set up axes. + + const margin = {top: 25, right: 60, bottom: 25, left: 60}; + let maxY = d3.max(data, d => d.opsSec); + + const width = styleWidth(svg) - margin.left - margin.right; + const height = styleHeight(svg) - margin.top - margin.bottom; + + const x = d3.scaleTime() + .domain([minDate, max.date]) + .range([0, width]); + const x2 = d3.scaleTime() + .domain([minDate, max.date]) + .range([0, width]); + + const y = d3.scaleLinear() + .domain([0, maxY * 1.1]) + .range([height, 0]); + + const z = d3.scaleOrdinal(d3.schemeCategory10); + + const xAxis = d3.axisBottom(x) + .ticks(5); + + const yAxis = d3.axisLeft(y) + .ticks(5); + + const g = svg + .append("g") + .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); + + g.append("g") + .attr("class", "axis axis--x") + .attr("transform", "translate(0," + height + ")") + .call(xAxis); + + g.append("g") + .attr("class", "axis axis--y") + .call(yAxis); + + g.append("text") + .attr("class", "chart-title") + .attr("x", margin.left + width / 2) + .attr("y", 0) + .style("text-anchor", "middle") + .style("font", "8pt sans-serif") + .text(dataKey); + + // Create a rectangle that can be used to clip the data. This avoids having + // the time-series spill across the y-axis when panning and zooming. + + const defs = svg.append("defs"); + + defs.append("clipPath") + .attr("id", dataKey) + .append("rect") + .attr("x", 0) + .attr("y", -margin.top) + .attr("width", width) + .attr("height", margin.top + height + 10); + + // Plot time-series. + + const view = g.append("g") + .attr("class", "view") + .attr("clip-path", "url(#" + dataKey + ")"); + + const line = d3.line() + .x(d => x(parseTime(d.date))) + .y(d => y(d.opsSec)); + + const path = view.selectAll(".line1") + .data([data]) + .enter() + .append("path") + .attr("class", "line1") + .attr("d", line) + .style("stroke", z(0)); + + // Hover to show labels. + + const lineHover = g + .append("line") + .attr("class", "hover") + .style("fill", "none") + .style("stroke", "#f99") + .style("stroke-width", "1px"); + + const dateHover = g + .append("text") + .attr("class", "hover") + .attr("fill", "#f22") + .attr("text-anchor", "middle") + .attr("alignment-baseline", "hanging") + .attr("transform", "translate(0, 0)"); + + const opsHover = g + .append("text") + .attr("class", "hover") + .attr("fill", "#f22") + .attr("text-anchor", "middle") + .attr("transform", "translate(0, 0)"); + + const marker = g + .append("circle") + .attr("class", "hover") + .attr("r", 3) + .style("opacity", "0") + .style("stroke", "#f22") + .style("fill", "#f22"); + + svg.node().updateMouse = function (mouse, date, hover) { + const mousex = mouse[0]; + const bisect = d3.bisector(d => parseTime(d.date)).left; + const i = bisect(data, date, 1); + const v = + i === data.length + ? data[i - 1] + : mousex - x(parseTime(data[i - 1].date)) < x(parseTime(data[i].date)) - mousex + ? data[i - 1] + : data[i]; + const noData = mousex < x(parseTime(data[0].date)); + + let lineY = height; + if (!noData) { + lineY = pathGetY(path.node(), mousex); + } + + let val, valY, valFormat; + val = v.opsSec; + valY = y(val); + valFormat = d3.format(",.0f"); + + lineHover + .attr("x1", mousex) + .attr("x2", mousex) + .attr("y1", lineY) + .attr("y2", height); + marker.attr("transform", "translate(" + x(parseTime(v.date)) + "," + valY + ")"); + dateHover + .attr("transform", "translate(" + mousex + "," + (height + 8) + ")") + .text(formatTime(date)); + opsHover + .attr("transform", "translate(" + x(parseTime(v.date)) + "," + (valY - 7) + ")") + .text(valFormat(val)); + }; + + // Panning and zooming. + + const updateZoom = function (t) { + x.domain(t.rescaleX(x2).domain()); + g.select(".axis--x").call(xAxis); + g.selectAll(".line1").attr("d", line); + }; + svg.node().updateZoom = updateZoom; + + const zoom = d3.zoom() + .extent([[0, 0], [width, 1]]) + .scaleExtent([0.25, 2]) // [45, 360] days + .translateExtent([[-width * 3, 0], [width, 1]]) // [today-360, today] + .on("zoom", function () { + const t = d3.event.transform; + if (!d3.event.sourceEvent) { + updateZoom(t); + return; + } + + d3.selectAll(".chart").each(function () { + if (this.updateZoom != null) { + this.updateZoom(t); + } + }); + + d3.selectAll(".chart").each(function () { + this.__zoom = t.translate(0, 0); + }); + }); + + svg.call(zoom); + svg.call(zoom.transform, d3.zoomTransform(svg.node())); + + svg.append("rect") + .attr("class", "mouse") + .attr("cursor", "move") + .attr("fill", "none") + .attr("pointer-events", "all") + .attr("width", width) + .attr("height", height + margin.top + margin.bottom) + .attr("transform", "translate(" + margin.left + "," + 0 + ")") + .on("mousemove", function () { + const mouse = d3.mouse(this); + const date = x.invert(mouse[0]); + + d3.selectAll(".chart").each(function () { + if (this.updateMouse != null) { + this.updateMouse(mouse, date, 1); + } + }); + }) + .on("mouseover", function () { + d3.selectAll(".chart") + .selectAll(".hover") + .style("opacity", 1.0); + }) + .on("mouseout", function () { + d3.selectAll(".chart") + .selectAll(".hover") + .style("opacity", 0); + }) + .on("click", function(d) { + // Use the date corresponding to the clicked data point to bisect + // into the workload data to pluck out the correct datapoint. + const mouse = d3.mouse(this); + let detailDate = d3.timeDay.floor(x.invert(mouse[0])); + bisectAndRenderWriteThroughputDetail(data, detailDate); + }); +} + +function fetchWriteThroughputSummaryData(file) { + return fetch(writeThroughputDetailURL(file)) + .then(response => response.json()) + .then(data => { + for (let key in data) { + let csvData = data[key].rawData; + data[key].data = d3.csvParseRows(csvData, function (d, i) { + return { + elapsed: +d[0], + opsSec: +d[1], + passed: d[2] === 'true', + size: +d[3], + levels: +d[4], + }; + }); + delete data[key].rawData; + } + return data; + }); +} + +/* + * Renders the write-throughput detail view, given the correspnding data, and + * the particular workload and date on which it was run. + * + * This function generates a series with the x-axis representing the elapsed + * time since the start of the benchmark, and the measured write load at that + * point in time (in ops/second). Each series is a worker that participated in + * the benchmark on the selected date. + */ +function renderWriteThroughputSummaryDetail(workload, date, opsSec, rawData) { + const svg = d3.select(".chart.write-throughput-detail"); + + // Remove anything that was previously on the canvas. This ensures that a + // user clicking multiple times does not keep adding data to the canvas. + svg.selectAll("*").remove(); + + const margin = {top: 25, right: 60, bottom: 25, left: 60}; + let maxX = 0; + let maxY = 0; + for (let key in rawData) { + let run = rawData[key]; + maxX = Math.max(maxX, d3.max(run.data, d => d.elapsed)); + maxY = Math.max(maxY, d3.max(run.data, d => d.opsSec)); + } + + const width = styleWidth(svg) - margin.left - margin.right; + const height = styleHeight(svg) - margin.top - margin.bottom; + + // Panning and zooming. + // These callbacks are defined as they are called from the panning / + // zooming functions elsewhere, however, they are simply no-ops on this + // chart, as they x-axis is a measure of "elapsed time" rather than a date. + + svg.node().updateMouse = function (mouse, date, hover) {} + svg.node().updateZoom = function () {}; + + // Set up axes. + + const x = d3.scaleLinear() + .domain([0, 8.5 * 3600]) + .range([0, width]); + + const y = d3.scaleLinear() + .domain([0, maxY * 1.1]) + .range([height, 0]); + + const z = d3.scaleOrdinal(d3.schemeCategory10); + + const xAxis = d3.axisBottom(x) + .ticks(5) + .tickFormat(d => Math.floor(d / 3600) + "h"); + + const yAxis = d3.axisLeft(y) + .ticks(5); + + const g = svg + .append("g") + .attr("transform", "translate(" + margin.left + "," + margin.top + ")"); + + g.append("g") + .attr("class", "axis axis--x") + .attr("transform", "translate(0," + height + ")") + .call(xAxis); + + g.append("g") + .attr("class", "axis axis--y") + .call(yAxis); + + // If we get no data, we just render an empty chart. + if (rawData == null) { + g.append("text") + .attr("class", "chart-title") + .attr("x", margin.left + width / 2) + .attr("y", height / 2) + .style("text-anchor", "middle") + .style("font", "8pt sans-serif") + .text("Data unavailable"); + return; + } + + g.append("text") + .attr("class", "chart-title") + .attr("x", margin.left + width / 2) + .attr("y", 0) + .style("text-anchor", "middle") + .style("font", "8pt sans-serif") + .text("Ops/sec over time"); + + // Plot data. + + const view = g.append("g") + .attr("class", "view"); + + let values = []; + for (let key in rawData) { + values.push({ + id: key, + values: rawData[key].data, + }); + } + + const line = d3.line() + .x(d => x(d.elapsed)) + .y(d => y(d.opsSec)); + + const path = view.selectAll(".line1") + .data(values) + .enter() + .append("path") + .attr("class", "line1") + .attr("d", d => line(d.values)) + .style("stroke", d => z(d.id)); + + // Draw a horizontal line for the calculated ops/sec average. + + view.append("path") + .attr("d", d3.line()([[x(0), y(opsSec)], [x(maxX), y(opsSec)]])) + .attr("stroke", "black") + .attr("stroke-width", "2") + .style("stroke-dasharray", ("2, 5")); +} diff --git a/pebble/docs/memory.md b/pebble/docs/memory.md new file mode 100644 index 0000000..b9f4a63 --- /dev/null +++ b/pebble/docs/memory.md @@ -0,0 +1,92 @@ +# Memory Management + +## Background + +Pebble has two significant sources of memory usage: MemTables and the +Block Cache. MemTables buffer data that has been written to the WAL +but not yet flushed to an SSTable. The Block Cache provides a cache of +uncompressed SSTable data blocks. + +Originally, Pebble used regular Go memory allocation for the memory +backing both MemTables and the Block Cache. This was problematic as it +put significant pressure on the Go GC. The higher the bandwidth of +memory allocations, the more work GC has to do to reclaim the +memory. In order to lessen the pressure on the Go GC, an "allocation +cache" was introduced to the Block Cache which allowed reusing the +memory backing cached blocks in most circumstances. This produced a +dramatic reduction in GC pressure and a measurable performance +improvement in CockroachDB workloads. + +Unfortunately, the use of Go allocated memory still caused a +problem. CockroachDB running on top of Pebble often resulted in an RSS +(resident set size) 2x what it was when using RocksDB. The cause of +this effect is due to the Go runtime's heuristic for triggering GC: + +> A collection is triggered when the ratio of freshly allocated data +> to live data remaining after the previous collection reaches this +> percentage. + +This percentage can be configured by the +[`GOGC`](https://golang.org/pkg/runtime/) environment variable or by +calling +[`debug.SetGCPercent`](https://golang.org/pkg/runtime/debug/#SetGCPercent). The +default value is `100`, which means that GC is triggered when the +freshly allocated data is equal to the amount of live data at the end +of the last collection period. This generally works well in practice, +but the Pebble Block Cache is often configured to be 10s of gigabytes +in size. Waiting for 10s of gigabytes of data to be allocated before +triggering a GC results in very large Go heap sizes. + +## Manual Memory Management + +Attempting to adjust `GOGC` to account for the significant amount of +memory used by the Block Cache is fraught. What value should be used? +`10%`? `20%`? Should the setting be tuned dynamically? Rather than +introducing a heuristic which may have cascading effects on the +application using Pebble, we decided to move the Block Cache and +MemTable memory out of the Go heap. This is done by using the C memory +allocator, though it could also be done by providing a simple memory +allocator in Go which uses `mmap` to allocate memory. + +In order to support manual memory management for the Block Cache and +MemTables, Pebble needs to precisely track their lifetime. This was +already being done for the MemTable in order to account for its memory +usage in metrics. It was mostly being done for the Block Cache. Values +stores in the Block Cache are reference counted and are returned to +the "alloc cache" when their reference count falls +to 0. Unfortunately, this tracking wasn't precise and there were +numerous cases where the cache values were being leaked. This was +acceptable in a world where the Go GC would clean up after us. It is +unacceptable if the leak becomes permanent. + +## Leak Detection + +In order to find all of the cache value leaks, Pebble has a leak +detection facility built on top of +[`runtime.SetFinalizer`](https://golang.org/pkg/runtime/#SetFinalizer). A +finalizer is a function associated with an object which is run when +the object is no longer reachable. On the surface, this sounds perfect +as a facility for performing all memory reclamation. Unfortunately, +finalizers are generally frowned upon by the Go implementors, and come +with very loose guarantees: + +> The finalizer is scheduled to run at some arbitrary time after the +> program can no longer reach the object to which obj points. There is +> no guarantee that finalizers will run before a program exits, so +> typically they are useful only for releasing non-memory resources +> associated with an object during a long-running program + +This language is somewhat frightening, but in practice finalizers are run at the +end of every GC period. Pebble primarily relies on finalizers for its leak +detection facility. In the block cache, a finalizer is associated with the Go +allocated `cache.Value` object. When the finalizer is run, it checks that the +buffer backing the `cache.Value` has been freed. This leak detection facility is +enabled by the `"invariants"` build tag which is enabled by the Pebble unit +tests. + +There also exists a very specific memory reclamation use case in the block cache +that ensures that structs with transitively reachable fields backed by manually +allocated memory that are pooled in a `sync.Pool` are freed correctly when their +parent struct is released from the pool and consequently garbage collected by +the Go runtime (see `cache/entry_normal.go`). The loose guarantees provided by +the runtime are reasonable to rely on in this case to prevent a memory leak. diff --git a/pebble/docs/range_deletions.md b/pebble/docs/range_deletions.md new file mode 100644 index 0000000..19b8f06 --- /dev/null +++ b/pebble/docs/range_deletions.md @@ -0,0 +1,471 @@ +# Range Deletions + +TODO: The following explanation of range deletions does not take into account +the recent change to prohibit splitting of a user key between sstables. This +change simplifies the logic, removing 'improperly truncated range tombstones.' + +TODO: The following explanation of range deletions ignores the +kind/trailer that appears at the end of keys after the sequence +number. This should be harmless but need to add a justification on why +it is harmless. + +## Background and Notation + +Range deletions are represented as `[start, end)#seqnum`. Points +(set/merge/...) are represented as `key#seqnum`. A range delete `[s, e)#n1` +deletes every point `k#n2` where `k \in [s, e)` and `n2 < n1`. +The inequality `n2 < n1` is to handle the case where a range delete and +a point have the same sequence number -- this happens during sstable +ingestion where the whole sstable is assigned a single sequence number +that applies to all the data in it. + +There is additionally an infinity sequence number, represented as +`inf`, which is not used for any point, that we can use for reasoning +about range deletes. + +It has been asked why range deletes use an exclusive end key instead +of an inclusive end key. For string keys, one can convert a desired +range delete on `[s, e]` into a range delete on `[s, ImmediateSuccessor(e))`. +For strings, the immediate successor of a key +is that key with a \0 appended to it. However one cannot go in the +other direction: if one could represent only inclusive end keys in a +range delete and one desires to delete a range with an exclusive end +key `[s, e)#n`, one needs to compute `ImmediatePredecessor(e)` which +is an infinite length string. For example, +`ImmediatePredecessor("ab")` is `"aa\xff\xff...."`. Additionally, +regardless of user needs, the exclusive end key helps with splitting a +range delete as we will see later. + +We will sometimes use ImmediatePredecessor and ImmediateSuccessor in +the following for illustrating an idea, but we do not rely on them as +something that is viable to produce for a particular kind of key. And +even if viable, these functions are not currently provided to +RockDB/Pebble. + +### Visualization + +If we consider a 2 dimensional space with increasing keys on the X +axis (with every possible user key represented) and increasing +sequence numbers on the Y axis, range deletes apply to a rectangle +whose bottom edge sits on the X axis. + +The actual space represented by the ordering in our sstables is a one +dimensional space where `k1#n1` is less than `k2#n2` if either of the +following holds: + +- k1 < k2 + +- k1 = k2 and n1 > n2 (under the assumption that no two points with +the same key have the same sequence number). + +``` + ^ + | . > . > . > yy + | . > . > . > . + | . > . > . > . +n | V > xx > . > V + | . > x. > x. > . + | . > x. > x. > . + | . > x. > x. > . + | .> x.> x.> . + ------------------------------------------> + k IS(k) IS(IS(k)) +``` + +The above figure uses `.` to represent points and the X axis is dense in +that it represents all possible keys. `xx` represents the start of a +range delete and `x.` are the points which it deletes. The arrows `V` and +`>` represent the ordering of the points in the one dimensional space. +`IS` is shorthand for `ImmediateSuccessor` and the range delete represented +there is `[k, IS(IS(k)))#n`. Ignore `yy` for now. + +The one dimensional space works fine in a world with only points. But +issues arise when storing range deletes, that represent an action in 2 +dimensional space, into this one dimensional space. + +## Range Delete Boundaries and the Simplest World + +RocksDB/Pebble store the inclusive bounds of each sstable in one dimensional +space. The range deletes two dimensional behavior and exclusive end key needs +to be adapted to this requirement. For a range delete `[s, e)#n`, +the smallest key it acts on is `s#(n-1)` and the largest key it +acts on is `ImmediatePredecessor(e)#0`. So if we position the range delete +immediately before the smallest key it acts on and immediately after +the largest key it acts on we can give it a tight inclusive bound of +`[s#n, e#inf]`. + +Note again that this range delete does not delete everything in its +inclusive bound. For example, range delete `["c", "h")#10` has a tight +inclusive bound of `["c"#10, "h"#inf]` but does not delete `"d"#11` +which lies in that bound. Going back to our earlier diagram, the one +dimensional inclusive bounds go from the `xx` to `yy` but there are +`.`s in between, in the one dimensional order, that are not deleted. + +This is the reason why one cannot in general +use a range delete to seek over all points within its bounds. The one +exception to this seeking behaviour is that when we can order sstables +from new to old, one can "blindly" use this range delete in a newer +sstable to seek to `"h"` in all older sstables since we know those +older sstables must only have point keys with sequence numbers `< 10` +for the keys in interval `["c", "h")`. This partial order across +sstables exists in RocksDB/Pebble between memtable, L0 sstables (where +it is a total order) and across sstables in different levels. + +Coming back to the inclusive bounds of the range delete, `[s#n, e#inf]`: +these bounds participate in deciding the bounds of the +sstable. In this world, one can read all the entries in an sstable and +compute its bounds. However being able to construct these bounds by +reading an sstable is not essential -- RocksDB/Pebble store these +bounds in the `MANIFEST`. This latter fact has been exploited to +construct a real world (later section) where the bounds of an sstable +are not computable by reading all its keys. + +If we had a system with one sstable per level, for each level lower +than L0, we are effectively done. We have represented the tight bounds +of each range delete and it is within the bounds of the sstable. This +works even with L0 => L0 compactions assuming they output exactly one +sstable. + +## The Mostly Simple World + +Here we have multiple files for levels lower than L0 that are non +overlapping in the file bounds. These multiple files occur because +compactions produce multiple files. This introduces the need to split a +range delete across the files being produced by a compaction. + +There is a clean way to split a range delete `[s, e)#n` into 2 parts +(which can be recursively applied to split into arbitrarily many +parts): split into `[s, m)#n` and `[m, e)#n`. These range deletes +apply to non-overlapping points and their tight bounds are `[s#m, +m#inf]`, `[m#n, e#inf]` which are also non-overlapping. + +Consider the following example of an input range delete `["c", "h")#10` and +the following two output files from a compaction: + +``` + sst1 sst2 +last point is "e"#7 | first point is "f"#20 +``` + +The range delete can be split into `["c", "f")#10` and `["f", +"h")#10`, by using the first point key of sst2 as the split +point. Then the bounds of sst1 and sst2 will be `[..., "f"#inf]` and +`["f"#20, ...]` which are non-overlapping. It is still possible to compute +the sstable bounds by looking at all the entries in the sstable. + +## The Real World + +Continuing with the same range delete `["c", "h")#10`, we can have the +following sstables produced during a compaction: + +``` + sst1 sst2 sst3 sst4 sst5 +points: "e"#7 | "f"#12 "f"#7 | "f"#4 "f"#3 | "f"#1 | "g"#15 +``` + +The range deletes written to these ssts are + +``` + sst1 sst2 sst3 sst4 sst5 +["c", "h")#10 | ["f", "h")#10 | ["f", "h")#10 | ["f", "h")#10 | ["g", "h")#10 +``` + +The Pebble code that achieves this effect is in +`rangedel.Fragmenter`. It is a code structuring artifact that sst1 +does not contain a range delete equal to `["c", "f")#10` and sst4 does +not contain `["f", "g")#10`. However for the range deletes in sst2 and +sst3 we cannot do any better because we don't know what the key +following "f" will be (the compaction cannot look ahead) and because +we don't have an `ImmediateSuccessor` function (otherwise we could +have written `["f", ImmediateSuccessor("f"))#10` to sst2, sst3). But +the code artifacts are not the ones introducing the real complexity. + +The range delete bounds are + +``` + sst1 sst2, sst3, sst4 sst5 +["c"#10, "h"#inf] ["f"#10, "h"#inf] ["g"#10, "h"#inf] + +``` + +We note the following: + +- The bounds of range deletes are overlapping since we have been + unable to split the range deletes. If these decide the sstable + bounds, the sstables will have overlapping bounds. This is not + permissible. + +- The range deletes included in each sstable result in that sstable + being "self-sufficient" wrt having the range delete that deletes + some of the points in the sstable (let us assume that the points in + this example have not been dropped from that sstable because of a + snapshot). + +- The transitions from sst1 to sst2 and sst4 to sst5 are **clean** in + that we can pretend that the range deletes in those files are actually: + +``` + sst1 sst2 sst3 sst4 sst5 +["c", "f")#10 | ["f", "g")#10 | ["f", "g")#10 | ["f", "g")#10 | ["g", "h")#10 +``` + +We could achieve some of these **clean** transitions (but not all) with a +code change. Also note that these better range deletes maintain the +"self-sufficient" property. + +### Making Non-overlapping SSTable bounds + +We force the sstable bounds to be non-overlapping by setting them to: + +``` + sst1 sst2 sst3 sst4 sst5 +["c"#10, "f"#inf] ["f"#12, "f"#7] ["f"#4, "f"#3] ["f"#1, "g"#inf] ["g"#15, "h"#inf] +``` + +Note that for sst1...sst4 the sstable bounds are smaller than the +bounds of the range deletes contained in them. The code that +accomplishes this is Pebble is in `compaction.go` -- we will not discuss the +details of that logic here but note that it is placing an `inf` +sequence number for a clean transition and for an unclean transition +it is using the point keys as the bounds. + +Associated with these narrower bounds, we add the following +requirement: a range delete in an sstable must **act-within** the bounds of +the sstable it is contained in. In the above example: + +- sst1: range delete `["c", "h")#10` must act-within the bound `["c"#10, "f"#inf]` + +- sst2: range delete `["f", "h")#10` must act-within the bound `["f"#12, "f"#7]` + +- sst3: range delete `["f", "h")#10` must act-within the bound `["f"#4, "f"#3]` + +- sst4: range delete `["f", "h")#10` must act-within the bound ["f"#1, "g"#inf] + +- And so on. + +The intuitive reason for the **act-within** requirement is that +sst5 can be compacted and moved down to a lower level independent of +sst1-sst4, since it was at a **clean** boundary. We do not want the +range delete `["f", "h")#10` sitting in sst1...sst4 at the higher +level to act on `"g"#15` that has been moved to the lower level. Note +that this incorrect action can happen due to 2 reasons: + +1. the invariant that lower levels have older data for keys + that also exist in higher levels means we can (a) seek a lower level + sstable to the end of a range delete from a higher level, (b) for a key + lookup, stop searching in lower levels once a range delete is encountered + for that key in a higher level. + +2. Sequence number zeroing logic can change the sequence number of + `"g"#15` to `"g"#0` (for better compression) once it realizes that + there are no older versions of `"g"`. It would be incorrect for this + `"g"#0` to be deleted. + + +#### Loss of Power + +This act-within behavior introduces some "loss of power" for +the original range delete `["c", "h")#10`. By acting within sst2...sst4 +it can no longer delete keys `"f"#6`, `"f"#5`, `"f"#2`. + +Luckily for us, this is harmless since these keys cannot have existed +in the system due to the levelling behavior: we cannot be writing +sst2...sst4 to level `i` if versions of `"f"` younger than `"f"#4` are +already in level `i` or version older than `"f"#7` have been left in +level i - 1. There is some trickery possible to prevent this "loss of +power" for queries (see the "Putting it together" section), but given +the history of bugs in this area, we should be cautious. + +### Improperly truncated Range Deletes + +We refer to range deletes that have experienced this "loss of power" +as **improper**. In the above example the range deletions in sst2, sst3, sst4 +are improper. The problem with improper range deletions occurs +when they need to participate in a future compaction: even though we +have restricted them to act-within their current sstable boundary, we +don't have a way of **"writing"** this restriction to a new sstable, +since they still need to be written in the `[s, e)#n` format. + +For example, sst2 has delete `["f", "h")#10` that must act-within +the bound `["f"#12, "f"#7]`. If sst2 was compacted down to the next +level into a new sstable (whose bounds we cannot predict because they +depend on other data written to that sstable) we need to be able to +write a range delete entry that follows the original restriction. But +the narrowest we can write is `["f", ImmediateSuccessor("f"))#10`. This +is an expansion of the act-within restriction with potentially +unintended consequences. In this case the expansion happened in the suffix. +For sst4, the range deletion `["f", "h")#10` must act-within `["f"#1, "g"#inf]`, +and we can precisely represent the constraint on the suffix by writing +`["f", "g")#10` but it does not precisely represent that this range delete +should not apply to `"f"#9`...`"f"#2`. + +In comparison, the sst1 range delete `["c", "h")#10` that must act-within +the bound `["c"#10, "f"#inf]` is not improper. This restriction can +be applied precisely to get a range delete `["c", "f")#10`. + +The solution to this is to note that while individual sstables have +improper range deletes, if we look at a collection of sstables we +can restore the improper range deletes spread across them to their proper self +(and their full power). To accurately find these improper range +deletes would require looking into the contents of a file, which is +expensive. But we can construct a pessimistic set based on +looking at the sequence of all files in a level and partitioning them: +adjacent files `f1`, `f2` with largest and smallest bound `k1#n1`, +`k2#n2` must be in the same partition if + +``` +k1 = k2 and n1 != inf +``` + +In the above example sst2, sst3, sst4 are one partition. The +**spanning bound** of this partition is `["f"#12, "g"#inf]` and the +range delete `["f", "h")#10` when constrained to act-within this +spanning bound is precisely the range delete `["f", +"g")#10`. Intuitively, the "loss of power" of this range delete has +been restored for the sake of making it proper, so it can be +accurately "written" in the output of the compaction (it may be +improperly fragmented again in the output, but we have already +discussed that). Such partitions are called "atomic compaction groups" +and must participate as a whole in a compaction (and a +compaction can use multiple atomic compaction groups as input). + +Consider another example: + +``` + sst1 sst2 +points: "e"#12 | "e"#10 +delete: ["c", "g")#8 | ["c", "g")#8 +bounds ["c"#8, "e"#12] | ["e"#10, "g"#inf] +``` + +sst1, sst2 are an atomic compaction group. Say we violated the +requirement that both be inputs in a compaction and only compacted +sst2 down to level `i + 1` and then down to level `i + 2`. Then we add +sst3 with bounds `["h"#10, "j"#5]` to level `i` and sst1 and sst3 are +compacted to level `i + 1` into a single sstable. This new sstable +will have bounds `["c"#8, "j"#5]` so these bounds do not help with the +original apply-witin constraint on `["c", "g")#8` (that it should +apply-within `["c"#8, "e"#12]`). The narrowest we can construct (if we had +`ImmediateSuccessor`) would be `["c", ImmediateSuccessor("e"))#8`. Now we +can incorrectly apply this range delete that is in level `i + 1` to `"e"#10` +sitting in level `i + 2`. Note that this example can be made worse using +sequence number zeroing -- `"e"#10` may have been rewritten to `"e"#0`. + +If a range delete `[s, e)#n` is in an atomic compaction group with +spanning bounds `[k1#n1, k2#n2]` our construction above guarantees the +following properties + +- `k1#n1 <= s#n`, so the bounds do not constrain the start of the + range delete. + +- `k2 >= e` or `n2 = inf`, so if `k2` is constraining the range delete + it will properly truncate the range delete. + + +#### New sstable at sequence number 0 + +A new sstable can be assigned sequence number 0 (and be written to L0) +if the keys in the sstable are not in any other sstable. This +comparison uses the keys and not key#seqnum, so the loss and +restoration of power does not cause problems since that occurs within +the versions of a single key. + +#### Flawed optimizations + +For the case where the atomic compaction group correspond to the lower +level of a compaction, it may initially seem to be correct to use only +a prefix or suffix of that group in a compaction. In this case the +prefix (suffix) will correspond to the largest key (smallest key) in +the input sstables in the compaction and so can continue to constrain +the range delete. For example, sst1 and sst2 are in the same atomic +compaction group + +``` + sst1 sst2 +points: "c"#10 "e"#12 | "e"#10 +delete: ["c", "g")#8 | ["c", "g")#8 +bounds ["c"#10, "e"#12] | ["e"#10, "g"#inf] +``` + +and this is the lower level of a compaction with + +``` + sst3 +points: "a"#14 "d"#15 +bounds ["a"#14, "d"#15] +``` + +we could allow for a compaction involving sst1 and sst3 which would produce + +``` + sst4 +points: "a"#14 "c"#10 "d"#15 "e"#12 +delete: ["c", "g")#8 +bounds ["a"#14, "e"#12] +``` + +and the range delete is still improper but its act-within constraint has +not expanded. + +But we have to be very careful to not have a more significant loss of power +of this range delete. Consider a situation where sst3 had a single delete +`"e"#16`. It still does not overlap in bounds with sst2 and we again pick +sst1 and sst3 for compaction. This single delete will cause `"e"#12` to be deleted +and sst4 bounds would be (unless we had complicated code preventing it): + +``` + sst4 +points: "a"#14 "c"#10 "d"#15 +delete: ["c", "g")#8 +bounds ["a"#14, "d"#15] +``` + +Now this delete cannot delete `"dd"#6` and we have lost the ability to know +that sst4 and sst2 are in the same atomic compaction group. + + +### Putting it together + +Summarizing the above, we have: + +- SStable bounds logic that ensures sstables are not +overlapping. These sstables contain range deletes that extend outside +these bounds. But these range deletes should **apply-within** the +sstable bounds. + +- Compactions: they need to constrain the range deletes in the inputs +to **apply-within**, but this can create problems with **writing** the +**improper** range deletes. The solution is to include the full +**atomic compaction group** in a compaction so we can restore the +**improper** range deletes to their **proper** self and then apply the +constraints of the atomic compaction group. + +- Queries: We need to act-within the file bound constraint on the range delete. + Say the range delete is `[s, e)#n` and the file bound is `[b1#n1, + b2#n2]`. We are guaranteed that `b1#n1 <= s#n` so the only + constraint can come from `b2#n2`. + + - Deciding whether a range delete covers a key in the same or lower levels. + + - `b2 >= e`: there is no act-within constraint. + - `b2 < e`: to be precise we cannot let it delete `b2#n2-1` or + later keys. But it is likely that allowing it to delete up to + `b2#0` would be ok due to the atomic compaction group. This + would prevent the so-called "loss of power" discussed earlier if + one also includes the argument that the gap in the file bounds + that also represents the loss of power is harmless (the gap + exists within versions of key, and anyone doing a query for that + key will start from the sstable to the left of the gap). But it + may be better to be cautious. + + - For using the range delete to seek sstables at lower levels. + - `b2 >= e`: seek to `e` since there is no act-within constraint. + - `b2 < e`: seek to `b2`. We are ignoring that this range delete + is allowed to delete some versions of `b2` since this is just a + performance optimization. + + + + + + diff --git a/pebble/docs/rocksdb.md b/pebble/docs/rocksdb.md new file mode 100644 index 0000000..8cf7ae9 --- /dev/null +++ b/pebble/docs/rocksdb.md @@ -0,0 +1,757 @@ +# Pebble vs RocksDB: Implementation Differences + +RocksDB is a key-value store implemented using a Log-Structured +Merge-Tree (LSM). This document is not a primer on LSMs. There exist +some decent +[introductions](http://www.benstopford.com/2015/02/14/log-structured-merge-trees/) +on the web, or try chapter 3 of [Designing Data-Intensive +Applications](https://www.amazon.com/Designing-Data-Intensive-Applications-Reliable-Maintainable/dp/1449373321). + +Pebble inherits the RocksDB file formats, has a similar API, and +shares many implementation details, but it also has many differences +that improve performance, reduce implementation complexity, or extend +functionality. This document highlights some of the more important +differences. + +* [Internal Keys](#internal-keys) +* [Indexed Batches](#indexed-batches) +* [Large Batches](#large-batches) +* [Commit Pipeline](#commit-pipeline) +* [Range Deletions](#range-deletions) +* [Flush and Compaction Pacing](#flush-and-compaction-pacing) +* [Write Throttling](#write-throttling) +* [Other Differences](#other-differences) + +## Internal Keys + +The external RocksDB API accepts keys and values. Due to the LSM +structure, keys are never updated in place, but overwritten with new +versions. Inside RocksDB, these versioned keys are known as Internal +Keys. An Internal Key is composed of the user specified key, a +sequence number and a kind. On disk, sstables always store Internal +Keys. + +``` + +-------------+------------+----------+ + | UserKey (N) | SeqNum (7) | Kind (1) | + +-------------+------------+----------+ +``` + +The `Kind` field indicates the type of key: set, merge, delete, etc. + +While Pebble inherits the Internal Key encoding for format +compatibility, it diverges from RocksDB in how it manages Internal +Keys in its implementation. In RocksDB, Internal Keys are represented +either in encoded form (as a string) or as a `ParsedInternalKey`. The +latter is a struct with the components of the Internal Key as three +separate fields. + +```c++ +struct ParsedInternalKey { + Slice user_key; + uint64 seqnum; + uint8 kind; +} +``` + +The component format is convenient: changing the `SeqNum` or `Kind` is +field assignment. Extracting the `UserKey` is a field +reference. However, RocksDB tends to only use `ParsedInternalKey` +locally. The major internal APIs, such as `InternalIterator`, operate +using encoded internal keys (i.e. strings) for parameters and return +values. + +To give a concrete example of the overhead this causes, consider +`Iterator::Seek(user_key)`. The external `Iterator` is implemented on +top of an `InternalIterator`. `Iterator::Seek` ends up calling +`InternalIterator::Seek`. Both Seek methods take a key, but +`InternalIterator::Seek` expects an encoded Internal Key. This is both +error prone and expensive. The key passed to `Iterator::Seek` needs to +be copied into a temporary string in order to append the `SeqNum` and +`Kind`. In Pebble, Internal Keys are represented in memory using an +`InternalKey` struct that is the analog of `ParsedInternalKey`. All +internal APIs use `InternalKeys`, with the exception of the lowest +level routines for decoding data from sstables. In Pebble, since the +interfaces all take and return the `InternalKey` struct, we don’t need +to allocate to construct the Internal Key from the User Key, but +RocksDB sometimes needs to allocate, and encode (i.e. make a +copy). The use of the encoded form also causes RocksDB to pass encoded +keys to the comparator routines, sometimes decoding the keys multiple +times during the course of processing. + +## Indexed Batches + +In RocksDB, a batch is the unit for all write operations. Even writing +a single key is transformed internally to a batch. The batch internal +representation is a contiguous byte buffer with a fixed 12-byte +header, followed by a series of records. + +``` + +------------+-----------+--- ... ---+ + | SeqNum (8) | Count (4) | Entries | + +------------+-----------+--- ... ---+ +``` + +Each record has a 1-byte kind tag prefix, followed by 1 or 2 length +prefixed strings (varstring): + +``` + +----------+-----------------+-------------------+ + | Kind (1) | Key (varstring) | Value (varstring) | + +----------+-----------------+-------------------+ +``` + +(The `Kind` indicates if there are 1 or 2 varstrings. `Set`, `Merge`, +and `DeleteRange` have 2 varstrings, while `Delete` has 1.) + +Adding a mutation to a batch involves appending a new record to the +buffer. This format is extremely fast for writes, but the lack of +indexing makes it untenable to use directly for reads. In order to +support iteration, a separate indexing structure is created. Both +RocksDB and Pebble use a skiplist for the indexing structure, but with +a clever twist. Rather than the skiplist storing a copy of the key, it +simply stores the offset of the record within the mutation buffer. The +result is that the skiplist acts a multi-map (i.e. a map that can have +duplicate entries for a given key). The iteration order for this map +is constructed so that records sort on key, and for equal keys they +sort on descending offset. Newer records for the same key appear +before older records. + +While the indexing structure for batches is nearly identical between +RocksDB and Pebble, how the index structure is used is completely +different. In RocksDB, a batch is indexed using the +`WriteBatchWithIndex` class. The `WriteBatchWithIndex` class provides +a `NewIteratorWithBase` method that allows iteration over the merged +view of the batch contents and an underlying "base" iterator created +from the database. `BaseDeltaIterator` contains logic to iterate over +the batch entries and the base iterator in parallel which allows us to +perform reads on a snapshot of the database as though the batch had +been applied to it. On the surface this sounds reasonable, yet the +implementation is incomplete. Merge and DeleteRange operations are not +supported. The reason they are not supported is because handling them +is complex and requires duplicating logic that already exists inside +RocksDB for normal iterator processing. + +Pebble takes a different approach to iterating over a merged view of a +batch's contents and the underlying database: it treats the batch as +another level in the LSM. Recall that an LSM is composed of zero or +more memtable layers and zero or more sstable layers. Internally, both +RocksDB and Pebble contain a `MergingIterator` that knows how to merge +the operations from different levels, including processing overwritten +keys, merge operations, and delete range operations. The challenge +with treating the batch as another level to be used by a +`MergingIterator` is that the records in a batch do not have a +sequence number. The sequence number in the batch header is not +assigned until the batch is committed. The solution is to give the +batch records temporary sequence numbers. We need these temporary +sequence numbers to be larger than any other sequence number in the +database so that the records in the batch are considered newer than +any committed record. This is accomplished by reserving the high-bit +in the 56-bit sequence number for use as a marker for batch sequence +numbers. The sequence number for a record in an uncommitted batch is: + +``` + RecordOffset | (1<<55) +``` + +Newer records in a given batch will have a larger sequence number than +older records in the batch. And all of the records in a batch will +have larger sequence numbers than any committed record in the +database. + +The end result is that Pebble's batch iterators support all of the +functionality of regular database iterators with minimal additional +code. + +## Large Batches + +The size of a batch is limited only by available memory, yet the +required memory is not just the batch representation. When a batch is +committed, the commit operation iterates over the records in the batch +from oldest to newest and inserts them into the current memtable. The +memtable is an in-memory structure that buffers mutations that have +been committed (written to the Write Ahead Log), but not yet written +to an sstable. Internally, a memtable uses a skiplist to index +records. Each skiplist entry has overhead for the index links and +other metadata that is a dozen bytes at minimum. A large batch +composed of many small records can require twice as much memory when +inserted into a memtable than it required in the batch. And note that +this causes a temporary increase in memory requirements because the +batch memory is not freed until it is completely committed. + +A non-obvious implementation restriction present in both RocksDB and +Pebble is that there is a one-to-one correspondence between WAL files +and memtables. That is, a given WAL file has a single memtable +associated with it and vice-versa. While this restriction could be +removed, doing so is onerous and intricate. It should also be noted +that committing a batch involves writing it to a single WAL file. The +combination of restrictions results in a batch needing to be written +entirely to a single memtable. + +What happens if a batch is too large to fit in a memtable? Memtables +are generally considered to have a fixed size, yet this is not +actually true in RocksDB. In RocksDB, the memtable skiplist is +implemented on top of an arena structure. An arena is composed of a +list of fixed size chunks, with no upper limit set for the number of +chunks that can be associated with an arena. So RocksDB handles large +batches by allowing a memtable to grow beyond its configured +size. Concretely, while RocksDB may be configured with a 64MB memtable +size, a 1GB batch will cause the memtable to grow to accomodate +it. Functionally, this is good, though there is a practical problem: a +large batch is first written to the WAL, and then added to the +memtable. Adding the large batch to the memtable may consume so much +memory that the system runs out of memory and is killed by the +kernel. This can result in a death loop because upon restarting as the +batch is read from the WAL and applied to the memtable again. + +In Pebble, the memtable is also implemented using a skiplist on top of +an arena. Significantly, the Pebble arena is a fixed size. While the +RocksDB skiplist uses pointers, the Pebble skiplist uses offsets from +the start of the arena. The fixed size arena means that the Pebble +memtable cannot expand arbitrarily. A batch that is too large to fit +in the memtable causes the current mutable memtable to be marked as +immutable and the batch is wrapped in a `flushableBatch` structure and +added to the list of immutable memtables. Because the `flushableBatch` +is readable as another layer in the LSM, the batch commit can return +as soon as the `flushableBatch` has been added to the immutable +memtable list. + +Internally, a `flushableBatch` provides iterator support by sorting +the batch contents (the batch is sorted once, when it is added to the +memtable list). Sorting the batch contents and insertion of the +contents into a memtable have the same big-O time, but the constant +factor dominates here. Sorting is significantly faster and uses +significantly less memory due to not having to copy the batch records. + +Note that an effect of this large batch support is that Pebble can be +configured as an efficient on-disk sorter: specify a small memtable +size, disable the WAL, and set a large L0 compaction threshold. In +order to sort a large amount of data, create batches that are larger +than the memtable size and commit them. When committed these batches +will not be inserted into a memtable, but instead sorted and then +written out to L0. The fully sorted data can later be read and the +normal merging process will take care of the final ordering. + +## Commit Pipeline + +The commit pipeline is the component which manages the steps in +committing write batches, such as writing the batch to the WAL and +applying its contents to the memtable. While simple conceptually, the +commit pipeline is crucial for high performance. In the absence of +concurrency, commit performance is limited by how fast a batch can be +written (and synced) to the WAL and then added to the memtable, both +of which are outside of the purview of the commit pipeline. + +To understand the challenge here, it is useful to have a conception of +the WAL (write-ahead log). The WAL contains a record of all of the +batches that have been committed to the database. As a record is +written to the WAL it is added to the memtable. Each record is +assigned a sequence number which is used to distinguish newer updates +from older ones. Conceptually the WAL looks like: + +``` ++--------------------------------------+ +| Batch(SeqNum=1,Count=9,Records=...) | ++--------------------------------------+ +| Batch(SeqNum=10,Count=5,Records=...) | ++--------------------------------------+ +| Batch(SeqNum=15,Count=7,Records...) | ++--------------------------------------+ +| ... | ++--------------------------------------+ +``` + +Note that each WAL entry is precisely the batch representation +described earlier in the [Indexed Batches](#indexed-batches) +section. The monotonically increasing sequence numbers are a critical +component in allowing RocksDB and Pebble to provide fast snapshot +views of the database for reads. + +If concurrent performance was not a concern, the commit pipeline could +simply be a mutex which serialized writes to the WAL and application +of the batch records to the memtable. Concurrent performance is a +concern, though. + +The primary challenge in concurrent performance in the commit pipeline +is maintaining two invariants: + +1. Batches need to be written to the WAL in sequence number order. +2. Batches need to be made visible for reads in sequence number + order. This invariant arises from the use of a single sequence + number which indicates which mutations are visible. + +The second invariant deserves explanation. RocksDB and Pebble both +keep track of a visible sequence number. This is the sequence number +for which records in the database are visible during reads. The +visible sequence number exists because committing a batch is an atomic +operation, yet adding records to the memtable is done without an +exclusive lock (the skiplists used by both Pebble and RocksDB are +lock-free). When the records from a batch are being added to the +memtable, a concurrent read operation may see those records, but will +skip over them because they are newer than the visible sequence +number. Once all of the records in the batch have been added to the +memtable, the visible sequence number is atomically incremented. + +So we have four steps in committing a write batch: + +1. Write the batch to the WAL +2. Apply the mutations in the batch to the memtable +3. Bump the visible sequence number +4. (Optionally) sync the WAL + +Writing the batch to the WAL is actually very fast as it is just a +memory copy. Applying the mutations in the batch to the memtable is by +far the most CPU intensive part of the commit pipeline. Syncing the +WAL is the most expensive from a wall clock perspective. + +With that background out of the way, let's examine how RocksDB commits +batches. This description is of the traditional commit pipeline in +RocksDB (i.e. the one used by CockroachDB). + +RocksDB achieves concurrency in the commit pipeline by grouping +concurrently committed batches into a batch group. Each group is +assigned a "leader" which is the first batch to be added to the +group. The batch group is written atomically to the WAL by the leader +thread, and then the individual batches making up the group are +concurrently applied to the memtable. Lastly, the visible sequence +number is bumped such that all of the batches in the group become +visible in a single atomic step. While a batch group is being applied, +other concurrent commits are added to a waiting list. When the group +commit finishes, the waiting commits form the next group. + +There are two criticisms of the batch grouping approach. The first is +that forming a batch group involves copying batch contents. RocksDB +partially alleviates this for large batches by placing a limit on the +total size of a group. A large batch will end up in its own group and +not be copied, but the criticism still applies for small batches. Note +that there are actually two copies here. The batch contents are +concatenated together to form the group, and then the group contents +are written into an in memory buffer for the WAL before being written +to disk. + +The second criticism is about the thread synchronization points. Let's +consider what happens to a commit which becomes the leader: + +1. Lock commit mutex +2. Wait to become leader +3. Form (concatenate) batch group and write to the WAL +4. Notify followers to apply their batch to the memtable +5. Apply own batch to memtable +6. Wait for followers to finish +7. Bump visible sequence number +8. Unlock commit mutex +9. Notify followers that the commit is complete + +The follower's set of operations looks like: + +1. Lock commit mutex +2. Wait to become follower +3. Wait to be notified that it is time to apply batch +4. Unlock commit mutex +5. Apply batch to memtable +6. Wait to be notified that commit is complete + +The thread synchronization points (all of the waits and notifies) are +overhead. Reducing that overhead can improve performance. + +The Pebble commit pipeline addresses both criticisms. The main +innovation is a commit queue that mirrors the commit order. The Pebble +commit pipeline looks like: + +1. Lock commit mutex + * Add batch to commit queue + * Assign batch sequence number + * Write batch to the WAL +2. Unlock commit mutex +3. Apply batch to memtable (concurrently) +4. Publish batch sequence number + +Pebble does not use the concept of a batch group. Each batch is +individually written to the WAL, but note that the WAL write is just a +memory copy into an internal buffer in the WAL. + +Step 4 deserves further scrutiny as it is where the invariant on the +visible batch sequence number is maintained. Publishing the batch +sequence number cannot simply bump the visible sequence number because +batches with earlier sequence numbers may still be applying to the +memtable. If we were to ratchet the visible sequence number without +waiting for those applies to finish, a concurrent reader could see +partial batch contents. Note that RocksDB has experimented with +allowing these semantics with its unordered writes option. + +We want to retain the atomic visibility of batch commits. The publish +batch sequence number step needs to ensure that we don't ratchet the +visible sequence number until all batches with earlier sequence +numbers have applied. Enter the commit queue: a lock-free +single-producer, multi-consumer queue. Batches are added to the commit +queue with the commit mutex held, ensuring the same order as the +sequence number assignment. After a batch finishes applying to the +memtable, it atomically marks the batch as applied. It then removes +the prefix of applied batches from the commit queue, bumping the +visible sequence number, and marking the batch as committed (via a +`sync.WaitGroup`). If the first batch in the commit queue has not be +applied we wait for our batch to be committed, relying on another +concurrent committer to perform the visible sequence ratcheting for +our batch. We know a concurrent commit is taking place because if +there was only one batch committing it would be at the head of the +commit queue. + +There are two possibilities when publishing a sequence number. The +first is that there is an unapplied batch at the head of the +queue. Consider the following scenario where we're trying to publish +the sequence number for batch `B`. + +``` + +---------------+-------------+---------------+-----+ + | A (unapplied) | B (applied) | C (unapplied) | ... | + +---------------+-------------+---------------+-----+ +``` + +The publish routine will see that `A` is unapplied and then simply +wait for `B's` done `sync.WaitGroup` to be signalled. This is safe +because `A` must still be committing. And if `A` has concurrently been +marked as applied, the goroutine publishing `A` will then publish +`B`. What happens when `A` publishes its sequence number? The commit +queue state becomes: + +``` + +-------------+-------------+---------------+-----+ + | A (applied) | B (applied) | C (unapplied) | ... | + +-------------+-------------+---------------+-----+ +``` + +The publish routine pops `A` from the queue, ratchets the sequence +number, then pops `B` and ratchets the sequence number again, and then +finds `C` and stops. A detail that it is important to notice is that +the committer for batch `B` didn't have to do any more work. An +alternative approach would be to have `B` wakeup and ratchet its own +sequence number, but that would serialize the remainder of the commit +queue behind that goroutine waking up. + +The commit queue reduces the number of thread synchronization +operations required to commit a batch. There is no leader to notify, +or followers to wait for. A commit either publishes its own sequence +number, or performs one synchronization operation to wait for a +concurrent committer to publish its sequence number. + +## Range Deletions + +Deletion of an individual key in RocksDB and Pebble is accomplished by +writing a deletion tombstone. A deletion tombstone shadows an existing +value for a key, causing reads to treat the key as not present. The +deletion tombstone mechanism works well for deleting small sets of +keys, but what happens if you want to all of the keys within a range +of keys that might number in the thousands or millions? A range +deletion is an operation which deletes an entire range of keys with a +single record. In contrast to a point deletion tombstone which +specifies a single key, a range deletion tombstone (a.k.a. range +tombstone) specifies a start key (inclusive) and an end key +(exclusive). This single record is much faster to write than thousands +or millions of point deletion tombstones, and can be done blindly -- +without iterating over the keys that need to be deleted. The downside +to range tombstones is that they require additional processing during +reads. How the processing of range tombstones is done significantly +affects both the complexity of the implementation, and the efficiency +of read operations in the presence of range tombstones. + +A range tombstone is composed of a start key, end key, and sequence +number. Any key that falls within the range is considered deleted if +the key's sequence number is less than the range tombstone's sequence +number. RocksDB stores range tombstones segregated from point +operations in a special range deletion block within each sstable. +Conceptually, the range tombstones stored within an sstable are +truncated to the boundaries of the sstable, though there are +complexities that cause this to not actually be physically true. + +In RocksDB, the main structure implementing range tombstone processing +is the `RangeDelAggregator`. Each read operation and iterator has its +own `RangeDelAggregator` configured for the sequence number the read +is taking place at. The initial implementation of `RangeDelAggregator` +built up a "skyline" for the range tombstones visible at the read +sequence number. + +``` +10 +---+ + 9 | | + 8 | | + 7 | +----+ + 6 | | + 5 +-+ | +----+ + 4 | | | | + 3 | | | +---+ + 2 | | | | + 1 | | | | + 0 | | | | + abcdefghijklmnopqrstuvwxyz +``` + +The above diagram shows the skyline created for the range tombstones +`[b,j)#5`, `[d,h)#10`, `[f,m)#7`, `[p,u)#5`, and `[t,y)#3`. The +skyline is queried for each key read to see if the key should be +considered deleted or not. The skyline structure is stored in a binary +tree, making the queries an O(logn) operation in the number of +tombstones, though there is an optimization to make this O(1) for +`next`/`prev` iteration. Note that the skyline representation loses +information about the range tombstones. This requires the structure to +be rebuilt on every read which has a significant performance impact. + +The initial skyline range tombstone implementation has since been +replaced with a more efficient lookup structure. See the +[DeleteRange](https://rocksdb.org/blog/2018/11/21/delete-range.html) +blog post for a good description of both the original implementation +and the new (v2) implementation. The key change in the new +implementation is to "fragment" the range tombstones that are stored +in an sstable. The fragmented range tombstones provide the same +benefit as the skyline representation: the ability to binary search +the fragments in order to find the tombstone covering a key. But +unlike the skyline approach, the fragmented tombstones can be cached +on a per-sstable basis. In the v2 approach, `RangeDelAggregator` keeps +track of the fragmented range tombstones for each sstable encountered +during a read or iterator, and logically merges them together. + +Fragmenting range tombstones involves splitting range tombstones at +overlap points. Let's consider the tombstones in the skyline example +above: + +``` +10: d---h + 7: f------m + 5: b-------j p----u + 3: t----y +``` + +Fragmenting the range tombstones at the overlap points creates a +larger number of range tombstones: + +``` +10: d-f-h + 7: f-h-j--m + 5: b-d-f-h-j p---tu + 3: tu---y +``` + +While the number of tombstones is larger there is a significant +advantage: we can order the tombstones by their start key and then +binary search to find the set of tombstones overlapping a particular +point. This is possible because due to the fragmenting, all the +tombstones that overlap a range of keys will have the same start and +end key. The v2 `RangeDelAggregator` and associated classes perform +fragmentation of range tombstones stored in each sstable and those +fragmented tombstones are then cached. + +In summary, in RocksDB `RangeDelAggregator` acts as an oracle for +answering whether a key is deleted at a particular sequence +number. Due to caching of fragmented tombstones, the v2 implementation +of `RangeDelAggregator` implementation is significantly faster to +populate than v1, yet the overall approach to processing range +tombstones remains similar. + +Pebble takes a different approach: it integrates range tombstones +processing directly into the `mergingIter` structure. `mergingIter` is +the internal structure which provides a merged view of the levels in +an LSM. RocksDB has a similar class named +`MergingIterator`. Internally, `mergingIter` maintains a heap over the +levels in the LSM (note that each memtable and L0 table is a separate +"level" in `mergingIter`). In RocksDB, `MergingIterator` knows nothing +about range tombstones, and it is thus up to higher-level code to +process range tombstones using `RangeDelAggregator`. + +While the separation of `MergingIterator` and range tombstones seems +reasonable at first glance, there is an optimization that RocksDB does +not perform which is awkward with the `RangeDelAggregator` approach: +skipping swaths of deleted keys. A range tombstone often shadows more +than one key. Rather than iterating over the deleted keys, it is much +quicker to seek to the end point of the range tombstone. The challenge +in implementing this optimization is that a key might be newer than +the range tombstone and thus shouldn't be skipped. An insight to be +utilized is that the level structure itself provides sufficient +information. A range tombstone at `Ln` is guaranteed to be newer than +any key it overlaps in `Ln+1`. + +Pebble utilizes the insight above to integrate range deletion +processing with `mergingIter`. A `mergingIter` maintains a point +iterator and a range deletion iterator per level in the LSM. In this +context, every L0 table is a separate level, as is every +memtable. Within a level, when a range deletion contains a point +operation the sequence numbers must be checked to determine if the +point operation is newer or older than the range deletion +tombstone. The `mergingIter` maintains the invariant that the range +deletion iterators for all levels newer that the current iteration key +are positioned at the next (or previous during reverse iteration) +range deletion tombstone. We know those levels don't contain a range +deletion tombstone that covers the current key because if they did the +current key would be deleted. The range deletion iterator for the +current key's level is positioned at a range tombstone covering or +past the current key. The position of all of other range deletion +iterators is unspecified. Whenever a key from those levels becomes the +current key, their range deletion iterators need to be +positioned. This lazy positioning avoids seeking the range deletion +iterators for keys that are never considered. + +For a full example, consider the following setup: + +``` + p0: o + r0: m---q + + p1: n p + r1: g---k + + p2: b d i + r2: a---e q----v + + p3: e + r3: +``` + +The diagram above shows is showing 4 levels, with `pX` indicating the +point operations in a level and `rX` indicating the range tombstones. + +If we start iterating from the beginning, the first key we encounter +is `b` in `p2`. When the mergingIter is pointing at a valid entry, the +range deletion iterators for all of the levels less that the current +key's level are positioned at the next range tombstone past the +current key. So `r0` will point at `[m,q)` and `r1` at `[g,k)`. When +the key `b` is encountered, we check to see if the current tombstone +for `r0` or `r1` contains it, and whether the tombstone for `r2`, +`[a,e)`, contains and is newer than `b`. + +Advancing the iterator finds the next key at `d`. This is in the same +level as the previous key `b` so we don't have to reposition any of +the range deletion iterators, but merely check whether `d` is now +contained by any of the range tombstones at higher levels or has +stepped past the range tombstone in its own level. In this case, there +is nothing to be done. + +Advancing the iterator again finds `e`. Since `e` comes from `p3`, we +have to position the `r3` range deletion iterator, which is empty. `e` +is past the `r2` tombstone of `[a,e)` so we need to advance the `r2` +range deletion iterator to `[q,v)`. + +The next key is `i`. Because this key is in `p2`, a level above `e`, +we don't have to reposition any range deletion iterators and instead +see that `i` is covered by the range tombstone `[g,k)`. The iterator +is immediately advanced to `n` which is covered by the range tombstone +`[m,q)` causing the iterator to advance to `o` which is visible. + +## Flush and Compaction Pacing + +Flushes and compactions in LSM trees are problematic because they +contend with foreground traffic, resulting in write and read latency +spikes. Without throttling the rate of flushes and compactions, they +occur "as fast as possible" (which is not entirely true, since we +have a `bytes_per_sync` option). This instantaneous usage of CPU and +disk IO results in potentially huge latency spikes for writes and +reads which occur in parallel to the flushes and compactions. + +RocksDB attempts to solve this issue by offering an option to limit +the speed of flushes and compactions. A maximum `bytes/sec` can be +specified through the options, and background IO usage will be limited +to the specified amount. Flushes are given priority over compactions, +but they still use the same rate limiter. Though simple to implement +and understand, this option is fragile for various reasons. + +1) If the rate limit is configured too low, the DB will stall and +write throughput will be affected. +2) If the rate limit is configured too high, the write and read +latency spikes will persist. +3) A different configuration is needed per system depending on the +speed of the storage device. +4) Write rates typically do not stay the same throughout the lifetime +of the DB (higher throughput during certain times of the day, etc) but +the rate limit cannot be configured during runtime. + +RocksDB also offers an +["auto-tuned" rate limiter](https://rocksdb.org/blog/2017/12/18/17-auto-tuned-rate-limiter.html) +which uses a simple multiplicative-increase, multiplicative-decrease +algorithm to dynamically adjust the background IO rate limit depending +on how much of the rate limiter has been exhausted in an interval. +This solves the problem of having a static rate limit, but Pebble +attempts to improve on this with a different pacing mechanism. + +Pebble's pacing mechanism uses separate rate limiters for flushes and +compactions. Both the flush and compaction pacing mechanisms work by +attempting to flush and compact only as fast as needed and no faster. +This is achieved differently for flushes versus compactions. + +For flush pacing, Pebble keeps the rate at which the memtable is +flushed at the same rate as user writes. This ensures that disk IO +used by flushes remains steady. When a mutable memtable becomes full +and is marked immutable, it is typically flushed as fast as possible. +Instead of flushing as fast as possible, what we do is look at the +total number of bytes in all the memtables (mutable + queue of +immutables) and subtract the number of bytes that have been flushed in +the current flush. This number gives us the total number of bytes +which remain to be flushed. If we keep this number steady at a constant +level, we have the invariant that the flush rate is equal to the write +rate. + +When the number of bytes remaining to be flushed falls below our +target level, we slow down the speed of flushing. We keep a minimum +rate at which the memtable is flushed so that flushes proceed even if +writes have stopped. When the number of bytes remaining to be flushed +goes above our target level, we allow the flush to proceed as fast as +possible, without applying any rate limiting. However, note that the +second case would indicate that writes are occurring faster than the +memtable can flush, which would be an unsustainable rate. The LSM +would soon hit the memtable count stall condition and writes would be +completely stopped. + +For compaction pacing, Pebble uses an estimation of compaction debt, +which is the number of bytes which need to be compacted before no +further compactions are needed. This estimation is calculated by +looking at the number of bytes that have been flushed by the current +flush routine, adding those bytes to the size of the level 0 sstables, +then seeing how many bytes exceed the target number of bytes for the +level 0 sstables. We multiply the number of bytes exceeded by the +level ratio and add that number to the compaction debt estimate. +We repeat this process until the final level, which gives us a final +compaction debt estimate for the entire LSM tree. + +Like with flush pacing, we want to keep the compaction debt at a +constant level. This ensures that compactions occur only as fast as +needed and no faster. If the compaction debt estimate falls below our +target level, we slow down compactions. We maintain a minimum +compaction rate so that compactions proceed even if flushes have +stopped. If the compaction debt goes above our target level, we let +compactions proceed as fast as possible without any rate limiting. +Just like with flush pacing, this would indicate that writes are +occurring faster than the background compactions can keep up with, +which is an unsustainable rate. The LSM's read amplification would +increase and the L0 file count stall condition would be hit. + +With the combined flush and compaction pacing mechanisms, flushes and +compactions only occur as fast as needed and no faster, which reduces +latency spikes for user read and write operations. + +## Write throttling + +RocksDB adds artificial delays to user writes when certain thresholds +are met, such as `l0_slowdown_writes_threshold`. These artificial +delays occur when the system is close to stalling to lessen the write +pressure so that flushing and compactions can catch up. On the surface +this seems good, since write stalls would seemingly be eliminated and +replaced with gradual slowdowns. Closed loop write latency benchmarks +would show the elimination of abrupt write stalls, which seems +desirable. + +However, this doesn't do anything to improve latencies in an open loop +model, which is the model more likely to resemble real world use +cases. Artificial delays increase write latencies without a clear +benefit. Writes stalls in an open loop system would indicate that +writes are generated faster than the system could possibly handle, +which adding artificial delays won't solve. + +For this reason, Pebble doesn't add artificial delays to user writes +and writes are served as quickly as possible. + +### Other Differences + +* `internalIterator` API which minimizes indirect (virtual) function + calls +* Previous pointers in the memtable and indexed batch skiplists +* Elision of per-key lower/upper bound checks in long range scans +* Improved `Iterator` API + + `SeekPrefixGE` for prefix iteration + + `SetBounds` for adjusting the bounds on an existing `Iterator` +* Simpler `Get` implementation diff --git a/pebble/error_iter.go b/pebble/error_iter.go new file mode 100644 index 0000000..10bc9cc --- /dev/null +++ b/pebble/error_iter.go @@ -0,0 +1,86 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "context" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/keyspan" +) + +type errorIter struct { + err error +} + +// errorIter implements the base.InternalIterator interface. +var _ internalIterator = (*errorIter)(nil) + +func (c *errorIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, base.LazyValue) { + return nil, base.LazyValue{} +} + +func (c *errorIter) SeekPrefixGE( + prefix, key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + return nil, base.LazyValue{} +} + +func (c *errorIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, base.LazyValue) { + return nil, base.LazyValue{} +} + +func (c *errorIter) First() (*InternalKey, base.LazyValue) { + return nil, base.LazyValue{} +} + +func (c *errorIter) Last() (*InternalKey, base.LazyValue) { + return nil, base.LazyValue{} +} + +func (c *errorIter) Next() (*InternalKey, base.LazyValue) { + return nil, base.LazyValue{} +} + +func (c *errorIter) Prev() (*InternalKey, base.LazyValue) { + return nil, base.LazyValue{} +} + +func (c *errorIter) NextPrefix([]byte) (*InternalKey, base.LazyValue) { + return nil, base.LazyValue{} +} + +func (c *errorIter) Error() error { + return c.err +} + +func (c *errorIter) Close() error { + return c.err +} + +func (c *errorIter) String() string { + return "error" +} + +func (c *errorIter) SetBounds(lower, upper []byte) {} + +func (c *errorIter) SetContext(_ context.Context) {} + +type errorKeyspanIter struct { + err error +} + +// errorKeyspanIter implements the keyspan.FragmentIterator interface. +var _ keyspan.FragmentIterator = (*errorKeyspanIter)(nil) + +func (*errorKeyspanIter) SeekGE(key []byte) *keyspan.Span { return nil } +func (*errorKeyspanIter) SeekLT(key []byte) *keyspan.Span { return nil } +func (*errorKeyspanIter) First() *keyspan.Span { return nil } +func (*errorKeyspanIter) Last() *keyspan.Span { return nil } +func (*errorKeyspanIter) Next() *keyspan.Span { return nil } +func (*errorKeyspanIter) Prev() *keyspan.Span { return nil } +func (i *errorKeyspanIter) Error() error { return i.err } +func (i *errorKeyspanIter) Close() error { return i.err } +func (*errorKeyspanIter) String() string { return "error" } diff --git a/pebble/error_test.go b/pebble/error_test.go new file mode 100644 index 0000000..82af4a4 --- /dev/null +++ b/pebble/error_test.go @@ -0,0 +1,429 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "fmt" + "math" + "strings" + "sync/atomic" + "testing" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/vfs" + "github.com/cockroachdb/pebble/vfs/errorfs" + "github.com/stretchr/testify/require" +) + +type panicLogger struct{} + +func (l panicLogger) Infof(format string, args ...interface{}) {} +func (l panicLogger) Errorf(format string, args ...interface{}) {} + +func (l panicLogger) Fatalf(format string, args ...interface{}) { + panic(errors.Errorf("fatal: "+format, args...)) +} + +// corruptFS injects a corruption in the `index`th byte read. +type corruptFS struct { + vfs.FS + // index is the index of the byte which we will corrupt. + index atomic.Int32 + bytesRead atomic.Int32 +} + +func (fs *corruptFS) maybeCorrupt(n int32, p []byte) { + newBytesRead := fs.bytesRead.Add(n) + pIdx := newBytesRead - 1 - fs.index.Load() + if pIdx >= 0 && pIdx < n { + p[pIdx]++ + } +} + +func (fs *corruptFS) maybeCorruptAt(n int32, p []byte, offset int64) { + pIdx := fs.index.Load() - int32(offset) + if pIdx >= 0 && pIdx < n { + p[pIdx]++ + } +} + +func (fs *corruptFS) Open(name string, opts ...vfs.OpenOption) (vfs.File, error) { + f, err := fs.FS.Open(name) + if err != nil { + return nil, err + } + cf := corruptFile{f, fs} + for _, opt := range opts { + opt.Apply(cf) + } + return cf, nil +} + +type corruptFile struct { + vfs.File + fs *corruptFS +} + +func (f corruptFile) Read(p []byte) (int, error) { + n, err := f.File.Read(p) + f.fs.maybeCorrupt(int32(n), p) + return n, err +} + +func (f corruptFile) ReadAt(p []byte, off int64) (int, error) { + n, err := f.File.ReadAt(p, off) + f.fs.maybeCorruptAt(int32(n), p, off) + return n, err +} + +func expectLSM(expected string, d *DB, t *testing.T) { + t.Helper() + expected = strings.TrimSpace(expected) + d.mu.Lock() + actual := d.mu.versions.currentVersion().String() + d.mu.Unlock() + actual = strings.TrimSpace(actual) + if expected != actual { + t.Fatalf("expected\n%s\nbut found\n%s", expected, actual) + } +} + +// TestErrors repeatedly runs a short sequence of operations, injecting FS +// errors at different points, until success is achieved. +func TestErrors(t *testing.T) { + run := func(fs *errorfs.FS) (err error) { + defer func() { + if r := recover(); r != nil { + if e, ok := r.(error); ok { + err = e + } else { + t.Fatal(r) + } + } + }() + + d, err := Open("", &Options{ + FS: fs, + Logger: panicLogger{}, + }) + if err != nil { + return err + } + + key := []byte("a") + value := []byte("b") + if err := d.Set(key, value, nil); err != nil { + return err + } + if err := d.Flush(); err != nil { + return err + } + if err := d.Compact(nil, []byte("\xff"), false); err != nil { + return err + } + + iter, _ := d.NewIter(nil) + for valid := iter.First(); valid; valid = iter.Next() { + } + if err := iter.Close(); err != nil { + return err + } + return d.Close() + } + + errorCounts := make(map[string]int) + for i := int32(0); ; i++ { + fs := errorfs.Wrap(vfs.NewMem(), errorfs.ErrInjected.If(errorfs.OnIndex(i))) + err := run(fs) + if err == nil { + t.Logf("success %d\n", i) + break + } + errorCounts[err.Error()]++ + } + + expectedErrors := []string{ + "fatal: MANIFEST flush failed: injected error", + "fatal: MANIFEST sync failed: injected error", + "fatal: MANIFEST set current failed: injected error", + "fatal: MANIFEST dirsync failed: injected error", + } + for _, expected := range expectedErrors { + if errorCounts[expected] == 0 { + t.Errorf("expected error %q did not occur", expected) + } + } +} + +// TestRequireReadError injects FS errors into read operations at successively later +// points until all operations can complete. It requires an operation fails any time +// an error was injected. This differs from the TestErrors case above as that one +// cannot require operations fail since it involves flush/compaction, which retry +// internally and succeed following an injected error. +func TestRequireReadError(t *testing.T) { + run := func(formatVersion FormatMajorVersion, index int32) (err error) { + // Perform setup with error injection disabled as it involves writes/background ops. + ii := errorfs.OnIndex(-1) + fs := errorfs.Wrap(vfs.NewMem(), errorfs.ErrInjected.If(ii)) + opts := &Options{ + FS: fs, + Logger: panicLogger{}, + FormatMajorVersion: formatVersion, + } + opts.private.disableTableStats = true + d, err := Open("", opts) + require.NoError(t, err) + + defer func() { + if d != nil { + require.NoError(t, d.Close()) + } + }() + + key1 := []byte("a1") + key2 := []byte("a2") + value := []byte("b") + require.NoError(t, d.Set(key1, value, nil)) + require.NoError(t, d.Set(key2, value, nil)) + require.NoError(t, d.Flush()) + require.NoError(t, d.Compact(key1, key2, false)) + require.NoError(t, d.DeleteRange(key1, key2, nil)) + require.NoError(t, d.Set(key1, value, nil)) + require.NoError(t, d.Flush()) + if formatVersion < FormatSetWithDelete { + expectLSM(` +0.0: + 000007:[a1#13,SET-a2#inf,RANGEDEL] +6: + 000005:[a1#10,SET-a2#11,SET] +`, d, t) + } else { + expectLSM(` +0.0: + 000007:[a1#13,SETWITHDEL-a2#inf,RANGEDEL] +6: + 000005:[a1#10,SET-a2#11,SET] +`, d, t) + } + + // Now perform foreground ops with error injection enabled. + ii.Store(index) + iter, _ := d.NewIter(nil) + if err := iter.Error(); err != nil { + return err + } + numFound := 0 + expectedKeys := [][]byte{key1, key2} + for valid := iter.First(); valid; valid = iter.Next() { + if !bytes.Equal(iter.Key(), expectedKeys[numFound]) { + t.Fatalf("expected key %v; found %v", expectedKeys[numFound], iter.Key()) + } + if !bytes.Equal(iter.Value(), value) { + t.Fatalf("expected value %v; found %v", value, iter.Value()) + } + numFound++ + } + if err := iter.Close(); err != nil { + return err + } + if err := d.Close(); err != nil { + d = nil + return err + } + d = nil + // Reaching here implies all read operations succeeded. This + // should only happen when we reached a large enough index at + // which `errorfs.FS` did not return any error. + if i := ii.Load(); i < 0 { + t.Errorf("FS error injected %d ops ago went unreported", -i) + } + if numFound != 2 { + t.Fatalf("expected 2 values; found %d", numFound) + } + return nil + } + + versions := []FormatMajorVersion{FormatMostCompatible, FormatSetWithDelete} + for _, version := range versions { + t.Run(fmt.Sprintf("version-%s", version), func(t *testing.T) { + for i := int32(0); ; i++ { + err := run(version, i) + if err == nil { + t.Logf("no failures reported at index %d", i) + break + } + } + }) + } +} + +// TestCorruptReadError verifies that reads to a corrupted file detect the +// corruption and return an error. In this case the filesystem reads return +// successful status but the data they return is corrupt. +func TestCorruptReadError(t *testing.T) { + run := func(formatVersion FormatMajorVersion, index int32) (err error) { + // Perform setup with corruption injection disabled as it involves writes/background ops. + fs := &corruptFS{ + FS: vfs.NewMem(), + } + fs.index.Store(-1) + opts := &Options{ + FS: fs, + Logger: panicLogger{}, + FormatMajorVersion: formatVersion, + } + opts.private.disableTableStats = true + d, err := Open("", opts) + if err != nil { + t.Fatalf("%v", err) + } + defer func() { + if d != nil { + require.NoError(t, d.Close()) + } + }() + + key1 := []byte("a1") + key2 := []byte("a2") + value := []byte("b") + require.NoError(t, d.Set(key1, value, nil)) + require.NoError(t, d.Set(key2, value, nil)) + require.NoError(t, d.Flush()) + require.NoError(t, d.Compact(key1, key2, false)) + require.NoError(t, d.DeleteRange(key1, key2, nil)) + require.NoError(t, d.Set(key1, value, nil)) + require.NoError(t, d.Flush()) + if formatVersion < FormatSetWithDelete { + expectLSM(` +0.0: + 000007:[a1#13,SET-a2#inf,RANGEDEL] +6: + 000005:[a1#10,SET-a2#11,SET] +`, d, t) + + } else { + expectLSM(` +0.0: + 000007:[a1#13,SETWITHDEL-a2#inf,RANGEDEL] +6: + 000005:[a1#10,SET-a2#11,SET] +`, d, t) + } + + // Now perform foreground ops with corruption injection enabled. + fs.index.Store(index) + iter, _ := d.NewIter(nil) + if err := iter.Error(); err != nil { + return err + } + + numFound := 0 + expectedKeys := [][]byte{key1, key2} + for valid := iter.First(); valid; valid = iter.Next() { + if !bytes.Equal(iter.Key(), expectedKeys[numFound]) { + t.Fatalf("expected key %v; found %v", expectedKeys[numFound], iter.Key()) + } + if !bytes.Equal(iter.Value(), value) { + t.Fatalf("expected value %v; found %v", value, iter.Value()) + } + numFound++ + } + if err := iter.Close(); err != nil { + return err + } + if err := d.Close(); err != nil { + return err + } + d = nil + // Reaching here implies all read operations succeeded. This + // should only happen when we reached a large enough index at + // which `corruptFS` did not inject any corruption. + if bytesRead := fs.bytesRead.Load(); bytesRead > index { + t.Errorf("corruption error injected at index %d went unreported", index) + } + if numFound != 2 { + t.Fatalf("expected 2 values; found %d", numFound) + } + return nil + } + versions := []FormatMajorVersion{FormatMostCompatible, FormatSetWithDelete} + for _, version := range versions { + t.Run(fmt.Sprintf("version-%s", version), func(t *testing.T) { + for i := int32(0); ; i++ { + err := run(version, i) + if err == nil { + t.Logf("no failures reported at index %d", i) + break + } + } + }) + } +} + +func TestDBWALRotationCrash(t *testing.T) { + memfs := vfs.NewStrictMem() + + var index atomic.Int32 + inj := errorfs.InjectorFunc(func(op errorfs.Op) error { + if op.Kind.ReadOrWrite() == errorfs.OpIsWrite && index.Add(-1) == -1 { + memfs.SetIgnoreSyncs(true) + } + return nil + }) + triggered := func() bool { return index.Load() < 0 } + + run := func(fs *errorfs.FS, k int32) (err error) { + opts := &Options{ + FS: fs, + Logger: panicLogger{}, + MemTableSize: 2048, + } + opts.private.disableTableStats = true + d, err := Open("", opts) + if err != nil || triggered() { + return err + } + + // Write keys with the FS set up to simulate a crash by ignoring + // syncs on the k-th write operation. + index.Store(k) + key := []byte("test") + for i := 0; i < 10; i++ { + v := []byte(strings.Repeat("b", i)) + err = d.Set(key, v, nil) + if err != nil || triggered() { + break + } + } + err = firstError(err, d.Close()) + return err + } + + fs := errorfs.Wrap(memfs, inj) + for k := int32(0); ; k++ { + // Run, simulating a crash by ignoring syncs after the k-th write + // operation after Open. + index.Store(math.MaxInt32) + err := run(fs, k) + if !triggered() { + // Stop when we reach a value of k greater than the number of + // write operations performed during `run`. + t.Logf("No crash at write operation %d\n", k) + if err != nil { + t.Fatalf("Filesystem did not 'crash', but error returned: %s", err) + } + break + } + t.Logf("Crashed at write operation % 2d, error: %v\n", k, err) + + // Reset the filesystem to its state right before the simulated + // "crash", restore syncs, and run again without crashing. + memfs.ResetToSyncedState() + memfs.SetIgnoreSyncs(false) + index.Store(math.MaxInt32) + require.NoError(t, run(fs, k)) + } +} diff --git a/pebble/event.go b/pebble/event.go new file mode 100644 index 0000000..ea527ef --- /dev/null +++ b/pebble/event.go @@ -0,0 +1,767 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "fmt" + "strings" + "time" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/humanize" + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/vfs" + "github.com/cockroachdb/redact" +) + +// TableInfo exports the manifest.TableInfo type. +type TableInfo = manifest.TableInfo + +func tablesTotalSize(tables []TableInfo) uint64 { + var size uint64 + for i := range tables { + size += tables[i].Size + } + return size +} + +func formatFileNums(tables []TableInfo) string { + var buf strings.Builder + for i := range tables { + if i > 0 { + buf.WriteString(" ") + } + buf.WriteString(tables[i].FileNum.String()) + } + return buf.String() +} + +// LevelInfo contains info pertaining to a particular level. +type LevelInfo struct { + Level int + Tables []TableInfo + Score float64 +} + +func (i LevelInfo) String() string { + return redact.StringWithoutMarkers(i) +} + +// SafeFormat implements redact.SafeFormatter. +func (i LevelInfo) SafeFormat(w redact.SafePrinter, _ rune) { + w.Printf("L%d [%s] (%s) Score=%.2f", + redact.Safe(i.Level), + redact.Safe(formatFileNums(i.Tables)), + redact.Safe(humanize.Bytes.Uint64(tablesTotalSize(i.Tables))), + redact.Safe(i.Score)) +} + +// CompactionInfo contains the info for a compaction event. +type CompactionInfo struct { + // JobID is the ID of the compaction job. + JobID int + // Reason is the reason for the compaction. + Reason string + // Input contains the input tables for the compaction organized by level. + Input []LevelInfo + // Output contains the output tables generated by the compaction. The output + // tables are empty for the compaction begin event. + Output LevelInfo + // Duration is the time spent compacting, including reading and writing + // sstables. + Duration time.Duration + // TotalDuration is the total wall-time duration of the compaction, + // including applying the compaction to the database. TotalDuration is + // always ≥ Duration. + TotalDuration time.Duration + Done bool + Err error + + SingleLevelOverlappingRatio float64 + MultiLevelOverlappingRatio float64 + + // Annotations specifies additional info to appear in a compaction's event log line + Annotations compactionAnnotations +} + +type compactionAnnotations []string + +// SafeFormat implements redact.SafeFormatter. +func (ca compactionAnnotations) SafeFormat(w redact.SafePrinter, _ rune) { + if len(ca) == 0 { + return + } + for i := range ca { + if i != 0 { + w.Print(" ") + } + w.Printf("%s", redact.SafeString(ca[i])) + } +} + +func (i CompactionInfo) String() string { + return redact.StringWithoutMarkers(i) +} + +// SafeFormat implements redact.SafeFormatter. +func (i CompactionInfo) SafeFormat(w redact.SafePrinter, _ rune) { + if i.Err != nil { + w.Printf("[JOB %d] compaction(%s) to L%d error: %s", + redact.Safe(i.JobID), redact.SafeString(i.Reason), redact.Safe(i.Output.Level), i.Err) + return + } + + if !i.Done { + w.Printf("[JOB %d] compacting(%s) ", + redact.Safe(i.JobID), + redact.SafeString(i.Reason)) + w.Printf("%s", i.Annotations) + w.Printf("%s; ", levelInfos(i.Input)) + w.Printf("OverlappingRatio: Single %.2f, Multi %.2f", i.SingleLevelOverlappingRatio, i.MultiLevelOverlappingRatio) + return + } + outputSize := tablesTotalSize(i.Output.Tables) + w.Printf("[JOB %d] compacted(%s) ", redact.Safe(i.JobID), redact.SafeString(i.Reason)) + w.Printf("%s", i.Annotations) + w.Print(levelInfos(i.Input)) + w.Printf(" -> L%d [%s] (%s), in %.1fs (%.1fs total), output rate %s/s", + redact.Safe(i.Output.Level), + redact.Safe(formatFileNums(i.Output.Tables)), + redact.Safe(humanize.Bytes.Uint64(outputSize)), + redact.Safe(i.Duration.Seconds()), + redact.Safe(i.TotalDuration.Seconds()), + redact.Safe(humanize.Bytes.Uint64(uint64(float64(outputSize)/i.Duration.Seconds())))) +} + +type levelInfos []LevelInfo + +func (i levelInfos) SafeFormat(w redact.SafePrinter, _ rune) { + for j, levelInfo := range i { + if j > 0 { + w.Printf(" + ") + } + w.Print(levelInfo) + } +} + +// DiskSlowInfo contains the info for a disk slowness event when writing to a +// file. +type DiskSlowInfo = vfs.DiskSlowInfo + +// FlushInfo contains the info for a flush event. +type FlushInfo struct { + // JobID is the ID of the flush job. + JobID int + // Reason is the reason for the flush. + Reason string + // Input contains the count of input memtables that were flushed. + Input int + // InputBytes contains the total in-memory size of the memtable(s) that were + // flushed. This size includes skiplist indexing data structures. + InputBytes uint64 + // Output contains the ouptut table generated by the flush. The output info + // is empty for the flush begin event. + Output []TableInfo + // Duration is the time spent flushing. This duration includes writing and + // syncing all of the flushed keys to sstables. + Duration time.Duration + // TotalDuration is the total wall-time duration of the flush, including + // applying the flush to the database. TotalDuration is always ≥ Duration. + TotalDuration time.Duration + // Ingest is set to true if the flush is handling tables that were added to + // the flushable queue via an ingestion operation. + Ingest bool + // IngestLevels are the output levels for each ingested table in the flush. + // This field is only populated when Ingest is true. + IngestLevels []int + Done bool + Err error +} + +func (i FlushInfo) String() string { + return redact.StringWithoutMarkers(i) +} + +// SafeFormat implements redact.SafeFormatter. +func (i FlushInfo) SafeFormat(w redact.SafePrinter, _ rune) { + if i.Err != nil { + w.Printf("[JOB %d] flush error: %s", redact.Safe(i.JobID), i.Err) + return + } + + plural := redact.SafeString("s") + if i.Input == 1 { + plural = "" + } + if !i.Done { + w.Printf("[JOB %d] ", redact.Safe(i.JobID)) + if !i.Ingest { + w.Printf("flushing %d memtable", redact.Safe(i.Input)) + w.SafeString(plural) + w.Printf(" (%s) to L0", redact.Safe(humanize.Bytes.Uint64(i.InputBytes))) + } else { + w.Printf("flushing %d ingested table%s", redact.Safe(i.Input), plural) + } + return + } + + outputSize := tablesTotalSize(i.Output) + if !i.Ingest { + if invariants.Enabled && len(i.IngestLevels) > 0 { + panic(errors.AssertionFailedf("pebble: expected len(IngestedLevels) == 0")) + } + w.Printf("[JOB %d] flushed %d memtable%s (%s) to L0 [%s] (%s), in %.1fs (%.1fs total), output rate %s/s", + redact.Safe(i.JobID), redact.Safe(i.Input), plural, + redact.Safe(humanize.Bytes.Uint64(i.InputBytes)), + redact.Safe(formatFileNums(i.Output)), + redact.Safe(humanize.Bytes.Uint64(outputSize)), + redact.Safe(i.Duration.Seconds()), + redact.Safe(i.TotalDuration.Seconds()), + redact.Safe(humanize.Bytes.Uint64(uint64(float64(outputSize)/i.Duration.Seconds())))) + } else { + if invariants.Enabled && len(i.IngestLevels) == 0 { + panic(errors.AssertionFailedf("pebble: expected len(IngestedLevels) > 0")) + } + w.Printf("[JOB %d] flushed %d ingested flushable%s", + redact.Safe(i.JobID), redact.Safe(len(i.Output)), plural) + for j, level := range i.IngestLevels { + file := i.Output[j] + if j > 0 { + w.Printf(" +") + } + w.Printf(" L%d:%s (%s)", level, file.FileNum, humanize.Bytes.Uint64(file.Size)) + } + w.Printf(" in %.1fs (%.1fs total), output rate %s/s", + redact.Safe(i.Duration.Seconds()), + redact.Safe(i.TotalDuration.Seconds()), + redact.Safe(humanize.Bytes.Uint64(uint64(float64(outputSize)/i.Duration.Seconds())))) + } +} + +// ManifestCreateInfo contains info about a manifest creation event. +type ManifestCreateInfo struct { + // JobID is the ID of the job the caused the manifest to be created. + JobID int + Path string + // The file number of the new Manifest. + FileNum base.DiskFileNum + Err error +} + +func (i ManifestCreateInfo) String() string { + return redact.StringWithoutMarkers(i) +} + +// SafeFormat implements redact.SafeFormatter. +func (i ManifestCreateInfo) SafeFormat(w redact.SafePrinter, _ rune) { + if i.Err != nil { + w.Printf("[JOB %d] MANIFEST create error: %s", redact.Safe(i.JobID), i.Err) + return + } + w.Printf("[JOB %d] MANIFEST created %s", redact.Safe(i.JobID), i.FileNum) +} + +// ManifestDeleteInfo contains the info for a Manifest deletion event. +type ManifestDeleteInfo struct { + // JobID is the ID of the job the caused the Manifest to be deleted. + JobID int + Path string + FileNum FileNum + Err error +} + +func (i ManifestDeleteInfo) String() string { + return redact.StringWithoutMarkers(i) +} + +// SafeFormat implements redact.SafeFormatter. +func (i ManifestDeleteInfo) SafeFormat(w redact.SafePrinter, _ rune) { + if i.Err != nil { + w.Printf("[JOB %d] MANIFEST delete error: %s", redact.Safe(i.JobID), i.Err) + return + } + w.Printf("[JOB %d] MANIFEST deleted %s", redact.Safe(i.JobID), i.FileNum) +} + +// TableCreateInfo contains the info for a table creation event. +type TableCreateInfo struct { + JobID int + // Reason is the reason for the table creation: "compacting", "flushing", or + // "ingesting". + Reason string + Path string + FileNum FileNum +} + +func (i TableCreateInfo) String() string { + return redact.StringWithoutMarkers(i) +} + +// SafeFormat implements redact.SafeFormatter. +func (i TableCreateInfo) SafeFormat(w redact.SafePrinter, _ rune) { + w.Printf("[JOB %d] %s: sstable created %s", + redact.Safe(i.JobID), redact.Safe(i.Reason), i.FileNum) +} + +// TableDeleteInfo contains the info for a table deletion event. +type TableDeleteInfo struct { + JobID int + Path string + FileNum FileNum + Err error +} + +func (i TableDeleteInfo) String() string { + return redact.StringWithoutMarkers(i) +} + +// SafeFormat implements redact.SafeFormatter. +func (i TableDeleteInfo) SafeFormat(w redact.SafePrinter, _ rune) { + if i.Err != nil { + w.Printf("[JOB %d] sstable delete error %s: %s", + redact.Safe(i.JobID), i.FileNum, i.Err) + return + } + w.Printf("[JOB %d] sstable deleted %s", redact.Safe(i.JobID), i.FileNum) +} + +// TableIngestInfo contains the info for a table ingestion event. +type TableIngestInfo struct { + // JobID is the ID of the job the caused the table to be ingested. + JobID int + Tables []struct { + TableInfo + Level int + } + // GlobalSeqNum is the sequence number that was assigned to all entries in + // the ingested table. + GlobalSeqNum uint64 + // flushable indicates whether the ingested sstable was treated as a + // flushable. + flushable bool + Err error +} + +func (i TableIngestInfo) String() string { + return redact.StringWithoutMarkers(i) +} + +// SafeFormat implements redact.SafeFormatter. +func (i TableIngestInfo) SafeFormat(w redact.SafePrinter, _ rune) { + if i.Err != nil { + w.Printf("[JOB %d] ingest error: %s", redact.Safe(i.JobID), i.Err) + return + } + + if i.flushable { + w.Printf("[JOB %d] ingested as flushable", redact.Safe(i.JobID)) + } else { + w.Printf("[JOB %d] ingested", redact.Safe(i.JobID)) + } + + for j := range i.Tables { + t := &i.Tables[j] + if j > 0 { + w.Printf(",") + } + levelStr := "" + if !i.flushable { + levelStr = fmt.Sprintf("L%d:", t.Level) + } + w.Printf(" %s%s (%s)", redact.Safe(levelStr), t.FileNum, + redact.Safe(humanize.Bytes.Uint64(t.Size))) + } +} + +// TableStatsInfo contains the info for a table stats loaded event. +type TableStatsInfo struct { + // JobID is the ID of the job that finished loading the initial tables' + // stats. + JobID int +} + +func (i TableStatsInfo) String() string { + return redact.StringWithoutMarkers(i) +} + +// SafeFormat implements redact.SafeFormatter. +func (i TableStatsInfo) SafeFormat(w redact.SafePrinter, _ rune) { + w.Printf("[JOB %d] all initial table stats loaded", redact.Safe(i.JobID)) +} + +// TableValidatedInfo contains information on the result of a validation run +// on an sstable. +type TableValidatedInfo struct { + JobID int + Meta *fileMetadata +} + +func (i TableValidatedInfo) String() string { + return redact.StringWithoutMarkers(i) +} + +// SafeFormat implements redact.SafeFormatter. +func (i TableValidatedInfo) SafeFormat(w redact.SafePrinter, _ rune) { + w.Printf("[JOB %d] validated table: %s", redact.Safe(i.JobID), i.Meta) +} + +// WALCreateInfo contains info about a WAL creation event. +type WALCreateInfo struct { + // JobID is the ID of the job the caused the WAL to be created. + JobID int + Path string + // The file number of the new WAL. + FileNum base.DiskFileNum + // The file number of a previous WAL which was recycled to create this + // one. Zero if recycling did not take place. + RecycledFileNum FileNum + Err error +} + +func (i WALCreateInfo) String() string { + return redact.StringWithoutMarkers(i) +} + +// SafeFormat implements redact.SafeFormatter. +func (i WALCreateInfo) SafeFormat(w redact.SafePrinter, _ rune) { + if i.Err != nil { + w.Printf("[JOB %d] WAL create error: %s", redact.Safe(i.JobID), i.Err) + return + } + + if i.RecycledFileNum == 0 { + w.Printf("[JOB %d] WAL created %s", redact.Safe(i.JobID), i.FileNum) + return + } + + w.Printf("[JOB %d] WAL created %s (recycled %s)", + redact.Safe(i.JobID), i.FileNum, i.RecycledFileNum) +} + +// WALDeleteInfo contains the info for a WAL deletion event. +type WALDeleteInfo struct { + // JobID is the ID of the job the caused the WAL to be deleted. + JobID int + Path string + FileNum FileNum + Err error +} + +func (i WALDeleteInfo) String() string { + return redact.StringWithoutMarkers(i) +} + +// SafeFormat implements redact.SafeFormatter. +func (i WALDeleteInfo) SafeFormat(w redact.SafePrinter, _ rune) { + if i.Err != nil { + w.Printf("[JOB %d] WAL delete error: %s", redact.Safe(i.JobID), i.Err) + return + } + w.Printf("[JOB %d] WAL deleted %s", redact.Safe(i.JobID), i.FileNum) +} + +// WriteStallBeginInfo contains the info for a write stall begin event. +type WriteStallBeginInfo struct { + Reason string +} + +func (i WriteStallBeginInfo) String() string { + return redact.StringWithoutMarkers(i) +} + +// SafeFormat implements redact.SafeFormatter. +func (i WriteStallBeginInfo) SafeFormat(w redact.SafePrinter, _ rune) { + w.Printf("write stall beginning: %s", redact.Safe(i.Reason)) +} + +// EventListener contains a set of functions that will be invoked when various +// significant DB events occur. Note that the functions should not run for an +// excessive amount of time as they are invoked synchronously by the DB and may +// block continued DB work. For a similar reason it is advisable to not perform +// any synchronous calls back into the DB. +type EventListener struct { + // BackgroundError is invoked whenever an error occurs during a background + // operation such as flush or compaction. + BackgroundError func(error) + + // CompactionBegin is invoked after the inputs to a compaction have been + // determined, but before the compaction has produced any output. + CompactionBegin func(CompactionInfo) + + // CompactionEnd is invoked after a compaction has completed and the result + // has been installed. + CompactionEnd func(CompactionInfo) + + // DiskSlow is invoked after a disk write operation on a file created with a + // disk health checking vfs.FS (see vfs.DefaultWithDiskHealthChecks) is + // observed to exceed the specified disk slowness threshold duration. DiskSlow + // is called on a goroutine that is monitoring slowness/stuckness. The callee + // MUST return without doing any IO, or blocking on anything (like a mutex) + // that is waiting on IO. This is imperative in order to reliably monitor for + // slowness, since if this goroutine gets stuck, the monitoring will stop + // working. + DiskSlow func(DiskSlowInfo) + + // FlushBegin is invoked after the inputs to a flush have been determined, + // but before the flush has produced any output. + FlushBegin func(FlushInfo) + + // FlushEnd is invoked after a flush has complated and the result has been + // installed. + FlushEnd func(FlushInfo) + + // FormatUpgrade is invoked after the database's FormatMajorVersion + // is upgraded. + FormatUpgrade func(FormatMajorVersion) + + // ManifestCreated is invoked after a manifest has been created. + ManifestCreated func(ManifestCreateInfo) + + // ManifestDeleted is invoked after a manifest has been deleted. + ManifestDeleted func(ManifestDeleteInfo) + + // TableCreated is invoked when a table has been created. + TableCreated func(TableCreateInfo) + + // TableDeleted is invoked after a table has been deleted. + TableDeleted func(TableDeleteInfo) + + // TableIngested is invoked after an externally created table has been + // ingested via a call to DB.Ingest(). + TableIngested func(TableIngestInfo) + + // TableStatsLoaded is invoked at most once, when the table stats + // collector has loaded statistics for all tables that existed at Open. + TableStatsLoaded func(TableStatsInfo) + + // TableValidated is invoked after validation runs on an sstable. + TableValidated func(TableValidatedInfo) + + // WALCreated is invoked after a WAL has been created. + WALCreated func(WALCreateInfo) + + // WALDeleted is invoked after a WAL has been deleted. + WALDeleted func(WALDeleteInfo) + + // WriteStallBegin is invoked when writes are intentionally delayed. + WriteStallBegin func(WriteStallBeginInfo) + + // WriteStallEnd is invoked when delayed writes are released. + WriteStallEnd func() +} + +// EnsureDefaults ensures that background error events are logged to the +// specified logger if a handler for those events hasn't been otherwise +// specified. Ensure all handlers are non-nil so that we don't have to check +// for nil-ness before invoking. +func (l *EventListener) EnsureDefaults(logger Logger) { + if l.BackgroundError == nil { + if logger != nil { + l.BackgroundError = func(err error) { + logger.Errorf("background error: %s", err) + } + } else { + l.BackgroundError = func(error) {} + } + } + if l.CompactionBegin == nil { + l.CompactionBegin = func(info CompactionInfo) {} + } + if l.CompactionEnd == nil { + l.CompactionEnd = func(info CompactionInfo) {} + } + if l.DiskSlow == nil { + l.DiskSlow = func(info DiskSlowInfo) {} + } + if l.FlushBegin == nil { + l.FlushBegin = func(info FlushInfo) {} + } + if l.FlushEnd == nil { + l.FlushEnd = func(info FlushInfo) {} + } + if l.FormatUpgrade == nil { + l.FormatUpgrade = func(v FormatMajorVersion) {} + } + if l.ManifestCreated == nil { + l.ManifestCreated = func(info ManifestCreateInfo) {} + } + if l.ManifestDeleted == nil { + l.ManifestDeleted = func(info ManifestDeleteInfo) {} + } + if l.TableCreated == nil { + l.TableCreated = func(info TableCreateInfo) {} + } + if l.TableDeleted == nil { + l.TableDeleted = func(info TableDeleteInfo) {} + } + if l.TableIngested == nil { + l.TableIngested = func(info TableIngestInfo) {} + } + if l.TableStatsLoaded == nil { + l.TableStatsLoaded = func(info TableStatsInfo) {} + } + if l.TableValidated == nil { + l.TableValidated = func(validated TableValidatedInfo) {} + } + if l.WALCreated == nil { + l.WALCreated = func(info WALCreateInfo) {} + } + if l.WALDeleted == nil { + l.WALDeleted = func(info WALDeleteInfo) {} + } + if l.WriteStallBegin == nil { + l.WriteStallBegin = func(info WriteStallBeginInfo) {} + } + if l.WriteStallEnd == nil { + l.WriteStallEnd = func() {} + } +} + +// MakeLoggingEventListener creates an EventListener that logs all events to the +// specified logger. +func MakeLoggingEventListener(logger Logger) EventListener { + if logger == nil { + logger = DefaultLogger + } + + return EventListener{ + BackgroundError: func(err error) { + logger.Errorf("background error: %s", err) + }, + CompactionBegin: func(info CompactionInfo) { + logger.Infof("%s", info) + }, + CompactionEnd: func(info CompactionInfo) { + logger.Infof("%s", info) + }, + DiskSlow: func(info DiskSlowInfo) { + logger.Infof("%s", info) + }, + FlushBegin: func(info FlushInfo) { + logger.Infof("%s", info) + }, + FlushEnd: func(info FlushInfo) { + logger.Infof("%s", info) + }, + FormatUpgrade: func(v FormatMajorVersion) { + logger.Infof("upgraded to format version: %s", v) + }, + ManifestCreated: func(info ManifestCreateInfo) { + logger.Infof("%s", info) + }, + ManifestDeleted: func(info ManifestDeleteInfo) { + logger.Infof("%s", info) + }, + TableCreated: func(info TableCreateInfo) { + logger.Infof("%s", info) + }, + TableDeleted: func(info TableDeleteInfo) { + logger.Infof("%s", info) + }, + TableIngested: func(info TableIngestInfo) { + logger.Infof("%s", info) + }, + TableStatsLoaded: func(info TableStatsInfo) { + logger.Infof("%s", info) + }, + TableValidated: func(info TableValidatedInfo) { + logger.Infof("%s", info) + }, + WALCreated: func(info WALCreateInfo) { + logger.Infof("%s", info) + }, + WALDeleted: func(info WALDeleteInfo) { + logger.Infof("%s", info) + }, + WriteStallBegin: func(info WriteStallBeginInfo) { + logger.Infof("%s", info) + }, + WriteStallEnd: func() { + logger.Infof("write stall ending") + }, + } +} + +// TeeEventListener wraps two EventListeners, forwarding all events to both. +func TeeEventListener(a, b EventListener) EventListener { + a.EnsureDefaults(nil) + b.EnsureDefaults(nil) + return EventListener{ + BackgroundError: func(err error) { + a.BackgroundError(err) + b.BackgroundError(err) + }, + CompactionBegin: func(info CompactionInfo) { + a.CompactionBegin(info) + b.CompactionBegin(info) + }, + CompactionEnd: func(info CompactionInfo) { + a.CompactionEnd(info) + b.CompactionEnd(info) + }, + DiskSlow: func(info DiskSlowInfo) { + a.DiskSlow(info) + b.DiskSlow(info) + }, + FlushBegin: func(info FlushInfo) { + a.FlushBegin(info) + b.FlushBegin(info) + }, + FlushEnd: func(info FlushInfo) { + a.FlushEnd(info) + b.FlushEnd(info) + }, + FormatUpgrade: func(v FormatMajorVersion) { + a.FormatUpgrade(v) + b.FormatUpgrade(v) + }, + ManifestCreated: func(info ManifestCreateInfo) { + a.ManifestCreated(info) + b.ManifestCreated(info) + }, + ManifestDeleted: func(info ManifestDeleteInfo) { + a.ManifestDeleted(info) + b.ManifestDeleted(info) + }, + TableCreated: func(info TableCreateInfo) { + a.TableCreated(info) + b.TableCreated(info) + }, + TableDeleted: func(info TableDeleteInfo) { + a.TableDeleted(info) + b.TableDeleted(info) + }, + TableIngested: func(info TableIngestInfo) { + a.TableIngested(info) + b.TableIngested(info) + }, + TableStatsLoaded: func(info TableStatsInfo) { + a.TableStatsLoaded(info) + b.TableStatsLoaded(info) + }, + TableValidated: func(info TableValidatedInfo) { + a.TableValidated(info) + b.TableValidated(info) + }, + WALCreated: func(info WALCreateInfo) { + a.WALCreated(info) + b.WALCreated(info) + }, + WALDeleted: func(info WALDeleteInfo) { + a.WALDeleted(info) + b.WALDeleted(info) + }, + WriteStallBegin: func(info WriteStallBeginInfo) { + a.WriteStallBegin(info) + b.WriteStallBegin(info) + }, + WriteStallEnd: func() { + a.WriteStallEnd() + b.WriteStallEnd() + }, + } +} diff --git a/pebble/event_listener_test.go b/pebble/event_listener_test.go new file mode 100644 index 0000000..69325d9 --- /dev/null +++ b/pebble/event_listener_test.go @@ -0,0 +1,376 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "fmt" + "reflect" + "strings" + "sync" + "testing" + "time" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/objstorage/objstorageprovider" + "github.com/cockroachdb/pebble/sstable" + "github.com/cockroachdb/pebble/vfs" + "github.com/cockroachdb/redact" + "github.com/stretchr/testify/require" +) + +// Verify event listener actions, as well as expected filesystem operations. +func TestEventListener(t *testing.T) { + var d *DB + var memLog base.InMemLogger + mem := vfs.NewMem() + require.NoError(t, mem.MkdirAll("ext", 0755)) + + datadriven.RunTest(t, "testdata/event_listener", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "open": + memLog.Reset() + lel := MakeLoggingEventListener(&memLog) + flushBegin, flushEnd := lel.FlushBegin, lel.FlushEnd + lel.FlushBegin = func(info FlushInfo) { + // Make deterministic. + info.InputBytes = 100 + flushBegin(info) + } + lel.FlushEnd = func(info FlushInfo) { + // Make deterministic. + info.InputBytes = 100 + flushEnd(info) + } + opts := &Options{ + FS: vfs.WithLogging(mem, memLog.Infof), + FormatMajorVersion: internalFormatNewest, + EventListener: &lel, + MaxManifestFileSize: 1, + L0CompactionThreshold: 10, + WALDir: "wal", + } + // The table stats collector runs asynchronously and its + // timing is less predictable. It increments nextJobID, which + // can make these tests flaky. The TableStatsLoaded event is + // tested separately in TestTableStats. + opts.private.disableTableStats = true + var err error + d, err = Open("db", opts) + if err != nil { + return err.Error() + } + t := time.Now() + d.timeNow = func() time.Time { + t = t.Add(time.Second) + return t + } + d.opts.private.testingAlwaysWaitForCleanup = true + return memLog.String() + + case "close": + memLog.Reset() + if err := d.Close(); err != nil { + return err.Error() + } + return memLog.String() + + case "flush": + memLog.Reset() + if err := d.Set([]byte("a"), nil, nil); err != nil { + return err.Error() + } + if err := d.Flush(); err != nil { + return err.Error() + } + return memLog.String() + + case "compact": + memLog.Reset() + if err := d.Set([]byte("a"), nil, nil); err != nil { + return err.Error() + } + if err := d.Compact([]byte("a"), []byte("b"), false); err != nil { + return err.Error() + } + return memLog.String() + + case "checkpoint": + memLog.Reset() + if err := d.Checkpoint("checkpoint"); err != nil { + return err.Error() + } + return memLog.String() + + case "disable-file-deletions": + memLog.Reset() + d.mu.Lock() + d.disableFileDeletions() + d.mu.Unlock() + return memLog.String() + + case "enable-file-deletions": + memLog.Reset() + func() { + defer func() { + if r := recover(); r != nil { + memLog.Infof("%v", r) + } + }() + d.mu.Lock() + defer d.mu.Unlock() + d.enableFileDeletions() + }() + d.TestOnlyWaitForCleaning() + return memLog.String() + + case "ingest": + memLog.Reset() + f, err := mem.Create("ext/0") + if err != nil { + return err.Error() + } + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: d.FormatMajorVersion().MaxTableFormat(), + }) + if err := w.Add(base.MakeInternalKey([]byte("a"), 0, InternalKeyKindSet), nil); err != nil { + return err.Error() + } + if err := w.Close(); err != nil { + return err.Error() + } + if err := d.Ingest([]string{"ext/0"}); err != nil { + return err.Error() + } + return memLog.String() + + case "ingest-flushable": + memLog.Reset() + + // Prevent flushes during this test to ensure determinism. + d.mu.Lock() + d.mu.compact.flushing = true + d.mu.Unlock() + + b := d.NewBatch() + if err := b.Set([]byte("a"), nil, nil); err != nil { + return err.Error() + } + if err := d.Apply(b, nil); err != nil { + return err.Error() + } + writeTable := func(name string, key byte) error { + f, err := mem.Create(name) + if err != nil { + return err + } + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: d.FormatMajorVersion().MaxTableFormat(), + }) + if err := w.Add(base.MakeInternalKey([]byte{key}, 0, InternalKeyKindSet), nil); err != nil { + return err + } + if err := w.Close(); err != nil { + return err + } + return nil + } + tableA, tableB := "ext/a", "ext/b" + if err := writeTable(tableA, 'a'); err != nil { + return err.Error() + } + if err := writeTable(tableB, 'b'); err != nil { + return err.Error() + } + if err := d.Ingest([]string{tableA, tableB}); err != nil { + return err.Error() + } + + // Re-enable flushes, to allow the subsequent flush to proceed. + d.mu.Lock() + d.mu.compact.flushing = false + d.mu.Unlock() + if err := d.Flush(); err != nil { + return err.Error() + } + return memLog.String() + + case "metrics": + // The asynchronous loading of table stats can change metrics, so + // wait for all the tables' stats to be loaded. + d.mu.Lock() + d.waitTableStats() + d.mu.Unlock() + + return d.Metrics().StringForTests() + + case "sstables": + var buf bytes.Buffer + tableInfos, _ := d.SSTables() + for i, level := range tableInfos { + if len(level) == 0 { + continue + } + fmt.Fprintf(&buf, "%d:\n", i) + for _, m := range level { + fmt.Fprintf(&buf, " %d:[%s-%s]\n", + m.FileNum, m.Smallest.UserKey, m.Largest.UserKey) + } + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestWriteStallEvents(t *testing.T) { + const flushCount = 10 + const writeStallEnd = "write stall ending" + + testCases := []struct { + delayFlush bool + expected string + }{ + {true, "memtable count limit reached"}, + {false, "L0 file count limit exceeded"}, + } + + for _, c := range testCases { + t.Run("", func(t *testing.T) { + stallEnded := make(chan struct{}, 1) + createReleased := make(chan struct{}, flushCount) + var log base.InMemLogger + var delayOnce sync.Once + listener := &EventListener{ + TableCreated: func(info TableCreateInfo) { + if c.delayFlush == (info.Reason == "flushing") { + delayOnce.Do(func() { + <-createReleased + }) + } + }, + WriteStallBegin: func(info WriteStallBeginInfo) { + log.Infof("%s", info.String()) + createReleased <- struct{}{} + }, + WriteStallEnd: func() { + log.Infof("%s", writeStallEnd) + select { + case stallEnded <- struct{}{}: + default: + } + }, + } + d, err := Open("db", &Options{ + EventListener: listener, + FS: vfs.NewMem(), + MemTableSize: initialMemTableSize, + MemTableStopWritesThreshold: 2, + L0CompactionThreshold: 2, + L0StopWritesThreshold: 2, + }) + require.NoError(t, err) + defer d.Close() + + for i := 0; i < flushCount; i++ { + require.NoError(t, d.Set([]byte("a"), nil, NoSync)) + + ch, err := d.AsyncFlush() + require.NoError(t, err) + + // If we're delaying the flush (because we're testing for memtable + // write stalls), we can't wait for the flush to finish as doing so + // would deadlock. If we're not delaying the flush (because we're + // testing for L0 write stals), we wait for the flush to finish so we + // don't create too many memtables which would trigger a memtable write + // stall. + if !c.delayFlush { + <-ch + } + if strings.Contains(log.String(), c.expected) { + break + } + } + <-stallEnded + + events := log.String() + require.Contains(t, events, c.expected) + require.Contains(t, events, writeStallEnd) + if testing.Verbose() { + t.Logf("\n%s", events) + } + }) + } +} + +type redactLogger struct { + logger Logger +} + +// Infof implements the Logger.Infof interface. +func (l redactLogger) Infof(format string, args ...interface{}) { + l.logger.Infof("%s", redact.Sprintf(format, args...).Redact()) +} + +// Errorf implements the Logger.Errorf interface. +func (l redactLogger) Errorf(format string, args ...interface{}) { + l.logger.Errorf("%s", redact.Sprintf(format, args...).Redact()) +} + +// Fatalf implements the Logger.Fatalf interface. +func (l redactLogger) Fatalf(format string, args ...interface{}) { + l.logger.Fatalf("%s", redact.Sprintf(format, args...).Redact()) +} + +func TestEventListenerRedact(t *testing.T) { + // The vast majority of event listener fields logged are safe and do not + // need to be redacted. Verify that the rare, unsafe error does appear in + // the log redacted. + var log base.InMemLogger + l := MakeLoggingEventListener(redactLogger{logger: &log}) + l.WALDeleted(WALDeleteInfo{ + JobID: 5, + FileNum: FileNum(20), + Err: errors.Errorf("unredacted error: %s", "unredacted string"), + }) + require.Equal(t, "[JOB 5] WAL delete error: unredacted error: ‹×›\n", log.String()) +} + +func TestEventListenerEnsureDefaultsBackgroundError(t *testing.T) { + e := EventListener{} + e.EnsureDefaults(nil) + e.BackgroundError(errors.New("an example error")) +} + +func TestEventListenerEnsureDefaultsSetsAllCallbacks(t *testing.T) { + e := EventListener{} + e.EnsureDefaults(nil) + testAllCallbacksSetInEventListener(t, e) +} + +func TestMakeLoggingEventListenerSetsAllCallbacks(t *testing.T) { + e := MakeLoggingEventListener(nil) + testAllCallbacksSetInEventListener(t, e) +} + +func TestTeeEventListenerSetsAllCallbacks(t *testing.T) { + e := TeeEventListener(EventListener{}, EventListener{}) + testAllCallbacksSetInEventListener(t, e) +} + +func testAllCallbacksSetInEventListener(t *testing.T, e EventListener) { + t.Helper() + v := reflect.ValueOf(e) + for i := 0; i < v.NumField(); i++ { + fType := v.Type().Field(i) + fVal := v.Field(i) + require.Equal(t, reflect.Func, fType.Type.Kind(), "unexpected non-func field: %s", fType.Name) + require.False(t, fVal.IsNil(), "unexpected nil field: %s", fType.Name) + } +} diff --git a/pebble/example_test.go b/pebble/example_test.go new file mode 100644 index 0000000..5c13df1 --- /dev/null +++ b/pebble/example_test.go @@ -0,0 +1,37 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble_test + +import ( + "fmt" + "log" + + "github.com/cockroachdb/pebble" + "github.com/cockroachdb/pebble/vfs" +) + +func Example() { + db, err := pebble.Open("", &pebble.Options{FS: vfs.NewMem()}) + if err != nil { + log.Fatal(err) + } + key := []byte("hello") + if err := db.Set(key, []byte("world"), pebble.Sync); err != nil { + log.Fatal(err) + } + value, closer, err := db.Get(key) + if err != nil { + log.Fatal(err) + } + fmt.Printf("%s %s\n", key, value) + if err := closer.Close(); err != nil { + log.Fatal(err) + } + if err := db.Close(); err != nil { + log.Fatal(err) + } + // Output: + // hello world +} diff --git a/pebble/external_iterator.go b/pebble/external_iterator.go new file mode 100644 index 0000000..078d016 --- /dev/null +++ b/pebble/external_iterator.go @@ -0,0 +1,561 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "context" + "fmt" + "sort" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/sstable" +) + +// ExternalIterOption provide an interface to specify open-time options to +// NewExternalIter. +type ExternalIterOption interface { + // iterApply is called on the iterator during opening in order to set internal + // parameters. + iterApply(*Iterator) + // readerOptions returns any reader options added by this iter option. + readerOptions() []sstable.ReaderOption +} + +type externalIterReaderOptions struct { + opts []sstable.ReaderOption +} + +func (e *externalIterReaderOptions) iterApply(iterator *Iterator) { + // Do nothing. +} + +func (e *externalIterReaderOptions) readerOptions() []sstable.ReaderOption { + return e.opts +} + +// ExternalIterReaderOptions returns an ExternalIterOption that specifies +// sstable.ReaderOptions to be applied on sstable readers in NewExternalIter. +func ExternalIterReaderOptions(opts ...sstable.ReaderOption) ExternalIterOption { + return &externalIterReaderOptions{opts: opts} +} + +// ExternalIterForwardOnly is an ExternalIterOption that specifies this iterator +// will only be used for forward positioning operations (First, SeekGE, Next). +// This could enable optimizations that take advantage of this invariant. +// Behaviour when a reverse positioning operation is done on an iterator +// opened with this option is unpredictable, though in most cases it should. +type ExternalIterForwardOnly struct{} + +func (e ExternalIterForwardOnly) iterApply(iter *Iterator) { + iter.forwardOnly = true +} + +func (e ExternalIterForwardOnly) readerOptions() []sstable.ReaderOption { + return nil +} + +// NewExternalIter takes an input 2d array of sstable files which may overlap +// across subarrays but not within a subarray (at least as far as points are +// concerned; range keys are allowed to overlap arbitrarily even within a +// subarray), and returns an Iterator over the merged contents of the sstables. +// Input sstables may contain point keys, range keys, range deletions, etc. The +// input files slice must be sorted in reverse chronological ordering. A key in a +// file at a lower index subarray will shadow a key with an identical user key +// contained within a file at a higher index subarray. Each subarray must be +// sorted in internal key order, where lower index files contain keys that sort +// left of files with higher indexes. +// +// Input sstables must only contain keys with the zero sequence number. +// +// Iterators constructed through NewExternalIter do not support all iterator +// options, including block-property and table filters. NewExternalIter errors +// if an incompatible option is set. +func NewExternalIter( + o *Options, + iterOpts *IterOptions, + files [][]sstable.ReadableFile, + extraOpts ...ExternalIterOption, +) (it *Iterator, err error) { + return NewExternalIterWithContext(context.Background(), o, iterOpts, files, extraOpts...) +} + +// NewExternalIterWithContext is like NewExternalIter, and additionally +// accepts a context for tracing. +func NewExternalIterWithContext( + ctx context.Context, + o *Options, + iterOpts *IterOptions, + files [][]sstable.ReadableFile, + extraOpts ...ExternalIterOption, +) (it *Iterator, err error) { + if iterOpts != nil { + if err := validateExternalIterOpts(iterOpts); err != nil { + return nil, err + } + } + + var readers [][]*sstable.Reader + + // Ensure we close all the opened readers if we error out. + defer func() { + if err != nil { + for i := range readers { + for j := range readers[i] { + _ = readers[i][j].Close() + } + } + } + }() + seqNumOffset := 0 + var extraReaderOpts []sstable.ReaderOption + for i := range extraOpts { + extraReaderOpts = append(extraReaderOpts, extraOpts[i].readerOptions()...) + } + for _, levelFiles := range files { + seqNumOffset += len(levelFiles) + } + for _, levelFiles := range files { + var subReaders []*sstable.Reader + seqNumOffset -= len(levelFiles) + subReaders, err = openExternalTables(o, levelFiles, seqNumOffset, o.MakeReaderOptions(), extraReaderOpts...) + readers = append(readers, subReaders) + } + if err != nil { + return nil, err + } + + buf := iterAllocPool.Get().(*iterAlloc) + dbi := &buf.dbi + *dbi = Iterator{ + ctx: ctx, + alloc: buf, + merge: o.Merger.Merge, + comparer: *o.Comparer, + readState: nil, + keyBuf: buf.keyBuf, + prefixOrFullSeekKey: buf.prefixOrFullSeekKey, + boundsBuf: buf.boundsBuf, + batch: nil, + // Add the readers to the Iterator so that Close closes them, and + // SetOptions can re-construct iterators from them. + externalReaders: readers, + newIters: func( + ctx context.Context, f *manifest.FileMetadata, opts *IterOptions, + internalOpts internalIterOpts) (internalIterator, keyspan.FragmentIterator, error) { + // NB: External iterators are currently constructed without any + // `levelIters`. newIters should never be called. When we support + // organizing multiple non-overlapping files into a single level + // (see TODO below), we'll need to adjust this tableNewIters + // implementation to open iterators by looking up f in a map + // of readers indexed by *fileMetadata. + panic("unreachable") + }, + seqNum: base.InternalKeySeqNumMax, + } + if iterOpts != nil { + dbi.opts = *iterOpts + dbi.processBounds(iterOpts.LowerBound, iterOpts.UpperBound) + } + for i := range extraOpts { + extraOpts[i].iterApply(dbi) + } + if err := finishInitializingExternal(ctx, dbi); err != nil { + dbi.Close() + return nil, err + } + return dbi, nil +} + +func validateExternalIterOpts(iterOpts *IterOptions) error { + switch { + case iterOpts.TableFilter != nil: + return errors.Errorf("pebble: external iterator: TableFilter unsupported") + case iterOpts.PointKeyFilters != nil: + return errors.Errorf("pebble: external iterator: PointKeyFilters unsupported") + case iterOpts.RangeKeyFilters != nil: + return errors.Errorf("pebble: external iterator: RangeKeyFilters unsupported") + case iterOpts.OnlyReadGuaranteedDurable: + return errors.Errorf("pebble: external iterator: OnlyReadGuaranteedDurable unsupported") + case iterOpts.UseL6Filters: + return errors.Errorf("pebble: external iterator: UseL6Filters unsupported") + } + return nil +} + +func createExternalPointIter(ctx context.Context, it *Iterator) (internalIterator, error) { + // TODO(jackson): In some instances we could generate fewer levels by using + // L0Sublevels code to organize nonoverlapping files into the same level. + // This would allow us to use levelIters and keep a smaller set of data and + // files in-memory. However, it would also require us to identify the bounds + // of all the files upfront. + + if !it.opts.pointKeys() { + return emptyIter, nil + } else if it.pointIter != nil { + return it.pointIter, nil + } + mlevels := it.alloc.mlevels[:0] + + if len(it.externalReaders) > cap(mlevels) { + mlevels = make([]mergingIterLevel, 0, len(it.externalReaders)) + } + for _, readers := range it.externalReaders { + var combinedIters []internalIterator + for _, r := range readers { + var ( + rangeDelIter keyspan.FragmentIterator + pointIter internalIterator + err error + ) + // We could set hideObsoletePoints=true, since we are reading at + // InternalKeySeqNumMax, but we don't bother since these sstables should + // not have obsolete points (so the performance optimization is + // unnecessary), and we don't want to bother constructing a + // BlockPropertiesFilterer that includes obsoleteKeyBlockPropertyFilter. + pointIter, err = r.NewIterWithBlockPropertyFiltersAndContextEtc( + ctx, it.opts.LowerBound, it.opts.UpperBound, nil, /* BlockPropertiesFilterer */ + false /* hideObsoletePoints */, false, /* useFilterBlock */ + &it.stats.InternalStats, it.opts.CategoryAndQoS, nil, + sstable.TrivialReaderProvider{Reader: r}) + if err != nil { + return nil, err + } + rangeDelIter, err = r.NewRawRangeDelIter() + if err != nil { + return nil, err + } + if rangeDelIter == nil && pointIter != nil && it.forwardOnly { + // TODO(bilal): Consider implementing range key pausing in + // simpleLevelIter so we can reduce mergingIterLevels even more by + // sending all sstable iterators to combinedIters, not just those + // corresponding to sstables without range deletes. + combinedIters = append(combinedIters, pointIter) + continue + } + mlevels = append(mlevels, mergingIterLevel{ + iter: pointIter, + rangeDelIter: rangeDelIter, + }) + } + if len(combinedIters) == 1 { + mlevels = append(mlevels, mergingIterLevel{ + iter: combinedIters[0], + }) + } else if len(combinedIters) > 1 { + sli := &simpleLevelIter{ + cmp: it.cmp, + iters: combinedIters, + } + sli.init(it.opts) + mlevels = append(mlevels, mergingIterLevel{ + iter: sli, + rangeDelIter: nil, + }) + } + } + if len(mlevels) == 1 && mlevels[0].rangeDelIter == nil { + // Set closePointIterOnce to true. This is because we're bypassing the + // merging iter, which turns Close()s on it idempotent for any child + // iterators. The outer Iterator could call Close() on a point iter twice, + // which sstable iterators do not support (as they release themselves to + // a pool). + it.closePointIterOnce = true + return mlevels[0].iter, nil + } + + it.alloc.merging.init(&it.opts, &it.stats.InternalStats, it.comparer.Compare, it.comparer.Split, mlevels...) + it.alloc.merging.snapshot = base.InternalKeySeqNumMax + if len(mlevels) <= cap(it.alloc.levelsPositioned) { + it.alloc.merging.levelsPositioned = it.alloc.levelsPositioned[:len(mlevels)] + } + return &it.alloc.merging, nil +} + +func finishInitializingExternal(ctx context.Context, it *Iterator) error { + pointIter, err := createExternalPointIter(ctx, it) + if err != nil { + return err + } + it.pointIter = pointIter + it.iter = it.pointIter + + if it.opts.rangeKeys() { + it.rangeKeyMasking.init(it, it.comparer.Compare, it.comparer.Split) + var rangeKeyIters []keyspan.FragmentIterator + if it.rangeKey == nil { + // We could take advantage of the lack of overlaps in range keys within + // each slice in it.externalReaders, and generate keyspan.LevelIters + // out of those. However, since range keys are expected to be sparse to + // begin with, the performance gain might not be significant enough to + // warrant it. + // + // TODO(bilal): Explore adding a simpleRangeKeyLevelIter that does not + // operate on FileMetadatas (similar to simpleLevelIter), and implements + // this optimization. + for _, readers := range it.externalReaders { + for _, r := range readers { + if rki, err := r.NewRawRangeKeyIter(); err != nil { + return err + } else if rki != nil { + rangeKeyIters = append(rangeKeyIters, rki) + } + } + } + if len(rangeKeyIters) > 0 { + it.rangeKey = iterRangeKeyStateAllocPool.Get().(*iteratorRangeKeyState) + it.rangeKey.init(it.comparer.Compare, it.comparer.Split, &it.opts) + it.rangeKey.rangeKeyIter = it.rangeKey.iterConfig.Init( + &it.comparer, + base.InternalKeySeqNumMax, + it.opts.LowerBound, it.opts.UpperBound, + &it.hasPrefix, &it.prefixOrFullSeekKey, + false /* internalKeys */, &it.rangeKey.internal, + ) + for i := range rangeKeyIters { + it.rangeKey.iterConfig.AddLevel(rangeKeyIters[i]) + } + } + } + if it.rangeKey != nil { + it.rangeKey.iiter.Init(&it.comparer, it.iter, it.rangeKey.rangeKeyIter, + keyspan.InterleavingIterOpts{ + Mask: &it.rangeKeyMasking, + LowerBound: it.opts.LowerBound, + UpperBound: it.opts.UpperBound, + }) + it.iter = &it.rangeKey.iiter + } + } + return nil +} + +func openExternalTables( + o *Options, + files []sstable.ReadableFile, + seqNumOffset int, + readerOpts sstable.ReaderOptions, + extraReaderOpts ...sstable.ReaderOption, +) (readers []*sstable.Reader, err error) { + readers = make([]*sstable.Reader, 0, len(files)) + for i := range files { + readable, err := sstable.NewSimpleReadable(files[i]) + if err != nil { + return readers, err + } + r, err := sstable.NewReader(readable, readerOpts, extraReaderOpts...) + if err != nil { + return readers, err + } + // Use the index of the file in files as the sequence number for all of + // its keys. + r.Properties.GlobalSeqNum = uint64(len(files) - i + seqNumOffset) + readers = append(readers, r) + } + return readers, err +} + +// simpleLevelIter is similar to a levelIter in that it merges the points +// from multiple point iterators that are non-overlapping in the key ranges +// they return. It is only expected to support forward iteration and forward +// regular seeking; reverse iteration and prefix seeking is not supported. +// Intended to be a low-overhead, non-FileMetadata dependent option for +// NewExternalIter. To optimize seeking and forward iteration, it maintains +// two slices of child iterators; one of all iterators, and a subset of it that +// contains just the iterators that contain point keys within the current +// bounds. +// +// Note that this levelIter does not support pausing at file boundaries +// in case of range tombstones in this file that could apply to points outside +// of this file (and outside of this level). This is sufficient for optimizing +// the main use cases of NewExternalIter, however for completeness it would make +// sense to build this pausing functionality in. +type simpleLevelIter struct { + cmp Compare + err error + lowerBound []byte + iters []internalIterator + filtered []internalIterator + firstKeys [][]byte + firstKeysBuf []byte + currentIdx int +} + +var _ internalIterator = &simpleLevelIter{} + +// init initializes this simpleLevelIter. +func (s *simpleLevelIter) init(opts IterOptions) { + s.currentIdx = 0 + s.lowerBound = opts.LowerBound + s.resetFilteredIters() +} + +func (s *simpleLevelIter) resetFilteredIters() { + s.filtered = s.filtered[:0] + s.firstKeys = s.firstKeys[:0] + s.firstKeysBuf = s.firstKeysBuf[:0] + s.err = nil + for i := range s.iters { + var iterKey *base.InternalKey + if s.lowerBound != nil { + iterKey, _ = s.iters[i].SeekGE(s.lowerBound, base.SeekGEFlagsNone) + } else { + iterKey, _ = s.iters[i].First() + } + if iterKey != nil { + s.filtered = append(s.filtered, s.iters[i]) + bufStart := len(s.firstKeysBuf) + s.firstKeysBuf = append(s.firstKeysBuf, iterKey.UserKey...) + s.firstKeys = append(s.firstKeys, s.firstKeysBuf[bufStart:bufStart+len(iterKey.UserKey)]) + } else if err := s.iters[i].Error(); err != nil { + s.err = err + } + } +} + +func (s *simpleLevelIter) SeekGE( + key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + if s.err != nil { + return nil, base.LazyValue{} + } + // Find the first file that is entirely >= key. The file before that could + // contain the key we're looking for. + n := sort.Search(len(s.firstKeys), func(i int) bool { + return s.cmp(key, s.firstKeys[i]) <= 0 + }) + if n > 0 { + s.currentIdx = n - 1 + } else { + s.currentIdx = n + } + if s.currentIdx < len(s.filtered) { + if iterKey, val := s.filtered[s.currentIdx].SeekGE(key, flags); iterKey != nil { + return iterKey, val + } + if err := s.filtered[s.currentIdx].Error(); err != nil { + s.err = err + } + s.currentIdx++ + } + return s.skipEmptyFileForward(key, flags) +} + +func (s *simpleLevelIter) skipEmptyFileForward( + seekKey []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + var iterKey *base.InternalKey + var val base.LazyValue + for s.currentIdx >= 0 && s.currentIdx < len(s.filtered) && s.err == nil { + if seekKey != nil { + iterKey, val = s.filtered[s.currentIdx].SeekGE(seekKey, flags) + } else if s.lowerBound != nil { + iterKey, val = s.filtered[s.currentIdx].SeekGE(s.lowerBound, flags) + } else { + iterKey, val = s.filtered[s.currentIdx].First() + } + if iterKey != nil { + return iterKey, val + } + if err := s.filtered[s.currentIdx].Error(); err != nil { + s.err = err + } + s.currentIdx++ + } + return nil, base.LazyValue{} +} + +func (s *simpleLevelIter) SeekPrefixGE( + prefix, key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + panic("unimplemented") +} + +func (s *simpleLevelIter) SeekLT( + key []byte, flags base.SeekLTFlags, +) (*base.InternalKey, base.LazyValue) { + panic("unimplemented") +} + +func (s *simpleLevelIter) First() (*base.InternalKey, base.LazyValue) { + if s.err != nil { + return nil, base.LazyValue{} + } + s.currentIdx = 0 + return s.skipEmptyFileForward(nil /* seekKey */, base.SeekGEFlagsNone) +} + +func (s *simpleLevelIter) Last() (*base.InternalKey, base.LazyValue) { + panic("unimplemented") +} + +func (s *simpleLevelIter) Next() (*base.InternalKey, base.LazyValue) { + if s.err != nil { + return nil, base.LazyValue{} + } + if s.currentIdx < 0 || s.currentIdx >= len(s.filtered) { + return nil, base.LazyValue{} + } + if iterKey, val := s.filtered[s.currentIdx].Next(); iterKey != nil { + return iterKey, val + } + s.currentIdx++ + return s.skipEmptyFileForward(nil /* seekKey */, base.SeekGEFlagsNone) +} + +func (s *simpleLevelIter) NextPrefix(succKey []byte) (*base.InternalKey, base.LazyValue) { + if s.err != nil { + return nil, base.LazyValue{} + } + if s.currentIdx < 0 || s.currentIdx >= len(s.filtered) { + return nil, base.LazyValue{} + } + if iterKey, val := s.filtered[s.currentIdx].NextPrefix(succKey); iterKey != nil { + return iterKey, val + } + s.currentIdx++ + return s.skipEmptyFileForward(succKey /* seekKey */, base.SeekGEFlagsNone) +} + +func (s *simpleLevelIter) Prev() (*base.InternalKey, base.LazyValue) { + panic("unimplemented") +} + +func (s *simpleLevelIter) Error() error { + if s.currentIdx >= 0 && s.currentIdx < len(s.filtered) { + s.err = firstError(s.err, s.filtered[s.currentIdx].Error()) + } + return s.err +} + +func (s *simpleLevelIter) Close() error { + var err error + for i := range s.iters { + err = firstError(err, s.iters[i].Close()) + } + return err +} + +func (s *simpleLevelIter) SetBounds(lower, upper []byte) { + s.currentIdx = -1 + s.lowerBound = lower + for i := range s.iters { + s.iters[i].SetBounds(lower, upper) + } + s.resetFilteredIters() +} + +func (s *simpleLevelIter) SetContext(_ context.Context) {} + +func (s *simpleLevelIter) String() string { + if s.currentIdx < 0 || s.currentIdx >= len(s.filtered) { + return "simpleLevelIter: current=" + } + return fmt.Sprintf("simpleLevelIter: current=%s", s.filtered[s.currentIdx]) +} + +var _ internalIterator = &simpleLevelIter{} diff --git a/pebble/external_iterator_test.go b/pebble/external_iterator_test.go new file mode 100644 index 0000000..77afd4d --- /dev/null +++ b/pebble/external_iterator_test.go @@ -0,0 +1,380 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "fmt" + "math" + "testing" + "time" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/cache" + "github.com/cockroachdb/pebble/internal/itertest" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/cockroachdb/pebble/objstorage/objstorageprovider" + "github.com/cockroachdb/pebble/sstable" + "github.com/cockroachdb/pebble/vfs" + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" +) + +func TestExternalIterator(t *testing.T) { + mem := vfs.NewMem() + o := &Options{ + FS: mem, + Comparer: testkeys.Comparer, + FormatMajorVersion: FormatRangeKeys, + } + o.EnsureDefaults() + d, err := Open("", o) + require.NoError(t, err) + defer func() { require.NoError(t, d.Close()) }() + + datadriven.RunTest(t, "testdata/external_iterator", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "reset": + mem = vfs.NewMem() + return "" + case "build": + if err := runBuildCmd(td, d, mem); err != nil { + return err.Error() + } + return "" + case "iter": + opts := IterOptions{KeyTypes: IterKeyTypePointsAndRanges} + var externalIterOpts []ExternalIterOption + var files [][]sstable.ReadableFile + for _, arg := range td.CmdArgs { + switch arg.Key { + case "fwd-only": + externalIterOpts = append(externalIterOpts, ExternalIterForwardOnly{}) + case "mask-suffix": + opts.RangeKeyMasking.Suffix = []byte(arg.Vals[0]) + case "lower": + opts.LowerBound = []byte(arg.Vals[0]) + case "upper": + opts.UpperBound = []byte(arg.Vals[0]) + case "files": + for _, v := range arg.Vals { + f, err := mem.Open(v) + require.NoError(t, err) + files = append(files, []sstable.ReadableFile{f}) + } + } + } + it, err := NewExternalIter(o, &opts, files, externalIterOpts...) + require.NoError(t, err) + return runIterCmd(td, it, true /* close iter */) + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestSimpleLevelIter(t *testing.T) { + mem := vfs.NewMem() + o := &Options{ + FS: mem, + Comparer: testkeys.Comparer, + FormatMajorVersion: FormatRangeKeys, + } + o.EnsureDefaults() + d, err := Open("", o) + require.NoError(t, err) + defer func() { require.NoError(t, d.Close()) }() + + datadriven.RunTest(t, "testdata/simple_level_iter", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "reset": + mem = vfs.NewMem() + return "" + case "build": + if err := runBuildCmd(td, d, mem); err != nil { + return err.Error() + } + return "" + case "iter": + var files []sstable.ReadableFile + var filenames []string + td.ScanArgs(t, "files", &filenames) + for _, name := range filenames { + f, err := mem.Open(name) + require.NoError(t, err) + files = append(files, f) + } + readers, err := openExternalTables(o, files, 0, o.MakeReaderOptions()) + require.NoError(t, err) + defer func() { + for i := range readers { + _ = readers[i].Close() + } + }() + var internalIters []internalIterator + for i := range readers { + iter, err := readers[i].NewIter(nil, nil) + require.NoError(t, err) + internalIters = append(internalIters, iter) + } + it := &simpleLevelIter{cmp: o.Comparer.Compare, iters: internalIters} + it.init(IterOptions{}) + + response := itertest.RunInternalIterCmd(t, td, it) + require.NoError(t, it.Close()) + return response + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestSimpleIterError(t *testing.T) { + s := simpleLevelIter{cmp: DefaultComparer.Compare, iters: []internalIterator{&errorIter{err: errors.New("injected")}}} + s.init(IterOptions{}) + defer s.Close() + + iterKey, _ := s.First() + require.Nil(t, iterKey) + require.Error(t, s.Error()) +} + +func TestIterRandomizedMaybeFilteredKeys(t *testing.T) { + mem := vfs.NewMem() + + seed := *seed + if seed == 0 { + seed = uint64(time.Now().UnixNano()) + t.Logf("seed: %d", seed) + } + rng := rand.New(rand.NewSource(seed)) + numKeys := 100 + rng.Intn(5000) + // The block property filter will exclude keys with suffixes [0, tsSeparator-1]. + // We use the first "part" of the keyspace below to write keys >= tsSeparator, + // and the second part to write keys < tsSeparator. Successive parts (if any) + // will contain keys at random before or after the separator. + tsSeparator := 10 + rng.Int63n(5000) + const keyLen = 5 + + // We split the keyspace into logical "parts" which are disjoint slices of the + // keyspace. That is, the keyspace a-z could be comprised of parts {a-k, l-z}. + // We rely on this partitioning when generating timestamps to give us some + // predictable clustering of timestamps in sstable blocks, however it is not + // strictly necessary for this test. + alpha := testkeys.Alpha(keyLen) + numParts := rng.Intn(3) + 2 + blockSize := 16 + rng.Intn(64) + + c := cache.New(128 << 20) + defer c.Unref() + + for fileIdx, twoLevelIndex := range []bool{false, true} { + t.Run(fmt.Sprintf("twoLevelIndex=%v", twoLevelIndex), func(t *testing.T) { + keys := make([][]byte, 0, numKeys) + + filename := fmt.Sprintf("test-%d", fileIdx) + f0, err := mem.Create(filename) + require.NoError(t, err) + + indexBlockSize := 4096 + if twoLevelIndex { + indexBlockSize = 1 + } + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f0), sstable.WriterOptions{ + BlockSize: blockSize, + Comparer: testkeys.Comparer, + IndexBlockSize: indexBlockSize, + TableFormat: sstable.TableFormatPebblev2, + BlockPropertyCollectors: []func() BlockPropertyCollector{ + func() BlockPropertyCollector { + return sstable.NewTestKeysBlockPropertyCollector() + }, + }, + }) + buf := make([]byte, alpha.MaxLen()+testkeys.MaxSuffixLen) + valBuf := make([]byte, 20) + keyIdx := int64(0) + for i := 0; i < numParts; i++ { + // The first two parts of the keyspace are special. The first one has + // all keys with timestamps greater than tsSeparator, while the second + // one has all keys with timestamps less than tsSeparator. Any additional + // keys could have timestamps at random before or after the tsSeparator. + maxKeysPerPart := numKeys / numParts + for j := 0; j < maxKeysPerPart; j++ { + var ts int64 + if i == 0 { + ts = rng.Int63n(5000) + tsSeparator + } else if i == 1 { + ts = rng.Int63n(tsSeparator) + } else { + ts = rng.Int63n(tsSeparator + 5000) + } + n := testkeys.WriteKeyAt(buf, alpha, keyIdx*alpha.Count()/int64(numKeys), ts) + keys = append(keys, append([]byte(nil), buf[:n]...)) + randStr(valBuf, rng) + require.NoError(t, w.Set(buf[:n], valBuf)) + keyIdx++ + } + } + require.NoError(t, w.Close()) + + // Re-open that filename for reading. + f1, err := mem.Open(filename) + require.NoError(t, err) + + readable, err := sstable.NewSimpleReadable(f1) + require.NoError(t, err) + + r, err := sstable.NewReader(readable, sstable.ReaderOptions{ + Cache: c, + Comparer: testkeys.Comparer, + }) + require.NoError(t, err) + defer r.Close() + + filter := sstable.NewTestKeysBlockPropertyFilter(uint64(tsSeparator), math.MaxUint64) + filterer, err := sstable.IntersectsTable([]BlockPropertyFilter{filter}, nil, r.Properties.UserProperties) + require.NoError(t, err) + require.NotNil(t, filterer) + + var iter sstable.Iterator + iter, err = r.NewIterWithBlockPropertyFilters( + nil, nil, filterer, false /* useFilterBlock */, nil, /* stats */ + sstable.CategoryAndQoS{}, nil, sstable.TrivialReaderProvider{Reader: r}) + require.NoError(t, err) + defer iter.Close() + var lastSeekKey, lowerBound, upperBound []byte + narrowBoundsMode := false + + for i := 0; i < 10000; i++ { + if rng.Intn(8) == 0 { + // Toggle narrow bounds mode. + if narrowBoundsMode { + // Reset bounds. + lowerBound, upperBound = nil, nil + iter.SetBounds(nil /* lower */, nil /* upper */) + } + narrowBoundsMode = !narrowBoundsMode + } + keyIdx := rng.Intn(len(keys)) + seekKey := keys[keyIdx] + if narrowBoundsMode { + // Case 1: We just entered narrow bounds mode, and both bounds + // are nil. Set a lower/upper bound. + // + // Case 2: The seek key is outside our last bounds. + // + // In either case, pick a narrow range of keys to set bounds on, + // let's say keys[keyIdx-5] and keys[keyIdx+5], before doing our + // seek operation. Picking narrow bounds increases the chance of + // monotonic bound changes. + cmp := testkeys.Comparer.Compare + case1 := lowerBound == nil && upperBound == nil + case2 := (lowerBound != nil && cmp(lowerBound, seekKey) > 0) || (upperBound != nil && cmp(upperBound, seekKey) <= 0) + if case1 || case2 { + lowerBound = nil + if keyIdx-5 >= 0 { + lowerBound = keys[keyIdx-5] + } + upperBound = nil + if keyIdx+5 < len(keys) { + upperBound = keys[keyIdx+5] + } + iter.SetBounds(lowerBound, upperBound) + } + // Case 3: The current seek key is within the previously-set bounds. + // No need to change bounds. + } + flags := base.SeekGEFlagsNone + if lastSeekKey != nil && bytes.Compare(seekKey, lastSeekKey) > 0 { + flags = flags.EnableTrySeekUsingNext() + } + lastSeekKey = append(lastSeekKey[:0], seekKey...) + + newKey, _ := iter.SeekGE(seekKey, flags) + if newKey == nil || !bytes.Equal(newKey.UserKey, seekKey) { + // We skipped some keys. Check if maybeFilteredKeys is true. + formattedNewKey := "" + if newKey != nil { + formattedNewKey = fmt.Sprintf("%s", testkeys.Comparer.FormatKey(newKey.UserKey)) + } + require.True(t, iter.MaybeFilteredKeys(), "seeked for key = %s, got key = %s indicating block property filtering but MaybeFilteredKeys = false", testkeys.Comparer.FormatKey(seekKey), formattedNewKey) + } + } + }) + } +} + +func BenchmarkExternalIter_NonOverlapping_SeekNextScan(b *testing.B) { + ks := testkeys.Alpha(6) + opts := (&Options{}).EnsureDefaults() + iterOpts := &IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + } + writeOpts := opts.MakeWriterOptions(6, sstable.TableFormatPebblev2) + + for _, keyCount := range []int{100, 10_000, 100_000} { + b.Run(fmt.Sprintf("keys=%d", keyCount), func(b *testing.B) { + for _, fileCount := range []int{1, 10, 100} { + b.Run(fmt.Sprintf("files=%d", fileCount), func(b *testing.B) { + var fs vfs.FS = vfs.NewMem() + filenames := make([]string, fileCount) + var keys [][]byte + for i := 0; i < fileCount; i++ { + filename := fmt.Sprintf("%03d.sst", i) + wf, err := fs.Create(filename) + require.NoError(b, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(wf), writeOpts) + for j := 0; j < keyCount/fileCount; j++ { + key := testkeys.Key(ks, int64(len(keys))) + keys = append(keys, key) + require.NoError(b, w.Set(key, key)) + } + require.NoError(b, w.Close()) + filenames[i] = filename + } + + for _, forwardOnly := range []bool{false, true} { + b.Run(fmt.Sprintf("forward-only=%t", forwardOnly), func(b *testing.B) { + var externalIterOpts []ExternalIterOption + if forwardOnly { + externalIterOpts = append(externalIterOpts, ExternalIterForwardOnly{}) + } + + for i := 0; i < b.N; i++ { + func() { + files := make([][]sstable.ReadableFile, fileCount) + for i := 0; i < fileCount; i++ { + f, err := fs.Open(filenames[i]) + require.NoError(b, err) + files[i] = []sstable.ReadableFile{f} + } + + it, err := NewExternalIter(opts, iterOpts, files, externalIterOpts...) + require.NoError(b, err) + defer it.Close() + + for k := 0; k+1 < len(keys); k += 2 { + if !it.SeekGE(keys[k]) { + b.Fatalf("key %q not found", keys[k]) + } + if !it.Next() { + b.Fatalf("key %q not found", keys[k+1]) + } + if !bytes.Equal(it.Key(), keys[k+1]) { + b.Fatalf("expected key %q, found %q", keys[k+1], it.Key()) + } + } + }() + } + }) + } + }) + } + }) + } +} diff --git a/pebble/filenames.go b/pebble/filenames.go new file mode 100644 index 0000000..07d74c8 --- /dev/null +++ b/pebble/filenames.go @@ -0,0 +1,54 @@ +// Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "fmt" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/vfs" +) + +type fileType = base.FileType + +// FileNum is an identifier for a file within a database. +type FileNum = base.FileNum + +const ( + fileTypeLog = base.FileTypeLog + fileTypeLock = base.FileTypeLock + fileTypeTable = base.FileTypeTable + fileTypeManifest = base.FileTypeManifest + fileTypeCurrent = base.FileTypeCurrent + fileTypeOptions = base.FileTypeOptions + fileTypeTemp = base.FileTypeTemp + fileTypeOldTemp = base.FileTypeOldTemp +) + +// setCurrentFile sets the CURRENT file to point to the manifest with +// provided file number. +// +// NB: This is a low-level routine and typically not what you want to +// use. Newer versions of Pebble running newer format major versions do +// not use the CURRENT file. See setCurrentFunc in version_set.go. +func setCurrentFile(dirname string, fs vfs.FS, fileNum base.DiskFileNum) error { + newFilename := base.MakeFilepath(fs, dirname, fileTypeCurrent, fileNum) + oldFilename := base.MakeFilepath(fs, dirname, fileTypeTemp, fileNum) + fs.Remove(oldFilename) + f, err := fs.Create(oldFilename) + if err != nil { + return err + } + if _, err := fmt.Fprintf(f, "MANIFEST-%s\n", fileNum); err != nil { + return err + } + if err := f.Sync(); err != nil { + return err + } + if err := f.Close(); err != nil { + return err + } + return fs.Rename(oldFilename, newFilename) +} diff --git a/pebble/filenames_test.go b/pebble/filenames_test.go new file mode 100644 index 0000000..287352e --- /dev/null +++ b/pebble/filenames_test.go @@ -0,0 +1,110 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "testing" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/vfs" + "github.com/stretchr/testify/require" +) + +// TestSetCurrentFileCrash tests a crash that occurs during +// a MANIFEST roll, leaving the temporary CURRENT file on +// the filesystem. These temporary files should be cleaned +// up on Open. +func TestSetCurrentFileCrash(t *testing.T) { + mem := vfs.NewMem() + + // Initialize a fresh database to write the initial MANIFEST. + { + d, err := Open("", &Options{FS: mem}) + require.NoError(t, err) + require.NoError(t, d.Close()) + } + + // Open the database again, this time with a FS that + // errors on Rename and a tiny max manifest file size + // to force manifest rolls. + { + wantErr := errors.New("rename error") + _, err := Open("", &Options{ + FS: renameErrorFS{FS: mem, err: wantErr}, + Logger: noFatalLogger{t: t}, + MaxManifestFileSize: 1, + L0CompactionThreshold: 10, + }) + // Open should fail during a manifest roll, + // leaving a temp dir on the filesystem. + if !errors.Is(err, wantErr) { + t.Fatal(err) + } + } + + // A temp file should be left on the filesystem + // from the failed Rename of the CURRENT file. + if temps := allTempFiles(t, mem); len(temps) == 0 { + t.Fatal("no temp files on the filesystem") + } + + // Open the database a third time with a normal + // filesystem again. It should clean up any temp + // files on Open. + { + d, err := Open("", &Options{ + FS: mem, + MaxManifestFileSize: 1, + L0CompactionThreshold: 10, + }) + require.NoError(t, err) + require.NoError(t, d.Close()) + if temps := allTempFiles(t, mem); len(temps) > 0 { + t.Fatalf("temporary files still on disk: %#v\n", temps) + } + } +} + +func allTempFiles(t *testing.T, fs vfs.FS) []string { + var files []string + ls, err := fs.List("") + require.NoError(t, err) + for _, f := range ls { + ft, _, ok := base.ParseFilename(fs, f) + if ok && ft == fileTypeTemp { + files = append(files, f) + } + } + return files +} + +type renameErrorFS struct { + vfs.FS + err error +} + +func (fs renameErrorFS) Rename(oldname string, newname string) error { + return fs.err +} + +// noFatalLogger implements Logger, logging to the contained +// *testing.T. Notably it does not panic on calls to Fatalf +// to enable unit tests of fatal logic. +type noFatalLogger struct { + t *testing.T +} + +func (l noFatalLogger) Infof(format string, args ...interface{}) { + l.t.Logf(format, args...) +} + +func (l noFatalLogger) Errorf(format string, args ...interface{}) { + l.t.Logf(format, args...) +} + +func (l noFatalLogger) Fatalf(format string, args ...interface{}) { + l.t.Logf(format, args...) +} diff --git a/pebble/flush_test.go b/pebble/flush_test.go new file mode 100644 index 0000000..0031420 --- /dev/null +++ b/pebble/flush_test.go @@ -0,0 +1,117 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "fmt" + "testing" + "time" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/vfs" + "github.com/stretchr/testify/require" +) + +func TestManualFlush(t *testing.T) { + getOptions := func() *Options { + opts := &Options{ + FS: vfs.NewMem(), + L0CompactionThreshold: 10, + } + opts.DisableAutomaticCompactions = true + return opts + } + d, err := Open("", getOptions()) + require.NoError(t, err) + defer func() { + require.NoError(t, d.Close()) + }() + + datadriven.RunTest(t, "testdata/manual_flush", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "batch": + b := d.NewBatch() + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + b.Commit(nil) + return "" + + case "flush": + if err := d.Flush(); err != nil { + return err.Error() + } + + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + case "async-flush": + d.mu.Lock() + cur := d.mu.versions.currentVersion() + d.mu.Unlock() + + if _, err := d.AsyncFlush(); err != nil { + return err.Error() + } + + err := try(100*time.Microsecond, 20*time.Second, func() error { + d.mu.Lock() + defer d.mu.Unlock() + if cur == d.mu.versions.currentVersion() { + return errors.New("flush has not occurred") + } + return nil + }) + if err != nil { + return err.Error() + } + + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + case "reset": + if err := d.Close(); err != nil { + return err.Error() + } + d, err = Open("", getOptions()) + if err != nil { + return err.Error() + } + return "" + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +// TestFlushDelRangeEmptyKey tests flushing a range tombstone that begins with +// an empty key. The empty key is a valid key but can be confused with nil. +func TestFlushDelRangeEmptyKey(t *testing.T) { + d, err := Open("", &Options{FS: vfs.NewMem()}) + require.NoError(t, err) + require.NoError(t, d.DeleteRange([]byte{}, []byte("z"), nil)) + require.NoError(t, d.Flush()) + require.NoError(t, d.Close()) +} + +// TestFlushEmptyKey tests that flushing an empty key does not trigger that key +// order invariant assertions. +func TestFlushEmptyKey(t *testing.T) { + d, err := Open("", &Options{FS: vfs.NewMem()}) + require.NoError(t, err) + require.NoError(t, d.Set(nil, []byte("hello"), nil)) + require.NoError(t, d.Flush()) + val, closer, err := d.Get(nil) + require.NoError(t, err) + require.Equal(t, val, []byte("hello")) + require.NoError(t, closer.Close()) + require.NoError(t, d.Close()) +} diff --git a/pebble/flushable.go b/pebble/flushable.go new file mode 100644 index 0000000..473dc6a --- /dev/null +++ b/pebble/flushable.go @@ -0,0 +1,254 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "context" + "fmt" + "sync/atomic" + "time" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/manifest" +) + +// flushable defines the interface for immutable memtables. +type flushable interface { + newIter(o *IterOptions) internalIterator + newFlushIter(o *IterOptions, bytesFlushed *uint64) internalIterator + newRangeDelIter(o *IterOptions) keyspan.FragmentIterator + newRangeKeyIter(o *IterOptions) keyspan.FragmentIterator + containsRangeKeys() bool + // inuseBytes returns the number of inuse bytes by the flushable. + inuseBytes() uint64 + // totalBytes returns the total number of bytes allocated by the flushable. + totalBytes() uint64 + // readyForFlush returns true when the flushable is ready for flushing. See + // memTable.readyForFlush for one implementation which needs to check whether + // there are any outstanding write references. + readyForFlush() bool +} + +// flushableEntry wraps a flushable and adds additional metadata and +// functionality that is common to all flushables. +type flushableEntry struct { + flushable + // Channel which is closed when the flushable has been flushed. + flushed chan struct{} + // flushForced indicates whether a flush was forced on this memtable (either + // manual, or due to ingestion). Protected by DB.mu. + flushForced bool + // delayedFlushForcedAt indicates whether a timer has been set to force a + // flush on this memtable at some point in the future. Protected by DB.mu. + // Holds the timestamp of when the flush will be issued. + delayedFlushForcedAt time.Time + // logNum corresponds to the WAL that contains the records present in the + // receiver. + logNum base.DiskFileNum + // logSize is the size in bytes of the associated WAL. Protected by DB.mu. + logSize uint64 + // The current logSeqNum at the time the memtable was created. This is + // guaranteed to be less than or equal to any seqnum stored in the memtable. + logSeqNum uint64 + // readerRefs tracks the read references on the flushable. The two sources of + // reader references are DB.mu.mem.queue and readState.memtables. The memory + // reserved by the flushable in the cache is released when the reader refs + // drop to zero. If the flushable is referencing sstables, then the file + // refount is also decreased once the reader refs drops to 0. If the + // flushable is a memTable, when the reader refs drops to zero, the writer + // refs will already be zero because the memtable will have been flushed and + // that only occurs once the writer refs drops to zero. + readerRefs atomic.Int32 + // Closure to invoke to release memory accounting. + releaseMemAccounting func() + // unrefFiles, if not nil, should be invoked to decrease the ref count of + // files which are backing the flushable. + unrefFiles func() []*fileBacking + // deleteFnLocked should be called if the caller is holding DB.mu. + deleteFnLocked func(obsolete []*fileBacking) + // deleteFn should be called if the caller is not holding DB.mu. + deleteFn func(obsolete []*fileBacking) +} + +func (e *flushableEntry) readerRef() { + switch v := e.readerRefs.Add(1); { + case v <= 1: + panic(fmt.Sprintf("pebble: inconsistent reference count: %d", v)) + } +} + +// db.mu must not be held when this is called. +func (e *flushableEntry) readerUnref(deleteFiles bool) { + e.readerUnrefHelper(deleteFiles, e.deleteFn) +} + +// db.mu must be held when this is called. +func (e *flushableEntry) readerUnrefLocked(deleteFiles bool) { + e.readerUnrefHelper(deleteFiles, e.deleteFnLocked) +} + +func (e *flushableEntry) readerUnrefHelper( + deleteFiles bool, deleteFn func(obsolete []*fileBacking), +) { + switch v := e.readerRefs.Add(-1); { + case v < 0: + panic(fmt.Sprintf("pebble: inconsistent reference count: %d", v)) + case v == 0: + if e.releaseMemAccounting == nil { + panic("pebble: memtable reservation already released") + } + e.releaseMemAccounting() + e.releaseMemAccounting = nil + if e.unrefFiles != nil { + obsolete := e.unrefFiles() + e.unrefFiles = nil + if deleteFiles { + deleteFn(obsolete) + } + } + } +} + +type flushableList []*flushableEntry + +// ingestedFlushable is the implementation of the flushable interface for the +// ingesting sstables which are added to the flushable list. +type ingestedFlushable struct { + files []physicalMeta + comparer *Comparer + newIters tableNewIters + newRangeKeyIters keyspan.TableNewSpanIter + + // Since the level slice is immutable, we construct and set it once. It + // should be safe to read from slice in future reads. + slice manifest.LevelSlice + // hasRangeKeys is set on ingestedFlushable construction. + hasRangeKeys bool +} + +func newIngestedFlushable( + files []*fileMetadata, + comparer *Comparer, + newIters tableNewIters, + newRangeKeyIters keyspan.TableNewSpanIter, +) *ingestedFlushable { + var physicalFiles []physicalMeta + var hasRangeKeys bool + for _, f := range files { + if f.HasRangeKeys { + hasRangeKeys = true + } + physicalFiles = append(physicalFiles, f.PhysicalMeta()) + } + + ret := &ingestedFlushable{ + files: physicalFiles, + comparer: comparer, + newIters: newIters, + newRangeKeyIters: newRangeKeyIters, + // slice is immutable and can be set once and used many times. + slice: manifest.NewLevelSliceKeySorted(comparer.Compare, files), + hasRangeKeys: hasRangeKeys, + } + + return ret +} + +// TODO(sumeer): ingestedFlushable iters also need to plumb context for +// tracing. + +// newIter is part of the flushable interface. +func (s *ingestedFlushable) newIter(o *IterOptions) internalIterator { + var opts IterOptions + if o != nil { + opts = *o + } + // TODO(bananabrick): The manifest.Level in newLevelIter is only used for + // logging. Update the manifest.Level encoding to account for levels which + // aren't truly levels in the lsm. Right now, the encoding only supports + // L0 sublevels, and the rest of the levels in the lsm. + return newLevelIter( + context.Background(), opts, s.comparer, s.newIters, s.slice.Iter(), manifest.Level(0), + internalIterOpts{}, + ) +} + +// newFlushIter is part of the flushable interface. +func (s *ingestedFlushable) newFlushIter(o *IterOptions, bytesFlushed *uint64) internalIterator { + // newFlushIter is only used for writing memtables to disk as sstables. + // Since ingested sstables are already present on disk, they don't need to + // make use of a flush iter. + panic("pebble: not implemented") +} + +func (s *ingestedFlushable) constructRangeDelIter( + file *manifest.FileMetadata, _ keyspan.SpanIterOptions, +) (keyspan.FragmentIterator, error) { + // Note that the keyspan level iter expects a non-nil iterator to be + // returned even if there is an error. So, we return the emptyKeyspanIter. + iter, rangeDelIter, err := s.newIters(context.Background(), file, nil, internalIterOpts{}) + if err != nil { + return emptyKeyspanIter, err + } + iter.Close() + if rangeDelIter == nil { + return emptyKeyspanIter, nil + } + return rangeDelIter, nil +} + +// newRangeDelIter is part of the flushable interface. +// TODO(bananabrick): Using a level iter instead of a keyspan level iter to +// surface range deletes is more efficient. +// +// TODO(sumeer): *IterOptions are being ignored, so the index block load for +// the point iterator in constructRangeDeIter is not tracked. +func (s *ingestedFlushable) newRangeDelIter(_ *IterOptions) keyspan.FragmentIterator { + return keyspan.NewLevelIter( + keyspan.SpanIterOptions{}, s.comparer.Compare, + s.constructRangeDelIter, s.slice.Iter(), manifest.Level(0), + manifest.KeyTypePoint, + ) +} + +// newRangeKeyIter is part of the flushable interface. +func (s *ingestedFlushable) newRangeKeyIter(o *IterOptions) keyspan.FragmentIterator { + if !s.containsRangeKeys() { + return nil + } + + return keyspan.NewLevelIter( + keyspan.SpanIterOptions{}, s.comparer.Compare, s.newRangeKeyIters, + s.slice.Iter(), manifest.Level(0), manifest.KeyTypeRange, + ) +} + +// containsRangeKeys is part of the flushable interface. +func (s *ingestedFlushable) containsRangeKeys() bool { + return s.hasRangeKeys +} + +// inuseBytes is part of the flushable interface. +func (s *ingestedFlushable) inuseBytes() uint64 { + // inuseBytes is only used when memtables are flushed to disk as sstables. + panic("pebble: not implemented") +} + +// totalBytes is part of the flushable interface. +func (s *ingestedFlushable) totalBytes() uint64 { + // We don't allocate additional bytes for the ingestedFlushable. + return 0 +} + +// readyForFlush is part of the flushable interface. +func (s *ingestedFlushable) readyForFlush() bool { + // ingestedFlushable should always be ready to flush. However, note that + // memtables before the ingested sstables in the memtable queue must be + // flushed before an ingestedFlushable can be flushed. This is because the + // ingested sstables need an updated view of the Version to + // determine where to place the files in the lsm. + return true +} diff --git a/pebble/flushable_test.go b/pebble/flushable_test.go new file mode 100644 index 0000000..c5d1d9c --- /dev/null +++ b/pebble/flushable_test.go @@ -0,0 +1,168 @@ +package pebble + +import ( + "bytes" + "fmt" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/vfs" + "github.com/stretchr/testify/require" +) + +// Simple sanity tests for the flushable interface implementation for ingested +// sstables. +func TestIngestedSSTFlushableAPI(t *testing.T) { + var mem vfs.FS + var d *DB + defer func() { + require.NoError(t, d.Close()) + }() + var flushable flushable + + reset := func() { + if d != nil { + require.NoError(t, d.Close()) + } + + mem = vfs.NewMem() + require.NoError(t, mem.MkdirAll("ext", 0755)) + opts := &Options{ + FS: mem, + L0CompactionThreshold: 100, + L0StopWritesThreshold: 100, + DebugCheck: DebugCheckLevels, + FormatMajorVersion: internalFormatNewest, + } + // Disable automatic compactions because otherwise we'll race with + // delete-only compactions triggered by ingesting range tombstones. + opts.DisableAutomaticCompactions = true + + var err error + d, err = Open("", opts) + require.NoError(t, err) + flushable = nil + } + reset() + + loadFileMeta := func(paths []string) []*fileMetadata { + d.mu.Lock() + pendingOutputs := make([]base.DiskFileNum, len(paths)) + for i := range paths { + pendingOutputs[i] = d.mu.versions.getNextDiskFileNum() + } + jobID := d.mu.nextJobID + d.mu.nextJobID++ + d.mu.Unlock() + + // We can reuse the ingestLoad function for this test even if we're + // not actually ingesting a file. + lr, err := ingestLoad(d.opts, d.FormatMajorVersion(), paths, nil, nil, d.cacheID, pendingOutputs, d.objProvider, jobID) + if err != nil { + panic(err) + } + meta := lr.localMeta + if len(meta) == 0 { + // All of the sstables to be ingested were empty. Nothing to do. + panic("empty sstable") + } + // The table cache requires the *fileMetadata to have a positive + // reference count. Fake a reference before we try to load the file. + for _, f := range meta { + f.Ref() + } + + // Verify the sstables do not overlap. + if err := ingestSortAndVerify(d.cmp, lr, KeyRange{}); err != nil { + panic("unsorted sstables") + } + + // Hard link the sstables into the DB directory. Since the sstables aren't + // referenced by a version, they won't be used. If the hard linking fails + // (e.g. because the files reside on a different filesystem), ingestLink will + // fall back to copying, and if that fails we undo our work and return an + // error. + if err := ingestLink(jobID, d.opts, d.objProvider, lr, nil /* shared */); err != nil { + panic("couldn't hard link sstables") + } + + // Fsync the directory we added the tables to. We need to do this at some + // point before we update the MANIFEST (via logAndApply), otherwise a crash + // can have the tables referenced in the MANIFEST, but not present in the + // directory. + if err := d.dataDir.Sync(); err != nil { + panic("Couldn't sync data directory") + } + + return meta + } + + datadriven.RunTest(t, "testdata/ingested_flushable_api", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "reset": + reset() + return "" + case "build": + if err := runBuildCmd(td, d, mem); err != nil { + return err.Error() + } + return "" + case "flushable": + // Creates an ingestedFlushable over the input files. + paths := make([]string, 0, len(td.CmdArgs)) + for _, arg := range td.CmdArgs { + paths = append(paths, arg.String()) + } + + meta := loadFileMeta(paths) + flushable = newIngestedFlushable( + meta, d.opts.Comparer, d.newIters, d.tableNewRangeKeyIter, + ) + return "" + case "iter": + iter := flushable.newIter(nil) + var buf bytes.Buffer + for x, _ := iter.First(); x != nil; x, _ = iter.Next() { + buf.WriteString(x.String()) + buf.WriteString("\n") + } + iter.Close() + return buf.String() + case "rangekeyIter": + iter := flushable.newRangeKeyIter(nil) + var buf bytes.Buffer + if iter != nil { + for span := iter.First(); span != nil; span = iter.Next() { + buf.WriteString(span.String()) + buf.WriteString("\n") + } + iter.Close() + } + return buf.String() + case "rangedelIter": + iter := flushable.newRangeDelIter(nil) + var buf bytes.Buffer + if iter != nil { + for span := iter.First(); span != nil; span = iter.Next() { + buf.WriteString(span.String()) + buf.WriteString("\n") + } + iter.Close() + } + return buf.String() + case "readyForFlush": + if flushable.readyForFlush() { + return "true" + } + return "false" + case "containsRangeKey": + if flushable.containsRangeKeys() { + return "true" + } + return "false" + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} diff --git a/pebble/format_major_version.go b/pebble/format_major_version.go new file mode 100644 index 0000000..89be161 --- /dev/null +++ b/pebble/format_major_version.go @@ -0,0 +1,678 @@ +// Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "fmt" + "strconv" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/sstable" + "github.com/cockroachdb/pebble/vfs" + "github.com/cockroachdb/pebble/vfs/atomicfs" +) + +// FormatMajorVersion is a constant controlling the format of persisted +// data. Backwards incompatible changes to durable formats are gated +// behind new format major versions. +// +// At any point, a database's format major version may be bumped. +// However, once a database's format major version is increased, +// previous versions of Pebble will refuse to open the database. +// +// The zero value format is the FormatDefault constant. The exact +// FormatVersion that the default corresponds to may change with time. +type FormatMajorVersion uint64 + +// SafeValue implements redact.SafeValue. +func (v FormatMajorVersion) SafeValue() {} + +// String implements fmt.Stringer. +func (v FormatMajorVersion) String() string { + // NB: This must not change. It's used as the value for the on-disk + // version marker file. + // + // Specifically, this value must always parse as a base 10 integer + // that fits in a uint64. We format it as zero-padded, 3-digit + // number today, but the padding may change. + return fmt.Sprintf("%03d", v) +} + +const ( + // 21.2 versions. + + // FormatDefault leaves the format version unspecified. The + // FormatDefault constant may be ratcheted upwards over time. + FormatDefault FormatMajorVersion = iota + // FormatMostCompatible maintains the most backwards compatibility, + // maintaining bi-directional compatibility with RocksDB 6.2.1 in + // the particular configuration described in the Pebble README. + FormatMostCompatible + // formatVersionedManifestMarker is the first + // backwards-incompatible change made to Pebble, introducing the + // format-version marker file for handling backwards-incompatible + // changes more broadly, and replacing the `CURRENT` file with a + // marker file. + // + // This format version is intended as an intermediary version state. + // It is deliberately unexported to discourage direct use of this + // format major version. Clients should use FormatVersioned which + // also ensures earlier versions of Pebble fail to open a database + // written in a future format major version. + formatVersionedManifestMarker + // FormatVersioned is a new format major version that replaces the + // old `CURRENT` file with a new 'marker' file scheme. Previous + // Pebble versions will be unable to open the database unless + // they're aware of format versions. + FormatVersioned + // FormatSetWithDelete is a format major version that introduces a new key + // kind, base.InternalKeyKindSetWithDelete. Previous Pebble versions will be + // unable to open this database. + FormatSetWithDelete + + // 22.1 versions. + + // FormatBlockPropertyCollector is a format major version that introduces + // BlockPropertyCollectors. + FormatBlockPropertyCollector + // FormatSplitUserKeysMarked is a format major version that guarantees that + // all files that share user keys with neighbors are marked for compaction + // in the manifest. Ratcheting to FormatSplitUserKeysMarked will block + // (without holding mutexes) until the scan of the LSM is complete and the + // manifest has been rotated. + FormatSplitUserKeysMarked + + // 22.2 versions. + + // FormatSplitUserKeysMarkedCompacted is a format major version that + // guarantees that all files explicitly marked for compaction in the manifest + // have been compacted. Combined with the FormatSplitUserKeysMarked format + // major version, this version guarantees that there are no user keys split + // across multiple files within a level L1+. Ratcheting to this format version + // will block (without holding mutexes) until all necessary compactions for + // files marked for compaction are complete. + FormatSplitUserKeysMarkedCompacted + // FormatRangeKeys is a format major version that introduces range keys. + FormatRangeKeys + // FormatMinTableFormatPebblev1 is a format major version that guarantees that + // tables created by or ingested into the DB at or above this format major + // version will have a table format version of at least Pebblev1 (Block + // Properties). + FormatMinTableFormatPebblev1 + // FormatPrePebblev1Marked is a format major version that guarantees that all + // sstables with a table format version pre-Pebblev1 (i.e. those that are + // guaranteed to not contain block properties) are marked for compaction in + // the manifest. Ratcheting to FormatPrePebblev1Marked will block (without + // holding mutexes) until the scan of the LSM is complete and the manifest has + // been rotated. + FormatPrePebblev1Marked + + // 23.1 versions. + + // formatUnusedPrePebblev1MarkedCompacted is an unused format major version. + // This format major version was originally intended to ship in the 23.1 + // release. It was later decided that this should be deferred until a + // subsequent release. The original ordering is preserved so as not to + // introduce breaking changes in Cockroach. + formatUnusedPrePebblev1MarkedCompacted + + // FormatSSTableValueBlocks is a format major version that adds support for + // storing values in value blocks in the sstable. Value block support is not + // necessarily enabled when writing sstables, when running with this format + // major version. + // + // WARNING: In development, so no production code should upgrade to this + // format, since a DB with this format major version will not actually + // interoperate correctly with another DB with the same format major + // version. This format major version is introduced so that tests can start + // being executed up to this version. Note that these tests succeed despite + // the incomplete support since they do not enable value blocks and use + // TableFormatPebblev2. + FormatSSTableValueBlocks + + // FormatFlushableIngest is a format major version that enables lazy + // addition of ingested sstables into the LSM structure. When an ingest + // overlaps with a memtable, a record of the ingest is written to the WAL + // without waiting for a flush. Subsequent reads treat the ingested files as + // a level above the overlapping memtable. Once the memtable is flushed, the + // ingested files are moved into the lowest possible levels. + // + // This feature is behind a format major version because it required + // breaking changes to the WAL format. + FormatFlushableIngest + + // 23.2 versions. + + // FormatPrePebblev1MarkedCompacted is a format major version that guarantees + // that all sstables explicitly marked for compaction in the manifest (see + // FormatPrePebblev1Marked) have been compacted. Ratcheting to this format + // version will block (without holding mutexes) until all necessary + // compactions for files marked for compaction are complete. + FormatPrePebblev1MarkedCompacted + + // FormatDeleteSizedAndObsolete is a format major version that adds support + // for deletion tombstones that encode the size of the value they're + // expected to delete. This format major version is required before the + // associated key kind may be committed through batch applications or + // ingests. It also adds support for keys that are marked obsolete (see + // sstable/format.go for details). + FormatDeleteSizedAndObsolete + + // FormatVirtualSSTables is a format major version that adds support for + // virtual sstables that can reference a sub-range of keys in an underlying + // physical sstable. This information is persisted through new, + // backward-incompatible fields in the Manifest, and therefore requires + // a format major version. + FormatVirtualSSTables + + // internalFormatNewest holds the newest format major version, including + // experimental ones excluded from the exported FormatNewest constant until + // they've stabilized. Used in tests. + internalFormatNewest FormatMajorVersion = iota - 1 + + // FormatNewest always contains the most recent format major version. + FormatNewest FormatMajorVersion = internalFormatNewest +) + +// MaxTableFormat returns the maximum sstable.TableFormat that can be used at +// this FormatMajorVersion. +func (v FormatMajorVersion) MaxTableFormat() sstable.TableFormat { + switch v { + case FormatDefault, FormatMostCompatible, formatVersionedManifestMarker, + FormatVersioned, FormatSetWithDelete: + return sstable.TableFormatRocksDBv2 + case FormatBlockPropertyCollector, FormatSplitUserKeysMarked, + FormatSplitUserKeysMarkedCompacted: + return sstable.TableFormatPebblev1 + case FormatRangeKeys, FormatMinTableFormatPebblev1, FormatPrePebblev1Marked, + formatUnusedPrePebblev1MarkedCompacted: + return sstable.TableFormatPebblev2 + case FormatSSTableValueBlocks, FormatFlushableIngest, FormatPrePebblev1MarkedCompacted: + return sstable.TableFormatPebblev3 + case FormatDeleteSizedAndObsolete, FormatVirtualSSTables: + return sstable.TableFormatPebblev4 + default: + panic(fmt.Sprintf("pebble: unsupported format major version: %s", v)) + } +} + +// MinTableFormat returns the minimum sstable.TableFormat that can be used at +// this FormatMajorVersion. +func (v FormatMajorVersion) MinTableFormat() sstable.TableFormat { + switch v { + case FormatDefault, FormatMostCompatible, formatVersionedManifestMarker, + FormatVersioned, FormatSetWithDelete, FormatBlockPropertyCollector, + FormatSplitUserKeysMarked, FormatSplitUserKeysMarkedCompacted, + FormatRangeKeys: + return sstable.TableFormatLevelDB + case FormatMinTableFormatPebblev1, FormatPrePebblev1Marked, + formatUnusedPrePebblev1MarkedCompacted, FormatSSTableValueBlocks, + FormatFlushableIngest, FormatPrePebblev1MarkedCompacted, + FormatDeleteSizedAndObsolete, FormatVirtualSSTables: + return sstable.TableFormatPebblev1 + default: + panic(fmt.Sprintf("pebble: unsupported format major version: %s", v)) + } +} + +// orderingInvariants returns an enum encoding the set of invariants that must +// hold within the receiver format major version. Invariants only get stricter +// as the format major version advances, so it is okay to retrieve the +// invariants from the current format major version and by the time the +// invariants are enforced, the format major version has advanced. +func (v FormatMajorVersion) orderingInvariants() manifest.OrderingInvariants { + if v < FormatSplitUserKeysMarkedCompacted { + return manifest.AllowSplitUserKeys + } + return manifest.ProhibitSplitUserKeys +} + +// formatMajorVersionMigrations defines the migrations from one format +// major version to the next. Each migration is defined as a closure +// which will be invoked on the database before the new format major +// version is committed. Migrations must be idempotent. Migrations are +// invoked with d.mu locked. +// +// Each migration is responsible for invoking finalizeFormatVersUpgrade +// to set the new format major version. RatchetFormatMajorVersion will +// panic if a migration returns a nil error but fails to finalize the +// new format major version. +var formatMajorVersionMigrations = map[FormatMajorVersion]func(*DB) error{ + FormatMostCompatible: func(d *DB) error { return nil }, + formatVersionedManifestMarker: func(d *DB) error { + // formatVersionedManifestMarker introduces the use of a marker + // file for pointing to the current MANIFEST file. + + // Lock the manifest. + d.mu.versions.logLock() + defer d.mu.versions.logUnlock() + + // Construct the filename of the currently active manifest and + // move the manifest marker to that filename. The marker is + // guaranteed to exist, because we unconditionally locate it + // during Open. + manifestFileNum := d.mu.versions.manifestFileNum + filename := base.MakeFilename(fileTypeManifest, manifestFileNum) + if err := d.mu.versions.manifestMarker.Move(filename); err != nil { + return errors.Wrap(err, "moving manifest marker") + } + + // Now that we have a manifest marker file in place and pointing + // to the current MANIFEST, finalize the upgrade. If we fail for + // some reason, a retry of this migration is guaranteed to again + // move the manifest marker file to the latest manifest. If + // we're unable to finalize the upgrade, a subsequent call to + // Open will ignore the manifest marker. + if err := d.finalizeFormatVersUpgrade(formatVersionedManifestMarker); err != nil { + return err + } + + // We've finalized the upgrade. All subsequent Open calls will + // ignore the CURRENT file and instead read the manifest marker. + // Before we unlock the manifest, we need to update versionSet + // to use the manifest marker on future rotations. + d.mu.versions.setCurrent = setCurrentFuncMarker( + d.mu.versions.manifestMarker, + d.mu.versions.fs, + d.mu.versions.dirname) + return nil + }, + // The FormatVersioned version is split into two, each with their + // own migration to ensure the post-migration cleanup happens even + // if there's a crash immediately after finalizing the version. Once + // a new format major version is finalized, its migration will never + // run again. Post-migration cleanup like the one in the migration + // below must be performed in a separate migration or every time the + // database opens. + FormatVersioned: func(d *DB) error { + // Replace the `CURRENT` file with one that points to the + // nonexistent `MANIFEST-000000` file. If an earlier Pebble + // version that does not know about format major versions + // attempts to open the database, it will error avoiding + // accidental corruption. + if err := setCurrentFile(d.mu.versions.dirname, d.mu.versions.fs, base.FileNum(0).DiskFileNum()); err != nil { + return err + } + return d.finalizeFormatVersUpgrade(FormatVersioned) + }, + // As SetWithDelete is a new key kind, there is nothing to migrate. We can + // simply finalize the format version and we're done. + FormatSetWithDelete: func(d *DB) error { + return d.finalizeFormatVersUpgrade(FormatSetWithDelete) + }, + FormatBlockPropertyCollector: func(d *DB) error { + return d.finalizeFormatVersUpgrade(FormatBlockPropertyCollector) + }, + FormatSplitUserKeysMarked: func(d *DB) error { + // Mark any unmarked files with split-user keys. Note all format major + // versions migrations are invoked with DB.mu locked. + if err := d.markFilesLocked(markFilesWithSplitUserKeys(d.opts.Comparer.Equal)); err != nil { + return err + } + return d.finalizeFormatVersUpgrade(FormatSplitUserKeysMarked) + }, + FormatSplitUserKeysMarkedCompacted: func(d *DB) error { + // Before finalizing the format major version, rewrite any sstables + // still marked for compaction. Note all format major versions + // migrations are invoked with DB.mu locked. + if err := d.compactMarkedFilesLocked(); err != nil { + return err + } + return d.finalizeFormatVersUpgrade(FormatSplitUserKeysMarkedCompacted) + }, + FormatRangeKeys: func(d *DB) error { + return d.finalizeFormatVersUpgrade(FormatRangeKeys) + }, + FormatMinTableFormatPebblev1: func(d *DB) error { + return d.finalizeFormatVersUpgrade(FormatMinTableFormatPebblev1) + }, + FormatPrePebblev1Marked: func(d *DB) error { + // Mark any unmarked files that contain only table properties. Note all + // format major versions migrations are invoked with DB.mu locked. + if err := d.markFilesLocked(markFilesPrePebblev1(d.tableCache)); err != nil { + return err + } + return d.finalizeFormatVersUpgrade(FormatPrePebblev1Marked) + }, + formatUnusedPrePebblev1MarkedCompacted: func(d *DB) error { + // Intentional no-op. + return d.finalizeFormatVersUpgrade(formatUnusedPrePebblev1MarkedCompacted) + }, + FormatSSTableValueBlocks: func(d *DB) error { + return d.finalizeFormatVersUpgrade(FormatSSTableValueBlocks) + }, + FormatFlushableIngest: func(d *DB) error { + return d.finalizeFormatVersUpgrade(FormatFlushableIngest) + }, + FormatPrePebblev1MarkedCompacted: func(d *DB) error { + // Before finalizing the format major version, rewrite any sstables + // still marked for compaction. Note all format major versions + // migrations are invoked with DB.mu locked. + if err := d.compactMarkedFilesLocked(); err != nil { + return err + } + return d.finalizeFormatVersUpgrade(FormatPrePebblev1MarkedCompacted) + }, + FormatDeleteSizedAndObsolete: func(d *DB) error { + return d.finalizeFormatVersUpgrade(FormatDeleteSizedAndObsolete) + }, + FormatVirtualSSTables: func(d *DB) error { + return d.finalizeFormatVersUpgrade(FormatVirtualSSTables) + }, +} + +const formatVersionMarkerName = `format-version` + +func lookupFormatMajorVersion( + fs vfs.FS, dirname string, +) (FormatMajorVersion, *atomicfs.Marker, error) { + m, versString, err := atomicfs.LocateMarker(fs, dirname, formatVersionMarkerName) + if err != nil { + return 0, nil, err + } + if versString == "" { + return FormatMostCompatible, m, nil + } + v, err := strconv.ParseUint(versString, 10, 64) + if err != nil { + return 0, nil, errors.Wrap(err, "parsing format major version") + } + vers := FormatMajorVersion(v) + if vers == FormatDefault { + return 0, nil, errors.Newf("pebble: default format major version should not persisted", vers) + } + if vers > internalFormatNewest { + return 0, nil, errors.Newf("pebble: database %q written in format major version %d", dirname, vers) + } + return vers, m, nil +} + +// FormatMajorVersion returns the database's active format major +// version. The format major version may be higher than the one +// provided in Options when the database was opened if the existing +// database was written with a higher format version. +func (d *DB) FormatMajorVersion() FormatMajorVersion { + return FormatMajorVersion(d.mu.formatVers.vers.Load()) +} + +// RatchetFormatMajorVersion ratchets the opened database's format major +// version to the provided version. It errors if the provided format +// major version is below the database's current version. Once a +// database's format major version is upgraded, previous Pebble versions +// that do not know of the format version will be unable to open the +// database. +func (d *DB) RatchetFormatMajorVersion(fmv FormatMajorVersion) error { + if err := d.closed.Load(); err != nil { + panic(err) + } + + d.mu.Lock() + defer d.mu.Unlock() + return d.ratchetFormatMajorVersionLocked(fmv) +} + +func (d *DB) ratchetFormatMajorVersionLocked(formatVers FormatMajorVersion) error { + if d.opts.ReadOnly { + return ErrReadOnly + } + if formatVers > internalFormatNewest { + // Guard against accidentally forgetting to update internalFormatNewest. + return errors.Errorf("pebble: unknown format version %d", formatVers) + } + if currentVers := d.FormatMajorVersion(); currentVers > formatVers { + return errors.Newf("pebble: database already at format major version %d; cannot reduce to %d", + currentVers, formatVers) + } + if d.mu.formatVers.ratcheting { + return errors.Newf("pebble: database format major version upgrade is in-progress") + } + d.mu.formatVers.ratcheting = true + defer func() { d.mu.formatVers.ratcheting = false }() + + for nextVers := d.FormatMajorVersion() + 1; nextVers <= formatVers; nextVers++ { + if err := formatMajorVersionMigrations[nextVers](d); err != nil { + return errors.Wrapf(err, "migrating to version %d", nextVers) + } + + // NB: The migration is responsible for calling + // finalizeFormatVersUpgrade to finalize the upgrade. This + // structure is necessary because some migrations may need to + // update in-memory state (without ever dropping locks) after + // the upgrade is finalized. Here we assert that the upgrade + // did occur. + if d.FormatMajorVersion() != nextVers { + d.opts.Logger.Fatalf("pebble: successful migration to format version %d never finalized the upgrade", nextVers) + } + } + return nil +} + +// finalizeFormatVersUpgrade is typically only be called from within a +// format major version migration. +// +// See formatMajorVersionMigrations. +func (d *DB) finalizeFormatVersUpgrade(formatVers FormatMajorVersion) error { + // We use the marker to encode the active format version in the + // marker filename. Unlike other uses of the atomic marker, there is + // no file with the filename `formatVers.String()` on the + // filesystem. + if err := d.mu.formatVers.marker.Move(formatVers.String()); err != nil { + return err + } + d.mu.formatVers.vers.Store(uint64(formatVers)) + d.opts.EventListener.FormatUpgrade(formatVers) + return nil +} + +// compactMarkedFilesLocked performs a migration that schedules rewrite +// compactions to compact away any sstables marked for compaction. +// compactMarkedFilesLocked is run while ratcheting the database's format major +// version to FormatSplitUserKeysMarkedCompacted. +// +// Note that while this method is called with the DB.mu held, and will not +// return until all marked files have been compacted, the mutex is dropped while +// waiting for compactions to complete (or for slots to free up). +func (d *DB) compactMarkedFilesLocked() error { + curr := d.mu.versions.currentVersion() + for curr.Stats.MarkedForCompaction > 0 { + // Attempt to schedule a compaction to rewrite a file marked for + // compaction. + d.maybeScheduleCompactionPicker(func(picker compactionPicker, env compactionEnv) *pickedCompaction { + return picker.pickRewriteCompaction(env) + }) + + // The above attempt might succeed and schedule a rewrite compaction. Or + // there might not be available compaction concurrency to schedule the + // compaction. Or compaction of the file might have already been in + // progress. In any scenario, wait until there's some change in the + // state of active compactions. + + // Before waiting, check that the database hasn't been closed. Trying to + // schedule the compaction may have dropped d.mu while waiting for a + // manifest write to complete. In that dropped interim, the database may + // have been closed. + if err := d.closed.Load(); err != nil { + return err.(error) + } + + // Some flush or compaction may have scheduled or completed while we waited + // for the manifest lock in maybeScheduleCompactionPicker. Get the latest + // Version before waiting on a compaction. + curr = d.mu.versions.currentVersion() + + // Only wait on compactions if there are files still marked for compaction. + // NB: Waiting on this condition variable drops d.mu while blocked. + if curr.Stats.MarkedForCompaction > 0 { + if d.mu.compact.compactingCount == 0 { + panic("expected a compaction of marked files in progress") + } + d.mu.compact.cond.Wait() + // Refresh the current version again. + curr = d.mu.versions.currentVersion() + } + } + return nil +} + +// findFilesFunc scans the LSM for files, returning true if at least one +// file was found. The returned array contains the matched files, if any, per +// level. +type findFilesFunc func(v *version) (found bool, files [numLevels][]*fileMetadata, _ error) + +// markFilesWithSplitUserKeys scans the LSM's levels 1 through 6 for adjacent +// files that contain the same user key. Such arrangements of files were +// permitted in RocksDB and in Pebble up to SHA a860bbad. +var markFilesWithSplitUserKeys = func(equal Equal) findFilesFunc { + return func(v *version) (found bool, files [numLevels][]*fileMetadata, _ error) { + // Files with split user keys are expected to be rare and performing key + // comparisons for every file within the LSM is expensive, so drop the + // database lock while scanning the file metadata. + for l := numLevels - 1; l > 0; l-- { + iter := v.Levels[l].Iter() + var prevFile *fileMetadata + var prevUserKey []byte + for f := iter.First(); f != nil; f = iter.Next() { + if prevUserKey != nil && equal(prevUserKey, f.Smallest.UserKey) { + // NB: We may append a file twice, once as prevFile and once + // as f. That's okay, and handled below. + files[l] = append(files[l], prevFile, f) + found = true + } + if f.Largest.IsExclusiveSentinel() { + prevUserKey = nil + prevFile = nil + } else { + prevUserKey = f.Largest.UserKey + prevFile = f + } + } + } + return + } +} + +// markFilesPrePebblev1 scans the LSM for files that do not support block +// properties (i.e. a table format version pre-Pebblev1). +var markFilesPrePebblev1 = func(tc *tableCacheContainer) findFilesFunc { + return func(v *version) (found bool, files [numLevels][]*fileMetadata, err error) { + for l := numLevels - 1; l > 0; l-- { + iter := v.Levels[l].Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if f.Virtual { + // Any physical sstable which has been virtualized must + // have already undergone this migration, and we don't + // need to worry about the virtual sstable themselves. + panic("pebble: unexpected virtual sstable during migration") + } + err = tc.withReader( + f.PhysicalMeta(), func(r *sstable.Reader) error { + tf, err := r.TableFormat() + if err != nil { + return err + } + if tf < sstable.TableFormatPebblev1 { + found = true + files[l] = append(files[l], f) + } + return nil + }) + if err != nil { + return + } + } + } + return + } +} + +// markFilesLock durably marks the files that match the given findFilesFunc for +// compaction. +func (d *DB) markFilesLocked(findFn findFilesFunc) error { + jobID := d.mu.nextJobID + d.mu.nextJobID++ + + // Acquire a read state to have a view of the LSM and a guarantee that none + // of the referenced files will be deleted until we've unreferenced the read + // state. Some findFilesFuncs may read the files, requiring they not be + // deleted. + rs := d.loadReadState() + var ( + found bool + files [numLevels][]*fileMetadata + err error + ) + func() { + defer rs.unrefLocked() + // Note the unusual locking: unlock, defer Lock(). The scan of the files in + // the version does not need to block other operations that require the + // DB.mu. Drop it for the scan, before re-acquiring it. + d.mu.Unlock() + defer d.mu.Lock() + found, files, err = findFn(rs.current) + }() + if err != nil { + return err + } + + // The database lock has been acquired again by the defer within the above + // anonymous function. + if !found { + // Nothing to do. + return nil + } + + // After scanning, if we found files to mark, we fetch the current state of + // the LSM (which may have changed) and set MarkedForCompaction on the files, + // and update the version's Stats.MarkedForCompaction count, which are both + // protected by d.mu. + + // Lock the manifest for a coherent view of the LSM. The database lock has + // been re-acquired by the defer within the above anonymous function. + d.mu.versions.logLock() + vers := d.mu.versions.currentVersion() + for l, filesToMark := range files { + if len(filesToMark) == 0 { + continue + } + for _, f := range filesToMark { + // Ignore files to be marked that have already been compacted or marked. + if f.CompactionState == manifest.CompactionStateCompacted || + f.MarkedForCompaction { + continue + } + // Else, mark the file for compaction in this version. + vers.Stats.MarkedForCompaction++ + f.MarkedForCompaction = true + } + // The compaction picker uses the markedForCompactionAnnotator to + // quickly find files marked for compaction, or to quickly determine + // that there are no such files marked for compaction within a level. + // A b-tree node may be annotated with an annotation recording that + // there are no files marked for compaction within the node's subtree, + // based on the assumption that it's static. + // + // Since we're marking files for compaction, these b-tree nodes' + // annotations will be out of date. Clear the compaction-picking + // annotation, so that it's recomputed the next time the compaction + // picker looks for a file marked for compaction. + vers.Levels[l].InvalidateAnnotation(markedForCompactionAnnotator{}) + } + + // The 'marked-for-compaction' bit is persisted in the MANIFEST file + // metadata. We've already modified the in-memory file metadata, but the + // manifest hasn't been updated. Force rotation to a new MANIFEST file, + // which will write every file metadata to the new manifest file and ensure + // that the now marked-for-compaction file metadata are persisted as marked. + // NB: This call to logAndApply will unlockthe MANIFEST, which we locked up + // above before obtaining `vers`. + return d.mu.versions.logAndApply( + jobID, + &manifest.VersionEdit{}, + map[int]*LevelMetrics{}, + true, /* forceRotation */ + func() []compactionInfo { return d.getInProgressCompactionInfoLocked(nil) }) +} diff --git a/pebble/format_major_version_test.go b/pebble/format_major_version_test.go new file mode 100644 index 0000000..bbca42b --- /dev/null +++ b/pebble/format_major_version_test.go @@ -0,0 +1,580 @@ +// Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "fmt" + "strconv" + "sync" + "testing" + "time" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/bloom" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/cockroachdb/pebble/sstable" + "github.com/cockroachdb/pebble/vfs" + "github.com/cockroachdb/pebble/vfs/atomicfs" + "github.com/stretchr/testify/require" +) + +func TestFormatMajorVersion_MigrationDefined(t *testing.T) { + for v := FormatMostCompatible; v <= FormatNewest; v++ { + if _, ok := formatMajorVersionMigrations[v]; !ok { + t.Errorf("format major version %d has no migration defined", v) + } + } +} + +func TestRatchetFormat(t *testing.T) { + fs := vfs.NewMem() + d, err := Open("", (&Options{FS: fs}).WithFSDefaults()) + require.NoError(t, err) + require.NoError(t, d.Set([]byte("foo"), []byte("bar"), Sync)) + require.Equal(t, FormatMostCompatible, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatVersioned)) + require.Equal(t, FormatVersioned, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatVersioned)) + require.Equal(t, FormatVersioned, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatSetWithDelete)) + require.Equal(t, FormatSetWithDelete, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatBlockPropertyCollector)) + require.Equal(t, FormatBlockPropertyCollector, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatSplitUserKeysMarked)) + require.Equal(t, FormatSplitUserKeysMarked, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatSplitUserKeysMarkedCompacted)) + require.Equal(t, FormatSplitUserKeysMarkedCompacted, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatRangeKeys)) + require.Equal(t, FormatRangeKeys, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatMinTableFormatPebblev1)) + require.Equal(t, FormatMinTableFormatPebblev1, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatPrePebblev1Marked)) + require.Equal(t, FormatPrePebblev1Marked, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(formatUnusedPrePebblev1MarkedCompacted)) + require.Equal(t, formatUnusedPrePebblev1MarkedCompacted, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatSSTableValueBlocks)) + require.Equal(t, FormatSSTableValueBlocks, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatFlushableIngest)) + require.Equal(t, FormatFlushableIngest, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatPrePebblev1MarkedCompacted)) + require.Equal(t, FormatPrePebblev1MarkedCompacted, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatDeleteSizedAndObsolete)) + require.Equal(t, FormatDeleteSizedAndObsolete, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(FormatVirtualSSTables)) + require.Equal(t, FormatVirtualSSTables, d.FormatMajorVersion()) + + require.NoError(t, d.Close()) + + // If we Open the database again, leaving the default format, the + // database should Open using the persisted FormatNewest. + d, err = Open("", (&Options{FS: fs}).WithFSDefaults()) + require.NoError(t, err) + require.Equal(t, internalFormatNewest, d.FormatMajorVersion()) + require.NoError(t, d.Close()) + + // Move the marker to a version that does not exist. + m, _, err := atomicfs.LocateMarker(fs, "", formatVersionMarkerName) + require.NoError(t, err) + require.NoError(t, m.Move("999999")) + require.NoError(t, m.Close()) + + _, err = Open("", (&Options{ + FS: fs, + FormatMajorVersion: FormatVersioned, + }).WithFSDefaults()) + require.Error(t, err) + require.EqualError(t, err, `pebble: database "" written in format major version 999999`) +} + +func testBasicDB(d *DB) error { + key := []byte("a") + value := []byte("b") + if err := d.Set(key, value, nil); err != nil { + return err + } + if err := d.Flush(); err != nil { + return err + } + if err := d.Compact(nil, []byte("\xff"), false); err != nil { + return err + } + + iter, _ := d.NewIter(nil) + for valid := iter.First(); valid; valid = iter.Next() { + } + if err := iter.Close(); err != nil { + return err + } + return nil +} + +func TestFormatMajorVersions(t *testing.T) { + for vers := FormatMostCompatible; vers <= FormatNewest; vers++ { + t.Run(fmt.Sprintf("vers=%03d", vers), func(t *testing.T) { + fs := vfs.NewStrictMem() + opts := (&Options{ + FS: fs, + FormatMajorVersion: vers, + }).WithFSDefaults() + + // Create a database at this format major version and perform + // some very basic operations. + d, err := Open("", opts) + require.NoError(t, err) + require.NoError(t, testBasicDB(d)) + require.NoError(t, d.Close()) + + // Re-open the database at this format major version, and again + // perform some basic operations. + d, err = Open("", opts) + require.NoError(t, err) + require.NoError(t, testBasicDB(d)) + require.NoError(t, d.Close()) + + t.Run("upgrade-at-open", func(t *testing.T) { + for upgradeVers := vers + 1; upgradeVers <= FormatNewest; upgradeVers++ { + t.Run(fmt.Sprintf("upgrade-vers=%03d", upgradeVers), func(t *testing.T) { + // We use vfs.MemFS's option to ignore syncs so + // that we can perform an upgrade on the current + // database state in fs, and revert it when this + // subtest is complete. + fs.SetIgnoreSyncs(true) + defer fs.ResetToSyncedState() + + // Re-open the database, passing a higher format + // major version in the Options to automatically + // ratchet the format major version. Ensure some + // basic operations pass. + opts := opts.Clone() + opts.FormatMajorVersion = upgradeVers + d, err = Open("", opts) + require.NoError(t, err) + require.Equal(t, upgradeVers, d.FormatMajorVersion()) + require.NoError(t, testBasicDB(d)) + require.NoError(t, d.Close()) + + // Re-open to ensure the upgrade persisted. + d, err = Open("", opts) + require.NoError(t, err) + require.Equal(t, upgradeVers, d.FormatMajorVersion()) + require.NoError(t, testBasicDB(d)) + require.NoError(t, d.Close()) + }) + } + }) + + t.Run("upgrade-while-open", func(t *testing.T) { + for upgradeVers := vers + 1; upgradeVers <= FormatNewest; upgradeVers++ { + t.Run(fmt.Sprintf("upgrade-vers=%03d", upgradeVers), func(t *testing.T) { + // Ensure the previous tests don't overwrite our + // options. + require.Equal(t, vers, opts.FormatMajorVersion) + + // We use vfs.MemFS's option to ignore syncs so + // that we can perform an upgrade on the current + // database state in fs, and revert it when this + // subtest is complete. + fs.SetIgnoreSyncs(true) + defer fs.ResetToSyncedState() + + // Re-open the database, still at the current format + // major version. Perform some basic operations, + // ratchet the format version up, and perform + // additional basic operations. + d, err = Open("", opts) + require.NoError(t, err) + require.NoError(t, testBasicDB(d)) + require.Equal(t, vers, d.FormatMajorVersion()) + require.NoError(t, d.RatchetFormatMajorVersion(upgradeVers)) + require.Equal(t, upgradeVers, d.FormatMajorVersion()) + require.NoError(t, testBasicDB(d)) + require.NoError(t, d.Close()) + + // Re-open to ensure the upgrade persisted. + d, err = Open("", opts) + require.NoError(t, err) + require.Equal(t, upgradeVers, d.FormatMajorVersion()) + require.NoError(t, testBasicDB(d)) + require.NoError(t, d.Close()) + }) + } + }) + }) + } +} + +func TestFormatMajorVersions_TableFormat(t *testing.T) { + // NB: This test is intended to validate the mapping between every + // FormatMajorVersion and sstable.TableFormat exhaustively. This serves as a + // sanity check that new versions have a corresponding mapping. The test + // fixture is intentionally verbose. + + m := map[FormatMajorVersion][2]sstable.TableFormat{ + FormatDefault: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, + FormatMostCompatible: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, + formatVersionedManifestMarker: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, + FormatVersioned: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, + FormatSetWithDelete: {sstable.TableFormatLevelDB, sstable.TableFormatRocksDBv2}, + FormatBlockPropertyCollector: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev1}, + FormatSplitUserKeysMarked: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev1}, + FormatSplitUserKeysMarkedCompacted: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev1}, + FormatRangeKeys: {sstable.TableFormatLevelDB, sstable.TableFormatPebblev2}, + FormatMinTableFormatPebblev1: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2}, + FormatPrePebblev1Marked: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2}, + formatUnusedPrePebblev1MarkedCompacted: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev2}, + FormatSSTableValueBlocks: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, + FormatFlushableIngest: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, + FormatPrePebblev1MarkedCompacted: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev3}, + FormatDeleteSizedAndObsolete: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev4}, + FormatVirtualSSTables: {sstable.TableFormatPebblev1, sstable.TableFormatPebblev4}, + } + + // Valid versions. + for fmv := FormatMostCompatible; fmv <= internalFormatNewest; fmv++ { + got := [2]sstable.TableFormat{fmv.MinTableFormat(), fmv.MaxTableFormat()} + require.Equalf(t, m[fmv], got, "got %s; want %s", got, m[fmv]) + require.True(t, got[0] <= got[1] /* min <= max */) + } + + // Invalid versions. + fmv := internalFormatNewest + 1 + require.Panics(t, func() { _ = fmv.MaxTableFormat() }) + require.Panics(t, func() { _ = fmv.MinTableFormat() }) +} + +func TestSplitUserKeyMigration(t *testing.T) { + var d *DB + var opts *Options + var fs vfs.FS + var buf bytes.Buffer + defer func() { + if d != nil { + require.NoError(t, d.Close()) + } + }() + + datadriven.RunTest(t, "testdata/format_major_version_split_user_key_migration", + func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + if d != nil { + if err := d.Close(); err != nil { + return err.Error() + } + buf.Reset() + } + opts = (&Options{ + FormatMajorVersion: FormatBlockPropertyCollector, + EventListener: &EventListener{ + CompactionEnd: func(info CompactionInfo) { + // Fix the job ID and durations for determinism. + info.JobID = 100 + info.Duration = time.Second + info.TotalDuration = 2 * time.Second + fmt.Fprintln(&buf, info) + }, + }, + DisableAutomaticCompactions: true, + }).WithFSDefaults() + var err error + if d, err = runDBDefineCmd(td, opts); err != nil { + return err.Error() + } + + fs = d.opts.FS + d.mu.Lock() + defer d.mu.Unlock() + return d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) + case "reopen": + if d != nil { + if err := d.Close(); err != nil { + return err.Error() + } + buf.Reset() + } + opts.FS = fs + opts.DisableAutomaticCompactions = true + var err error + d, err = Open("", opts) + if err != nil { + return err.Error() + } + return "OK" + case "build": + if err := runBuildCmd(td, d, fs); err != nil { + return err.Error() + } + return "" + case "force-ingest": + if err := runForceIngestCmd(td, d); err != nil { + return err.Error() + } + d.mu.Lock() + defer d.mu.Unlock() + return d.mu.versions.currentVersion().DebugString(base.DefaultFormatter) + case "format-major-version": + return d.FormatMajorVersion().String() + case "ratchet-format-major-version": + v, err := strconv.Atoi(td.CmdArgs[0].String()) + if err != nil { + return err.Error() + } + if err := d.RatchetFormatMajorVersion(FormatMajorVersion(v)); err != nil { + return err.Error() + } + return buf.String() + case "lsm": + return runLSMCmd(td, d) + case "marked-file-count": + m := d.Metrics() + return fmt.Sprintf("%d files marked for compaction", m.Compact.MarkedFiles) + case "disable-automatic-compactions": + d.mu.Lock() + defer d.mu.Unlock() + switch v := td.CmdArgs[0].String(); v { + case "true": + d.opts.DisableAutomaticCompactions = true + case "false": + d.opts.DisableAutomaticCompactions = false + default: + return fmt.Sprintf("unknown value %q", v) + } + return "" + default: + return fmt.Sprintf("unrecognized command %q", td.Cmd) + } + }) +} + +func TestPebblev1Migration(t *testing.T) { + var d *DB + defer func() { + if d != nil { + require.NoError(t, d.Close()) + } + }() + + datadriven.RunTest(t, "testdata/format_major_version_pebblev1_migration", + func(t *testing.T, td *datadriven.TestData) string { + switch cmd := td.Cmd; cmd { + case "open": + var version int + var err error + for _, cmdArg := range td.CmdArgs { + switch cmd := cmdArg.Key; cmd { + case "version": + version, err = strconv.Atoi(cmdArg.Vals[0]) + if err != nil { + return err.Error() + } + default: + return fmt.Sprintf("unknown argument: %s", cmd) + } + } + opts := (&Options{ + FS: vfs.NewMem(), + FormatMajorVersion: FormatMajorVersion(version), + }).WithFSDefaults() + d, err = Open("", opts) + if err != nil { + return err.Error() + } + return "" + + case "format-major-version": + return d.FormatMajorVersion().String() + + case "min-table-format": + return d.FormatMajorVersion().MinTableFormat().String() + + case "max-table-format": + return d.FormatMajorVersion().MaxTableFormat().String() + + case "disable-automatic-compactions": + d.mu.Lock() + defer d.mu.Unlock() + switch v := td.CmdArgs[0].String(); v { + case "true": + d.opts.DisableAutomaticCompactions = true + case "false": + d.opts.DisableAutomaticCompactions = false + default: + return fmt.Sprintf("unknown value %q", v) + } + return "" + + case "batch": + b := d.NewIndexedBatch() + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + if err := b.Commit(nil); err != nil { + return err.Error() + } + return "" + + case "flush": + if err := d.Flush(); err != nil { + return err.Error() + } + return "" + + case "ingest": + if err := runBuildCmd(td, d, d.opts.FS); err != nil { + return err.Error() + } + // Only the first arg is a filename. + td.CmdArgs = td.CmdArgs[:1] + if err := runIngestCmd(td, d, d.opts.FS); err != nil { + return err.Error() + } + return "" + + case "lsm": + return runLSMCmd(td, d) + + case "tally-table-formats": + d.mu.Lock() + defer d.mu.Unlock() + v := d.mu.versions.currentVersion() + tally := make([]int, sstable.TableFormatMax+1) + for _, l := range v.Levels { + iter := l.Iter() + for m := iter.First(); m != nil; m = iter.Next() { + err := d.tableCache.withReader(m.PhysicalMeta(), + func(r *sstable.Reader) error { + f, err := r.TableFormat() + if err != nil { + return err + } + tally[f]++ + return nil + }) + if err != nil { + return err.Error() + } + } + } + var b bytes.Buffer + for i := 1; i <= int(sstable.TableFormatMax); i++ { + _, _ = fmt.Fprintf(&b, "%s: %d\n", sstable.TableFormat(i), tally[i]) + } + return b.String() + + case "ratchet-format-major-version": + v, err := strconv.Atoi(td.CmdArgs[0].String()) + if err != nil { + return err.Error() + } + if err = d.RatchetFormatMajorVersion(FormatMajorVersion(v)); err != nil { + return err.Error() + } + return "" + + case "marked-file-count": + m := d.Metrics() + return fmt.Sprintf("%d files marked for compaction", m.Compact.MarkedFiles) + + default: + return fmt.Sprintf("unknown command: %s", cmd) + } + }, + ) +} + +// TestPebblev1MigrationRace exercises the race between a PrePebbleV1Marked +// format major version upgrade that needs to open sstables to read their table +// format, and concurrent compactions that may delete the same files from the +// LSM. +// +// Regression test for #2019. +func TestPebblev1MigrationRace(t *testing.T) { + // Use a smaller table cache size to slow down the PrePebbleV1Marked + // migration, ensuring each table read needs to re-open the file. + cache := NewCache(4 << 20) + defer cache.Unref() + tableCache := NewTableCache(cache, 1, 5) + defer tableCache.Unref() + d, err := Open("", (&Options{ + Cache: cache, + FS: vfs.NewMem(), + FormatMajorVersion: FormatMajorVersion(FormatPrePebblev1Marked - 1), + TableCache: tableCache, + Levels: []LevelOptions{{TargetFileSize: 1}}, + }).WithFSDefaults()) + require.NoError(t, err) + defer d.Close() + + ks := testkeys.Alpha(3).EveryN(10) + var key [3]byte + for i := int64(0); i < ks.Count(); i++ { + n := testkeys.WriteKey(key[:], ks, i) + require.NoError(t, d.Set(key[:n], key[:n], nil)) + require.NoError(t, d.Flush()) + } + + // Asynchronously write and flush range deletes that will cause compactions + // to delete the existing sstables. These deletes will race with the format + // major version upgrade's migration will attempt to delete the files. + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + for i := ks.Count() - 1; i > 0; i -= 50 { + endKey := testkeys.Key(ks, i) + startIndex := i - 50 + if startIndex < 0 { + startIndex = 0 + } + startKey := testkeys.Key(ks, startIndex) + + require.NoError(t, d.DeleteRange(startKey, endKey, nil)) + _, err := d.AsyncFlush() + require.NoError(t, err) + } + }() + require.NoError(t, d.RatchetFormatMajorVersion(FormatPrePebblev1Marked)) + wg.Wait() +} + +// Regression test for #2044, where multiple concurrent compactions can lead +// to an indefinite wait on the compaction goroutine in compactMarkedFilesLocked. +func TestPebblev1MigrationConcurrencyRace(t *testing.T) { + opts := (&Options{ + Comparer: testkeys.Comparer, + FS: vfs.NewMem(), + FormatMajorVersion: FormatSplitUserKeysMarked, + Levels: []LevelOptions{{FilterPolicy: bloom.FilterPolicy(10)}}, + MaxConcurrentCompactions: func() int { + return 4 + }, + }).WithFSDefaults() + func() { + d, err := Open("", opts) + require.NoError(t, err) + defer func() { + require.NoError(t, d.Close()) + }() + + ks := testkeys.Alpha(3).EveryN(10) + var key [3]byte + for i := int64(0); i < ks.Count(); i++ { + n := testkeys.WriteKey(key[:], ks, i) + require.NoError(t, d.Set(key[:n], key[:n], nil)) + if i%100 == 0 { + require.NoError(t, d.Flush()) + } + } + require.NoError(t, d.Flush()) + }() + + opts.FormatMajorVersion = formatUnusedPrePebblev1MarkedCompacted + d, err := Open("", opts) + require.NoError(t, err) + require.NoError(t, d.RatchetFormatMajorVersion(formatUnusedPrePebblev1MarkedCompacted)) + require.NoError(t, d.Close()) +} diff --git a/pebble/get_iter.go b/pebble/get_iter.go new file mode 100644 index 0000000..6ebdd59 --- /dev/null +++ b/pebble/get_iter.go @@ -0,0 +1,258 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "context" + "fmt" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/sstable" +) + +// getIter is an internal iterator used to perform gets. It iterates through +// the values for a particular key, level by level. It is not a general purpose +// internalIterator, but specialized for Get operations so that it loads data +// lazily. +type getIter struct { + logger Logger + comparer *Comparer + newIters tableNewIters + snapshot uint64 + key []byte + iter internalIterator + rangeDelIter keyspan.FragmentIterator + tombstone *keyspan.Span + levelIter levelIter + level int + batch *Batch + mem flushableList + l0 []manifest.LevelSlice + version *version + iterKey *InternalKey + iterValue base.LazyValue + err error +} + +// TODO(sumeer): CockroachDB code doesn't use getIter, but, for completeness, +// make this implement InternalIteratorWithStats. + +// getIter implements the base.InternalIterator interface. +var _ base.InternalIterator = (*getIter)(nil) + +func (g *getIter) String() string { + return fmt.Sprintf("len(l0)=%d, len(mem)=%d, level=%d", len(g.l0), len(g.mem), g.level) +} + +func (g *getIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, base.LazyValue) { + panic("pebble: SeekGE unimplemented") +} + +func (g *getIter) SeekPrefixGE( + prefix, key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + panic("pebble: SeekPrefixGE unimplemented") +} + +func (g *getIter) SeekLT(key []byte, flags base.SeekLTFlags) (*InternalKey, base.LazyValue) { + panic("pebble: SeekLT unimplemented") +} + +func (g *getIter) First() (*InternalKey, base.LazyValue) { + return g.Next() +} + +func (g *getIter) Last() (*InternalKey, base.LazyValue) { + panic("pebble: Last unimplemented") +} + +func (g *getIter) Next() (*InternalKey, base.LazyValue) { + if g.iter != nil { + g.iterKey, g.iterValue = g.iter.Next() + } + + for { + if g.iter != nil { + // We have to check rangeDelIter on each iteration because a single + // user-key can be spread across multiple tables in a level. A range + // tombstone will appear in the table corresponding to its start + // key. Every call to levelIter.Next() potentially switches to a new + // table and thus reinitializes rangeDelIter. + if g.rangeDelIter != nil { + g.tombstone = keyspan.Get(g.comparer.Compare, g.rangeDelIter, g.key) + if g.err = g.rangeDelIter.Close(); g.err != nil { + return nil, base.LazyValue{} + } + g.rangeDelIter = nil + } + + if g.iterKey != nil { + key := g.iterKey + if g.tombstone != nil && g.tombstone.CoversAt(g.snapshot, key.SeqNum()) { + // We have a range tombstone covering this key. Rather than return a + // point or range deletion here, we return false and close our + // internal iterator which will make Valid() return false, + // effectively stopping iteration. + g.err = g.iter.Close() + g.iter = nil + return nil, base.LazyValue{} + } + if g.comparer.Equal(g.key, key.UserKey) { + if !key.Visible(g.snapshot, base.InternalKeySeqNumMax) { + g.iterKey, g.iterValue = g.iter.Next() + continue + } + return g.iterKey, g.iterValue + } + } + // We've advanced the iterator passed the desired key. Move on to the + // next memtable / level. + g.err = g.iter.Close() + g.iter = nil + if g.err != nil { + return nil, base.LazyValue{} + } + } + + // Create an iterator from the batch. + if g.batch != nil { + if g.batch.index == nil { + g.err = ErrNotIndexed + g.iterKey, g.iterValue = nil, base.LazyValue{} + return nil, base.LazyValue{} + } + g.iter = g.batch.newInternalIter(nil) + g.rangeDelIter = g.batch.newRangeDelIter( + nil, + // Get always reads the entirety of the batch's history, so no + // batch keys should be filtered. + base.InternalKeySeqNumMax, + ) + g.iterKey, g.iterValue = g.iter.SeekGE(g.key, base.SeekGEFlagsNone) + g.batch = nil + continue + } + + // If we have a tombstone from a previous level it is guaranteed to delete + // keys in lower levels. + if g.tombstone != nil && g.tombstone.VisibleAt(g.snapshot) { + return nil, base.LazyValue{} + } + + // Create iterators from memtables from newest to oldest. + if n := len(g.mem); n > 0 { + m := g.mem[n-1] + g.iter = m.newIter(nil) + g.rangeDelIter = m.newRangeDelIter(nil) + g.mem = g.mem[:n-1] + g.iterKey, g.iterValue = g.iter.SeekGE(g.key, base.SeekGEFlagsNone) + continue + } + + if g.level == 0 { + // Create iterators from L0 from newest to oldest. + if n := len(g.l0); n > 0 { + files := g.l0[n-1].Iter() + g.l0 = g.l0[:n-1] + iterOpts := IterOptions{ + // TODO(sumeer): replace with a parameter provided by the caller. + CategoryAndQoS: sstable.CategoryAndQoS{ + Category: "pebble-get", + QoSLevel: sstable.LatencySensitiveQoSLevel, + }, + logger: g.logger, + snapshotForHideObsoletePoints: g.snapshot} + g.levelIter.init(context.Background(), iterOpts, g.comparer, g.newIters, + files, manifest.L0Sublevel(n), internalIterOpts{}) + g.levelIter.initRangeDel(&g.rangeDelIter) + bc := levelIterBoundaryContext{} + g.levelIter.initBoundaryContext(&bc) + g.iter = &g.levelIter + + // Compute the key prefix for bloom filtering if split function is + // specified, or use the user key as default. + prefix := g.key + if g.comparer.Split != nil { + prefix = g.key[:g.comparer.Split(g.key)] + } + g.iterKey, g.iterValue = g.iter.SeekPrefixGE(prefix, g.key, base.SeekGEFlagsNone) + if bc.isSyntheticIterBoundsKey || bc.isIgnorableBoundaryKey { + g.iterKey = nil + g.iterValue = base.LazyValue{} + } + continue + } + g.level++ + } + + if g.level >= numLevels { + return nil, base.LazyValue{} + } + if g.version.Levels[g.level].Empty() { + g.level++ + continue + } + + iterOpts := IterOptions{ + // TODO(sumeer): replace with a parameter provided by the caller. + CategoryAndQoS: sstable.CategoryAndQoS{ + Category: "pebble-get", + QoSLevel: sstable.LatencySensitiveQoSLevel, + }, logger: g.logger, snapshotForHideObsoletePoints: g.snapshot} + g.levelIter.init(context.Background(), iterOpts, g.comparer, g.newIters, + g.version.Levels[g.level].Iter(), manifest.Level(g.level), internalIterOpts{}) + g.levelIter.initRangeDel(&g.rangeDelIter) + bc := levelIterBoundaryContext{} + g.levelIter.initBoundaryContext(&bc) + g.level++ + g.iter = &g.levelIter + + // Compute the key prefix for bloom filtering if split function is + // specified, or use the user key as default. + prefix := g.key + if g.comparer.Split != nil { + prefix = g.key[:g.comparer.Split(g.key)] + } + g.iterKey, g.iterValue = g.iter.SeekPrefixGE(prefix, g.key, base.SeekGEFlagsNone) + if bc.isSyntheticIterBoundsKey || bc.isIgnorableBoundaryKey { + g.iterKey = nil + g.iterValue = base.LazyValue{} + } + } +} + +func (g *getIter) Prev() (*InternalKey, base.LazyValue) { + panic("pebble: Prev unimplemented") +} + +func (g *getIter) NextPrefix([]byte) (*InternalKey, base.LazyValue) { + panic("pebble: NextPrefix unimplemented") +} + +func (g *getIter) Valid() bool { + return g.iterKey != nil && g.err == nil +} + +func (g *getIter) Error() error { + return g.err +} + +func (g *getIter) Close() error { + if g.iter != nil { + if err := g.iter.Close(); err != nil && g.err == nil { + g.err = err + } + g.iter = nil + } + return g.err +} + +func (g *getIter) SetBounds(lower, upper []byte) { + panic("pebble: SetBounds unimplemented") +} + +func (g *getIter) SetContext(_ context.Context) {} diff --git a/pebble/get_iter_test.go b/pebble/get_iter_test.go new file mode 100644 index 0000000..ab6e67e --- /dev/null +++ b/pebble/get_iter_test.go @@ -0,0 +1,576 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "context" + "strings" + "testing" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/internal/testkeys" +) + +func TestGetIter(t *testing.T) { + // testTable is a table to insert into a version. + // Each element of data is a string of the form "internalKey value". + type testTable struct { + level int + fileNum FileNum + data []string + } + + testCases := []struct { + description string + // badOrdering is whether this test case has a table ordering violation. + badOrdering bool + // tables are the tables to populate the version with. + tables []testTable + // queries are the queries to run against the version. Each element has + // the form "internalKey wantedValue". The internalKey is passed to the + // version.get method, wantedValue may be "ErrNotFound" if the query + // should return that error. + queries []string + }{ + { + description: "empty: an empty version", + queries: []string{ + "abc.SEPARATOR.101 ErrNotFound", + }, + }, + + { + description: "single-0: one level-0 table", + tables: []testTable{ + { + level: 0, + fileNum: 10, + data: []string{ + "the.SET.101 a", + "cat.SET.102 b", + "on_.SET.103 c", + "the.SET.104 d", + "mat.SET.105 e", + "the.DEL.106 ", + "the.MERGE.107 g", + }, + }, + }, + queries: []string{ + "aaa.SEPARATOR.105 ErrNotFound", + "cat.SEPARATOR.105 b", + "hat.SEPARATOR.105 ErrNotFound", + "mat.SEPARATOR.105 e", + "the.SEPARATOR.108 g", + "the.SEPARATOR.107 g", + "the.SEPARATOR.106 ErrNotFound", + "the.SEPARATOR.105 d", + "the.SEPARATOR.104 d", + "the.SEPARATOR.104 d", + "the.SEPARATOR.103 a", + "the.SEPARATOR.102 a", + "the.SEPARATOR.101 a", + "the.SEPARATOR.100 ErrNotFound", + "zzz.SEPARATOR.105 ErrNotFound", + }, + }, + + { + description: "triple-0: three level-0 tables", + tables: []testTable{ + { + level: 0, + fileNum: 10, + data: []string{ + "the.SET.101 a", + "cat.SET.102 b", + "on_.SET.103 c", + "the.SET.104 d", + "mat.SET.105 e", + "the.DEL.106 ", + "the.SET.107 g", + }, + }, + { + level: 0, + fileNum: 11, + data: []string{ + "awk.SET.111 w", + "cat.SET.112 x", + "man.SET.113 y", + "sed.SET.114 z", + }, + }, + { + level: 0, + fileNum: 12, + data: []string{ + "the.DEL.121 ", + "cat.DEL.122 ", + "man.DEL.123 ", + "was.SET.124 D", + "not.SET.125 E", + "the.SET.126 F", + "man.SET.127 G", + }, + }, + }, + queries: []string{ + "aaa.SEPARATOR.105 ErrNotFound", + "awk.SEPARATOR.135 w", + "awk.SEPARATOR.125 w", + "awk.SEPARATOR.115 w", + "awk.SEPARATOR.105 ErrNotFound", + "cat.SEPARATOR.135 ErrNotFound", + "cat.SEPARATOR.125 ErrNotFound", + "cat.SEPARATOR.115 x", + "cat.SEPARATOR.105 b", + "man.SEPARATOR.135 G", + "man.SEPARATOR.125 ErrNotFound", + "man.SEPARATOR.115 y", + "man.SEPARATOR.105 ErrNotFound", + "on_.SEPARATOR.135 c", + "on_.SEPARATOR.125 c", + "on_.SEPARATOR.115 c", + "on_.SEPARATOR.105 c", + "the.SEPARATOR.135 F", + "the.SEPARATOR.127 F", + "the.SEPARATOR.126 F", + "the.SEPARATOR.125 ErrNotFound", + "the.SEPARATOR.122 ErrNotFound", + "the.SEPARATOR.121 ErrNotFound", + "the.SEPARATOR.120 g", + "the.SEPARATOR.115 g", + "the.SEPARATOR.114 g", + "the.SEPARATOR.111 g", + "the.SEPARATOR.110 g", + "the.SEPARATOR.108 g", + "the.SEPARATOR.107 g", + "the.SEPARATOR.106 ErrNotFound", + "the.SEPARATOR.105 d", + "the.SEPARATOR.104 d", + "the.SEPARATOR.104 d", + "the.SEPARATOR.103 a", + "the.SEPARATOR.102 a", + "the.SEPARATOR.101 a", + "the.SEPARATOR.100 ErrNotFound", + "zzz.SEPARATOR.105 ErrNotFound", + }, + }, + + { + description: "quad-4: four level-4 tables", + tables: []testTable{ + { + level: 4, + fileNum: 11, + data: []string{ + "aardvark.SET.101 a1", + "alpaca__.SET.201 a2", + "anteater.SET.301 a3", + }, + }, + { + level: 4, + fileNum: 22, + data: []string{ + "baboon__.SET.102 b1", + "baboon__.DEL.202 ", + "baboon__.SET.302 b3", + "bear____.SET.402 b4", + "bear____.DEL.502 ", + "buffalo_.SET.602 b6", + }, + }, + { + level: 4, + fileNum: 33, + data: []string{ + "buffalo_.SET.103 B1", + }, + }, + { + level: 4, + fileNum: 44, + data: []string{ + "chipmunk.SET.104 c1", + "chipmunk.SET.204 c2", + }, + }, + }, + queries: []string{ + "a_______.SEPARATOR.999 ErrNotFound", + "aardvark.SEPARATOR.999 a1", + "aardvark.SEPARATOR.102 a1", + "aardvark.SEPARATOR.101 a1", + "aardvark.SEPARATOR.100 ErrNotFound", + "alpaca__.SEPARATOR.999 a2", + "alpaca__.SEPARATOR.200 ErrNotFound", + "anteater.SEPARATOR.999 a3", + "anteater.SEPARATOR.302 a3", + "anteater.SEPARATOR.301 a3", + "anteater.SEPARATOR.300 ErrNotFound", + "anteater.SEPARATOR.000 ErrNotFound", + "b_______.SEPARATOR.999 ErrNotFound", + "baboon__.SEPARATOR.999 b3", + "baboon__.SEPARATOR.302 b3", + "baboon__.SEPARATOR.301 ErrNotFound", + "baboon__.SEPARATOR.202 ErrNotFound", + "baboon__.SEPARATOR.201 b1", + "baboon__.SEPARATOR.102 b1", + "baboon__.SEPARATOR.101 ErrNotFound", + "bear____.SEPARATOR.999 ErrNotFound", + "bear____.SEPARATOR.500 b4", + "bear____.SEPARATOR.000 ErrNotFound", + "buffalo_.SEPARATOR.999 b6", + "buffalo_.SEPARATOR.603 b6", + "buffalo_.SEPARATOR.602 b6", + "buffalo_.SEPARATOR.601 B1", + "buffalo_.SEPARATOR.104 B1", + "buffalo_.SEPARATOR.103 B1", + "buffalo_.SEPARATOR.102 ErrNotFound", + "buffalo_.SEPARATOR.000 ErrNotFound", + "c_______.SEPARATOR.999 ErrNotFound", + "chipmunk.SEPARATOR.999 c2", + "chipmunk.SEPARATOR.205 c2", + "chipmunk.SEPARATOR.204 c2", + "chipmunk.SEPARATOR.203 c1", + "chipmunk.SEPARATOR.105 c1", + "chipmunk.SEPARATOR.104 c1", + "chipmunk.SEPARATOR.103 ErrNotFound", + "chipmunk.SEPARATOR.000 ErrNotFound", + "d_______.SEPARATOR.999 ErrNotFound", + }, + }, + + { + description: "complex: many tables at many levels", + tables: []testTable{ + { + level: 0, + fileNum: 50, + data: []string{ + "alfalfa__.SET.501 p1", + "asparagus.SET.502 p2", + "cabbage__.DEL.503 ", + "spinach__.MERGE.504 p3", + }, + }, + { + level: 0, + fileNum: 51, + data: []string{ + "asparagus.SET.511 q1", + "asparagus.SET.512 q2", + "asparagus.SET.513 q3", + "beans____.SET.514 q4", + "broccoli_.SET.515 q5", + "cabbage__.SET.516 q6", + "celery___.SET.517 q7", + "spinach__.MERGE.518 q8", + }, + }, + { + level: 1, + fileNum: 40, + data: []string{ + "alfalfa__.SET.410 r1", + "asparagus.SET.420 r2", + "arugula__.SET.430 r3", + }, + }, + { + level: 1, + fileNum: 41, + data: []string{ + "beans____.SET.411 s1", + "beans____.SET.421 s2", + "bokchoy__.DEL.431 ", + "broccoli_.SET.441 s4", + }, + }, + { + level: 1, + fileNum: 42, + data: []string{ + "cabbage__.SET.412 t1", + "corn_____.DEL.422 ", + "spinach__.MERGE.432 t2", + }, + }, + { + level: 2, + fileNum: 30, + data: []string{ + "alfalfa__.SET.310 u1", + "bokchoy__.SET.320 u2", + "celery___.SET.330 u3", + "corn_____.SET.340 u4", + "spinach__.MERGE.350 u5", + }, + }, + }, + queries: []string{ + "a________.SEPARATOR.999 ErrNotFound", + "alfalfa__.SEPARATOR.520 p1", + "alfalfa__.SEPARATOR.510 p1", + "alfalfa__.SEPARATOR.500 r1", + "alfalfa__.SEPARATOR.400 u1", + "alfalfa__.SEPARATOR.300 ErrNotFound", + "asparagus.SEPARATOR.520 q3", + "asparagus.SEPARATOR.510 p2", + "asparagus.SEPARATOR.500 r2", + "asparagus.SEPARATOR.400 ErrNotFound", + "asparagus.SEPARATOR.300 ErrNotFound", + "arugula__.SEPARATOR.520 r3", + "arugula__.SEPARATOR.510 r3", + "arugula__.SEPARATOR.500 r3", + "arugula__.SEPARATOR.400 ErrNotFound", + "arugula__.SEPARATOR.300 ErrNotFound", + "beans____.SEPARATOR.520 q4", + "beans____.SEPARATOR.510 s2", + "beans____.SEPARATOR.500 s2", + "beans____.SEPARATOR.400 ErrNotFound", + "beans____.SEPARATOR.300 ErrNotFound", + "bokchoy__.SEPARATOR.520 ErrNotFound", + "bokchoy__.SEPARATOR.510 ErrNotFound", + "bokchoy__.SEPARATOR.500 ErrNotFound", + "bokchoy__.SEPARATOR.400 u2", + "bokchoy__.SEPARATOR.300 ErrNotFound", + "broccoli_.SEPARATOR.520 q5", + "broccoli_.SEPARATOR.510 s4", + "broccoli_.SEPARATOR.500 s4", + "broccoli_.SEPARATOR.400 ErrNotFound", + "broccoli_.SEPARATOR.300 ErrNotFound", + "cabbage__.SEPARATOR.520 q6", + "cabbage__.SEPARATOR.510 ErrNotFound", + "cabbage__.SEPARATOR.500 t1", + "cabbage__.SEPARATOR.400 ErrNotFound", + "cabbage__.SEPARATOR.300 ErrNotFound", + "celery___.SEPARATOR.520 q7", + "celery___.SEPARATOR.510 u3", + "celery___.SEPARATOR.500 u3", + "celery___.SEPARATOR.400 u3", + "celery___.SEPARATOR.300 ErrNotFound", + "corn_____.SEPARATOR.520 ErrNotFound", + "corn_____.SEPARATOR.510 ErrNotFound", + "corn_____.SEPARATOR.500 ErrNotFound", + "corn_____.SEPARATOR.400 u4", + "corn_____.SEPARATOR.300 ErrNotFound", + "d________.SEPARATOR.999 ErrNotFound", + "spinach__.SEPARATOR.999 u5t2p3q8", + "spinach__.SEPARATOR.518 u5t2p3q8", + "spinach__.SEPARATOR.517 u5t2p3", + "spinach__.SEPARATOR.504 u5t2p3", + "spinach__.SEPARATOR.503 u5t2", + "spinach__.SEPARATOR.432 u5t2", + "spinach__.SEPARATOR.431 u5", + "spinach__.SEPARATOR.350 u5", + "spinach__.SEPARATOR.349 ErrNotFound", + }, + }, + + { + description: "broken invariants 0: non-increasing level 0 sequence numbers", + badOrdering: true, + tables: []testTable{ + { + level: 0, + fileNum: 19, + data: []string{ + "a.SET.101 a", + "b.SET.102 b", + }, + }, + { + level: 0, + fileNum: 20, + data: []string{ + "c.SET.101 c", + }, + }, + }, + }, + + { + description: "broken invariants 1: non-increasing level 0 sequence numbers", + badOrdering: true, + tables: []testTable{ + { + level: 0, + fileNum: 19, + data: []string{ + "a.SET.101 a", + "b.SET.102 b", + }, + }, + { + level: 0, + fileNum: 20, + data: []string{ + "c.SET.100 c", + "d.SET.101 d", + }, + }, + }, + }, + + { + description: "broken invariants 2: matching level 0 sequence numbers, considered acceptable", + badOrdering: false, + tables: []testTable{ + { + level: 0, + fileNum: 19, + data: []string{ + "a.SET.101 a", + }, + }, + { + level: 0, + fileNum: 20, + data: []string{ + "a.SET.101 a", + }, + }, + }, + }, + + { + description: "broken invariants 3: level non-0 overlapping internal key ranges", + badOrdering: true, + tables: []testTable{ + { + level: 5, + fileNum: 11, + data: []string{ + "bat.SET.101 xxx", + "dog.SET.102 xxx", + }, + }, + { + level: 5, + fileNum: 12, + data: []string{ + "cow.SET.103 xxx", + "pig.SET.104 xxx", + }, + }, + }, + }, + } + + cmp := testkeys.Comparer.Compare + for _, tc := range testCases { + desc := tc.description[:strings.Index(tc.description, ":")] + + // m is a map from file numbers to DBs. + m := map[FileNum]*memTable{} + newIter := func( + _ context.Context, file *manifest.FileMetadata, _ *IterOptions, _ internalIterOpts, + ) (internalIterator, keyspan.FragmentIterator, error) { + d, ok := m[file.FileNum] + if !ok { + return nil, nil, errors.New("no such file") + } + return d.newIter(nil), nil, nil + } + + var files [numLevels][]*fileMetadata + for _, tt := range tc.tables { + d := newMemTable(memTableOptions{}) + m[tt.fileNum] = d + + meta := &fileMetadata{ + FileNum: tt.fileNum, + } + meta.InitPhysicalBacking() + for i, datum := range tt.data { + s := strings.Split(datum, " ") + ikey := base.ParseInternalKey(s[0]) + err := d.set(ikey, []byte(s[1])) + if err != nil { + t.Fatalf("desc=%q: memtable Set: %v", desc, err) + } + + meta.ExtendPointKeyBounds(cmp, ikey, ikey) + if i == 0 { + meta.SmallestSeqNum = ikey.SeqNum() + meta.LargestSeqNum = ikey.SeqNum() + } else { + if meta.SmallestSeqNum > ikey.SeqNum() { + meta.SmallestSeqNum = ikey.SeqNum() + } + if meta.LargestSeqNum < ikey.SeqNum() { + meta.LargestSeqNum = ikey.SeqNum() + } + } + } + + files[tt.level] = append(files[tt.level], meta) + } + v := manifest.NewVersion(cmp, base.DefaultFormatter, 10<<20, files) + err := v.CheckOrdering(cmp, base.DefaultFormatter, manifest.AllowSplitUserKeys) + if tc.badOrdering && err == nil { + t.Errorf("desc=%q: want bad ordering, got nil error", desc) + continue + } else if !tc.badOrdering && err != nil { + t.Errorf("desc=%q: bad ordering: %v", desc, err) + continue + } + + get := func(v *version, ikey InternalKey) ([]byte, error) { + var buf struct { + dbi Iterator + get getIter + } + + get := &buf.get + get.comparer = testkeys.Comparer + get.newIters = newIter + get.key = ikey.UserKey + get.l0 = v.L0SublevelFiles + get.version = v + get.snapshot = ikey.SeqNum() + 1 + + i := &buf.dbi + i.comparer = *testkeys.Comparer + i.merge = DefaultMerger.Merge + i.iter = get + + defer i.Close() + if !i.First() { + err := i.Error() + if err != nil { + return nil, err + } + return nil, ErrNotFound + } + return i.Value(), nil + } + + for _, query := range tc.queries { + s := strings.Split(query, " ") + ikey := base.ParseInternalKey(s[0]) + value, err := get(v, ikey) + got, want := "", s[1] + if err != nil { + if err != ErrNotFound { + t.Errorf("desc=%q: query=%q: %v", desc, s[0], err) + continue + } + got = "ErrNotFound" + } else { + got = string(value) + } + if got != want { + t.Errorf("desc=%q: query=%q: got %q, want %q", desc, s[0], got, want) + } + } + } +} diff --git a/pebble/go.mod b/pebble/go.mod new file mode 100644 index 0000000..d882642 --- /dev/null +++ b/pebble/go.mod @@ -0,0 +1,49 @@ +module github.com/cockroachdb/pebble + +require ( + github.com/DataDog/zstd v1.4.5 + github.com/HdrHistogram/hdrhistogram-go v1.1.2 + github.com/cespare/xxhash/v2 v2.2.0 + github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f + github.com/cockroachdb/errors v1.11.1 + github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895 + github.com/cockroachdb/redact v1.1.5 + github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 + github.com/ghemawat/stream v0.0.0-20171120220530-696b145b53b9 + github.com/golang/snappy v0.0.4 + github.com/guptarohit/asciigraph v0.5.5 + github.com/klauspost/compress v1.15.15 + github.com/kr/pretty v0.3.1 + github.com/pkg/errors v0.9.1 + github.com/pmezard/go-difflib v1.0.0 + github.com/prometheus/client_golang v1.12.0 + github.com/prometheus/client_model v0.2.1-0.20210607210712-147c58e9608a + github.com/spf13/cobra v1.0.0 + github.com/stretchr/testify v1.8.4 + golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df + golang.org/x/perf v0.0.0-20230113213139-801c7ef9e5c5 + golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 + golang.org/x/sys v0.11.0 +) + +require ( + github.com/aclements/go-moremath v0.0.0-20210112150236-f10218a38794 // indirect + github.com/beorn7/perks v1.0.1 // indirect + github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/getsentry/sentry-go v0.18.0 // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.2 // indirect + github.com/inconshreveable/mousetrap v1.0.0 // indirect + github.com/kr/text v0.2.0 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect + github.com/prometheus/common v0.32.1 // indirect + github.com/prometheus/procfs v0.7.3 // indirect + github.com/rogpeppe/go-internal v1.9.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + golang.org/x/text v0.7.0 // indirect + google.golang.org/protobuf v1.28.1 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) + +go 1.20 diff --git a/pebble/go.sum b/pebble/go.sum new file mode 100644 index 0000000..89e7f53 --- /dev/null +++ b/pebble/go.sum @@ -0,0 +1,666 @@ +cloud.google.com/go v0.0.0-20170206221025-ce650573d812/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= +cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= +cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU= +cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY= +cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc= +cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0= +cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To= +cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4= +cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M= +cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc= +cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk= +cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs= +cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc= +cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY= +cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o= +cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE= +cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc= +cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg= +cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc= +cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ= +cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE= +cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk= +cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I= +cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw= +cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA= +cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU= +cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw= +cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos= +cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk= +cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs= +cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0= +dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= +gioui.org v0.0.0-20210308172011-57750fc8a0a6/go.mod h1:RSH6KIUZ0p2xy5zHDxgAM4zumjgTw83q2ge/PI+yyw8= +github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/DataDog/zstd v1.4.5 h1:EndNeuB0l9syBZhut0wns3gV1hL8zX8LIu6ZiVHWLIQ= +github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= +github.com/GoogleCloudPlatform/cloudsql-proxy v0.0.0-20190129172621-c8b1d7a94ddf/go.mod h1:aJ4qN3TfrelA6NZ6AXsXRfmEVaYin3EDbSPJrKS8OXo= +github.com/HdrHistogram/hdrhistogram-go v1.1.2 h1:5IcZpTvzydCQeHzK4Ef/D5rrSqwxob0t8PQPMybUNFM= +github.com/HdrHistogram/hdrhistogram-go v1.1.2/go.mod h1:yDgFjdqOqDEKOvasDdhWNXYg9BVp4O+o5f6V/ehm6Oo= +github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= +github.com/aclements/go-gg v0.0.0-20170118225347-6dbb4e4fefb0/go.mod h1:55qNq4vcpkIuHowELi5C8e+1yUHtoLoOUR9QU5j7Tes= +github.com/aclements/go-moremath v0.0.0-20210112150236-f10218a38794 h1:xlwdaKcTNVW4PtpQb8aKA4Pjy0CdJHEqvFbAnvR5m2g= +github.com/aclements/go-moremath v0.0.0-20210112150236-f10218a38794/go.mod h1:7e+I0LQFUI9AXWxOfsQROs9xPhoJtbsyWcjJqDd4KPY= +github.com/ajstarks/svgo v0.0.0-20180226025133-644b8db467af/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= +github.com/ajstarks/svgo v0.0.0-20210923152817-c3b6e2f0c527/go.mod h1:K08gAheRH3/J6wwsYMMT4xOr94bZjxIelGM0+d/wbFw= +github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= +github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= +github.com/alecthomas/units v0.0.0-20190924025748-f65c72e2690d/go.mod h1:rBZYJk541a8SKzHPHnH3zbiI+7dagKZ0cgpgrD7Fyho= +github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= +github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= +github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/boombuler/barcode v1.0.0/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= +github.com/boombuler/barcode v1.0.1/go.mod h1:paBWMcWSl3LHKBqUq+rly7CNSldXjb2rDl3JlRe0mD8= +github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= +github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.1.2/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44= +github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= +github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= +github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= +github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f h1:otljaYPt5hWxV3MUfO5dFPFiOXg9CyG5/kCfayTqsJ4= +github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU= +github.com/cockroachdb/errors v1.11.1 h1:xSEW75zKaKCWzR3OfxXUxgrk/NtT4G1MiOv5lWZazG8= +github.com/cockroachdb/errors v1.11.1/go.mod h1:8MUxA3Gi6b25tYlFEBGLf+D8aISL+M4MIpiWMSNRfxw= +github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZeQy818SGhaone5OnYfxFR/+AzdY3sf5aE= +github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= +github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895 h1:XANOgPYtvELQ/h4IrmPAohXqe2pWA8Bwhejr3VQoZsA= +github.com/cockroachdb/metamorphic v0.0.0-20231108215700-4ba948b56895/go.mod h1:aPd7gM9ov9M8v32Yy5NJrDyOcD8z642dqs+F0CeNXfA= +github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30= +github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= +github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 h1:zuQyyAKVxetITBuuhv3BI9cMrmStnpT18zmgmTxunpo= +github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06/go.mod h1:7nc4anLGjupUW/PeY5qiNYsdNXj7zopG+eqsS7To5IQ= +github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= +github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= +github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= +github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= +github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= +github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= +github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= +github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= +github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= +github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/fogleman/gg v1.2.1-0.20190220221249-0403632d5b90/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= +github.com/fogleman/gg v1.3.0/go.mod h1:R/bRT+9gY/C5z7JzPU0zXsXHKM4/ayA+zqcVNZzPa1k= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/getsentry/sentry-go v0.18.0 h1:MtBW5H9QgdcJabtZcuJG80BMOwaBpkRDZkxRkNC1sN0= +github.com/getsentry/sentry-go v0.18.0/go.mod h1:Kgon4Mby+FJ7ZWHFUAZgVaIa8sxHtnRJRLTXZr51aKQ= +github.com/ghemawat/stream v0.0.0-20171120220530-696b145b53b9 h1:r5GgOLGbza2wVHRzK7aAj6lWZjfbAwiu/RDCVOKjRyM= +github.com/ghemawat/stream v0.0.0-20171120220530-696b145b53b9/go.mod h1:106OIgooyS7OzLDOpUGgm9fA3bQENb/cFSyyBmMoJDs= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= +github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= +github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= +github.com/go-fonts/dejavu v0.1.0/go.mod h1:4Wt4I4OU2Nq9asgDCteaAaWZOV24E+0/Pwo0gppep4g= +github.com/go-fonts/latin-modern v0.2.0/go.mod h1:rQVLdDMK+mK1xscDwsqM5J8U2jrRa3T0ecnM9pNujks= +github.com/go-fonts/liberation v0.1.1/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= +github.com/go-fonts/liberation v0.2.0/go.mod h1:K6qoJYypsmfVjWg8KOVDQhLc8UDgIK2HYqyqAO9z7GY= +github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmnUIzUY= +github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= +github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= +github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= +github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= +github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY= +github.com/go-latex/latex v0.0.0-20210118124228-b3d85cf34e07/go.mod h1:CO1AlKB2CSIqUrmQPqA0gdRIlnLEY0gK5JGjh37zN5U= +github.com/go-latex/latex v0.0.0-20210823091927-c0d11ff05a81/go.mod h1:SX0U8uGpxhq9o2S/CELCSUxEWWAuoCUcVCQWv7G2OCk= +github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= +github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk= +github.com/go-logfmt/logfmt v0.5.0/go.mod h1:wCYkCAKZfumFQihp8CzCvQ3paCTfi41vtzG1KdI/P7A= +github.com/go-pdf/fpdf v0.5.0/go.mod h1:HzcnA+A23uwogo0tp9yU+l3V+KXhiESpt1PMayhOh5M= +github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= +github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= +github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= +github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= +github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y= +github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= +github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= +github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw= +github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4= +github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk= +github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= +github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= +github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= +github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= +github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= +github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2 h1:ROPKBNFfQgOUMifHyP+KYbvpjbdoFNs+aK7DXlji0Tw= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/gonum/blas v0.0.0-20181208220705-f22b278b28ac/go.mod h1:P32wAyui1PQ58Oce/KYkOqQv8cVw1zAapXOl+dRFGbc= +github.com/gonum/floats v0.0.0-20181209220543-c233463c7e82/go.mod h1:PxC8OnwL11+aosOB5+iEPoV3picfs8tUpkVd0pDo+Kg= +github.com/gonum/internal v0.0.0-20181124074243-f884aa714029/go.mod h1:Pu4dmpkhSyOzRwuXkOgAvijx4o+4YMUJJo9OvPYMkks= +github.com/gonum/lapack v0.0.0-20181123203213-e4cdc5a0bff9/go.mod h1:XA3DeT6rxh2EAE789SSiSJNqxPaC0aE9J8NTOI0Jo/A= +github.com/gonum/matrix v0.0.0-20181209220409-c518dec07be9/go.mod h1:0EXg4mc1CNP0HCqCz+K4ts155PXIlUywf0wqN+GfPZw= +github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= +github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= +github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs= +github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0= +github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc= +github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM= +github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= +github.com/google/safehtml v0.0.2/go.mod h1:L4KWwDsUJdECRAEpZoBn3O64bQaywRscowZjJAzjHnU= +github.com/googleapis/gax-go v0.0.0-20161107002406-da06d194a00e/go.mod h1:SFVmujtThgffbyetf+mdk2eWhX2bMyUtNHzFKcPA9HY= +github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg= +github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk= +github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= +github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= +github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/guptarohit/asciigraph v0.5.5 h1:ccFnUF8xYIOUPPY3tmdvRyHqmn1MYI9iv1pLKX+/ZkQ= +github.com/guptarohit/asciigraph v0.5.5/go.mod h1:dYl5wwK4gNsnFf9Zp+l06rFiDZ5YtXM6x7SRWZ3KGag= +github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= +github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= +github.com/inconshreveable/mousetrap v1.0.0 h1:Z8tu5sraLXCXIcARxBp/8cbvlwVa7Z1NHg9XEKhtSvM= +github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8= +github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= +github.com/jpillora/backoff v1.0.0/go.mod h1:J/6gKK9jxlEcS3zixgDgUAsiuZ7yrSoa/FX5e0EB2j4= +github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.11/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= +github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= +github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= +github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= +github.com/jung-kurt/gofpdf v1.0.3-0.20190309125859-24315acbbda5/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= +github.com/kisielk/errcheck v1.1.0/go.mod h1:EZBBE59ingxPouuu3KfxchcWSUPOHkagtvWXihfKN4Q= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.15.15 h1:EF27CXIuDsYJ6mmvtBRlEuB2UVOqHG1tAXgZ7yIO+lw= +github.com/klauspost/compress v1.15.15/go.mod h1:ZcK2JAFqKOpnBlxcLsJzYfrS9X1akm9fHZNnD9+Vo/4= +github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/konsorten/go-windows-terminal-sequences v1.0.3/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= +github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ= +github.com/mattn/go-sqlite3 v1.14.5/go.mod h1:WVKg1VTActs4Qso6iwGbiFih2UIHo0ENGwNd0Lj+XmI= +github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= +github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 h1:I0XW9+e1XWDxdcEniV4rQAIOPUGDq67JSCiRCgGCZLI= +github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369/go.mod h1:BSXmuO+STAnVfrANrmjBb36TMTDstsz7MSK+HVaYKv4= +github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= +github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= +github.com/oklog/ulid v1.3.1/go.mod h1:CirwcVhetQ6Lv90oh/F+FBtV6XMibvdAFo93nm5qn4U= +github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= +github.com/phpdave11/gofpdf v1.4.2/go.mod h1:zpO6xFn9yxo3YLyMvW8HcKWVdbNqgIfOOp2dXMnm1mY= +github.com/phpdave11/gofpdi v1.0.12/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= +github.com/phpdave11/gofpdi v1.0.13/go.mod h1:vBmVV0Do6hSBHC8uKUQ71JGW+ZGQq74llk/7bXwjDoI= +github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4= +github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= +github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDft0ttaMvbicHlPoso= +github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= +github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M= +github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0= +github.com/prometheus/client_golang v1.12.0 h1:C+UIj/QWtmqY13Arb8kwMt5j34/0Z2iKamrJ+ryC0Gg= +github.com/prometheus/client_golang v1.12.0/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY= +github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= +github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= +github.com/prometheus/client_model v0.2.1-0.20210607210712-147c58e9608a h1:CmF68hwI0XsOQ5UwlBopMi2Ow4Pbg32akc4KIVCOm+Y= +github.com/prometheus/client_model v0.2.1-0.20210607210712-147c58e9608a/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w= +github.com/prometheus/common v0.0.0-20181113130724-41aa239b4cce/go.mod h1:daVV7qP5qjZbuso7PdcryaAu0sAZbrN9i7WWcTMWvro= +github.com/prometheus/common v0.4.0/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= +github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= +github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= +github.com/prometheus/common v0.32.1 h1:hWIdL3N2HoUx3B8j3YN9mWor0qhY/NlEKZEaXxuIRh4= +github.com/prometheus/common v0.32.1/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls= +github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= +github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= +github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= +github.com/prometheus/procfs v0.6.0/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= +github.com/prometheus/procfs v0.7.3 h1:4jVXhlkAyzOScmCkXBTOLRLTz8EeU+eyjrwB/EPq0VU= +github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1xBZuNvfVA= +github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU= +github.com/rogpeppe/fastuuid v0.0.0-20150106093220-6724a57986af/go.mod h1:XWv6SoW27p1b0cqNHllgS5HIMJraePCO15w5zCzIWYg= +github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/ruudk/golang-pdf417 v0.0.0-20181029194003-1af4ab5afa58/go.mod h1:6lfFZQK844Gfx8o5WFuvpxWRwnSoipWe/p622j1v06w= +github.com/ruudk/golang-pdf417 v0.0.0-20201230142125-a7e3863a1245/go.mod h1:pQAZKsJ8yyVxGRWYNEm9oFB8ieLgKFnamEyDmSA0BRk= +github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= +github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= +github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/sirupsen/logrus v1.6.0/go.mod h1:7uNnSEd1DgxDLC74fIahvMZmmYsHGZGEOFrfsX/uA88= +github.com/soheilhy/cmux v0.1.4/go.mod h1:IM3LyeVVIOuxMH7sFAkER9+bJ4dT7Ms6E4xg4kGIyLM= +github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= +github.com/spf13/afero v1.1.2/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ= +github.com/spf13/cast v1.3.0/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE= +github.com/spf13/cobra v1.0.0 h1:6m/oheQuQ13N9ks4hubMG6BnvwOeaJrqSPLahSnczz8= +github.com/spf13/cobra v1.0.0/go.mod h1:/6GTrnGXV9HjY+aR4k0oJ5tcvakLuG6EuKReYlHNrgE= +github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo= +github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.4.0/go.mod h1:PTJ7Z/lr49W6bUbkmS1V3by4uWynFiR9p7+dSq/yZzE= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= +github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= +github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU= +github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:aYKd//L2LvnjZzWKhF00oedf4jCCReLcmhLdhm1A27Q= +github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= +go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= +go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8= +go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw= +go.uber.org/atomic v1.4.0/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE= +go.uber.org/multierr v1.1.0/go.mod h1:wR5kodmAFQ0UK8QlbwjlSNy0Z68gJhDJUG5sjR94q/0= +go.uber.org/zap v1.10.0/go.mod h1:vwi/ZaCAaUcBkycHslxD9B2zi4UTXhF60s6SWpuDF0Q= +golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190125153040-c74c464bbbf2/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= +golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= +golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek= +golang.org/x/exp v0.0.0-20191002040644-a1355ae1e2c3/go.mod h1:NOZ3BPKG0ec/BKJQgnvsSFpcKLM5xXVWnvZS97DWHgE= +golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY= +golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= +golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= +golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= +golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df h1:UA2aFVmmsIlefxMk29Dp2juaUSth8Pyn3Tq5Y5mJGME= +golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df/go.mod h1:FXUEEKJgO7OQYeo8N01OfiKP8RXMtf6e8aTskBGqWdc= +golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= +golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= +golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20190910094157-69e4b8554b2a/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20200119044424-58c23975cae1/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20200430140353-33d19683fad8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20200618115811-c13761719519/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20201208152932-35266b937fa6/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20210216034530-4410531fe030/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/image v0.0.0-20210607152325-775e3b0c77b9/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= +golang.org/x/image v0.0.0-20210628002857-a66eb6448b8d/go.mod h1:023OzeP/+EPmXeapQh35lcL3II3LrY8Ic+EFFKVhULM= +golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= +golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= +golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs= +golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY= +golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE= +golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o= +golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= +golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY= +golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20181220203305-927f97764cc3/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190522155817-f3200d17e092/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210525063256-abc453219eb5/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/oauth2 v0.0.0-20170207211851-4464e7848382/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= +golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= +golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A= +golang.org/x/perf v0.0.0-20230113213139-801c7ef9e5c5 h1:ObuXPmIgI4ZMyQLIz48cJYgSyWdjUXc2SZAdyJMwEAU= +golang.org/x/perf v0.0.0-20230113213139-801c7ef9e5c5/go.mod h1:UBKtEnL8aqnd+0JHqZ+2qoMDwtuy6cYhhKNoHLBiTQc= +golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 h1:uVc8UZUe6tr40fFVnUP5Oj+veunVezqYl9z7DYw9xzw= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181107165924-66b7b1311ac8/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200625212154-ddb9806d33ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210304124612-50617c2ba197/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210603081109-ebe580a85c40/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM= +golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.7.0 h1:4BRB4x83lYWy72KwLD/qYDuTu7q9PjSagHvijDw7cLo= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/tools v0.0.0-20180221164845-07fd8470d635/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190206041539-40960b6deb8e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= +golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= +golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= +golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20190927191325-030b2cf1153e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= +golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw= +golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8= +golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +gonum.org/v1/gonum v0.0.0-20180816165407-929014505bf4/go.mod h1:Y+Yx5eoAFn32cQvJDxZx5Dpnq+c3wtXuadVZAcxbbBo= +gonum.org/v1/gonum v0.8.2/go.mod h1:oe/vMfY3deqTw+1EZJhuvEW2iwGF1bW9wwu7XCu0+v0= +gonum.org/v1/gonum v0.9.3 h1:DnoIG+QAMaF5NvxnGe/oKsgKcAc6PcUyl8q0VetfQ8s= +gonum.org/v1/gonum v0.9.3/go.mod h1:TZumC3NeyVQskjXqmyWt4S3bINhy7B4eYwW69EbyX+0= +gonum.org/v1/netlib v0.0.0-20190313105609-8cb42192e0e0/go.mod h1:wa6Ws7BG/ESfp6dHfk7C6KdzKA7wR7u/rKwOGE66zvw= +gonum.org/v1/plot v0.0.0-20190515093506-e2840ee46a6b/go.mod h1:Wt8AAjI+ypCyYX3nZBvf6cAIx93T+c/OS2HFAYskSZc= +gonum.org/v1/plot v0.9.0/go.mod h1:3Pcqqmp6RHvJI72kgb8fThyUnav364FOsdDo2aGW5lY= +gonum.org/v1/plot v0.10.0/go.mod h1:JWIHJ7U20drSQb/aDpTetJzfC1KlAPldJLpkSy88dvQ= +google.golang.org/api v0.0.0-20170206182103-3d017632ea10/go.mod h1:4mhQ8q/RsB7i+udVvVy5NUi08OU8ZlA0gRVgrF7VFY0= +google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE= +google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M= +google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg= +google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI= +google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE= +google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= +google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= +google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM= +google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc= +google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= +google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= +google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0= +google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= +google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE= +google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= +google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8= +google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc= +google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA= +google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= +google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U= +google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= +google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA= +google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no= +google.golang.org/grpc v0.0.0-20170208002647-2a6bf6142e96/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= +google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= +google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38= +google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= +google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM= +google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= +google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= +google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= +google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60= +google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= +google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= +google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak= +google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= +google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= +google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= +google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= +google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= +google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= +google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= +google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.28.1 h1:d0NfwRgPtno5B1Wa6L2DAG+KivqkdutMf1UhdNx175w= +google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20200227125254-8fa46927fb4f/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= +gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= +gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= +gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.5/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= +honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= +honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= +rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= +rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= diff --git a/pebble/ingest.go b/pebble/ingest.go new file mode 100644 index 0000000..149340d --- /dev/null +++ b/pebble/ingest.go @@ -0,0 +1,2410 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "context" + "sort" + "time" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/internal/private" + "github.com/cockroachdb/pebble/objstorage" + "github.com/cockroachdb/pebble/objstorage/remote" + "github.com/cockroachdb/pebble/shims/slices" + "github.com/cockroachdb/pebble/sstable" +) + +func sstableKeyCompare(userCmp Compare, a, b InternalKey) int { + c := userCmp(a.UserKey, b.UserKey) + if c != 0 { + return c + } + if a.IsExclusiveSentinel() { + if !b.IsExclusiveSentinel() { + return -1 + } + } else if b.IsExclusiveSentinel() { + return +1 + } + return 0 +} + +// KeyRange encodes a key range in user key space. A KeyRange's Start is +// inclusive while its End is exclusive. +type KeyRange struct { + Start, End []byte +} + +// Valid returns true if the KeyRange is defined. +func (k *KeyRange) Valid() bool { + return k.Start != nil && k.End != nil +} + +// Contains returns whether the specified key exists in the KeyRange. +func (k *KeyRange) Contains(cmp base.Compare, key InternalKey) bool { + v := cmp(key.UserKey, k.End) + return (v < 0 || (v == 0 && key.IsExclusiveSentinel())) && cmp(k.Start, key.UserKey) <= 0 +} + +// OverlapsInternalKeyRange checks if the specified internal key range has an +// overlap with the KeyRange. Note that we aren't checking for full containment +// of smallest-largest within k, rather just that there's some intersection +// between the two ranges. +func (k *KeyRange) OverlapsInternalKeyRange(cmp base.Compare, smallest, largest InternalKey) bool { + v := cmp(k.Start, largest.UserKey) + return v <= 0 && !(largest.IsExclusiveSentinel() && v == 0) && + cmp(k.End, smallest.UserKey) > 0 +} + +// Overlaps checks if the specified file has an overlap with the KeyRange. +// Note that we aren't checking for full containment of m within k, rather just +// that there's some intersection between m and k's bounds. +func (k *KeyRange) Overlaps(cmp base.Compare, m *fileMetadata) bool { + return k.OverlapsInternalKeyRange(cmp, m.Smallest, m.Largest) +} + +// OverlapsKeyRange checks if this span overlaps with the provided KeyRange. +// Note that we aren't checking for full containment of either span in the other, +// just that there's a key x that is in both key ranges. +func (k *KeyRange) OverlapsKeyRange(cmp Compare, span KeyRange) bool { + return cmp(k.Start, span.End) < 0 && cmp(k.End, span.Start) > 0 +} + +func ingestValidateKey(opts *Options, key *InternalKey) error { + if key.Kind() == InternalKeyKindInvalid { + return base.CorruptionErrorf("pebble: external sstable has corrupted key: %s", + key.Pretty(opts.Comparer.FormatKey)) + } + if key.SeqNum() != 0 { + return base.CorruptionErrorf("pebble: external sstable has non-zero seqnum: %s", + key.Pretty(opts.Comparer.FormatKey)) + } + return nil +} + +// ingestSynthesizeShared constructs a fileMetadata for one shared sstable owned +// or shared by another node. +func ingestSynthesizeShared( + opts *Options, sm SharedSSTMeta, fileNum base.DiskFileNum, +) (*fileMetadata, error) { + if sm.Size == 0 { + // Disallow 0 file sizes + return nil, errors.New("pebble: cannot ingest shared file with size 0") + } + // Don't load table stats. Doing a round trip to shared storage, one SST + // at a time is not worth it as it slows down ingestion. + meta := &fileMetadata{ + FileNum: fileNum.FileNum(), + CreationTime: time.Now().Unix(), + Virtual: true, + Size: sm.Size, + } + meta.InitProviderBacking(fileNum) + // Set the underlying FileBacking's size to the same size as the virtualized + // view of the sstable. This ensures that we don't over-prioritize this + // sstable for compaction just yet, as we do not have a clear sense of what + // parts of this sstable are referenced by other nodes. + meta.FileBacking.Size = sm.Size + if sm.LargestRangeKey.Valid() && sm.LargestRangeKey.UserKey != nil { + // Initialize meta.{HasRangeKeys,Smallest,Largest}, etc. + // + // NB: We create new internal keys and pass them into ExternalRangeKeyBounds + // so that we can sub a zero sequence number into the bounds. We can set + // the sequence number to anything here; it'll be reset in ingestUpdateSeqNum + // anyway. However we do need to use the same sequence number across all + // bound keys at this step so that we end up with bounds that are consistent + // across point/range keys. + smallestRangeKey := base.MakeInternalKey(sm.SmallestRangeKey.UserKey, 0, sm.SmallestRangeKey.Kind()) + largestRangeKey := base.MakeExclusiveSentinelKey(sm.LargestRangeKey.Kind(), sm.LargestRangeKey.UserKey) + meta.ExtendRangeKeyBounds(opts.Comparer.Compare, smallestRangeKey, largestRangeKey) + } + if sm.LargestPointKey.Valid() && sm.LargestPointKey.UserKey != nil { + // Initialize meta.{HasPointKeys,Smallest,Largest}, etc. + // + // See point above in the ExtendRangeKeyBounds call on why we use a zero + // sequence number here. + smallestPointKey := base.MakeInternalKey(sm.SmallestPointKey.UserKey, 0, sm.SmallestPointKey.Kind()) + largestPointKey := base.MakeInternalKey(sm.LargestPointKey.UserKey, 0, sm.LargestPointKey.Kind()) + if sm.LargestPointKey.IsExclusiveSentinel() { + largestPointKey = base.MakeRangeDeleteSentinelKey(sm.LargestPointKey.UserKey) + } + meta.ExtendPointKeyBounds(opts.Comparer.Compare, smallestPointKey, largestPointKey) + } + if err := meta.Validate(opts.Comparer.Compare, opts.Comparer.FormatKey); err != nil { + return nil, err + } + return meta, nil +} + +// ingestLoad1External loads the fileMetadata for one external sstable. +// Sequence number and target level calculation happens during prepare/apply. +func ingestLoad1External( + opts *Options, + e ExternalFile, + fileNum base.DiskFileNum, + objprovider objstorage.Provider, + jobID int, +) (*fileMetadata, error) { + if e.Size == 0 { + // Disallow 0 file sizes + return nil, errors.New("pebble: cannot ingest external file with size 0") + } + if !e.HasRangeKey && !e.HasPointKey { + return nil, errors.New("pebble: cannot ingest external file with no point or range keys") + } + // Don't load table stats. Doing a round trip to shared storage, one SST + // at a time is not worth it as it slows down ingestion. + meta := &fileMetadata{} + meta.FileNum = fileNum.FileNum() + meta.CreationTime = time.Now().Unix() + meta.Virtual = true + meta.Size = e.Size + meta.InitProviderBacking(fileNum) + + // Try to resolve a reference to the external file. + backing, err := objprovider.CreateExternalObjectBacking(e.Locator, e.ObjName) + if err != nil { + return nil, err + } + metas, err := objprovider.AttachRemoteObjects([]objstorage.RemoteObjectToAttach{{ + FileNum: fileNum, + FileType: fileTypeTable, + Backing: backing, + }}) + if err != nil { + return nil, err + } + if opts.EventListener.TableCreated != nil { + opts.EventListener.TableCreated(TableCreateInfo{ + JobID: jobID, + Reason: "ingesting", + Path: objprovider.Path(metas[0]), + FileNum: fileNum.FileNum(), + }) + } + // In the name of keeping this ingestion as fast as possible, we avoid + // *all* existence checks and synthesize a file metadata with smallest/largest + // keys that overlap whatever the passed-in span was. + smallestCopy := make([]byte, len(e.SmallestUserKey)) + copy(smallestCopy, e.SmallestUserKey) + largestCopy := make([]byte, len(e.LargestUserKey)) + copy(largestCopy, e.LargestUserKey) + if e.HasPointKey { + meta.ExtendPointKeyBounds(opts.Comparer.Compare, base.MakeInternalKey(smallestCopy, 0, InternalKeyKindMax), + base.MakeRangeDeleteSentinelKey(largestCopy)) + } + if e.HasRangeKey { + meta.ExtendRangeKeyBounds(opts.Comparer.Compare, base.MakeInternalKey(smallestCopy, 0, InternalKeyKindRangeKeySet), + base.MakeExclusiveSentinelKey(InternalKeyKindRangeKeyDelete, largestCopy)) + } + + // Set the underlying FileBacking's size to the same size as the virtualized + // view of the sstable. This ensures that we don't over-prioritize this + // sstable for compaction just yet, as we do not have a clear sense of + // what parts of this sstable are referenced by other nodes. + meta.FileBacking.Size = e.Size + + if err := meta.Validate(opts.Comparer.Compare, opts.Comparer.FormatKey); err != nil { + return nil, err + } + return meta, nil +} + +// ingestLoad1 creates the FileMetadata for one file. This file will be owned +// by this store. +func ingestLoad1( + opts *Options, + fmv FormatMajorVersion, + readable objstorage.Readable, + cacheID uint64, + fileNum base.DiskFileNum, +) (*fileMetadata, error) { + cacheOpts := private.SSTableCacheOpts(cacheID, fileNum).(sstable.ReaderOption) + r, err := sstable.NewReader(readable, opts.MakeReaderOptions(), cacheOpts) + if err != nil { + return nil, err + } + defer r.Close() + + // Avoid ingesting tables with format versions this DB doesn't support. + tf, err := r.TableFormat() + if err != nil { + return nil, err + } + if tf < fmv.MinTableFormat() || tf > fmv.MaxTableFormat() { + return nil, errors.Newf( + "pebble: table format %s is not within range supported at DB format major version %d, (%s,%s)", + tf, fmv, fmv.MinTableFormat(), fmv.MaxTableFormat(), + ) + } + + meta := &fileMetadata{} + meta.FileNum = fileNum.FileNum() + meta.Size = uint64(readable.Size()) + meta.CreationTime = time.Now().Unix() + meta.InitPhysicalBacking() + + // Avoid loading into the table cache for collecting stats if we + // don't need to. If there are no range deletions, we have all the + // information to compute the stats here. + // + // This is helpful in tests for avoiding awkwardness around deletion of + // ingested files from MemFS. MemFS implements the Windows semantics of + // disallowing removal of an open file. Under MemFS, if we don't populate + // meta.Stats here, the file will be loaded into the table cache for + // calculating stats before we can remove the original link. + maybeSetStatsFromProperties(meta.PhysicalMeta(), &r.Properties) + + { + iter, err := r.NewIter(nil /* lower */, nil /* upper */) + if err != nil { + return nil, err + } + defer iter.Close() + var smallest InternalKey + if key, _ := iter.First(); key != nil { + if err := ingestValidateKey(opts, key); err != nil { + return nil, err + } + smallest = (*key).Clone() + } + if err := iter.Error(); err != nil { + return nil, err + } + if key, _ := iter.Last(); key != nil { + if err := ingestValidateKey(opts, key); err != nil { + return nil, err + } + meta.ExtendPointKeyBounds(opts.Comparer.Compare, smallest, key.Clone()) + } + if err := iter.Error(); err != nil { + return nil, err + } + } + + iter, err := r.NewRawRangeDelIter() + if err != nil { + return nil, err + } + if iter != nil { + defer iter.Close() + var smallest InternalKey + if s := iter.First(); s != nil { + key := s.SmallestKey() + if err := ingestValidateKey(opts, &key); err != nil { + return nil, err + } + smallest = key.Clone() + } + if err := iter.Error(); err != nil { + return nil, err + } + if s := iter.Last(); s != nil { + k := s.SmallestKey() + if err := ingestValidateKey(opts, &k); err != nil { + return nil, err + } + largest := s.LargestKey().Clone() + meta.ExtendPointKeyBounds(opts.Comparer.Compare, smallest, largest) + } + } + + // Update the range-key bounds for the table. + { + iter, err := r.NewRawRangeKeyIter() + if err != nil { + return nil, err + } + if iter != nil { + defer iter.Close() + var smallest InternalKey + if s := iter.First(); s != nil { + key := s.SmallestKey() + if err := ingestValidateKey(opts, &key); err != nil { + return nil, err + } + smallest = key.Clone() + } + if err := iter.Error(); err != nil { + return nil, err + } + if s := iter.Last(); s != nil { + k := s.SmallestKey() + if err := ingestValidateKey(opts, &k); err != nil { + return nil, err + } + // As range keys are fragmented, the end key of the last range key in + // the table provides the upper bound for the table. + largest := s.LargestKey().Clone() + meta.ExtendRangeKeyBounds(opts.Comparer.Compare, smallest, largest) + } + if err := iter.Error(); err != nil { + return nil, err + } + } + } + + if !meta.HasPointKeys && !meta.HasRangeKeys { + return nil, nil + } + + // Sanity check that the various bounds on the file were set consistently. + if err := meta.Validate(opts.Comparer.Compare, opts.Comparer.FormatKey); err != nil { + return nil, err + } + + return meta, nil +} + +type ingestLoadResult struct { + localMeta, sharedMeta []*fileMetadata + externalMeta []*fileMetadata + localPaths []string + sharedLevels []uint8 + fileCount int +} + +func ingestLoad( + opts *Options, + fmv FormatMajorVersion, + paths []string, + shared []SharedSSTMeta, + external []ExternalFile, + cacheID uint64, + pending []base.DiskFileNum, + objProvider objstorage.Provider, + jobID int, +) (ingestLoadResult, error) { + meta := make([]*fileMetadata, 0, len(paths)) + newPaths := make([]string, 0, len(paths)) + for i := range paths { + f, err := opts.FS.Open(paths[i]) + if err != nil { + return ingestLoadResult{}, err + } + + readable, err := sstable.NewSimpleReadable(f) + if err != nil { + return ingestLoadResult{}, err + } + m, err := ingestLoad1(opts, fmv, readable, cacheID, pending[i]) + if err != nil { + return ingestLoadResult{}, err + } + if m != nil { + meta = append(meta, m) + newPaths = append(newPaths, paths[i]) + } + } + if len(shared) == 0 && len(external) == 0 { + return ingestLoadResult{localMeta: meta, localPaths: newPaths, fileCount: len(meta)}, nil + } + + // Sort the shared files according to level. + sort.Sort(sharedByLevel(shared)) + + sharedMeta := make([]*fileMetadata, 0, len(shared)) + levels := make([]uint8, 0, len(shared)) + for i := range shared { + m, err := ingestSynthesizeShared(opts, shared[i], pending[len(paths)+i]) + if err != nil { + return ingestLoadResult{}, err + } + if shared[i].Level < sharedLevelsStart { + return ingestLoadResult{}, errors.New("cannot ingest shared file in level below sharedLevelsStart") + } + sharedMeta = append(sharedMeta, m) + levels = append(levels, shared[i].Level) + } + externalMeta := make([]*fileMetadata, 0, len(external)) + for i := range external { + m, err := ingestLoad1External(opts, external[i], pending[len(paths)+len(shared)+i], objProvider, jobID) + if err != nil { + return ingestLoadResult{}, err + } + externalMeta = append(externalMeta, m) + } + result := ingestLoadResult{ + localMeta: meta, + sharedMeta: sharedMeta, + externalMeta: externalMeta, + localPaths: newPaths, + sharedLevels: levels, + fileCount: len(meta) + len(sharedMeta) + len(externalMeta), + } + return result, nil +} + +// Struct for sorting metadatas by smallest user keys, while ensuring the +// matching path also gets swapped to the same index. For use in +// ingestSortAndVerify. +type metaAndPaths struct { + meta []*fileMetadata + paths []string + cmp Compare +} + +func (m metaAndPaths) Len() int { + return len(m.meta) +} + +func (m metaAndPaths) Less(i, j int) bool { + return m.cmp(m.meta[i].Smallest.UserKey, m.meta[j].Smallest.UserKey) < 0 +} + +func (m metaAndPaths) Swap(i, j int) { + m.meta[i], m.meta[j] = m.meta[j], m.meta[i] + if m.paths != nil { + m.paths[i], m.paths[j] = m.paths[j], m.paths[i] + } +} + +func ingestSortAndVerify(cmp Compare, lr ingestLoadResult, exciseSpan KeyRange) error { + // Verify that all the shared files (i.e. files in sharedMeta) + // fit within the exciseSpan. + for i := range lr.sharedMeta { + f := lr.sharedMeta[i] + if !exciseSpan.Contains(cmp, f.Smallest) || !exciseSpan.Contains(cmp, f.Largest) { + return errors.AssertionFailedf("pebble: shared file outside of excise span, span [%s-%s), file = %s", exciseSpan.Start, exciseSpan.End, f.String()) + } + } + if len(lr.externalMeta) > 0 { + if len(lr.localMeta) > 0 || len(lr.sharedMeta) > 0 { + // Currently we only support external ingests on their own. If external + // files are present alongside local/shared files, return an error. + return errors.AssertionFailedf("pebble: external files cannot be ingested atomically alongside other types of files") + } + sort.Sort(&metaAndPaths{ + meta: lr.externalMeta, + cmp: cmp, + }) + for i := 1; i < len(lr.externalMeta); i++ { + if sstableKeyCompare(cmp, lr.externalMeta[i-1].Largest, lr.externalMeta[i].Smallest) >= 0 { + return errors.AssertionFailedf("pebble: external sstables have overlapping ranges") + } + } + return nil + } + if len(lr.localMeta) <= 1 || len(lr.localPaths) <= 1 { + return nil + } + + sort.Sort(&metaAndPaths{ + meta: lr.localMeta, + paths: lr.localPaths, + cmp: cmp, + }) + + for i := 1; i < len(lr.localPaths); i++ { + if sstableKeyCompare(cmp, lr.localMeta[i-1].Largest, lr.localMeta[i].Smallest) >= 0 { + return errors.AssertionFailedf("pebble: local ingestion sstables have overlapping ranges") + } + } + if len(lr.sharedMeta) == 0 { + return nil + } + filesInLevel := make([]*fileMetadata, 0, len(lr.sharedMeta)) + for l := sharedLevelsStart; l < numLevels; l++ { + filesInLevel = filesInLevel[:0] + for i := range lr.sharedMeta { + if lr.sharedLevels[i] == uint8(l) { + filesInLevel = append(filesInLevel, lr.sharedMeta[i]) + } + } + slices.SortFunc(filesInLevel, func(a, b *fileMetadata) int { + return cmp(a.Smallest.UserKey, b.Smallest.UserKey) + }) + for i := 1; i < len(filesInLevel); i++ { + if sstableKeyCompare(cmp, filesInLevel[i-1].Largest, filesInLevel[i].Smallest) >= 0 { + return errors.AssertionFailedf("pebble: external shared sstables have overlapping ranges") + } + } + } + return nil +} + +func ingestCleanup(objProvider objstorage.Provider, meta []*fileMetadata) error { + var firstErr error + for i := range meta { + if err := objProvider.Remove(fileTypeTable, meta[i].FileBacking.DiskFileNum); err != nil { + firstErr = firstError(firstErr, err) + } + } + return firstErr +} + +// ingestLink creates new objects which are backed by either hardlinks to or +// copies of the ingested files. It also attaches shared objects to the provider. +func ingestLink( + jobID int, + opts *Options, + objProvider objstorage.Provider, + lr ingestLoadResult, + shared []SharedSSTMeta, +) error { + for i := range lr.localPaths { + objMeta, err := objProvider.LinkOrCopyFromLocal( + context.TODO(), opts.FS, lr.localPaths[i], fileTypeTable, lr.localMeta[i].FileBacking.DiskFileNum, + objstorage.CreateOptions{PreferSharedStorage: true}, + ) + if err != nil { + if err2 := ingestCleanup(objProvider, lr.localMeta[:i]); err2 != nil { + opts.Logger.Errorf("ingest cleanup failed: %v", err2) + } + return err + } + if opts.EventListener.TableCreated != nil { + opts.EventListener.TableCreated(TableCreateInfo{ + JobID: jobID, + Reason: "ingesting", + Path: objProvider.Path(objMeta), + FileNum: lr.localMeta[i].FileNum, + }) + } + } + sharedObjs := make([]objstorage.RemoteObjectToAttach, 0, len(shared)) + for i := range shared { + backing, err := shared[i].Backing.Get() + if err != nil { + return err + } + sharedObjs = append(sharedObjs, objstorage.RemoteObjectToAttach{ + FileNum: lr.sharedMeta[i].FileBacking.DiskFileNum, + FileType: fileTypeTable, + Backing: backing, + }) + } + sharedObjMetas, err := objProvider.AttachRemoteObjects(sharedObjs) + if err != nil { + return err + } + for i := range sharedObjMetas { + // One corner case around file sizes we need to be mindful of, is that + // if one of the shareObjs was initially created by us (and has boomeranged + // back from another node), we'll need to update the FileBacking's size + // to be the true underlying size. Otherwise, we could hit errors when we + // open the db again after a crash/restart (see checkConsistency in open.go), + // plus it more accurately allows us to prioritize compactions of files + // that were originally created by us. + if sharedObjMetas[i].IsShared() && !objProvider.IsSharedForeign(sharedObjMetas[i]) { + size, err := objProvider.Size(sharedObjMetas[i]) + if err != nil { + return err + } + lr.sharedMeta[i].FileBacking.Size = uint64(size) + } + if opts.EventListener.TableCreated != nil { + opts.EventListener.TableCreated(TableCreateInfo{ + JobID: jobID, + Reason: "ingesting", + Path: objProvider.Path(sharedObjMetas[i]), + FileNum: lr.sharedMeta[i].FileNum, + }) + } + } + // We do not need to do anything about lr.externalMetas. Those were already + // linked in ingestLoad. + + return nil +} + +func ingestMemtableOverlaps(cmp Compare, mem flushable, keyRanges []internalKeyRange) bool { + iter := mem.newIter(nil) + rangeDelIter := mem.newRangeDelIter(nil) + rkeyIter := mem.newRangeKeyIter(nil) + + closeIters := func() error { + err := iter.Close() + if rangeDelIter != nil { + err = firstError(err, rangeDelIter.Close()) + } + if rkeyIter != nil { + err = firstError(err, rkeyIter.Close()) + } + return err + } + + for _, kr := range keyRanges { + if overlapWithIterator(iter, &rangeDelIter, rkeyIter, kr, cmp) { + closeIters() + return true + } + } + + // Assume overlap if any iterator errored out. + return closeIters() != nil +} + +func ingestUpdateSeqNum( + cmp Compare, format base.FormatKey, seqNum uint64, loadResult ingestLoadResult, +) error { + setSeqFn := func(k base.InternalKey) base.InternalKey { + return base.MakeInternalKey(k.UserKey, seqNum, k.Kind()) + } + updateMetadata := func(m *fileMetadata) error { + // NB: we set the fields directly here, rather than via their Extend* + // methods, as we are updating sequence numbers. + if m.HasPointKeys { + m.SmallestPointKey = setSeqFn(m.SmallestPointKey) + } + if m.HasRangeKeys { + m.SmallestRangeKey = setSeqFn(m.SmallestRangeKey) + } + m.Smallest = setSeqFn(m.Smallest) + // Only update the seqnum for the largest key if that key is not an + // "exclusive sentinel" (i.e. a range deletion sentinel or a range key + // boundary), as doing so effectively drops the exclusive sentinel (by + // lowering the seqnum from the max value), and extends the bounds of the + // table. + // NB: as the largest range key is always an exclusive sentinel, it is never + // updated. + if m.HasPointKeys && !m.LargestPointKey.IsExclusiveSentinel() { + m.LargestPointKey = setSeqFn(m.LargestPointKey) + } + if !m.Largest.IsExclusiveSentinel() { + m.Largest = setSeqFn(m.Largest) + } + // Setting smallestSeqNum == largestSeqNum triggers the setting of + // Properties.GlobalSeqNum when an sstable is loaded. + m.SmallestSeqNum = seqNum + m.LargestSeqNum = seqNum + // Ensure the new bounds are consistent. + if err := m.Validate(cmp, format); err != nil { + return err + } + seqNum++ + return nil + } + + // Shared sstables are required to be sorted by level ascending. We then + // iterate the shared sstables in reverse, assigning the lower sequence + // numbers to the shared sstables that will be ingested into the lower + // (larger numbered) levels first. This ensures sequence number shadowing is + // correct. + for i := len(loadResult.sharedMeta) - 1; i >= 0; i-- { + if i-1 >= 0 && loadResult.sharedLevels[i-1] > loadResult.sharedLevels[i] { + panic(errors.AssertionFailedf("shared files %s, %s out of order", loadResult.sharedMeta[i-1], loadResult.sharedMeta[i])) + } + if err := updateMetadata(loadResult.sharedMeta[i]); err != nil { + return err + } + } + for i := range loadResult.localMeta { + if err := updateMetadata(loadResult.localMeta[i]); err != nil { + return err + } + } + for i := range loadResult.externalMeta { + if err := updateMetadata(loadResult.externalMeta[i]); err != nil { + return err + } + } + return nil +} + +// Denotes an internal key range. Smallest and largest are both inclusive. +type internalKeyRange struct { + smallest, largest InternalKey +} + +func overlapWithIterator( + iter internalIterator, + rangeDelIter *keyspan.FragmentIterator, + rkeyIter keyspan.FragmentIterator, + keyRange internalKeyRange, + cmp Compare, +) bool { + // Check overlap with point operations. + // + // When using levelIter, it seeks to the SST whose boundaries + // contain keyRange.smallest.UserKey(S). + // It then tries to find a point in that SST that is >= S. + // If there's no such point it means the SST ends in a tombstone in which case + // levelIter.SeekGE generates a boundary range del sentinel. + // The comparison of this boundary with keyRange.largest(L) below + // is subtle but maintains correctness. + // 1) boundary < L, + // since boundary is also > S (initial seek), + // whatever the boundary's start key may be, we're always overlapping. + // 2) boundary > L, + // overlap with boundary cannot be determined since we don't know boundary's start key. + // We require checking for overlap with rangeDelIter. + // 3) boundary == L and L is not sentinel, + // means boundary < L and hence is similar to 1). + // 4) boundary == L and L is sentinel, + // we'll always overlap since for any values of i,j ranges [i, k) and [j, k) always overlap. + key, _ := iter.SeekGE(keyRange.smallest.UserKey, base.SeekGEFlagsNone) + if key != nil { + c := sstableKeyCompare(cmp, *key, keyRange.largest) + if c <= 0 { + return true + } + } + // Assume overlap if iterator errored. + if err := iter.Error(); err != nil { + return true + } + + computeOverlapWithSpans := func(rIter keyspan.FragmentIterator) bool { + // NB: The spans surfaced by the fragment iterator are non-overlapping. + span := rIter.SeekLT(keyRange.smallest.UserKey) + if span == nil { + span = rIter.Next() + } + for ; span != nil; span = rIter.Next() { + if span.Empty() { + continue + } + key := span.SmallestKey() + c := sstableKeyCompare(cmp, key, keyRange.largest) + if c > 0 { + // The start of the span is after the largest key in the + // ingested table. + return false + } + if cmp(span.End, keyRange.smallest.UserKey) > 0 { + // The end of the span is greater than the smallest in the + // table. Note that the span end key is exclusive, thus ">0" + // instead of ">=0". + return true + } + } + // Assume overlap if iterator errored. + if err := rIter.Error(); err != nil { + return true + } + return false + } + + // rkeyIter is either a range key level iter, or a range key iterator + // over a single file. + if rkeyIter != nil { + if computeOverlapWithSpans(rkeyIter) { + return true + } + } + + // Check overlap with range deletions. + if rangeDelIter == nil || *rangeDelIter == nil { + return false + } + return computeOverlapWithSpans(*rangeDelIter) +} + +// ingestTargetLevel returns the target level for a file being ingested. +// If suggestSplit is true, it accounts for ingest-time splitting as part of +// its target level calculation, and if a split candidate is found, that file +// is returned as the splitFile. +func ingestTargetLevel( + newIters tableNewIters, + newRangeKeyIter keyspan.TableNewSpanIter, + iterOps IterOptions, + comparer *Comparer, + v *version, + baseLevel int, + compactions map[*compaction]struct{}, + meta *fileMetadata, + suggestSplit bool, +) (targetLevel int, splitFile *fileMetadata, err error) { + // Find the lowest level which does not have any files which overlap meta. We + // search from L0 to L6 looking for whether there are any files in the level + // which overlap meta. We want the "lowest" level (where lower means + // increasing level number) in order to reduce write amplification. + // + // There are 2 kinds of overlap we need to check for: file boundary overlap + // and data overlap. Data overlap implies file boundary overlap. Note that it + // is always possible to ingest into L0. + // + // To place meta at level i where i > 0: + // - there must not be any data overlap with levels <= i, since that will + // violate the sequence number invariant. + // - no file boundary overlap with level i, since that will violate the + // invariant that files do not overlap in levels i > 0. + // - if there is only a file overlap at a given level, and no data overlap, + // we can still slot a file at that level. We return the fileMetadata with + // which we have file boundary overlap (must be only one file, as sstable + // bounds are usually tight on user keys) and the caller is expected to split + // that sstable into two virtual sstables, allowing this file to go into that + // level. Note that if we have file boundary overlap with two files, which + // should only happen on rare occasions, we treat it as data overlap and + // don't use this optimization. + // + // The file boundary overlap check is simpler to conceptualize. Consider the + // following example, in which the ingested file lies completely before or + // after the file being considered. + // + // |--| |--| ingested file: [a,b] or [f,g] + // |-----| existing file: [c,e] + // _____________________ + // a b c d e f g + // + // In both cases the ingested file can move to considering the next level. + // + // File boundary overlap does not necessarily imply data overlap. The check + // for data overlap is a little more nuanced. Consider the following examples: + // + // 1. No data overlap: + // + // |-| |--| ingested file: [cc-d] or [ee-ff] + // |*--*--*----*------*| existing file: [a-g], points: [a, b, c, dd, g] + // _____________________ + // a b c d e f g + // + // In this case the ingested files can "fall through" this level. The checks + // continue at the next level. + // + // 2. Data overlap: + // + // |--| ingested file: [d-e] + // |*--*--*----*------*| existing file: [a-g], points: [a, b, c, dd, g] + // _____________________ + // a b c d e f g + // + // In this case the file cannot be ingested into this level as the point 'dd' + // is in the way. + // + // It is worth noting that the check for data overlap is only approximate. In + // the previous example, the ingested table [d-e] could contain only the + // points 'd' and 'e', in which case the table would be eligible for + // considering lower levels. However, such a fine-grained check would need to + // be exhaustive (comparing points and ranges in both the ingested existing + // tables) and such a check is prohibitively expensive. Thus Pebble treats any + // existing point that falls within the ingested table bounds as being "data + // overlap". + + // This assertion implicitly checks that we have the current version of + // the metadata. + if v.L0Sublevels == nil { + return 0, nil, errors.AssertionFailedf("could not read L0 sublevels") + } + iterOps.CategoryAndQoS = sstable.CategoryAndQoS{ + Category: "pebble-ingest", + QoSLevel: sstable.LatencySensitiveQoSLevel, + } + // Check for overlap over the keys of L0 by iterating over the sublevels. + for subLevel := 0; subLevel < len(v.L0SublevelFiles); subLevel++ { + iter := newLevelIter(context.Background(), + iterOps, comparer, newIters, v.L0Sublevels.Levels[subLevel].Iter(), manifest.Level(0), internalIterOpts{}) + + var rangeDelIter keyspan.FragmentIterator + // Pass in a non-nil pointer to rangeDelIter so that levelIter.findFileGE + // sets it up for the target file. + iter.initRangeDel(&rangeDelIter) + + levelIter := keyspan.LevelIter{} + levelIter.Init( + keyspan.SpanIterOptions{}, comparer.Compare, newRangeKeyIter, + v.L0Sublevels.Levels[subLevel].Iter(), manifest.Level(0), manifest.KeyTypeRange, + ) + + kr := internalKeyRange{ + smallest: meta.Smallest, + largest: meta.Largest, + } + overlap := overlapWithIterator(iter, &rangeDelIter, &levelIter, kr, comparer.Compare) + err := iter.Close() // Closes range del iter as well. + err = firstError(err, levelIter.Close()) + if err != nil { + return 0, nil, err + } + if overlap { + return targetLevel, nil, nil + } + } + + level := baseLevel + for ; level < numLevels; level++ { + levelIter := newLevelIter(context.Background(), + iterOps, comparer, newIters, v.Levels[level].Iter(), manifest.Level(level), internalIterOpts{}) + var rangeDelIter keyspan.FragmentIterator + // Pass in a non-nil pointer to rangeDelIter so that levelIter.findFileGE + // sets it up for the target file. + levelIter.initRangeDel(&rangeDelIter) + + rkeyLevelIter := &keyspan.LevelIter{} + rkeyLevelIter.Init( + keyspan.SpanIterOptions{}, comparer.Compare, newRangeKeyIter, + v.Levels[level].Iter(), manifest.Level(level), manifest.KeyTypeRange, + ) + + kr := internalKeyRange{ + smallest: meta.Smallest, + largest: meta.Largest, + } + overlap := overlapWithIterator(levelIter, &rangeDelIter, rkeyLevelIter, kr, comparer.Compare) + err := levelIter.Close() // Closes range del iter as well. + err = firstError(err, rkeyLevelIter.Close()) + if err != nil { + return 0, nil, err + } + if overlap { + return targetLevel, splitFile, nil + } + + // Check boundary overlap. + var candidateSplitFile *fileMetadata + boundaryOverlaps := v.Overlaps(level, comparer.Compare, meta.Smallest.UserKey, + meta.Largest.UserKey, meta.Largest.IsExclusiveSentinel()) + if !boundaryOverlaps.Empty() { + // We are already guaranteed to not have any data overlaps with files + // in boundaryOverlaps, otherwise we'd have returned in the above if + // statements. Use this, plus boundaryOverlaps.Len() == 1 to detect for + // the case where we can slot this file into the current level despite + // a boundary overlap, by splitting one existing file into two virtual + // sstables. + if suggestSplit && boundaryOverlaps.Len() == 1 { + iter := boundaryOverlaps.Iter() + candidateSplitFile = iter.First() + } else { + // We either don't want to suggest ingest-time splits (i.e. + // !suggestSplit), or we boundary-overlapped with more than one file. + continue + } + } + + // Check boundary overlap with any ongoing compactions. We consider an + // overlapping compaction that's writing files to an output level as + // equivalent to boundary overlap with files in that output level. + // + // We cannot check for data overlap with the new SSTs compaction will produce + // since compaction hasn't been done yet. However, there's no need to check + // since all keys in them will be from levels in [c.startLevel, + // c.outputLevel], and all those levels have already had their data overlap + // tested negative (else we'd have returned earlier). + // + // An alternative approach would be to cancel these compactions and proceed + // with an ingest-time split on this level if necessary. However, compaction + // cancellation can result in significant wasted effort and is best avoided + // unless necessary. + overlaps := false + for c := range compactions { + if c.outputLevel == nil || level != c.outputLevel.level { + continue + } + if comparer.Compare(meta.Smallest.UserKey, c.largest.UserKey) <= 0 && + comparer.Compare(meta.Largest.UserKey, c.smallest.UserKey) >= 0 { + overlaps = true + break + } + } + if !overlaps { + targetLevel = level + splitFile = candidateSplitFile + } + } + return targetLevel, splitFile, nil +} + +// Ingest ingests a set of sstables into the DB. Ingestion of the files is +// atomic and semantically equivalent to creating a single batch containing all +// of the mutations in the sstables. Ingestion may require the memtable to be +// flushed. The ingested sstable files are moved into the DB and must reside on +// the same filesystem as the DB. Sstables can be created for ingestion using +// sstable.Writer. On success, Ingest removes the input paths. +// +// Two types of sstables are accepted for ingestion(s): one is sstables present +// in the instance's vfs.FS and can be referenced locally. The other is sstables +// present in remote.Storage, referred to as shared or foreign sstables. These +// shared sstables can be linked through objstorageprovider.Provider, and do not +// need to already be present on the local vfs.FS. Foreign sstables must all fit +// in an excise span, and are destined for a level specified in SharedSSTMeta. +// +// All sstables *must* be Sync()'d by the caller after all bytes are written +// and before its file handle is closed; failure to do so could violate +// durability or lead to corrupted on-disk state. This method cannot, in a +// platform-and-FS-agnostic way, ensure that all sstables in the input are +// properly synced to disk. Opening new file handles and Sync()-ing them +// does not always guarantee durability; see the discussion here on that: +// https://github.com/cockroachdb/pebble/pull/835#issuecomment-663075379 +// +// Ingestion loads each sstable into the lowest level of the LSM which it +// doesn't overlap (see ingestTargetLevel). If an sstable overlaps a memtable, +// ingestion forces the memtable to flush, and then waits for the flush to +// occur. In some cases, such as with no foreign sstables and no excise span, +// ingestion that gets blocked on a memtable can join the flushable queue and +// finish even before the memtable has been flushed. +// +// The steps for ingestion are: +// +// 1. Allocate file numbers for every sstable being ingested. +// 2. Load the metadata for all sstables being ingested. +// 3. Sort the sstables by smallest key, verifying non overlap (for local +// sstables). +// 4. Hard link (or copy) the local sstables into the DB directory. +// 5. Allocate a sequence number to use for all of the entries in the +// local sstables. This is the step where overlap with memtables is +// determined. If there is overlap, we remember the most recent memtable +// that overlaps. +// 6. Update the sequence number in the ingested local sstables. (Remote +// sstables get fixed sequence numbers that were determined at load time.) +// 7. Wait for the most recent memtable that overlaps to flush (if any). +// 8. Add the ingested sstables to the version (DB.ingestApply). +// 8.1. If an excise span was specified, figure out what sstables in the +// current version overlap with the excise span, and create new virtual +// sstables out of those sstables that exclude the excised span (DB.excise). +// 9. Publish the ingestion sequence number. +// +// Note that if the mutable memtable overlaps with ingestion, a flush of the +// memtable is forced equivalent to DB.Flush. Additionally, subsequent +// mutations that get sequence numbers larger than the ingestion sequence +// number get queued up behind the ingestion waiting for it to complete. This +// can produce a noticeable hiccup in performance. See +// https://github.com/cockroachdb/pebble/issues/25 for an idea for how to fix +// this hiccup. +func (d *DB) Ingest(paths []string) error { + if err := d.closed.Load(); err != nil { + panic(err) + } + if d.opts.ReadOnly { + return ErrReadOnly + } + _, err := d.ingest(paths, ingestTargetLevel, nil /* shared */, KeyRange{}, nil /* external */) + return err +} + +// IngestOperationStats provides some information about where in the LSM the +// bytes were ingested. +type IngestOperationStats struct { + // Bytes is the total bytes in the ingested sstables. + Bytes uint64 + // ApproxIngestedIntoL0Bytes is the approximate number of bytes ingested + // into L0. This value is approximate when flushable ingests are active and + // an ingest overlaps an entry in the flushable queue. Currently, this + // approximation is very rough, only including tables that overlapped the + // memtable. This estimate may be improved with #2112. + ApproxIngestedIntoL0Bytes uint64 + // MemtableOverlappingFiles is the count of ingested sstables + // that overlapped keys in the memtables. + MemtableOverlappingFiles int +} + +// ExternalFile are external sstables that can be referenced through +// objprovider and ingested as remote files that will not be refcounted or +// cleaned up. For use with online restore. Note that the underlying sstable +// could contain keys outside the [Smallest,Largest) bounds; however Pebble +// is expected to only read the keys within those bounds. +type ExternalFile struct { + // Locator is the shared.Locator that can be used with objProvider to + // resolve a reference to this external sstable. + Locator remote.Locator + // ObjName is the unique name of this sstable on Locator. + ObjName string + // Size of the referenced proportion of the virtualized sstable. An estimate + // is acceptable in lieu of the backing file size. + Size uint64 + // SmallestUserKey and LargestUserKey are the [smallest,largest) user key + // bounds of the sstable. Both these bounds are loose i.e. it's possible for + // the sstable to not span the entirety of this range. However, multiple + // ExternalFiles in one ingestion must all have non-overlapping + // [smallest, largest) spans. Note that this Largest bound is exclusive. + SmallestUserKey, LargestUserKey []byte + // HasPointKey and HasRangeKey denote whether this file contains point keys + // or range keys. If both structs are false, an error is returned during + // ingestion. + HasPointKey, HasRangeKey bool +} + +// IngestWithStats does the same as Ingest, and additionally returns +// IngestOperationStats. +func (d *DB) IngestWithStats(paths []string) (IngestOperationStats, error) { + if err := d.closed.Load(); err != nil { + panic(err) + } + if d.opts.ReadOnly { + return IngestOperationStats{}, ErrReadOnly + } + return d.ingest(paths, ingestTargetLevel, nil /* shared */, KeyRange{}, nil /* external */) +} + +// IngestExternalFiles does the same as IngestWithStats, and additionally +// accepts external files (with locator info that can be resolved using +// d.opts.SharedStorage). These files must also be non-overlapping with +// each other, and must be resolvable through d.objProvider. +func (d *DB) IngestExternalFiles(external []ExternalFile) (IngestOperationStats, error) { + if err := d.closed.Load(); err != nil { + panic(err) + } + + if d.opts.ReadOnly { + return IngestOperationStats{}, ErrReadOnly + } + if d.opts.Experimental.RemoteStorage == nil { + return IngestOperationStats{}, errors.New("pebble: cannot ingest external files without shared storage configured") + } + return d.ingest(nil, ingestTargetLevel, nil /* shared */, KeyRange{}, external) +} + +// IngestAndExcise does the same as IngestWithStats, and additionally accepts a +// list of shared files to ingest that can be read from a remote.Storage through +// a Provider. All the shared files must live within exciseSpan, and any existing +// keys in exciseSpan are deleted by turning existing sstables into virtual +// sstables (if not virtual already) and shrinking their spans to exclude +// exciseSpan. See the comment at Ingest for a more complete picture of the +// ingestion process. +// +// Panics if this DB instance was not instantiated with a remote.Storage and +// shared sstables are present. +func (d *DB) IngestAndExcise( + paths []string, shared []SharedSSTMeta, exciseSpan KeyRange, +) (IngestOperationStats, error) { + if err := d.closed.Load(); err != nil { + panic(err) + } + if d.opts.ReadOnly { + return IngestOperationStats{}, ErrReadOnly + } + return d.ingest(paths, ingestTargetLevel, shared, exciseSpan, nil /* external */) +} + +// Both DB.mu and commitPipeline.mu must be held while this is called. +func (d *DB) newIngestedFlushableEntry( + meta []*fileMetadata, seqNum uint64, logNum base.DiskFileNum, +) (*flushableEntry, error) { + // Update the sequence number for all of the sstables in the + // metadata. Writing the metadata to the manifest when the + // version edit is applied is the mechanism that persists the + // sequence number. The sstables themselves are left unmodified. + // In this case, a version edit will only be written to the manifest + // when the flushable is eventually flushed. If Pebble restarts in that + // time, then we'll lose the ingest sequence number information. But this + // information will also be reconstructed on node restart. + if err := ingestUpdateSeqNum( + d.cmp, d.opts.Comparer.FormatKey, seqNum, ingestLoadResult{localMeta: meta}, + ); err != nil { + return nil, err + } + + f := newIngestedFlushable(meta, d.opts.Comparer, d.newIters, d.tableNewRangeKeyIter) + + // NB: The logNum/seqNum are the WAL number which we're writing this entry + // to and the sequence number within the WAL which we'll write this entry + // to. + entry := d.newFlushableEntry(f, logNum, seqNum) + // The flushable entry starts off with a single reader ref, so increment + // the FileMetadata.Refs. + for _, file := range f.files { + file.Ref() + } + entry.unrefFiles = func() []*fileBacking { + var obsolete []*fileBacking + for _, file := range f.files { + if file.Unref() == 0 { + obsolete = append(obsolete, file.FileMetadata.FileBacking) + } + } + return obsolete + } + + entry.flushForced = true + entry.releaseMemAccounting = func() {} + return entry, nil +} + +// Both DB.mu and commitPipeline.mu must be held while this is called. Since +// we're holding both locks, the order in which we rotate the memtable or +// recycle the WAL in this function is irrelevant as long as the correct log +// numbers are assigned to the appropriate flushable. +func (d *DB) handleIngestAsFlushable(meta []*fileMetadata, seqNum uint64) error { + b := d.NewBatch() + for _, m := range meta { + b.ingestSST(m.FileNum) + } + b.setSeqNum(seqNum) + + // If the WAL is disabled, then the logNum used to create the flushable + // entry doesn't matter. We just use the logNum assigned to the current + // mutable memtable. If the WAL is enabled, then this logNum will be + // overwritten by the logNum of the log which will contain the log entry + // for the ingestedFlushable. + logNum := d.mu.mem.queue[len(d.mu.mem.queue)-1].logNum + if !d.opts.DisableWAL { + // We create a new WAL for the flushable instead of reusing the end of + // the previous WAL. This simplifies the increment of the minimum + // unflushed log number, and also simplifies WAL replay. + logNum, _ = d.recycleWAL() + d.mu.Unlock() + err := d.commit.directWrite(b) + if err != nil { + d.opts.Logger.Fatalf("%v", err) + } + d.mu.Lock() + } + + entry, err := d.newIngestedFlushableEntry(meta, seqNum, logNum) + if err != nil { + return err + } + nextSeqNum := seqNum + uint64(b.Count()) + + // Set newLogNum to the logNum of the previous flushable. This value is + // irrelevant if the WAL is disabled. If the WAL is enabled, then we set + // the appropriate value below. + newLogNum := d.mu.mem.queue[len(d.mu.mem.queue)-1].logNum + if !d.opts.DisableWAL { + // This is WAL num of the next mutable memtable which comes after the + // ingestedFlushable in the flushable queue. The mutable memtable + // will be created below. + newLogNum, _ = d.recycleWAL() + if err != nil { + return err + } + } + + currMem := d.mu.mem.mutable + // NB: Placing ingested sstables above the current memtables + // requires rotating of the existing memtables/WAL. There is + // some concern of churning through tiny memtables due to + // ingested sstables being placed on top of them, but those + // memtables would have to be flushed anyways. + d.mu.mem.queue = append(d.mu.mem.queue, entry) + d.rotateMemtable(newLogNum, nextSeqNum, currMem) + d.updateReadStateLocked(d.opts.DebugCheck) + d.maybeScheduleFlush() + return nil +} + +// See comment at Ingest() for details on how this works. +func (d *DB) ingest( + paths []string, + targetLevelFunc ingestTargetLevelFunc, + shared []SharedSSTMeta, + exciseSpan KeyRange, + external []ExternalFile, +) (IngestOperationStats, error) { + if len(shared) > 0 && d.opts.Experimental.RemoteStorage == nil { + panic("cannot ingest shared sstables with nil SharedStorage") + } + if (exciseSpan.Valid() || len(shared) > 0 || len(external) > 0) && d.FormatMajorVersion() < FormatVirtualSSTables { + return IngestOperationStats{}, errors.New("pebble: format major version too old for excise, shared or external sstable ingestion") + } + // Allocate file numbers for all of the files being ingested and mark them as + // pending in order to prevent them from being deleted. Note that this causes + // the file number ordering to be out of alignment with sequence number + // ordering. The sorting of L0 tables by sequence number avoids relying on + // that (busted) invariant. + d.mu.Lock() + pendingOutputs := make([]base.DiskFileNum, len(paths)+len(shared)+len(external)) + for i := 0; i < len(paths)+len(shared)+len(external); i++ { + pendingOutputs[i] = d.mu.versions.getNextDiskFileNum() + } + + jobID := d.mu.nextJobID + d.mu.nextJobID++ + d.mu.Unlock() + + // Load the metadata for all the files being ingested. This step detects + // and elides empty sstables. + loadResult, err := ingestLoad(d.opts, d.FormatMajorVersion(), paths, shared, external, d.cacheID, pendingOutputs, d.objProvider, jobID) + if err != nil { + return IngestOperationStats{}, err + } + + if loadResult.fileCount == 0 { + // All of the sstables to be ingested were empty. Nothing to do. + return IngestOperationStats{}, nil + } + + // Verify the sstables do not overlap. + if err := ingestSortAndVerify(d.cmp, loadResult, exciseSpan); err != nil { + return IngestOperationStats{}, err + } + + // Hard link the sstables into the DB directory. Since the sstables aren't + // referenced by a version, they won't be used. If the hard linking fails + // (e.g. because the files reside on a different filesystem), ingestLink will + // fall back to copying, and if that fails we undo our work and return an + // error. + if err := ingestLink(jobID, d.opts, d.objProvider, loadResult, shared); err != nil { + return IngestOperationStats{}, err + } + + // Make the new tables durable. We need to do this at some point before we + // update the MANIFEST (via logAndApply), otherwise a crash can have the + // tables referenced in the MANIFEST, but not present in the provider. + if err := d.objProvider.Sync(); err != nil { + return IngestOperationStats{}, err + } + + // metaFlushableOverlaps is a slice parallel to meta indicating which of the + // ingested sstables overlap some table in the flushable queue. It's used to + // approximate ingest-into-L0 stats when using flushable ingests. + metaFlushableOverlaps := make([]bool, loadResult.fileCount) + var mem *flushableEntry + var mut *memTable + // asFlushable indicates whether the sstable was ingested as a flushable. + var asFlushable bool + iterOps := IterOptions{ + CategoryAndQoS: sstable.CategoryAndQoS{ + Category: "pebble-ingest", + QoSLevel: sstable.LatencySensitiveQoSLevel, + }, + } + prepare := func(seqNum uint64) { + // Note that d.commit.mu is held by commitPipeline when calling prepare. + + d.mu.Lock() + defer d.mu.Unlock() + + // Check to see if any files overlap with any of the memtables. The queue + // is ordered from oldest to newest with the mutable memtable being the + // last element in the slice. We want to wait for the newest table that + // overlaps. + + for i := len(d.mu.mem.queue) - 1; i >= 0; i-- { + m := d.mu.mem.queue[i] + iter := m.newIter(&iterOps) + rangeDelIter := m.newRangeDelIter(&iterOps) + rkeyIter := m.newRangeKeyIter(&iterOps) + + checkForOverlap := func(i int, meta *fileMetadata) { + if metaFlushableOverlaps[i] { + // This table already overlapped a more recent flushable. + return + } + kr := internalKeyRange{ + smallest: meta.Smallest, + largest: meta.Largest, + } + if overlapWithIterator(iter, &rangeDelIter, rkeyIter, kr, d.cmp) { + // If this is the first table to overlap a flushable, save + // the flushable. This ingest must be ingested or flushed + // after it. + if mem == nil { + mem = m + } + metaFlushableOverlaps[i] = true + } + } + for i := range loadResult.localMeta { + checkForOverlap(i, loadResult.localMeta[i]) + } + for i := range loadResult.sharedMeta { + checkForOverlap(len(loadResult.localMeta)+i, loadResult.sharedMeta[i]) + } + for i := range loadResult.externalMeta { + checkForOverlap(len(loadResult.localMeta)+len(loadResult.sharedMeta)+i, loadResult.externalMeta[i]) + } + if exciseSpan.Valid() { + kr := internalKeyRange{ + smallest: base.MakeInternalKey(exciseSpan.Start, InternalKeySeqNumMax, InternalKeyKindMax), + largest: base.MakeExclusiveSentinelKey(InternalKeyKindRangeDelete, exciseSpan.End), + } + if overlapWithIterator(iter, &rangeDelIter, rkeyIter, kr, d.cmp) { + if mem == nil { + mem = m + } + } + } + err := iter.Close() + if rangeDelIter != nil { + err = firstError(err, rangeDelIter.Close()) + } + if rkeyIter != nil { + err = firstError(err, rkeyIter.Close()) + } + if err != nil { + d.opts.Logger.Errorf("ingest error reading flushable for log %s: %s", m.logNum, err) + } + } + + if mem == nil { + // No overlap with any of the queued flushables, so no need to queue + // after them. + + // New writes with higher sequence numbers may be concurrently + // committed. We must ensure they don't flush before this ingest + // completes. To do that, we ref the mutable memtable as a writer, + // preventing its flushing (and the flushing of all subsequent + // flushables in the queue). Once we've acquired the manifest lock + // to add the ingested sstables to the LSM, we can unref as we're + // guaranteed that the flush won't edit the LSM before this ingest. + mut = d.mu.mem.mutable + mut.writerRef() + return + } + // The ingestion overlaps with some entry in the flushable queue. + if d.FormatMajorVersion() < FormatFlushableIngest || + d.opts.Experimental.DisableIngestAsFlushable() || + len(shared) > 0 || exciseSpan.Valid() || len(external) > 0 || + (len(d.mu.mem.queue) > d.opts.MemTableStopWritesThreshold-1) { + // We're not able to ingest as a flushable, + // so we must synchronously flush. + // + // TODO(bilal): Currently, if any of the files being ingested are shared or + // there's an excise span present, we cannot use flushable ingests and need + // to wait synchronously. Either remove this caveat by fleshing out + // flushable ingest logic to also account for these cases, or remove this + // comment. Tracking issue: https://github.com/cockroachdb/pebble/issues/2676 + if mem.flushable == d.mu.mem.mutable { + err = d.makeRoomForWrite(nil) + } + // New writes with higher sequence numbers may be concurrently + // committed. We must ensure they don't flush before this ingest + // completes. To do that, we ref the mutable memtable as a writer, + // preventing its flushing (and the flushing of all subsequent + // flushables in the queue). Once we've acquired the manifest lock + // to add the ingested sstables to the LSM, we can unref as we're + // guaranteed that the flush won't edit the LSM before this ingest. + mut = d.mu.mem.mutable + mut.writerRef() + mem.flushForced = true + d.maybeScheduleFlush() + return + } + // Since there aren't too many memtables already queued up, we can + // slide the ingested sstables on top of the existing memtables. + asFlushable = true + err = d.handleIngestAsFlushable(loadResult.localMeta, seqNum) + } + + var ve *versionEdit + apply := func(seqNum uint64) { + if err != nil || asFlushable { + // An error occurred during prepare. + if mut != nil { + if mut.writerUnref() { + d.mu.Lock() + d.maybeScheduleFlush() + d.mu.Unlock() + } + } + return + } + + // Update the sequence numbers for all ingested sstables' + // metadata. When the version edit is applied, the metadata is + // written to the manifest, persisting the sequence number. + // The sstables themselves are left unmodified. + if err = ingestUpdateSeqNum( + d.cmp, d.opts.Comparer.FormatKey, seqNum, loadResult, + ); err != nil { + if mut != nil { + if mut.writerUnref() { + d.mu.Lock() + d.maybeScheduleFlush() + d.mu.Unlock() + } + } + return + } + + // If we overlapped with a memtable in prepare wait for the flush to + // finish. + if mem != nil { + <-mem.flushed + } + + // Assign the sstables to the correct level in the LSM and apply the + // version edit. + ve, err = d.ingestApply(jobID, loadResult, targetLevelFunc, mut, exciseSpan) + } + + // Only one ingest can occur at a time because if not, one would block waiting + // for the other to finish applying. This blocking would happen while holding + // the commit mutex which would prevent unrelated batches from writing their + // changes to the WAL and memtable. This will cause a bigger commit hiccup + // during ingestion. + d.commit.ingestSem <- struct{}{} + d.commit.AllocateSeqNum(loadResult.fileCount, prepare, apply) + <-d.commit.ingestSem + + if err != nil { + if err2 := ingestCleanup(d.objProvider, loadResult.localMeta); err2 != nil { + d.opts.Logger.Errorf("ingest cleanup failed: %v", err2) + } + } else { + // Since we either created a hard link to the ingesting files, or copied + // them over, it is safe to remove the originals paths. + for _, path := range loadResult.localPaths { + if err2 := d.opts.FS.Remove(path); err2 != nil { + d.opts.Logger.Errorf("ingest failed to remove original file: %s", err2) + } + } + } + + info := TableIngestInfo{ + JobID: jobID, + Err: err, + flushable: asFlushable, + } + if len(loadResult.localMeta) > 0 { + info.GlobalSeqNum = loadResult.localMeta[0].SmallestSeqNum + } else if len(loadResult.sharedMeta) > 0 { + info.GlobalSeqNum = loadResult.sharedMeta[0].SmallestSeqNum + } else { + info.GlobalSeqNum = loadResult.externalMeta[0].SmallestSeqNum + } + var stats IngestOperationStats + if ve != nil { + info.Tables = make([]struct { + TableInfo + Level int + }, len(ve.NewFiles)) + for i := range ve.NewFiles { + e := &ve.NewFiles[i] + info.Tables[i].Level = e.Level + info.Tables[i].TableInfo = e.Meta.TableInfo() + stats.Bytes += e.Meta.Size + if e.Level == 0 { + stats.ApproxIngestedIntoL0Bytes += e.Meta.Size + } + if i < len(metaFlushableOverlaps) && metaFlushableOverlaps[i] { + stats.MemtableOverlappingFiles++ + } + } + } else if asFlushable { + // NB: If asFlushable == true, there are no shared sstables. + info.Tables = make([]struct { + TableInfo + Level int + }, len(loadResult.localMeta)) + for i, f := range loadResult.localMeta { + info.Tables[i].Level = -1 + info.Tables[i].TableInfo = f.TableInfo() + stats.Bytes += f.Size + // We don't have exact stats on which files will be ingested into + // L0, because actual ingestion into the LSM has been deferred until + // flush time. Instead, we infer based on memtable overlap. + // + // TODO(jackson): If we optimistically compute data overlap (#2112) + // before entering the commit pipeline, we can use that overlap to + // improve our approximation by incorporating overlap with L0, not + // just memtables. + if metaFlushableOverlaps[i] { + stats.ApproxIngestedIntoL0Bytes += f.Size + stats.MemtableOverlappingFiles++ + } + } + } + d.opts.EventListener.TableIngested(info) + + return stats, err +} + +// excise updates ve to include a replacement of the file m with new virtual +// sstables that exclude exciseSpan, returning a slice of newly-created files if +// any. If the entirety of m is deleted by exciseSpan, no new sstables are added +// and m is deleted. Note that ve is updated in-place. +// +// The manifest lock must be held when calling this method. +func (d *DB) excise( + exciseSpan KeyRange, m *fileMetadata, ve *versionEdit, level int, +) ([]manifest.NewFileEntry, error) { + numCreatedFiles := 0 + // Check if there's actually an overlap between m and exciseSpan. + if !exciseSpan.Overlaps(d.cmp, m) { + return nil, nil + } + ve.DeletedFiles[deletedFileEntry{ + Level: level, + FileNum: m.FileNum, + }] = m + // Fast path: m sits entirely within the exciseSpan, so just delete it. + if exciseSpan.Contains(d.cmp, m.Smallest) && exciseSpan.Contains(d.cmp, m.Largest) { + return nil, nil + } + var iter internalIterator + var rangeDelIter keyspan.FragmentIterator + var rangeKeyIter keyspan.FragmentIterator + needsBacking := false + // Create a file to the left of the excise span, if necessary. + // The bounds of this file will be [m.Smallest, lastKeyBefore(exciseSpan.Start)]. + // + // We create bounds that are tight on user keys, and we make the effort to find + // the last key in the original sstable that's smaller than exciseSpan.Start + // even though it requires some sstable reads. We could choose to create + // virtual sstables on loose userKey bounds, in which case we could just set + // leftFile.Largest to an exclusive sentinel at exciseSpan.Start. The biggest + // issue with that approach would be that it'd lead to lots of small virtual + // sstables in the LSM that have no guarantee on containing even a single user + // key within the file bounds. This has the potential to increase both read and + // write-amp as we will be opening up these sstables only to find no relevant + // keys in the read path, and compacting sstables on top of them instead of + // directly into the space occupied by them. We choose to incur the cost of + // calculating tight bounds at this time instead of creating more work in the + // future. + // + // TODO(bilal): Some of this work can happen without grabbing the manifest + // lock; we could grab one currentVersion, release the lock, calculate excised + // files, then grab the lock again and recalculate for just the files that + // have changed since our previous calculation. Do this optimiaztino as part of + // https://github.com/cockroachdb/pebble/issues/2112 . + if d.cmp(m.Smallest.UserKey, exciseSpan.Start) < 0 { + leftFile := &fileMetadata{ + Virtual: true, + FileBacking: m.FileBacking, + FileNum: d.mu.versions.getNextFileNum(), + // Note that these are loose bounds for smallest/largest seqnums, but they're + // sufficient for maintaining correctness. + SmallestSeqNum: m.SmallestSeqNum, + LargestSeqNum: m.LargestSeqNum, + } + if m.HasPointKeys && !exciseSpan.Contains(d.cmp, m.SmallestPointKey) { + // This file will contain point keys + smallestPointKey := m.SmallestPointKey + var err error + iter, rangeDelIter, err = d.newIters(context.TODO(), m, &IterOptions{ + CategoryAndQoS: sstable.CategoryAndQoS{ + Category: "pebble-ingest", + QoSLevel: sstable.LatencySensitiveQoSLevel, + }, + level: manifest.Level(level), + }, internalIterOpts{}) + if err != nil { + return nil, err + } + var key *InternalKey + if iter != nil { + defer iter.Close() + key, _ = iter.SeekLT(exciseSpan.Start, base.SeekLTFlagsNone) + } else { + iter = emptyIter + } + if key != nil { + leftFile.ExtendPointKeyBounds(d.cmp, smallestPointKey, key.Clone()) + } + // Store the min of (exciseSpan.Start, rdel.End) in lastRangeDel. This + // needs to be a copy if the key is owned by the range del iter. + var lastRangeDel []byte + if rangeDelIter != nil { + defer rangeDelIter.Close() + rdel := rangeDelIter.SeekLT(exciseSpan.Start) + if rdel != nil { + lastRangeDel = append(lastRangeDel[:0], rdel.End...) + if d.cmp(lastRangeDel, exciseSpan.Start) > 0 { + lastRangeDel = exciseSpan.Start + } + } + } else { + rangeDelIter = emptyKeyspanIter + } + if lastRangeDel != nil { + leftFile.ExtendPointKeyBounds(d.cmp, smallestPointKey, base.MakeExclusiveSentinelKey(InternalKeyKindRangeDelete, lastRangeDel)) + } + } + if m.HasRangeKeys && !exciseSpan.Contains(d.cmp, m.SmallestRangeKey) { + // This file will contain range keys + var err error + smallestRangeKey := m.SmallestRangeKey + rangeKeyIter, err = d.tableNewRangeKeyIter(m, keyspan.SpanIterOptions{}) + if err != nil { + return nil, err + } + // Store the min of (exciseSpan.Start, rkey.End) in lastRangeKey. This + // needs to be a copy if the key is owned by the range key iter. + var lastRangeKey []byte + var lastRangeKeyKind InternalKeyKind + defer rangeKeyIter.Close() + rkey := rangeKeyIter.SeekLT(exciseSpan.Start) + if rkey != nil { + lastRangeKey = append(lastRangeKey[:0], rkey.End...) + if d.cmp(lastRangeKey, exciseSpan.Start) > 0 { + lastRangeKey = exciseSpan.Start + } + lastRangeKeyKind = rkey.Keys[0].Kind() + } + if lastRangeKey != nil { + leftFile.ExtendRangeKeyBounds(d.cmp, smallestRangeKey, base.MakeExclusiveSentinelKey(lastRangeKeyKind, lastRangeKey)) + } + } + if leftFile.HasRangeKeys || leftFile.HasPointKeys { + var err error + leftFile.Size, err = d.tableCache.estimateSize(m, leftFile.Smallest.UserKey, leftFile.Largest.UserKey) + if err != nil { + return nil, err + } + if leftFile.Size == 0 { + // On occasion, estimateSize gives us a low estimate, i.e. a 0 file size, + // such as if the excised file only has range keys/dels and no point + // keys. This can cause panics in places where we divide by file sizes. + // Correct for it here. + leftFile.Size = 1 + } + if err := leftFile.Validate(d.cmp, d.opts.Comparer.FormatKey); err != nil { + return nil, err + } + leftFile.ValidateVirtual(m) + ve.NewFiles = append(ve.NewFiles, newFileEntry{Level: level, Meta: leftFile}) + needsBacking = true + numCreatedFiles++ + } + } + // Create a file to the right, if necessary. + if exciseSpan.Contains(d.cmp, m.Largest) { + // No key exists to the right of the excise span in this file. + if needsBacking && !m.Virtual { + // If m is virtual, then its file backing is already known to the manifest. + // We don't need to create another file backing. Note that there must be + // only one CreatedBackingTables entry per backing sstable. This is + // indicated by the VersionEdit.CreatedBackingTables invariant. + ve.CreatedBackingTables = append(ve.CreatedBackingTables, m.FileBacking) + } + return ve.NewFiles[len(ve.NewFiles)-numCreatedFiles:], nil + } + // Create a new file, rightFile, between [firstKeyAfter(exciseSpan.End), m.Largest]. + // + // See comment before the definition of leftFile for the motivation behind + // calculating tight user-key bounds. + rightFile := &fileMetadata{ + Virtual: true, + FileBacking: m.FileBacking, + FileNum: d.mu.versions.getNextFileNum(), + // Note that these are loose bounds for smallest/largest seqnums, but they're + // sufficient for maintaining correctness. + SmallestSeqNum: m.SmallestSeqNum, + LargestSeqNum: m.LargestSeqNum, + } + if m.HasPointKeys && !exciseSpan.Contains(d.cmp, m.LargestPointKey) { + // This file will contain point keys + largestPointKey := m.LargestPointKey + var err error + if iter == nil && rangeDelIter == nil { + iter, rangeDelIter, err = d.newIters(context.TODO(), m, &IterOptions{ + CategoryAndQoS: sstable.CategoryAndQoS{ + Category: "pebble-ingest", + QoSLevel: sstable.LatencySensitiveQoSLevel, + }, + level: manifest.Level(level), + }, internalIterOpts{}) + if err != nil { + return nil, err + } + if iter != nil { + defer iter.Close() + } else { + iter = emptyIter + } + if rangeDelIter != nil { + defer rangeDelIter.Close() + } else { + rangeDelIter = emptyKeyspanIter + } + } + key, _ := iter.SeekGE(exciseSpan.End, base.SeekGEFlagsNone) + if key != nil { + rightFile.ExtendPointKeyBounds(d.cmp, key.Clone(), largestPointKey) + } + // Store the max of (exciseSpan.End, rdel.Start) in firstRangeDel. This + // needs to be a copy if the key is owned by the range del iter. + var firstRangeDel []byte + rdel := rangeDelIter.SeekGE(exciseSpan.End) + if rdel != nil { + firstRangeDel = append(firstRangeDel[:0], rdel.Start...) + if d.cmp(firstRangeDel, exciseSpan.End) < 0 { + firstRangeDel = exciseSpan.End + } + } + if firstRangeDel != nil { + smallestPointKey := rdel.SmallestKey() + smallestPointKey.UserKey = firstRangeDel + rightFile.ExtendPointKeyBounds(d.cmp, smallestPointKey, largestPointKey) + } + } + if m.HasRangeKeys && !exciseSpan.Contains(d.cmp, m.LargestRangeKey) { + // This file will contain range keys. + largestRangeKey := m.LargestRangeKey + if rangeKeyIter == nil { + var err error + rangeKeyIter, err = d.tableNewRangeKeyIter(m, keyspan.SpanIterOptions{}) + if err != nil { + return nil, err + } + defer rangeKeyIter.Close() + } + // Store the max of (exciseSpan.End, rkey.Start) in firstRangeKey. This + // needs to be a copy if the key is owned by the range key iter. + var firstRangeKey []byte + rkey := rangeKeyIter.SeekGE(exciseSpan.End) + if rkey != nil { + firstRangeKey = append(firstRangeKey[:0], rkey.Start...) + if d.cmp(firstRangeKey, exciseSpan.End) < 0 { + firstRangeKey = exciseSpan.End + } + } + if firstRangeKey != nil { + smallestRangeKey := rkey.SmallestKey() + smallestRangeKey.UserKey = firstRangeKey + // We call ExtendRangeKeyBounds so any internal boundType fields are + // set correctly. Note that this is mildly wasteful as we'll be comparing + // rightFile.{Smallest,Largest}RangeKey with themselves, which can be + // avoided if we exported ExtendOverallKeyBounds or so. + rightFile.ExtendRangeKeyBounds(d.cmp, smallestRangeKey, largestRangeKey) + } + } + if rightFile.HasRangeKeys || rightFile.HasPointKeys { + var err error + rightFile.Size, err = d.tableCache.estimateSize(m, rightFile.Smallest.UserKey, rightFile.Largest.UserKey) + if err != nil { + return nil, err + } + if rightFile.Size == 0 { + // On occasion, estimateSize gives us a low estimate, i.e. a 0 file size, + // such as if the excised file only has range keys/dels and no point keys. + // This can cause panics in places where we divide by file sizes. Correct + // for it here. + rightFile.Size = 1 + } + rightFile.ValidateVirtual(m) + ve.NewFiles = append(ve.NewFiles, newFileEntry{Level: level, Meta: rightFile}) + needsBacking = true + numCreatedFiles++ + } + + if needsBacking && !m.Virtual { + // If m is virtual, then its file backing is already known to the manifest. + // We don't need to create another file backing. Note that there must be + // only one CreatedBackingTables entry per backing sstable. This is + // indicated by the VersionEdit.CreatedBackingTables invariant. + ve.CreatedBackingTables = append(ve.CreatedBackingTables, m.FileBacking) + } + + if err := rightFile.Validate(d.cmp, d.opts.Comparer.FormatKey); err != nil { + return nil, err + } + return ve.NewFiles[len(ve.NewFiles)-numCreatedFiles:], nil +} + +type ingestTargetLevelFunc func( + newIters tableNewIters, + newRangeKeyIter keyspan.TableNewSpanIter, + iterOps IterOptions, + comparer *Comparer, + v *version, + baseLevel int, + compactions map[*compaction]struct{}, + meta *fileMetadata, + suggestSplit bool, +) (int, *fileMetadata, error) + +type ingestSplitFile struct { + // ingestFile is the file being ingested. + ingestFile *fileMetadata + // splitFile is the file that needs to be split to allow ingestFile to slot + // into `level` level. + splitFile *fileMetadata + // The level where ingestFile will go (and where splitFile already is). + level int +} + +// ingestSplit splits files specified in `files` and updates ve in-place to +// account for existing files getting split into two virtual sstables. The map +// `replacedFiles` contains an in-progress map of all files that have been +// replaced with new virtual sstables in this version edit so far, which is also +// updated in-place. +// +// d.mu as well as the manifest lock must be held when calling this method. +func (d *DB) ingestSplit( + ve *versionEdit, + updateMetrics func(*fileMetadata, int, []newFileEntry), + files []ingestSplitFile, + replacedFiles map[base.FileNum][]newFileEntry, +) error { + for _, s := range files { + // replacedFiles can be thought of as a tree, where we start iterating with + // s.splitFile and run its fileNum through replacedFiles, then find which of + // the replaced files overlaps with s.ingestFile, which becomes the new + // splitFile, then we check splitFile's replacements in replacedFiles again + // for overlap with s.ingestFile, and so on until we either can't find the + // current splitFile in replacedFiles (i.e. that's the file that now needs to + // be split), or we don't find a file that overlaps with s.ingestFile, which + // means a prior ingest split already produced enough room for s.ingestFile + // to go into this level without necessitating another ingest split. + splitFile := s.splitFile + for splitFile != nil { + replaced, ok := replacedFiles[splitFile.FileNum] + if !ok { + break + } + updatedSplitFile := false + for i := range replaced { + if replaced[i].Meta.Overlaps(d.cmp, s.ingestFile.Smallest.UserKey, s.ingestFile.Largest.UserKey, s.ingestFile.Largest.IsExclusiveSentinel()) { + if updatedSplitFile { + // This should never happen because the earlier ingestTargetLevel + // function only finds split file candidates that are guaranteed to + // have no data overlap, only boundary overlap. See the comments + // in that method to see the definitions of data vs boundary + // overlap. That, plus the fact that files in `replaced` are + // guaranteed to have file bounds that are tight on user keys + // (as that's what `d.excise` produces), means that the only case + // where we overlap with two or more files in `replaced` is if we + // actually had data overlap all along, or if the ingestion files + // were overlapping, either of which is an invariant violation. + panic("updated with two files in ingestSplit") + } + splitFile = replaced[i].Meta + updatedSplitFile = true + } + } + if !updatedSplitFile { + // None of the replaced files overlapped with the file being ingested. + // This can happen if we've already excised a span overlapping with + // this file, or if we have consecutive ingested files that can slide + // within the same gap between keys in an existing file. For instance, + // if an existing file has keys a and g and we're ingesting b-c, d-e, + // the first loop iteration will split the existing file into one that + // ends in a and another that starts at g, and the second iteration will + // fall into this case and require no splitting. + // + // No splitting necessary. + splitFile = nil + } + } + if splitFile == nil { + continue + } + // NB: excise operates on [start, end). We're splitting at [start, end] + // (assuming !s.ingestFile.Largest.IsExclusiveSentinel()). The conflation + // of exclusive vs inclusive end bounds should not make a difference here + // as we're guaranteed to not have any data overlap between splitFile and + // s.ingestFile, so panic if we do see a newly added file with an endKey + // equalling s.ingestFile.Largest, and !s.ingestFile.Largest.IsExclusiveSentinel() + added, err := d.excise(KeyRange{Start: s.ingestFile.Smallest.UserKey, End: s.ingestFile.Largest.UserKey}, splitFile, ve, s.level) + if err != nil { + return err + } + if _, ok := ve.DeletedFiles[deletedFileEntry{ + Level: s.level, + FileNum: splitFile.FileNum, + }]; !ok { + panic("did not split file that was expected to be split") + } + replacedFiles[splitFile.FileNum] = added + for i := range added { + if s.ingestFile.Overlaps(d.cmp, added[i].Meta.Smallest.UserKey, added[i].Meta.Largest.UserKey, added[i].Meta.Largest.IsExclusiveSentinel()) { + panic("ingest-time split produced a file that overlaps with ingested file") + } + } + updateMetrics(splitFile, s.level, added) + } + // Flatten the version edit by removing any entries from ve.NewFiles that + // are also in ve.DeletedFiles. + newNewFiles := ve.NewFiles[:0] + for i := range ve.NewFiles { + fn := ve.NewFiles[i].Meta.FileNum + deEntry := deletedFileEntry{Level: ve.NewFiles[i].Level, FileNum: fn} + if _, ok := ve.DeletedFiles[deEntry]; ok { + delete(ve.DeletedFiles, deEntry) + } else { + newNewFiles = append(newNewFiles, ve.NewFiles[i]) + } + } + ve.NewFiles = newNewFiles + return nil +} + +func (d *DB) ingestApply( + jobID int, + lr ingestLoadResult, + findTargetLevel ingestTargetLevelFunc, + mut *memTable, + exciseSpan KeyRange, +) (*versionEdit, error) { + d.mu.Lock() + defer d.mu.Unlock() + + ve := &versionEdit{ + NewFiles: make([]newFileEntry, lr.fileCount), + } + if exciseSpan.Valid() || (d.opts.Experimental.IngestSplit != nil && d.opts.Experimental.IngestSplit()) { + ve.DeletedFiles = map[manifest.DeletedFileEntry]*manifest.FileMetadata{} + } + metrics := make(map[int]*LevelMetrics) + + // Lock the manifest for writing before we use the current version to + // determine the target level. This prevents two concurrent ingestion jobs + // from using the same version to determine the target level, and also + // provides serialization with concurrent compaction and flush jobs. + // logAndApply unconditionally releases the manifest lock, but any earlier + // returns must unlock the manifest. + d.mu.versions.logLock() + + if mut != nil { + // Unref the mutable memtable to allows its flush to proceed. Now that we've + // acquired the manifest lock, we can be certain that if the mutable + // memtable has received more recent conflicting writes, the flush won't + // beat us to applying to the manifest resulting in sequence number + // inversion. Even though we call maybeScheduleFlush right now, this flush + // will apply after our ingestion. + if mut.writerUnref() { + d.maybeScheduleFlush() + } + } + + shouldIngestSplit := d.opts.Experimental.IngestSplit != nil && + d.opts.Experimental.IngestSplit() && d.FormatMajorVersion() >= FormatVirtualSSTables + current := d.mu.versions.currentVersion() + baseLevel := d.mu.versions.picker.getBaseLevel() + iterOps := IterOptions{logger: d.opts.Logger} + // filesToSplit is a list where each element is a pair consisting of a file + // being ingested and a file being split to make room for an ingestion into + // that level. Each ingested file will appear at most once in this list. It + // is possible for split files to appear twice in this list. + filesToSplit := make([]ingestSplitFile, 0) + checkCompactions := false + for i := 0; i < lr.fileCount; i++ { + // Determine the lowest level in the LSM for which the sstable doesn't + // overlap any existing files in the level. + var m *fileMetadata + sharedIdx := -1 + sharedLevel := -1 + externalFile := false + if i < len(lr.localMeta) { + // local file. + m = lr.localMeta[i] + } else if (i - len(lr.localMeta)) < len(lr.sharedMeta) { + // shared file. + sharedIdx = i - len(lr.localMeta) + m = lr.sharedMeta[sharedIdx] + sharedLevel = int(lr.sharedLevels[sharedIdx]) + } else { + // external file. + externalFile = true + m = lr.externalMeta[i-(len(lr.localMeta)+len(lr.sharedMeta))] + } + f := &ve.NewFiles[i] + var err error + if sharedIdx >= 0 { + f.Level = sharedLevel + if f.Level < sharedLevelsStart { + panic("cannot slot a shared file higher than the highest shared level") + } + ve.CreatedBackingTables = append(ve.CreatedBackingTables, m.FileBacking) + } else { + if externalFile { + ve.CreatedBackingTables = append(ve.CreatedBackingTables, m.FileBacking) + } + var splitFile *fileMetadata + if exciseSpan.Valid() && exciseSpan.Contains(d.cmp, m.Smallest) && exciseSpan.Contains(d.cmp, m.Largest) { + // This file fits perfectly within the excise span. We can slot it at + // L6, or sharedLevelsStart - 1 if we have shared files. + if len(lr.sharedMeta) > 0 { + f.Level = sharedLevelsStart - 1 + if baseLevel > f.Level { + f.Level = 0 + } + } else { + f.Level = 6 + } + } else { + // TODO(bilal): findTargetLevel does disk IO (reading files for data + // overlap) even though we're holding onto d.mu. Consider unlocking + // d.mu while we do this. We already hold versions.logLock so we should + // not see any version applications while we're at this. The one + // complication here would be pulling out the mu.compact.inProgress + // check from findTargetLevel, as that requires d.mu to be held. + f.Level, splitFile, err = findTargetLevel( + d.newIters, d.tableNewRangeKeyIter, iterOps, d.opts.Comparer, current, baseLevel, d.mu.compact.inProgress, m, shouldIngestSplit) + } + + if splitFile != nil { + if invariants.Enabled { + if lf := current.Levels[f.Level].Find(d.cmp, splitFile); lf == nil { + panic("splitFile returned is not in level it should be") + } + } + // We take advantage of the fact that we won't drop the db mutex + // between now and the call to logAndApply. So, no files should + // get added to a new in-progress compaction at this point. We can + // avoid having to iterate on in-progress compactions to cancel them + // if none of the files being split have a compacting state. + if splitFile.IsCompacting() { + checkCompactions = true + } + filesToSplit = append(filesToSplit, ingestSplitFile{ingestFile: m, splitFile: splitFile, level: f.Level}) + } + } + if err != nil { + d.mu.versions.logUnlock() + return nil, err + } + f.Meta = m + levelMetrics := metrics[f.Level] + if levelMetrics == nil { + levelMetrics = &LevelMetrics{} + metrics[f.Level] = levelMetrics + } + levelMetrics.NumFiles++ + levelMetrics.Size += int64(m.Size) + levelMetrics.BytesIngested += m.Size + levelMetrics.TablesIngested++ + } + // replacedFiles maps files excised due to exciseSpan (or splitFiles returned + // by ingestTargetLevel), to files that were created to replace it. This map + // is used to resolve references to split files in filesToSplit, as it is + // possible for a file that we want to split to no longer exist or have a + // newer fileMetadata due to a split induced by another ingestion file, or an + // excise. + replacedFiles := make(map[base.FileNum][]newFileEntry) + updateLevelMetricsOnExcise := func(m *fileMetadata, level int, added []newFileEntry) { + levelMetrics := metrics[level] + if levelMetrics == nil { + levelMetrics = &LevelMetrics{} + metrics[level] = levelMetrics + } + levelMetrics.NumFiles-- + levelMetrics.Size -= int64(m.Size) + for i := range added { + levelMetrics.NumFiles++ + levelMetrics.Size += int64(added[i].Meta.Size) + } + } + if exciseSpan.Valid() { + // Iterate through all levels and find files that intersect with exciseSpan. + // + // TODO(bilal): We could drop the DB mutex here as we don't need it for + // excises; we only need to hold the version lock which we already are + // holding. However releasing the DB mutex could mess with the + // ingestTargetLevel calculation that happened above, as it assumed that it + // had a complete view of in-progress compactions that wouldn't change + // until logAndApply is called. If we were to drop the mutex now, we could + // schedule another in-progress compaction that would go into the chosen target + // level and lead to file overlap within level (which would panic in + // logAndApply). We should drop the db mutex here, do the excise, then + // re-grab the DB mutex and rerun just the in-progress compaction check to + // see if any new compactions are conflicting with our chosen target levels + // for files, and if they are, we should signal those compactions to error + // out. + for level := range current.Levels { + overlaps := current.Overlaps(level, d.cmp, exciseSpan.Start, exciseSpan.End, true /* exclusiveEnd */) + iter := overlaps.Iter() + + for m := iter.First(); m != nil; m = iter.Next() { + newFiles, err := d.excise(exciseSpan, m, ve, level) + if err != nil { + return nil, err + } + + if _, ok := ve.DeletedFiles[deletedFileEntry{ + Level: level, + FileNum: m.FileNum, + }]; !ok { + // We did not excise this file. + continue + } + replacedFiles[m.FileNum] = newFiles + updateLevelMetricsOnExcise(m, level, newFiles) + } + } + } + if len(filesToSplit) > 0 { + // For the same reasons as the above call to excise, we hold the db mutex + // while calling this method. + if err := d.ingestSplit(ve, updateLevelMetricsOnExcise, filesToSplit, replacedFiles); err != nil { + return nil, err + } + } + if len(filesToSplit) > 0 || exciseSpan.Valid() { + for c := range d.mu.compact.inProgress { + if c.versionEditApplied { + continue + } + // Check if this compaction overlaps with the excise span. Note that just + // checking if the inputs individually overlap with the excise span + // isn't sufficient; for instance, a compaction could have [a,b] and [e,f] + // as inputs and write it all out as [a,b,e,f] in one sstable. If we're + // doing a [c,d) excise at the same time as this compaction, we will have + // to error out the whole compaction as we can't guarantee it hasn't/won't + // write a file overlapping with the excise span. + if exciseSpan.OverlapsInternalKeyRange(d.cmp, c.smallest, c.largest) { + c.cancel.Store(true) + } + // Check if this compaction's inputs have been replaced due to an + // ingest-time split. In that case, cancel the compaction as a newly picked + // compaction would need to include any new files that slid in between + // previously-existing files. Note that we cancel any compaction that has a + // file that was ingest-split as an input, even if it started before this + // ingestion. + if checkCompactions { + for i := range c.inputs { + iter := c.inputs[i].files.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if _, ok := replacedFiles[f.FileNum]; ok { + c.cancel.Store(true) + break + } + } + } + } + } + // Check for any EventuallyFileOnlySnapshots that could be watching for + // an excise on this span. + if exciseSpan.Valid() { + for s := d.mu.snapshots.root.next; s != &d.mu.snapshots.root; s = s.next { + if s.efos == nil { + continue + } + efos := s.efos + // TODO(bilal): We can make this faster by taking advantage of the sorted + // nature of protectedRanges to do a sort.Search, or even maintaining a + // global list of all protected ranges instead of having to peer into every + // snapshot. + for i := range efos.protectedRanges { + if efos.protectedRanges[i].OverlapsKeyRange(d.cmp, exciseSpan) { + efos.excised.Store(true) + break + } + } + } + } + } + if err := d.mu.versions.logAndApply(jobID, ve, metrics, false /* forceRotation */, func() []compactionInfo { + return d.getInProgressCompactionInfoLocked(nil) + }); err != nil { + return nil, err + } + + d.mu.versions.metrics.Ingest.Count++ + + d.updateReadStateLocked(d.opts.DebugCheck) + // updateReadStateLocked could have generated obsolete tables, schedule a + // cleanup job if necessary. + d.deleteObsoleteFiles(jobID) + d.updateTableStatsLocked(ve.NewFiles) + // The ingestion may have pushed a level over the threshold for compaction, + // so check to see if one is necessary and schedule it. + d.maybeScheduleCompaction() + var toValidate []manifest.NewFileEntry + dedup := make(map[base.DiskFileNum]struct{}) + for _, entry := range ve.NewFiles { + if _, ok := dedup[entry.Meta.FileBacking.DiskFileNum]; !ok { + toValidate = append(toValidate, entry) + dedup[entry.Meta.FileBacking.DiskFileNum] = struct{}{} + } + } + d.maybeValidateSSTablesLocked(toValidate) + return ve, nil +} + +// maybeValidateSSTablesLocked adds the slice of newFileEntrys to the pending +// queue of files to be validated, when the feature is enabled. +// +// Note that if two entries with the same backing file are added twice, then the +// block checksums for the backing file will be validated twice. +// +// DB.mu must be locked when calling. +func (d *DB) maybeValidateSSTablesLocked(newFiles []newFileEntry) { + // Only add to the validation queue when the feature is enabled. + if !d.opts.Experimental.ValidateOnIngest { + return + } + + d.mu.tableValidation.pending = append(d.mu.tableValidation.pending, newFiles...) + if d.shouldValidateSSTablesLocked() { + go d.validateSSTables() + } +} + +// shouldValidateSSTablesLocked returns true if SSTable validation should run. +// DB.mu must be locked when calling. +func (d *DB) shouldValidateSSTablesLocked() bool { + return !d.mu.tableValidation.validating && + d.closed.Load() == nil && + d.opts.Experimental.ValidateOnIngest && + len(d.mu.tableValidation.pending) > 0 +} + +// validateSSTables runs a round of validation on the tables in the pending +// queue. +func (d *DB) validateSSTables() { + d.mu.Lock() + if !d.shouldValidateSSTablesLocked() { + d.mu.Unlock() + return + } + + pending := d.mu.tableValidation.pending + d.mu.tableValidation.pending = nil + d.mu.tableValidation.validating = true + jobID := d.mu.nextJobID + d.mu.nextJobID++ + rs := d.loadReadState() + + // Drop DB.mu before performing IO. + d.mu.Unlock() + + // Validate all tables in the pending queue. This could lead to a situation + // where we are starving IO from other tasks due to having to page through + // all the blocks in all the sstables in the queue. + // TODO(travers): Add some form of pacing to avoid IO starvation. + + // If we fail to validate any files due to reasons other than uncovered + // corruption, accumulate them and re-queue them for another attempt. + var retry []manifest.NewFileEntry + + for _, f := range pending { + // The file may have been moved or deleted since it was ingested, in + // which case we skip. + if !rs.current.Contains(f.Level, d.cmp, f.Meta) { + // Assume the file was moved to a lower level. It is rare enough + // that a table is moved or deleted between the time it was ingested + // and the time the validation routine runs that the overall cost of + // this inner loop is tolerably low, when amortized over all + // ingested tables. + found := false + for i := f.Level + 1; i < numLevels; i++ { + if rs.current.Contains(i, d.cmp, f.Meta) { + found = true + break + } + } + if !found { + continue + } + } + + var err error + if f.Meta.Virtual { + err = d.tableCache.withVirtualReader( + f.Meta.VirtualMeta(), func(v sstable.VirtualReader) error { + return v.ValidateBlockChecksumsOnBacking() + }) + } else { + err = d.tableCache.withReader( + f.Meta.PhysicalMeta(), func(r *sstable.Reader) error { + return r.ValidateBlockChecksums() + }) + } + + if err != nil { + if IsCorruptionError(err) { + // TODO(travers): Hook into the corruption reporting pipeline, once + // available. See pebble#1192. + d.opts.Logger.Fatalf("pebble: encountered corruption during ingestion: %s", err) + } else { + // If there was some other, possibly transient, error that + // caused table validation to fail inform the EventListener and + // move on. We remember the table so that we can retry it in a + // subsequent table validation job. + // + // TODO(jackson): If the error is not transient, this will retry + // validation indefinitely. While not great, it's the same + // behavior as erroring flushes and compactions. We should + // address this as a part of #270. + d.opts.EventListener.BackgroundError(err) + retry = append(retry, f) + continue + } + } + + d.opts.EventListener.TableValidated(TableValidatedInfo{ + JobID: jobID, + Meta: f.Meta, + }) + } + rs.unref() + d.mu.Lock() + defer d.mu.Unlock() + d.mu.tableValidation.pending = append(d.mu.tableValidation.pending, retry...) + d.mu.tableValidation.validating = false + d.mu.tableValidation.cond.Broadcast() + if d.shouldValidateSSTablesLocked() { + go d.validateSSTables() + } +} diff --git a/pebble/ingest_test.go b/pebble/ingest_test.go new file mode 100644 index 0000000..c4dcc2a --- /dev/null +++ b/pebble/ingest_test.go @@ -0,0 +1,3516 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import ( + "bytes" + "context" + "fmt" + "io" + "math" + "os" + "path/filepath" + "slices" + "sort" + "strconv" + "strings" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/errors/oserror" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/keyspan" + "github.com/cockroachdb/pebble/internal/manifest" + "github.com/cockroachdb/pebble/internal/rangekey" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/cockroachdb/pebble/objstorage" + "github.com/cockroachdb/pebble/objstorage/objstorageprovider" + "github.com/cockroachdb/pebble/objstorage/remote" + "github.com/cockroachdb/pebble/record" + "github.com/cockroachdb/pebble/sstable" + "github.com/cockroachdb/pebble/vfs" + "github.com/cockroachdb/pebble/vfs/errorfs" + "github.com/kr/pretty" + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" +) + +func TestSSTableKeyCompare(t *testing.T) { + var buf bytes.Buffer + datadriven.RunTest(t, "testdata/sstable_key_compare", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "cmp": + buf.Reset() + for _, line := range strings.Split(td.Input, "\n") { + fields := strings.Fields(line) + a := base.ParseInternalKey(fields[0]) + b := base.ParseInternalKey(fields[1]) + got := sstableKeyCompare(testkeys.Comparer.Compare, a, b) + fmt.Fprintf(&buf, "%38s", fmt.Sprint(a.Pretty(base.DefaultFormatter))) + switch got { + case -1: + fmt.Fprint(&buf, " < ") + case +1: + fmt.Fprint(&buf, " > ") + case 0: + fmt.Fprint(&buf, " = ") + } + fmt.Fprintf(&buf, "%s\n", fmt.Sprint(b.Pretty(base.DefaultFormatter))) + } + return buf.String() + default: + return fmt.Sprintf("unrecognized command %q", td.Cmd) + } + }) +} + +func TestIngestLoad(t *testing.T) { + mem := vfs.NewMem() + + datadriven.RunTest(t, "testdata/ingest_load", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "load": + writerOpts := sstable.WriterOptions{} + var dbVersion FormatMajorVersion + for _, cmdArgs := range td.CmdArgs { + v, err := strconv.Atoi(cmdArgs.Vals[0]) + if err != nil { + return err.Error() + } + switch k := cmdArgs.Key; k { + case "writer-version": + fmv := FormatMajorVersion(v) + writerOpts.TableFormat = fmv.MaxTableFormat() + case "db-version": + dbVersion = FormatMajorVersion(v) + default: + return fmt.Sprintf("unknown cmd %s\n", k) + } + } + f, err := mem.Create("ext") + if err != nil { + return err.Error() + } + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), writerOpts) + for _, data := range strings.Split(td.Input, "\n") { + if strings.HasPrefix(data, "rangekey: ") { + data = strings.TrimPrefix(data, "rangekey: ") + s := keyspan.ParseSpan(data) + err := rangekey.Encode(&s, w.AddRangeKey) + if err != nil { + return err.Error() + } + continue + } + + j := strings.Index(data, ":") + if j < 0 { + return fmt.Sprintf("malformed input: %s\n", data) + } + key := base.ParseInternalKey(data[:j]) + value := []byte(data[j+1:]) + if err := w.Add(key, value); err != nil { + return err.Error() + } + } + if err := w.Close(); err != nil { + return err.Error() + } + + opts := (&Options{ + Comparer: DefaultComparer, + FS: mem, + }).WithFSDefaults() + lr, err := ingestLoad(opts, dbVersion, []string{"ext"}, nil, nil, 0, []base.DiskFileNum{base.FileNum(1).DiskFileNum()}, nil, 0) + if err != nil { + return err.Error() + } + var buf bytes.Buffer + for _, m := range lr.localMeta { + fmt.Fprintf(&buf, "%d: %s-%s\n", m.FileNum, m.Smallest, m.Largest) + fmt.Fprintf(&buf, " points: %s-%s\n", m.SmallestPointKey, m.LargestPointKey) + fmt.Fprintf(&buf, " ranges: %s-%s\n", m.SmallestRangeKey, m.LargestRangeKey) + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestIngestLoadRand(t *testing.T) { + mem := vfs.NewMem() + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + cmp := DefaultComparer.Compare + version := internalFormatNewest + + randBytes := func(size int) []byte { + data := make([]byte, size) + for i := range data { + data[i] = byte(rng.Int() & 0xff) + } + return data + } + + paths := make([]string, 1+rng.Intn(10)) + pending := make([]base.DiskFileNum, len(paths)) + expected := make([]*fileMetadata, len(paths)) + for i := range paths { + paths[i] = fmt.Sprint(i) + pending[i] = base.FileNum(rng.Uint64()).DiskFileNum() + expected[i] = &fileMetadata{ + FileNum: pending[i].FileNum(), + } + expected[i].StatsMarkValid() + + func() { + f, err := mem.Create(paths[i]) + require.NoError(t, err) + + keys := make([]InternalKey, 1+rng.Intn(100)) + for i := range keys { + keys[i] = base.MakeInternalKey( + randBytes(1+rng.Intn(10)), + 0, + InternalKeyKindSet) + } + slices.SortFunc(keys, func(a, b base.InternalKey) int { + return base.InternalCompare(cmp, a, b) + }) + + expected[i].ExtendPointKeyBounds(cmp, keys[0], keys[len(keys)-1]) + + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: version.MaxTableFormat(), + }) + var count uint64 + for i := range keys { + if i > 0 && base.InternalCompare(cmp, keys[i-1], keys[i]) == 0 { + // Duplicate key, ignore. + continue + } + w.Add(keys[i], nil) + count++ + } + expected[i].Stats.NumEntries = count + require.NoError(t, w.Close()) + + meta, err := w.Metadata() + require.NoError(t, err) + + expected[i].Size = meta.Size + expected[i].InitPhysicalBacking() + }() + } + + opts := (&Options{ + Comparer: DefaultComparer, + FS: mem, + }).WithFSDefaults() + lr, err := ingestLoad(opts, version, paths, nil, nil, 0, pending, nil, 0) + require.NoError(t, err) + + for _, m := range lr.localMeta { + m.CreationTime = 0 + } + t.Log(strings.Join(pretty.Diff(expected, lr.localMeta), "\n")) + require.Equal(t, expected, lr.localMeta) +} + +func TestIngestLoadInvalid(t *testing.T) { + mem := vfs.NewMem() + f, err := mem.Create("invalid") + require.NoError(t, err) + require.NoError(t, f.Close()) + + opts := (&Options{ + Comparer: DefaultComparer, + FS: mem, + }).WithFSDefaults() + if _, err := ingestLoad(opts, internalFormatNewest, []string{"invalid"}, nil, nil, 0, []base.DiskFileNum{base.FileNum(1).DiskFileNum()}, nil, 0); err == nil { + t.Fatalf("expected error, but found success") + } +} + +func TestIngestSortAndVerify(t *testing.T) { + comparers := map[string]Compare{ + "default": DefaultComparer.Compare, + "reverse": func(a, b []byte) int { + return DefaultComparer.Compare(b, a) + }, + } + + t.Run("", func(t *testing.T) { + datadriven.RunTest(t, "testdata/ingest_sort_and_verify", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "ingest": + var buf bytes.Buffer + var meta []*fileMetadata + var paths []string + var cmpName string + d.ScanArgs(t, "cmp", &cmpName) + cmp := comparers[cmpName] + if cmp == nil { + return fmt.Sprintf("%s unknown comparer: %s", d.Cmd, cmpName) + } + for i, data := range strings.Split(d.Input, "\n") { + parts := strings.Split(data, "-") + if len(parts) != 2 { + return fmt.Sprintf("malformed test case: %s", d.Input) + } + smallest := base.ParseInternalKey(parts[0]) + largest := base.ParseInternalKey(parts[1]) + if cmp(smallest.UserKey, largest.UserKey) > 0 { + return fmt.Sprintf("range %v-%v is not valid", smallest, largest) + } + m := (&fileMetadata{}).ExtendPointKeyBounds(cmp, smallest, largest) + m.InitPhysicalBacking() + meta = append(meta, m) + paths = append(paths, strconv.Itoa(i)) + } + lr := ingestLoadResult{localPaths: paths, localMeta: meta} + err := ingestSortAndVerify(cmp, lr, KeyRange{}) + if err != nil { + return fmt.Sprintf("%v\n", err) + } + for i := range meta { + fmt.Fprintf(&buf, "%s: %v-%v\n", paths[i], meta[i].Smallest, meta[i].Largest) + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) + }) +} + +func TestIngestLink(t *testing.T) { + // Test linking of tables into the DB directory. Test cleanup when one of the + // tables cannot be linked. + + const dir = "db" + const count = 10 + for i := 0; i <= count; i++ { + t.Run("", func(t *testing.T) { + opts := &Options{FS: vfs.NewMem()} + opts.EnsureDefaults().WithFSDefaults() + require.NoError(t, opts.FS.MkdirAll(dir, 0755)) + objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(opts.FS, dir)) + require.NoError(t, err) + defer objProvider.Close() + + paths := make([]string, 10) + meta := make([]*fileMetadata, len(paths)) + contents := make([][]byte, len(paths)) + for j := range paths { + paths[j] = fmt.Sprintf("external%d", j) + meta[j] = &fileMetadata{} + meta[j].FileNum = FileNum(j) + meta[j].InitPhysicalBacking() + f, err := opts.FS.Create(paths[j]) + require.NoError(t, err) + + contents[j] = []byte(fmt.Sprintf("data%d", j)) + // memFile.Write will modify the supplied buffer when invariants are + // enabled, so provide a throw-away copy. + _, err = f.Write(append([]byte(nil), contents[j]...)) + require.NoError(t, err) + require.NoError(t, f.Close()) + } + + if i < count { + opts.FS.Remove(paths[i]) + } + + lr := ingestLoadResult{localMeta: meta, localPaths: paths} + err = ingestLink(0 /* jobID */, opts, objProvider, lr, nil /* shared */) + if i < count { + if err == nil { + t.Fatalf("expected error, but found success") + } + } else { + require.NoError(t, err) + } + + files, err := opts.FS.List(dir) + require.NoError(t, err) + + sort.Strings(files) + + if i < count { + if len(files) > 0 { + t.Fatalf("expected all of the files to be cleaned up, but found:\n%s", + strings.Join(files, "\n")) + } + } else { + if len(files) != count { + t.Fatalf("expected %d files, but found:\n%s", count, strings.Join(files, "\n")) + } + for j := range files { + ftype, fileNum, ok := base.ParseFilename(opts.FS, files[j]) + if !ok { + t.Fatalf("unable to parse filename: %s", files[j]) + } + if fileTypeTable != ftype { + t.Fatalf("expected table, but found %d", ftype) + } + if j != int(fileNum.FileNum()) { + t.Fatalf("expected table %d, but found %d", j, fileNum) + } + f, err := opts.FS.Open(opts.FS.PathJoin(dir, files[j])) + require.NoError(t, err) + + data, err := io.ReadAll(f) + require.NoError(t, err) + require.NoError(t, f.Close()) + if !bytes.Equal(contents[j], data) { + t.Fatalf("expected %s, but found %s", contents[j], data) + } + } + } + }) + } +} + +func TestIngestLinkFallback(t *testing.T) { + // Verify that ingestLink succeeds if linking fails by falling back to + // copying. + mem := vfs.NewMem() + src, err := mem.Create("source") + require.NoError(t, err) + + opts := &Options{FS: errorfs.Wrap(mem, errorfs.ErrInjected.If(errorfs.OnIndex(1)))} + opts.EnsureDefaults().WithFSDefaults() + objSettings := objstorageprovider.DefaultSettings(opts.FS, "") + // Prevent the provider from listing the dir (where we may get an injected error). + objSettings.FSDirInitialListing = []string{} + objProvider, err := objstorageprovider.Open(objSettings) + require.NoError(t, err) + defer objProvider.Close() + + meta := []*fileMetadata{{FileNum: 1}} + meta[0].InitPhysicalBacking() + lr := ingestLoadResult{localMeta: meta, localPaths: []string{"source"}} + err = ingestLink(0, opts, objProvider, lr, nil /* shared */) + require.NoError(t, err) + + dest, err := mem.Open("000001.sst") + require.NoError(t, err) + + // We should be able to write bytes to src, and not have them show up in + // dest. + _, _ = src.Write([]byte("test")) + data, err := io.ReadAll(dest) + require.NoError(t, err) + if len(data) != 0 { + t.Fatalf("expected copy, but files appear to be hard linked: [%s] unexpectedly found", data) + } +} + +func TestOverlappingIngestedSSTs(t *testing.T) { + dir := "" + var ( + mem vfs.FS + d *DB + opts *Options + closed = false + blockFlush = false + ) + defer func() { + if !closed { + require.NoError(t, d.Close()) + } + }() + + reset := func(strictMem bool) { + if d != nil && !closed { + require.NoError(t, d.Close()) + } + blockFlush = false + + if strictMem { + mem = vfs.NewStrictMem() + } else { + mem = vfs.NewMem() + } + + require.NoError(t, mem.MkdirAll("ext", 0755)) + opts = (&Options{ + FS: mem, + MemTableStopWritesThreshold: 4, + L0CompactionThreshold: 100, + L0StopWritesThreshold: 100, + DebugCheck: DebugCheckLevels, + FormatMajorVersion: internalFormatNewest, + }).WithFSDefaults() + // Disable automatic compactions because otherwise we'll race with + // delete-only compactions triggered by ingesting range tombstones. + opts.DisableAutomaticCompactions = true + + var err error + d, err = Open(dir, opts) + require.NoError(t, err) + d.TestOnlyWaitForCleaning() + } + waitForFlush := func() { + if d == nil { + return + } + d.mu.Lock() + for d.mu.compact.flushing { + d.mu.compact.cond.Wait() + } + d.mu.Unlock() + } + reset(false) + + datadriven.RunTest(t, "testdata/flushable_ingest", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "reset": + reset(td.HasArg("strictMem")) + return "" + + case "ignoreSyncs": + var ignoreSyncs bool + if len(td.CmdArgs) == 1 && td.CmdArgs[0].String() == "true" { + ignoreSyncs = true + } + mem.(*vfs.MemFS).SetIgnoreSyncs(ignoreSyncs) + return "" + + case "resetToSynced": + mem.(*vfs.MemFS).ResetToSyncedState() + files, err := mem.List(dir) + sort.Strings(files) + require.NoError(t, err) + return strings.Join(files, "\n") + + case "batch": + b := d.NewIndexedBatch() + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + if err := b.Commit(nil); err != nil { + return err.Error() + } + return "" + + case "build": + if err := runBuildCmd(td, d, mem); err != nil { + return err.Error() + } + return "" + + case "ingest": + if err := runIngestCmd(td, d, mem); err != nil { + return err.Error() + } + if !blockFlush { + waitForFlush() + } + return "" + + case "iter": + iter, _ := d.NewIter(nil) + return runIterCmd(td, iter, true) + + case "lsm": + return runLSMCmd(td, d) + + case "close": + if closed { + return "already closed" + } + require.NoError(t, d.Close()) + closed = true + return "" + + case "ls": + files, err := mem.List(dir) + sort.Strings(files) + require.NoError(t, err) + return strings.Join(files, "\n") + + case "open": + opts.ReadOnly = td.HasArg("readOnly") + var err error + d, err = Open(dir, opts) + closed = false + require.NoError(t, err) + waitForFlush() + d.TestOnlyWaitForCleaning() + return "" + + case "blockFlush": + blockFlush = true + d.mu.Lock() + d.mu.compact.flushing = true + d.mu.Unlock() + return "" + + case "allowFlush": + blockFlush = false + d.mu.Lock() + d.mu.compact.flushing = false + d.mu.Unlock() + return "" + + case "flush": + d.maybeScheduleFlush() + waitForFlush() + d.TestOnlyWaitForCleaning() + return "" + + case "get": + return runGetCmd(t, td, d) + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestExcise(t *testing.T) { + var mem vfs.FS + var d *DB + var flushed bool + defer func() { + require.NoError(t, d.Close()) + }() + + var opts *Options + reset := func() { + if d != nil { + require.NoError(t, d.Close()) + } + + mem = vfs.NewMem() + require.NoError(t, mem.MkdirAll("ext", 0755)) + opts = &Options{ + FS: mem, + L0CompactionThreshold: 100, + L0StopWritesThreshold: 100, + DebugCheck: DebugCheckLevels, + EventListener: &EventListener{FlushEnd: func(info FlushInfo) { + flushed = true + }}, + FormatMajorVersion: FormatVirtualSSTables, + Comparer: testkeys.Comparer, + } + // Disable automatic compactions because otherwise we'll race with + // delete-only compactions triggered by ingesting range tombstones. + opts.DisableAutomaticCompactions = true + // Set this to true to add some testing for the virtual sstable validation + // code paths. + opts.Experimental.ValidateOnIngest = true + + var err error + d, err = Open("", opts) + require.NoError(t, err) + } + reset() + + datadriven.RunTest(t, "testdata/excise", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "reset": + reset() + return "" + case "reopen": + require.NoError(t, d.Close()) + var err error + d, err = Open("", opts) + require.NoError(t, err) + + return "" + case "batch": + b := d.NewIndexedBatch() + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + if err := b.Commit(nil); err != nil { + return err.Error() + } + return "" + case "build": + if err := runBuildCmd(td, d, mem); err != nil { + return err.Error() + } + return "" + + case "flush": + if err := d.Flush(); err != nil { + return err.Error() + } + return "" + + case "ingest": + flushed = false + if err := runIngestCmd(td, d, mem); err != nil { + return err.Error() + } + // Wait for a possible flush. + d.mu.Lock() + for d.mu.compact.flushing { + d.mu.compact.cond.Wait() + } + d.mu.Unlock() + if flushed { + return "memtable flushed" + } + return "" + + case "ingest-and-excise": + flushed = false + if err := runIngestAndExciseCmd(td, d, mem); err != nil { + return err.Error() + } + // Wait for a possible flush. + d.mu.Lock() + for d.mu.compact.flushing { + d.mu.compact.cond.Wait() + } + d.mu.Unlock() + if flushed { + return "memtable flushed" + } + return "" + + case "get": + return runGetCmd(t, td, d) + + case "iter": + iter, _ := d.NewIter(&IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + }) + return runIterCmd(td, iter, true) + + case "lsm": + return runLSMCmd(td, d) + + case "metrics": + // The asynchronous loading of table stats can change metrics, so + // wait for all the tables' stats to be loaded. + d.mu.Lock() + d.waitTableStats() + d.mu.Unlock() + + return d.Metrics().StringForTests() + + case "wait-pending-table-stats": + return runTableStatsCmd(td, d) + + case "excise": + ve := &versionEdit{ + DeletedFiles: map[deletedFileEntry]*fileMetadata{}, + } + var exciseSpan KeyRange + if len(td.CmdArgs) != 2 { + panic("insufficient args for compact command") + } + exciseSpan.Start = []byte(td.CmdArgs[0].Key) + exciseSpan.End = []byte(td.CmdArgs[1].Key) + + d.mu.Lock() + d.mu.versions.logLock() + d.mu.Unlock() + current := d.mu.versions.currentVersion() + for level := range current.Levels { + iter := current.Levels[level].Iter() + for m := iter.SeekGE(d.cmp, exciseSpan.Start); m != nil && d.cmp(m.Smallest.UserKey, exciseSpan.End) < 0; m = iter.Next() { + _, err := d.excise(exciseSpan, m, ve, level) + if err != nil { + d.mu.Lock() + d.mu.versions.logUnlock() + d.mu.Unlock() + return fmt.Sprintf("error when excising %s: %s", m.FileNum, err.Error()) + } + } + } + d.mu.Lock() + d.mu.versions.logUnlock() + d.mu.Unlock() + return fmt.Sprintf("would excise %d files, use ingest-and-excise to excise.\n%s", len(ve.DeletedFiles), ve.DebugString(base.DefaultFormatter)) + + case "confirm-backing": + // Confirms that the files have the same FileBacking. + fileNums := make(map[base.FileNum]struct{}) + for i := range td.CmdArgs { + fNum, err := strconv.Atoi(td.CmdArgs[i].Key) + if err != nil { + panic("invalid file number") + } + fileNums[base.FileNum(fNum)] = struct{}{} + } + d.mu.Lock() + currVersion := d.mu.versions.currentVersion() + var ptr *manifest.FileBacking + for _, level := range currVersion.Levels { + lIter := level.Iter() + for f := lIter.First(); f != nil; f = lIter.Next() { + if _, ok := fileNums[f.FileNum]; ok { + if ptr == nil { + ptr = f.FileBacking + continue + } + if f.FileBacking != ptr { + d.mu.Unlock() + return "file backings are not the same" + } + } + } + } + d.mu.Unlock() + return "file backings are the same" + case "compact": + if len(td.CmdArgs) != 2 { + panic("insufficient args for compact command") + } + l := td.CmdArgs[0].Key + r := td.CmdArgs[1].Key + err := d.Compact([]byte(l), []byte(r), false) + if err != nil { + return err.Error() + } + return "" + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func testIngestSharedImpl( + t *testing.T, createOnShared remote.CreateOnSharedStrategy, fileName string, +) { + var d, d1, d2 *DB + var efos map[string]*EventuallyFileOnlySnapshot + defer func() { + for _, e := range efos { + require.NoError(t, e.Close()) + } + if d1 != nil { + require.NoError(t, d1.Close()) + } + if d2 != nil { + require.NoError(t, d2.Close()) + } + }() + creatorIDCounter := uint64(1) + replicateCounter := 1 + var opts1, opts2 *Options + + reset := func() { + for _, e := range efos { + require.NoError(t, e.Close()) + } + if d1 != nil { + require.NoError(t, d1.Close()) + } + if d2 != nil { + require.NoError(t, d2.Close()) + } + efos = make(map[string]*EventuallyFileOnlySnapshot) + + sstorage := remote.NewInMem() + mem1 := vfs.NewMem() + mem2 := vfs.NewMem() + require.NoError(t, mem1.MkdirAll("ext", 0755)) + require.NoError(t, mem2.MkdirAll("ext", 0755)) + opts1 = &Options{ + Comparer: testkeys.Comparer, + FS: mem1, + LBaseMaxBytes: 1, + L0CompactionThreshold: 100, + L0StopWritesThreshold: 100, + DebugCheck: DebugCheckLevels, + FormatMajorVersion: FormatVirtualSSTables, + } + // lel. + lel := MakeLoggingEventListener(DefaultLogger) + opts1.EventListener = &lel + opts1.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ + "": sstorage, + }) + opts1.Experimental.CreateOnShared = createOnShared + opts1.Experimental.CreateOnSharedLocator = "" + // Disable automatic compactions because otherwise we'll race with + // delete-only compactions triggered by ingesting range tombstones. + opts1.DisableAutomaticCompactions = true + + opts2 = &Options{} + *opts2 = *opts1 + opts2.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ + "": sstorage, + }) + opts2.Experimental.CreateOnShared = createOnShared + opts2.Experimental.CreateOnSharedLocator = "" + opts2.FS = mem2 + + var err error + d1, err = Open("", opts1) + require.NoError(t, err) + require.NoError(t, d1.SetCreatorID(creatorIDCounter)) + creatorIDCounter++ + d2, err = Open("", opts2) + require.NoError(t, err) + require.NoError(t, d2.SetCreatorID(creatorIDCounter)) + creatorIDCounter++ + d = d1 + } + reset() + + datadriven.RunTest(t, fmt.Sprintf("testdata/%s", fileName), func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "restart": + for _, e := range efos { + require.NoError(t, e.Close()) + } + if d1 != nil { + require.NoError(t, d1.Close()) + } + if d2 != nil { + require.NoError(t, d2.Close()) + } + + var err error + d1, err = Open("", opts1) + if err != nil { + return err.Error() + } + d2, err = Open("", opts2) + if err != nil { + return err.Error() + } + d = d1 + return "ok, note that the active db has been set to 1 (use 'switch' to change)" + case "reset": + reset() + return "" + case "switch": + if len(td.CmdArgs) != 1 { + return "usage: switch <1 or 2>" + } + switch td.CmdArgs[0].Key { + case "1": + d = d1 + case "2": + d = d2 + default: + return "usage: switch <1 or 2>" + } + return "ok" + case "batch": + b := d.NewIndexedBatch() + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + if err := b.Commit(nil); err != nil { + return err.Error() + } + return "" + case "build": + if err := runBuildCmd(td, d, d.opts.FS); err != nil { + return err.Error() + } + return "" + + case "flush": + if err := d.Flush(); err != nil { + return err.Error() + } + return "" + + case "ingest": + if err := runIngestCmd(td, d, d.opts.FS); err != nil { + return err.Error() + } + // Wait for a possible flush. + d.mu.Lock() + for d.mu.compact.flushing { + d.mu.compact.cond.Wait() + } + d.mu.Unlock() + return "" + + case "ingest-and-excise": + if err := runIngestAndExciseCmd(td, d, d.opts.FS); err != nil { + return err.Error() + } + // Wait for a possible flush. + d.mu.Lock() + for d.mu.compact.flushing { + d.mu.compact.cond.Wait() + } + d.mu.Unlock() + return "" + + case "replicate": + if len(td.CmdArgs) != 4 { + return "usage: replicate " + } + var from, to *DB + switch td.CmdArgs[0].Key { + case "1": + from = d1 + case "2": + from = d2 + default: + return "usage: replicate " + } + switch td.CmdArgs[1].Key { + case "1": + to = d1 + case "2": + to = d2 + default: + return "usage: replicate " + } + startKey := []byte(td.CmdArgs[2].Key) + endKey := []byte(td.CmdArgs[3].Key) + + writeOpts := d.opts.MakeWriterOptions(0 /* level */, to.opts.FormatMajorVersion.MaxTableFormat()) + sstPath := fmt.Sprintf("ext/replicate%d.sst", replicateCounter) + f, err := to.opts.FS.Create(sstPath) + require.NoError(t, err) + replicateCounter++ + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), writeOpts) + + var sharedSSTs []SharedSSTMeta + err = from.ScanInternal(context.TODO(), sstable.CategoryAndQoS{}, startKey, endKey, + func(key *InternalKey, value LazyValue, _ IteratorLevel) error { + val, _, err := value.Value(nil) + require.NoError(t, err) + require.NoError(t, w.Add(base.MakeInternalKey(key.UserKey, 0, key.Kind()), val)) + return nil + }, + func(start, end []byte, seqNum uint64) error { + require.NoError(t, w.DeleteRange(start, end)) + return nil + }, + func(start, end []byte, keys []keyspan.Key) error { + s := keyspan.Span{ + Start: start, + End: end, + Keys: keys, + KeysOrder: 0, + } + require.NoError(t, rangekey.Encode(&s, func(k base.InternalKey, v []byte) error { + return w.AddRangeKey(base.MakeInternalKey(k.UserKey, 0, k.Kind()), v) + })) + return nil + }, + func(sst *SharedSSTMeta) error { + sharedSSTs = append(sharedSSTs, *sst) + return nil + }, + ) + require.NoError(t, err) + require.NoError(t, w.Close()) + + _, err = to.IngestAndExcise([]string{sstPath}, sharedSSTs, KeyRange{Start: startKey, End: endKey}) + require.NoError(t, err) + return fmt.Sprintf("replicated %d shared SSTs", len(sharedSSTs)) + + case "get": + return runGetCmd(t, td, d) + + case "iter": + o := &IterOptions{KeyTypes: IterKeyTypePointsAndRanges} + var reader Reader + reader = d + for _, arg := range td.CmdArgs { + switch arg.Key { + case "mask-suffix": + o.RangeKeyMasking.Suffix = []byte(arg.Vals[0]) + case "mask-filter": + o.RangeKeyMasking.Filter = func() BlockPropertyFilterMask { + return sstable.NewTestKeysMaskingFilter() + } + case "snapshot": + reader = efos[arg.Vals[0]] + } + } + iter, err := reader.NewIter(o) + if err != nil { + return err.Error() + } + return runIterCmd(td, iter, true) + + case "lsm": + return runLSMCmd(td, d) + + case "metrics": + // The asynchronous loading of table stats can change metrics, so + // wait for all the tables' stats to be loaded. + d.mu.Lock() + d.waitTableStats() + d.mu.Unlock() + + return d.Metrics().StringForTests() + + case "wait-pending-table-stats": + return runTableStatsCmd(td, d) + + case "excise": + ve := &versionEdit{ + DeletedFiles: map[deletedFileEntry]*fileMetadata{}, + } + var exciseSpan KeyRange + if len(td.CmdArgs) != 2 { + panic("insufficient args for excise command") + } + exciseSpan.Start = []byte(td.CmdArgs[0].Key) + exciseSpan.End = []byte(td.CmdArgs[1].Key) + + d.mu.Lock() + d.mu.versions.logLock() + d.mu.Unlock() + current := d.mu.versions.currentVersion() + for level := range current.Levels { + iter := current.Levels[level].Iter() + for m := iter.SeekGE(d.cmp, exciseSpan.Start); m != nil && d.cmp(m.Smallest.UserKey, exciseSpan.End) < 0; m = iter.Next() { + _, err := d.excise(exciseSpan, m, ve, level) + if err != nil { + d.mu.Lock() + d.mu.versions.logUnlock() + d.mu.Unlock() + return fmt.Sprintf("error when excising %s: %s", m.FileNum, err.Error()) + } + } + } + d.mu.Lock() + d.mu.versions.logUnlock() + d.mu.Unlock() + return fmt.Sprintf("would excise %d files, use ingest-and-excise to excise.\n%s", len(ve.DeletedFiles), ve.String()) + + case "file-only-snapshot": + if len(td.CmdArgs) != 1 { + panic("insufficient args for file-only-snapshot command") + } + name := td.CmdArgs[0].Key + var keyRanges []KeyRange + for _, line := range strings.Split(td.Input, "\n") { + fields := strings.Fields(line) + if len(fields) != 2 { + return "expected two fields for file-only snapshot KeyRanges" + } + kr := KeyRange{Start: []byte(fields[0]), End: []byte(fields[1])} + keyRanges = append(keyRanges, kr) + } + + s := d.NewEventuallyFileOnlySnapshot(keyRanges) + efos[name] = s + return "ok" + + case "wait-for-file-only-snapshot": + if len(td.CmdArgs) != 1 { + panic("insufficient args for file-only-snapshot command") + } + name := td.CmdArgs[0].Key + err := efos[name].WaitForFileOnlySnapshot(context.TODO(), 1*time.Millisecond) + if err != nil { + return err.Error() + } + return "ok" + + case "compact": + err := runCompactCmd(td, d) + if err != nil { + return err.Error() + } + return "ok" + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestIngestShared(t *testing.T) { + for _, strategy := range []remote.CreateOnSharedStrategy{remote.CreateOnSharedAll, remote.CreateOnSharedLower} { + strategyStr := "all" + if strategy == remote.CreateOnSharedLower { + strategyStr = "lower" + } + t.Run(fmt.Sprintf("createOnShared=%s", strategyStr), func(t *testing.T) { + fileName := "ingest_shared" + if strategy == remote.CreateOnSharedLower { + fileName = "ingest_shared_lower" + } + testIngestSharedImpl(t, strategy, fileName) + }) + } +} + +func TestSimpleIngestShared(t *testing.T) { + mem := vfs.NewMem() + var d *DB + var provider2 objstorage.Provider + opts2 := Options{FS: vfs.NewMem(), FormatMajorVersion: FormatVirtualSSTables} + opts2.EnsureDefaults() + + // Create an objProvider where we will fake-create some sstables that can + // then be shared back to the db instance. + providerSettings := objstorageprovider.Settings{ + Logger: opts2.Logger, + FS: opts2.FS, + FSDirName: "", + FSDirInitialListing: nil, + FSCleaner: opts2.Cleaner, + NoSyncOnClose: opts2.NoSyncOnClose, + BytesPerSync: opts2.BytesPerSync, + } + providerSettings.Remote.StorageFactory = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ + "": remote.NewInMem(), + }) + providerSettings.Remote.CreateOnShared = remote.CreateOnSharedAll + providerSettings.Remote.CreateOnSharedLocator = "" + + provider2, err := objstorageprovider.Open(providerSettings) + require.NoError(t, err) + creatorIDCounter := uint64(1) + provider2.SetCreatorID(objstorage.CreatorID(creatorIDCounter)) + creatorIDCounter++ + + defer func() { + require.NoError(t, d.Close()) + }() + + reset := func() { + if d != nil { + require.NoError(t, d.Close()) + } + + mem = vfs.NewMem() + require.NoError(t, mem.MkdirAll("ext", 0755)) + opts := &Options{ + FormatMajorVersion: FormatVirtualSSTables, + FS: mem, + L0CompactionThreshold: 100, + L0StopWritesThreshold: 100, + } + opts.Experimental.RemoteStorage = providerSettings.Remote.StorageFactory + opts.Experimental.CreateOnShared = providerSettings.Remote.CreateOnShared + opts.Experimental.CreateOnSharedLocator = providerSettings.Remote.CreateOnSharedLocator + + var err error + d, err = Open("", opts) + require.NoError(t, err) + require.NoError(t, d.SetCreatorID(creatorIDCounter)) + creatorIDCounter++ + } + reset() + + metaMap := map[base.DiskFileNum]objstorage.ObjectMetadata{} + + require.NoError(t, d.Set([]byte("d"), []byte("unexpected"), nil)) + require.NoError(t, d.Set([]byte("e"), []byte("unexpected"), nil)) + require.NoError(t, d.Set([]byte("a"), []byte("unexpected"), nil)) + require.NoError(t, d.Set([]byte("f"), []byte("unexpected"), nil)) + d.Flush() + + { + // Create a shared file. + fn := base.FileNum(2) + f, meta, err := provider2.Create(context.TODO(), fileTypeTable, fn.DiskFileNum(), objstorage.CreateOptions{PreferSharedStorage: true}) + require.NoError(t, err) + w := sstable.NewWriter(f, d.opts.MakeWriterOptions(0, d.opts.FormatMajorVersion.MaxTableFormat())) + w.Set([]byte("d"), []byte("shared")) + w.Set([]byte("e"), []byte("shared")) + w.Close() + metaMap[fn.DiskFileNum()] = meta + } + + m := metaMap[base.FileNum(2).DiskFileNum()] + handle, err := provider2.RemoteObjectBacking(&m) + require.NoError(t, err) + size, err := provider2.Size(m) + require.NoError(t, err) + + sharedSSTMeta := SharedSSTMeta{ + Backing: handle, + Smallest: base.MakeInternalKey([]byte("d"), 0, InternalKeyKindSet), + Largest: base.MakeInternalKey([]byte("e"), 0, InternalKeyKindSet), + SmallestPointKey: base.MakeInternalKey([]byte("d"), 0, InternalKeyKindSet), + LargestPointKey: base.MakeInternalKey([]byte("e"), 0, InternalKeyKindSet), + Level: 6, + Size: uint64(size + 5), + } + _, err = d.IngestAndExcise([]string{}, []SharedSSTMeta{sharedSSTMeta}, KeyRange{Start: []byte("d"), End: []byte("ee")}) + require.NoError(t, err) + + // TODO(bilal): Once reading of shared sstables is in, verify that the values + // of d and e have been updated. +} + +type blockedCompaction struct { + startBlock, unblock chan struct{} +} + +func TestConcurrentExcise(t *testing.T) { + var d, d1, d2 *DB + var efos map[string]*EventuallyFileOnlySnapshot + backgroundErrs := make(chan error, 5) + var compactions map[string]*blockedCompaction + defer func() { + for _, e := range efos { + require.NoError(t, e.Close()) + } + if d1 != nil { + require.NoError(t, d1.Close()) + } + if d2 != nil { + require.NoError(t, d2.Close()) + } + }() + creatorIDCounter := uint64(1) + replicateCounter := 1 + + var wg sync.WaitGroup + defer wg.Wait() + var blockNextCompaction bool + var blockedJobID int + var blockedCompactionName string + var blockedCompactionsMu sync.Mutex // protects the above three variables. + + reset := func() { + wg.Wait() + for _, e := range efos { + require.NoError(t, e.Close()) + } + if d1 != nil { + require.NoError(t, d1.Close()) + } + if d2 != nil { + require.NoError(t, d2.Close()) + } + efos = make(map[string]*EventuallyFileOnlySnapshot) + compactions = make(map[string]*blockedCompaction) + backgroundErrs = make(chan error, 5) + + var el EventListener + el.EnsureDefaults(testLogger{t: t}) + el.FlushBegin = func(info FlushInfo) { + // Don't block flushes + } + el.BackgroundError = func(err error) { + backgroundErrs <- err + } + el.CompactionBegin = func(info CompactionInfo) { + if info.Reason == "move" { + return + } + blockedCompactionsMu.Lock() + defer blockedCompactionsMu.Unlock() + if blockNextCompaction { + blockNextCompaction = false + blockedJobID = info.JobID + } + } + el.TableCreated = func(info TableCreateInfo) { + blockedCompactionsMu.Lock() + if info.JobID != blockedJobID { + blockedCompactionsMu.Unlock() + return + } + blockedJobID = 0 + c := compactions[blockedCompactionName] + blockedCompactionName = "" + blockedCompactionsMu.Unlock() + c.startBlock <- struct{}{} + <-c.unblock + } + + sstorage := remote.NewInMem() + mem1 := vfs.NewMem() + mem2 := vfs.NewMem() + require.NoError(t, mem1.MkdirAll("ext", 0755)) + require.NoError(t, mem2.MkdirAll("ext", 0755)) + opts1 := &Options{ + Comparer: testkeys.Comparer, + LBaseMaxBytes: 1, + FS: mem1, + L0CompactionThreshold: 100, + L0StopWritesThreshold: 100, + DebugCheck: DebugCheckLevels, + FormatMajorVersion: FormatVirtualSSTables, + } + // lel. + lel := MakeLoggingEventListener(DefaultLogger) + tel := TeeEventListener(lel, el) + opts1.EventListener = &tel + opts1.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ + "": sstorage, + }) + opts1.Experimental.CreateOnShared = remote.CreateOnSharedAll + opts1.Experimental.CreateOnSharedLocator = "" + // Disable automatic compactions because otherwise we'll race with + // delete-only compactions triggered by ingesting range tombstones. + opts1.DisableAutomaticCompactions = true + + opts2 := &Options{} + *opts2 = *opts1 + opts2.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ + "": sstorage, + }) + opts2.Experimental.CreateOnShared = remote.CreateOnSharedAll + opts2.Experimental.CreateOnSharedLocator = "" + opts2.FS = mem2 + + var err error + d1, err = Open("", opts1) + require.NoError(t, err) + require.NoError(t, d1.SetCreatorID(creatorIDCounter)) + creatorIDCounter++ + d2, err = Open("", opts2) + require.NoError(t, err) + require.NoError(t, d2.SetCreatorID(creatorIDCounter)) + creatorIDCounter++ + d = d1 + } + reset() + + datadriven.RunTest(t, "testdata/concurrent_excise", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "reset": + reset() + return "" + case "switch": + if len(td.CmdArgs) != 1 { + return "usage: switch <1 or 2>" + } + switch td.CmdArgs[0].Key { + case "1": + d = d1 + case "2": + d = d2 + default: + return "usage: switch <1 or 2>" + } + return "ok" + case "batch": + b := d.NewIndexedBatch() + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + if err := b.Commit(nil); err != nil { + return err.Error() + } + return "" + case "build": + if err := runBuildCmd(td, d, d.opts.FS); err != nil { + return err.Error() + } + return "" + + case "flush": + if err := d.Flush(); err != nil { + return err.Error() + } + return "" + + case "ingest": + if err := runIngestCmd(td, d, d.opts.FS); err != nil { + return err.Error() + } + // Wait for a possible flush. + d.mu.Lock() + for d.mu.compact.flushing { + d.mu.compact.cond.Wait() + } + d.mu.Unlock() + return "" + + case "ingest-and-excise": + if err := runIngestAndExciseCmd(td, d, d.opts.FS); err != nil { + return err.Error() + } + // Wait for a possible flush. + d.mu.Lock() + for d.mu.compact.flushing { + d.mu.compact.cond.Wait() + } + d.mu.Unlock() + return "" + + case "replicate": + if len(td.CmdArgs) != 4 { + return "usage: replicate " + } + var from, to *DB + switch td.CmdArgs[0].Key { + case "1": + from = d1 + case "2": + from = d2 + default: + return "usage: replicate " + } + switch td.CmdArgs[1].Key { + case "1": + to = d1 + case "2": + to = d2 + default: + return "usage: replicate " + } + startKey := []byte(td.CmdArgs[2].Key) + endKey := []byte(td.CmdArgs[3].Key) + + writeOpts := d.opts.MakeWriterOptions(0 /* level */, to.opts.FormatMajorVersion.MaxTableFormat()) + sstPath := fmt.Sprintf("ext/replicate%d.sst", replicateCounter) + f, err := to.opts.FS.Create(sstPath) + require.NoError(t, err) + replicateCounter++ + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), writeOpts) + + var sharedSSTs []SharedSSTMeta + err = from.ScanInternal(context.TODO(), sstable.CategoryAndQoS{}, startKey, endKey, + func(key *InternalKey, value LazyValue, _ IteratorLevel) error { + val, _, err := value.Value(nil) + require.NoError(t, err) + require.NoError(t, w.Add(base.MakeInternalKey(key.UserKey, 0, key.Kind()), val)) + return nil + }, + func(start, end []byte, seqNum uint64) error { + require.NoError(t, w.DeleteRange(start, end)) + return nil + }, + func(start, end []byte, keys []keyspan.Key) error { + s := keyspan.Span{ + Start: start, + End: end, + Keys: keys, + KeysOrder: 0, + } + require.NoError(t, rangekey.Encode(&s, func(k base.InternalKey, v []byte) error { + return w.AddRangeKey(base.MakeInternalKey(k.UserKey, 0, k.Kind()), v) + })) + return nil + }, + func(sst *SharedSSTMeta) error { + sharedSSTs = append(sharedSSTs, *sst) + return nil + }, + ) + require.NoError(t, err) + require.NoError(t, w.Close()) + + _, err = to.IngestAndExcise([]string{sstPath}, sharedSSTs, KeyRange{Start: startKey, End: endKey}) + require.NoError(t, err) + return fmt.Sprintf("replicated %d shared SSTs", len(sharedSSTs)) + + case "get": + return runGetCmd(t, td, d) + + case "iter": + o := &IterOptions{KeyTypes: IterKeyTypePointsAndRanges} + var reader Reader + reader = d + for _, arg := range td.CmdArgs { + switch arg.Key { + case "mask-suffix": + o.RangeKeyMasking.Suffix = []byte(arg.Vals[0]) + case "mask-filter": + o.RangeKeyMasking.Filter = func() BlockPropertyFilterMask { + return sstable.NewTestKeysMaskingFilter() + } + case "snapshot": + reader = efos[arg.Vals[0]] + } + } + iter, err := reader.NewIter(o) + if err != nil { + return err.Error() + } + return runIterCmd(td, iter, true) + + case "lsm": + return runLSMCmd(td, d) + + case "metrics": + // The asynchronous loading of table stats can change metrics, so + // wait for all the tables' stats to be loaded. + d.mu.Lock() + d.waitTableStats() + d.mu.Unlock() + + return d.Metrics().StringForTests() + + case "wait-pending-table-stats": + return runTableStatsCmd(td, d) + + case "excise": + ve := &versionEdit{ + DeletedFiles: map[deletedFileEntry]*fileMetadata{}, + } + var exciseSpan KeyRange + if len(td.CmdArgs) != 2 { + panic("insufficient args for excise command") + } + exciseSpan.Start = []byte(td.CmdArgs[0].Key) + exciseSpan.End = []byte(td.CmdArgs[1].Key) + + d.mu.Lock() + d.mu.versions.logLock() + d.mu.Unlock() + current := d.mu.versions.currentVersion() + for level := range current.Levels { + iter := current.Levels[level].Iter() + for m := iter.SeekGE(d.cmp, exciseSpan.Start); m != nil && d.cmp(m.Smallest.UserKey, exciseSpan.End) < 0; m = iter.Next() { + _, err := d.excise(exciseSpan, m, ve, level) + if err != nil { + d.mu.Lock() + d.mu.versions.logUnlock() + d.mu.Unlock() + return fmt.Sprintf("error when excising %s: %s", m.FileNum, err.Error()) + } + } + } + d.mu.Lock() + d.mu.versions.logUnlock() + d.mu.Unlock() + return fmt.Sprintf("would excise %d files, use ingest-and-excise to excise.\n%s", len(ve.DeletedFiles), ve.String()) + + case "file-only-snapshot": + if len(td.CmdArgs) != 1 { + panic("insufficient args for file-only-snapshot command") + } + name := td.CmdArgs[0].Key + var keyRanges []KeyRange + for _, line := range strings.Split(td.Input, "\n") { + fields := strings.Fields(line) + if len(fields) != 2 { + return "expected two fields for file-only snapshot KeyRanges" + } + kr := KeyRange{Start: []byte(fields[0]), End: []byte(fields[1])} + keyRanges = append(keyRanges, kr) + } + + s := d.NewEventuallyFileOnlySnapshot(keyRanges) + efos[name] = s + return "ok" + + case "wait-for-file-only-snapshot": + if len(td.CmdArgs) != 1 { + panic("insufficient args for file-only-snapshot command") + } + name := td.CmdArgs[0].Key + err := efos[name].WaitForFileOnlySnapshot(context.TODO(), 1*time.Millisecond) + if err != nil { + return err.Error() + } + return "ok" + + case "unblock": + name := td.CmdArgs[0].Key + blockedCompactionsMu.Lock() + c := compactions[name] + delete(compactions, name) + blockedCompactionsMu.Unlock() + c.unblock <- struct{}{} + return "ok" + + case "compact": + async := false + var otherArgs []datadriven.CmdArg + var bc *blockedCompaction + for i := range td.CmdArgs { + switch td.CmdArgs[i].Key { + case "block": + name := td.CmdArgs[i].Vals[0] + bc = &blockedCompaction{startBlock: make(chan struct{}), unblock: make(chan struct{})} + blockedCompactionsMu.Lock() + compactions[name] = bc + blockNextCompaction = true + blockedCompactionName = name + blockedCompactionsMu.Unlock() + async = true + default: + otherArgs = append(otherArgs, td.CmdArgs[i]) + } + } + var tdClone datadriven.TestData + tdClone = *td + tdClone.CmdArgs = otherArgs + if !async { + err := runCompactCmd(td, d) + if err != nil { + return err.Error() + } + } else { + wg.Add(1) + go func() { + defer wg.Done() + _ = runCompactCmd(&tdClone, d) + }() + <-bc.startBlock + return "spun off in separate goroutine" + } + return "ok" + case "wait-for-background-error": + err := <-backgroundErrs + return err.Error() + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestIngestExternal(t *testing.T) { + var mem vfs.FS + var d *DB + var flushed bool + defer func() { + require.NoError(t, d.Close()) + }() + + var remoteStorage remote.Storage + + reset := func() { + if d != nil { + require.NoError(t, d.Close()) + } + + mem = vfs.NewMem() + require.NoError(t, mem.MkdirAll("ext", 0755)) + remoteStorage = remote.NewInMem() + opts := &Options{ + FS: mem, + L0CompactionThreshold: 100, + L0StopWritesThreshold: 100, + DebugCheck: DebugCheckLevels, + EventListener: &EventListener{FlushEnd: func(info FlushInfo) { + flushed = true + }}, + FormatMajorVersion: FormatVirtualSSTables, + } + opts.Experimental.RemoteStorage = remote.MakeSimpleFactory(map[remote.Locator]remote.Storage{ + "external-locator": remoteStorage, + }) + opts.Experimental.CreateOnShared = remote.CreateOnSharedNone + // Disable automatic compactions because otherwise we'll race with + // delete-only compactions triggered by ingesting range tombstones. + opts.DisableAutomaticCompactions = true + + var err error + d, err = Open("", opts) + require.NoError(t, err) + require.NoError(t, d.SetCreatorID(1)) + } + reset() + + datadriven.RunTest(t, "testdata/ingest_external", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "reset": + reset() + return "" + case "batch": + b := d.NewIndexedBatch() + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + if err := b.Commit(nil); err != nil { + return err.Error() + } + return "" + case "build-remote": + if err := runBuildRemoteCmd(td, d, remoteStorage); err != nil { + return err.Error() + } + return "" + + case "flush": + if err := d.Flush(); err != nil { + return err.Error() + } + return "" + + case "ingest-external": + flushed = false + if err := runIngestExternalCmd(td, d, "external-locator"); err != nil { + return err.Error() + } + // Wait for a possible flush. + d.mu.Lock() + for d.mu.compact.flushing { + d.mu.compact.cond.Wait() + } + d.mu.Unlock() + if flushed { + return "memtable flushed" + } + return "" + + case "get": + return runGetCmd(t, td, d) + + case "iter": + iter, _ := d.NewIter(&IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + }) + return runIterCmd(td, iter, true) + + case "lsm": + return runLSMCmd(td, d) + + case "metrics": + // The asynchronous loading of table stats can change metrics, so + // wait for all the tables' stats to be loaded. + d.mu.Lock() + d.waitTableStats() + d.mu.Unlock() + + return d.Metrics().StringForTests() + + case "wait-pending-table-stats": + return runTableStatsCmd(td, d) + + case "compact": + if len(td.CmdArgs) != 2 { + panic("insufficient args for compact command") + } + l := td.CmdArgs[0].Key + r := td.CmdArgs[1].Key + err := d.Compact([]byte(l), []byte(r), false) + if err != nil { + return err.Error() + } + return "" + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestIngestMemtableOverlaps(t *testing.T) { + comparers := []Comparer{ + {Name: "default", Compare: DefaultComparer.Compare, FormatKey: DefaultComparer.FormatKey}, + { + Name: "reverse", + Compare: func(a, b []byte) int { return DefaultComparer.Compare(b, a) }, + FormatKey: DefaultComparer.FormatKey, + }, + } + m := make(map[string]*Comparer) + for i := range comparers { + c := &comparers[i] + m[c.Name] = c + } + + for _, comparer := range comparers { + t.Run(comparer.Name, func(t *testing.T) { + var mem *memTable + + parseMeta := func(s string) *fileMetadata { + parts := strings.Split(s, "-") + meta := &fileMetadata{} + if len(parts) != 2 { + t.Fatalf("malformed table spec: %s", s) + } + var smallest, largest base.InternalKey + if strings.Contains(parts[0], ".") { + if !strings.Contains(parts[1], ".") { + t.Fatalf("malformed table spec: %s", s) + } + smallest = base.ParseInternalKey(parts[0]) + largest = base.ParseInternalKey(parts[1]) + } else { + smallest = InternalKey{UserKey: []byte(parts[0])} + largest = InternalKey{UserKey: []byte(parts[1])} + } + // If we're using a reverse comparer, flip the file bounds. + if mem.cmp(smallest.UserKey, largest.UserKey) > 0 { + smallest, largest = largest, smallest + } + meta.ExtendPointKeyBounds(comparer.Compare, smallest, largest) + meta.InitPhysicalBacking() + return meta + } + + datadriven.RunTest(t, "testdata/ingest_memtable_overlaps", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + b := newBatch(nil) + if err := runBatchDefineCmd(d, b); err != nil { + return err.Error() + } + + opts := &Options{ + Comparer: &comparer, + } + opts.EnsureDefaults().WithFSDefaults() + if len(d.CmdArgs) > 1 { + return fmt.Sprintf("%s expects at most 1 argument", d.Cmd) + } + if len(d.CmdArgs) == 1 { + opts.Comparer = m[d.CmdArgs[0].String()] + if opts.Comparer == nil { + return fmt.Sprintf("%s unknown comparer: %s", d.Cmd, d.CmdArgs[0].String()) + } + } + + mem = newMemTable(memTableOptions{Options: opts}) + if err := mem.apply(b, 0); err != nil { + return err.Error() + } + return "" + + case "overlaps": + var buf bytes.Buffer + for _, data := range strings.Split(d.Input, "\n") { + var keyRanges []internalKeyRange + for _, part := range strings.Fields(data) { + meta := parseMeta(part) + keyRanges = append(keyRanges, internalKeyRange{smallest: meta.Smallest, largest: meta.Largest}) + } + fmt.Fprintf(&buf, "%t\n", ingestMemtableOverlaps(mem.cmp, mem, keyRanges)) + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) + }) + } +} + +func TestKeyRangeBasic(t *testing.T) { + cmp := base.DefaultComparer.Compare + k1 := KeyRange{Start: []byte("b"), End: []byte("c")} + + // Tests for Contains() + require.True(t, k1.Contains(cmp, base.MakeInternalKey([]byte("b"), 1, InternalKeyKindSet))) + require.False(t, k1.Contains(cmp, base.MakeInternalKey([]byte("c"), 1, InternalKeyKindSet))) + require.True(t, k1.Contains(cmp, base.MakeInternalKey([]byte("bb"), 1, InternalKeyKindSet))) + require.True(t, k1.Contains(cmp, base.MakeExclusiveSentinelKey(InternalKeyKindRangeDelete, []byte("c")))) + + m1 := &fileMetadata{ + Smallest: base.MakeInternalKey([]byte("b"), 1, InternalKeyKindSet), + Largest: base.MakeInternalKey([]byte("c"), 1, InternalKeyKindSet), + } + require.True(t, k1.Overlaps(cmp, m1)) + m2 := &fileMetadata{ + Smallest: base.MakeInternalKey([]byte("c"), 1, InternalKeyKindSet), + Largest: base.MakeInternalKey([]byte("d"), 1, InternalKeyKindSet), + } + require.False(t, k1.Overlaps(cmp, m2)) + m3 := &fileMetadata{ + Smallest: base.MakeInternalKey([]byte("a"), 1, InternalKeyKindSet), + Largest: base.MakeExclusiveSentinelKey(InternalKeyKindRangeDelete, []byte("b")), + } + require.False(t, k1.Overlaps(cmp, m3)) + m4 := &fileMetadata{ + Smallest: base.MakeInternalKey([]byte("a"), 1, InternalKeyKindSet), + Largest: base.MakeInternalKey([]byte("b"), 1, InternalKeyKindSet), + } + require.True(t, k1.Overlaps(cmp, m4)) +} + +func BenchmarkIngestOverlappingMemtable(b *testing.B) { + assertNoError := func(err error) { + b.Helper() + if err != nil { + b.Fatal(err) + } + } + + for count := 1; count < 6; count++ { + b.Run(fmt.Sprintf("memtables=%d", count), func(b *testing.B) { + for i := 0; i < b.N; i++ { + b.StopTimer() + mem := vfs.NewMem() + d, err := Open("", &Options{ + FS: mem, + }) + assertNoError(err) + + // Create memtables. + for { + assertNoError(d.Set([]byte("a"), nil, nil)) + d.mu.Lock() + done := len(d.mu.mem.queue) == count + d.mu.Unlock() + if done { + break + } + } + + // Create the overlapping sstable that will force a flush when ingested. + f, err := mem.Create("ext") + assertNoError(err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + assertNoError(w.Set([]byte("a"), nil)) + assertNoError(w.Close()) + + b.StartTimer() + assertNoError(d.Ingest([]string{"ext"})) + } + }) + } +} + +func TestIngestTargetLevel(t *testing.T) { + var d *DB + defer func() { + if d != nil { + // Ignore errors because this test defines fake in-progress transactions + // that prohibit clean shutdown. + _ = d.Close() + } + }() + + parseMeta := func(s string) *fileMetadata { + var rkey bool + if len(s) >= 4 && s[0:4] == "rkey" { + rkey = true + s = s[5:] + } + parts := strings.Split(s, "-") + if len(parts) != 2 { + t.Fatalf("malformed table spec: %s", s) + } + var m *fileMetadata + if rkey { + m = (&fileMetadata{}).ExtendRangeKeyBounds( + d.cmp, + InternalKey{UserKey: []byte(parts[0])}, + InternalKey{UserKey: []byte(parts[1])}, + ) + } else { + m = (&fileMetadata{}).ExtendPointKeyBounds( + d.cmp, + InternalKey{UserKey: []byte(parts[0])}, + InternalKey{UserKey: []byte(parts[1])}, + ) + } + m.InitPhysicalBacking() + return m + } + + datadriven.RunTest(t, "testdata/ingest_target_level", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + if d != nil { + // Ignore errors because this test defines fake in-progress + // transactions that prohibit clean shutdown. + _ = d.Close() + } + + var err error + opts := Options{ + FormatMajorVersion: internalFormatNewest, + } + opts.WithFSDefaults() + if d, err = runDBDefineCmd(td, &opts); err != nil { + return err.Error() + } + + readState := d.loadReadState() + c := &checkConfig{ + logger: d.opts.Logger, + comparer: d.opts.Comparer, + readState: readState, + newIters: d.newIters, + // TODO: runDBDefineCmd doesn't properly update the visible + // sequence number. So we have to explicitly configure level checker with a very large + // sequence number, otherwise the DB appears empty. + seqNum: InternalKeySeqNumMax, + } + if err := checkLevelsInternal(c); err != nil { + return err.Error() + } + readState.unref() + + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + + case "target": + var buf bytes.Buffer + suggestSplit := false + for _, cmd := range td.CmdArgs { + switch cmd.Key { + case "suggest-split": + suggestSplit = true + } + } + for _, target := range strings.Split(td.Input, "\n") { + meta := parseMeta(target) + level, overlapFile, err := ingestTargetLevel( + d.newIters, d.tableNewRangeKeyIter, IterOptions{logger: d.opts.Logger}, + d.opts.Comparer, d.mu.versions.currentVersion(), 1, d.mu.compact.inProgress, meta, + suggestSplit) + if err != nil { + return err.Error() + } + if overlapFile != nil { + fmt.Fprintf(&buf, "%d (split file: %s)\n", level, overlapFile.FileNum) + } else { + fmt.Fprintf(&buf, "%d\n", level) + } + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestIngest(t *testing.T) { + var mem vfs.FS + var d *DB + var flushed bool + defer func() { + require.NoError(t, d.Close()) + }() + + reset := func(split bool) { + if d != nil { + require.NoError(t, d.Close()) + } + + mem = vfs.NewMem() + require.NoError(t, mem.MkdirAll("ext", 0755)) + opts := &Options{ + FS: mem, + L0CompactionThreshold: 100, + L0StopWritesThreshold: 100, + DebugCheck: DebugCheckLevels, + EventListener: &EventListener{FlushEnd: func(info FlushInfo) { + flushed = true + }}, + FormatMajorVersion: internalFormatNewest, + } + opts.Experimental.IngestSplit = func() bool { + return split + } + // Disable automatic compactions because otherwise we'll race with + // delete-only compactions triggered by ingesting range tombstones. + opts.DisableAutomaticCompactions = true + + var err error + d, err = Open("", opts) + require.NoError(t, err) + } + reset(false /* split */) + + datadriven.RunTest(t, "testdata/ingest", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "reset": + split := false + for _, cmd := range td.CmdArgs { + switch cmd.Key { + case "enable-split": + split = true + default: + return fmt.Sprintf("unexpected key: %s", cmd.Key) + } + } + reset(split) + return "" + case "batch": + b := d.NewIndexedBatch() + if err := runBatchDefineCmd(td, b); err != nil { + return err.Error() + } + if err := b.Commit(nil); err != nil { + return err.Error() + } + return "" + + case "build": + if err := runBuildCmd(td, d, mem); err != nil { + return err.Error() + } + return "" + + case "ingest": + flushed = false + if err := runIngestCmd(td, d, mem); err != nil { + return err.Error() + } + // Wait for a possible flush. + d.mu.Lock() + for d.mu.compact.flushing { + d.mu.compact.cond.Wait() + } + d.mu.Unlock() + if flushed { + return "memtable flushed" + } + return "" + + case "get": + return runGetCmd(t, td, d) + + case "iter": + iter, _ := d.NewIter(&IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + }) + return runIterCmd(td, iter, true) + + case "lsm": + return runLSMCmd(td, d) + + case "metrics": + // The asynchronous loading of table stats can change metrics, so + // wait for all the tables' stats to be loaded. + d.mu.Lock() + d.waitTableStats() + d.mu.Unlock() + + return d.Metrics().StringForTests() + + case "wait-pending-table-stats": + return runTableStatsCmd(td, d) + + case "compact": + if len(td.CmdArgs) != 2 { + panic("insufficient args for compact command") + } + l := td.CmdArgs[0].Key + r := td.CmdArgs[1].Key + err := d.Compact([]byte(l), []byte(r), false) + if err != nil { + return err.Error() + } + return "" + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) +} + +func TestIngestError(t *testing.T) { + for i := int32(0); ; i++ { + mem := vfs.NewMem() + + f0, err := mem.Create("ext0") + require.NoError(t, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f0), sstable.WriterOptions{}) + require.NoError(t, w.Set([]byte("d"), nil)) + require.NoError(t, w.Close()) + f1, err := mem.Create("ext1") + require.NoError(t, err) + w = sstable.NewWriter(objstorageprovider.NewFileWritable(f1), sstable.WriterOptions{}) + require.NoError(t, w.Set([]byte("d"), nil)) + require.NoError(t, w.Close()) + + ii := errorfs.OnIndex(-1) + d, err := Open("", &Options{ + FS: errorfs.Wrap(mem, errorfs.ErrInjected.If(ii)), + Logger: panicLogger{}, + L0CompactionThreshold: 8, + }) + require.NoError(t, err) + // Force the creation of an L0 sstable that overlaps with the tables + // we'll attempt to ingest. This ensures that we exercise filesystem + // codepaths when determining the ingest target level. + require.NoError(t, d.Set([]byte("a"), nil, nil)) + require.NoError(t, d.Set([]byte("d"), nil, nil)) + require.NoError(t, d.Flush()) + + t.Run(fmt.Sprintf("index-%d", i), func(t *testing.T) { + defer func() { + if r := recover(); r != nil { + if e, ok := r.(error); ok && errors.Is(e, errorfs.ErrInjected) { + return + } + // d.opts.Logger.Fatalf won't propagate ErrInjected + // itself, but should contain the error message. + if strings.HasSuffix(fmt.Sprint(r), errorfs.ErrInjected.Error()) { + return + } + t.Fatal(r) + } + }() + + ii.Store(i) + err1 := d.Ingest([]string{"ext0"}) + err2 := d.Ingest([]string{"ext1"}) + err := firstError(err1, err2) + if err != nil && !errors.Is(err, errorfs.ErrInjected) { + t.Fatal(err) + } + }) + + // d.Close may error if we failed to flush the manifest. + _ = d.Close() + + // If the injector's index is non-negative, the i-th filesystem + // operation was never executed. + if ii.Load() >= 0 { + break + } + } +} + +func TestIngestIdempotence(t *testing.T) { + // Use an on-disk filesystem, because Ingest with a MemFS will copy, not + // link the ingested file. + dir, err := os.MkdirTemp("", "ingest-idempotence") + require.NoError(t, err) + defer os.RemoveAll(dir) + fs := vfs.Default + + path := fs.PathJoin(dir, "ext") + f, err := fs.Create(fs.PathJoin(dir, "ext")) + require.NoError(t, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + require.NoError(t, w.Set([]byte("d"), nil)) + require.NoError(t, w.Close()) + + d, err := Open(dir, &Options{ + FS: fs, + }) + require.NoError(t, err) + const count = 4 + for i := 0; i < count; i++ { + ingestPath := fs.PathJoin(dir, fmt.Sprintf("ext%d", i)) + require.NoError(t, fs.Link(path, ingestPath)) + require.NoError(t, d.Ingest([]string{ingestPath})) + } + require.NoError(t, d.Close()) +} + +func TestIngestCompact(t *testing.T) { + mem := vfs.NewMem() + lel := MakeLoggingEventListener(&base.InMemLogger{}) + d, err := Open("", &Options{ + EventListener: &lel, + FS: mem, + L0CompactionThreshold: 1, + L0StopWritesThreshold: 1, + }) + require.NoError(t, err) + + src := func(i int) string { + return fmt.Sprintf("ext%d", i) + } + f, err := mem.Create(src(0)) + require.NoError(t, err) + + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + key := []byte("a") + require.NoError(t, w.Add(base.MakeInternalKey(key, 0, InternalKeyKindSet), nil)) + require.NoError(t, w.Close()) + + // Make N copies of the sstable. + const count = 20 + for i := 1; i < count; i++ { + require.NoError(t, vfs.Copy(d.opts.FS, src(0), src(i))) + } + + // Ingest the same sstable multiple times. Compaction should take place as + // ingestion happens, preventing an indefinite write stall from occurring. + for i := 0; i < count; i++ { + if i == 10 { + // Half-way through the ingestions, set a key in the memtable to force + // overlap with the memtable which will require the memtable to be + // flushed. + require.NoError(t, d.Set(key, nil, nil)) + } + require.NoError(t, d.Ingest([]string{src(i)})) + } + + require.NoError(t, d.Close()) +} + +func TestConcurrentIngest(t *testing.T) { + mem := vfs.NewMem() + d, err := Open("", &Options{ + FS: mem, + }) + require.NoError(t, err) + + // Create an sstable with 2 keys. This is necessary to trigger the overlap + // bug because an sstable with a single key will not have overlap in internal + // key space and the sequence number assignment had already guaranteed + // correct ordering. + src := func(i int) string { + return fmt.Sprintf("ext%d", i) + } + f, err := mem.Create(src(0)) + require.NoError(t, err) + + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + require.NoError(t, w.Set([]byte("a"), nil)) + require.NoError(t, w.Set([]byte("b"), nil)) + require.NoError(t, w.Close()) + + // Make N copies of the sstable. + errCh := make(chan error, 5) + for i := 1; i < cap(errCh); i++ { + require.NoError(t, vfs.Copy(d.opts.FS, src(0), src(i))) + } + + // Perform N ingestions concurrently. + for i := 0; i < cap(errCh); i++ { + go func(i int) { + err := d.Ingest([]string{src(i)}) + if err == nil { + if _, err = d.opts.FS.Stat(src(i)); oserror.IsNotExist(err) { + err = nil + } + } + errCh <- err + }(i) + } + for i := 0; i < cap(errCh); i++ { + require.NoError(t, <-errCh) + } + + require.NoError(t, d.Close()) +} + +func TestConcurrentIngestCompact(t *testing.T) { + for i := 0; i < 2; i++ { + t.Run("", func(t *testing.T) { + mem := vfs.NewMem() + compactionReady := make(chan struct{}) + compactionBegin := make(chan struct{}) + d, err := Open("", &Options{ + FS: mem, + EventListener: &EventListener{ + TableCreated: func(info TableCreateInfo) { + if info.Reason == "compacting" { + close(compactionReady) + <-compactionBegin + } + }, + }, + }) + require.NoError(t, err) + + ingest := func(keys ...string) { + t.Helper() + f, err := mem.Create("ext") + require.NoError(t, err) + + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + for _, k := range keys { + require.NoError(t, w.Set([]byte(k), nil)) + } + require.NoError(t, w.Close()) + require.NoError(t, d.Ingest([]string{"ext"})) + } + + compact := func(start, end string) { + t.Helper() + require.NoError(t, d.Compact([]byte(start), []byte(end), false)) + } + + lsm := func() string { + d.mu.Lock() + s := d.mu.versions.currentVersion().String() + d.mu.Unlock() + return s + } + + expectLSM := func(expected string) { + t.Helper() + expected = strings.TrimSpace(expected) + actual := strings.TrimSpace(lsm()) + if expected != actual { + t.Fatalf("expected\n%s\nbut found\n%s", expected, actual) + } + } + + ingest("a") + ingest("a") + ingest("c") + ingest("c") + + expectLSM(` +0.0: + 000005:[a#11,SET-a#11,SET] + 000007:[c#13,SET-c#13,SET] +6: + 000004:[a#10,SET-a#10,SET] + 000006:[c#12,SET-c#12,SET] +`) + + // At this point ingestion of an sstable containing only key "b" will be + // targeted at L6. Yet a concurrent compaction of sstables 5 and 7 will + // create a new sstable in L6 spanning ["a"-"c"]. So the ingestion must + // actually target L5. + + switch i { + case 0: + // Compact, then ingest. + go func() { + <-compactionReady + + ingest("b") + + close(compactionBegin) + }() + + compact("a", "z") + + expectLSM(` +0.0: + 000009:[b#14,SET-b#14,SET] +6: + 000008:[a#0,SET-c#0,SET] +`) + + case 1: + // Ingest, then compact + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + close(compactionBegin) + compact("a", "z") + }() + + ingest("b") + wg.Wait() + + // Because we're performing the ingestion and compaction concurrently, + // we can't guarantee any particular LSM structure at this point. The + // test will fail with an assertion error due to overlapping sstables + // if there is insufficient synchronization between ingestion and + // compaction. + } + + require.NoError(t, d.Close()) + }) + } +} + +func TestIngestFlushQueuedMemTable(t *testing.T) { + // Verify that ingestion forces a flush of a queued memtable. + + // Test with a format major version prior to FormatFlushableIngest and one + // after. Both should result in the same statistic calculations. + for _, fmv := range []FormatMajorVersion{FormatFlushableIngest - 1, internalFormatNewest} { + func(fmv FormatMajorVersion) { + mem := vfs.NewMem() + d, err := Open("", &Options{ + FS: mem, + FormatMajorVersion: fmv, + }) + require.NoError(t, err) + + // Add the key "a" to the memtable, then fill up the memtable with the key + // "b". The ingested sstable will only overlap with the queued memtable. + require.NoError(t, d.Set([]byte("a"), nil, nil)) + for { + require.NoError(t, d.Set([]byte("b"), nil, nil)) + d.mu.Lock() + done := len(d.mu.mem.queue) == 2 + d.mu.Unlock() + if done { + break + } + } + + ingest := func(keys ...string) { + t.Helper() + f, err := mem.Create("ext") + require.NoError(t, err) + + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: fmv.MinTableFormat(), + }) + for _, k := range keys { + require.NoError(t, w.Set([]byte(k), nil)) + } + require.NoError(t, w.Close()) + stats, err := d.IngestWithStats([]string{"ext"}) + require.NoError(t, err) + require.Equal(t, stats.ApproxIngestedIntoL0Bytes, stats.Bytes) + require.Equal(t, stats.MemtableOverlappingFiles, 1) + require.Less(t, uint64(0), stats.Bytes) + } + + ingest("a") + + require.NoError(t, d.Close()) + }(fmv) + } +} + +func TestIngestStats(t *testing.T) { + mem := vfs.NewMem() + d, err := Open("", &Options{ + FS: mem, + }) + require.NoError(t, err) + + ingest := func(expectedLevel int, keys ...string) { + t.Helper() + f, err := mem.Create("ext") + require.NoError(t, err) + + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + for _, k := range keys { + require.NoError(t, w.Set([]byte(k), nil)) + } + require.NoError(t, w.Close()) + stats, err := d.IngestWithStats([]string{"ext"}) + require.NoError(t, err) + if expectedLevel == 0 { + require.Equal(t, stats.ApproxIngestedIntoL0Bytes, stats.Bytes) + } else { + require.EqualValues(t, 0, stats.ApproxIngestedIntoL0Bytes) + } + require.Less(t, uint64(0), stats.Bytes) + } + ingest(6, "a") + ingest(0, "a") + ingest(6, "b", "g") + ingest(0, "c") + require.NoError(t, d.Close()) +} + +func TestIngestFlushQueuedLargeBatch(t *testing.T) { + // Verify that ingestion forces a flush of a queued large batch. + + mem := vfs.NewMem() + d, err := Open("", &Options{ + FS: mem, + }) + require.NoError(t, err) + + // The default large batch threshold is slightly less than 1/2 of the + // memtable size which makes triggering a problem with flushing queued large + // batches irritating. Manually adjust the threshold to 1/8 of the memtable + // size in order to more easily create a situation where a large batch is + // queued but not automatically flushed. + d.mu.Lock() + d.largeBatchThreshold = d.opts.MemTableSize / 8 + d.mu.Unlock() + + // Set a record with a large value. This will be transformed into a large + // batch and placed in the flushable queue. + require.NoError(t, d.Set([]byte("a"), bytes.Repeat([]byte("v"), int(d.largeBatchThreshold)), nil)) + + ingest := func(keys ...string) { + t.Helper() + f, err := mem.Create("ext") + require.NoError(t, err) + + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + for _, k := range keys { + require.NoError(t, w.Set([]byte(k), nil)) + } + require.NoError(t, w.Close()) + require.NoError(t, d.Ingest([]string{"ext"})) + } + + ingest("a") + + require.NoError(t, d.Close()) +} + +func TestIngestMemtablePendingOverlap(t *testing.T) { + mem := vfs.NewMem() + d, err := Open("", &Options{ + FS: mem, + }) + require.NoError(t, err) + + d.mu.Lock() + // Use a custom commit pipeline apply function to give us control over + // timing of events. + assignedBatch := make(chan struct{}) + applyBatch := make(chan struct{}) + originalApply := d.commit.env.apply + d.commit.env.apply = func(b *Batch, mem *memTable) error { + assignedBatch <- struct{}{} + applyBatch <- struct{}{} + return originalApply(b, mem) + } + d.mu.Unlock() + + ingest := func(keys ...string) { + t.Helper() + f, err := mem.Create("ext") + require.NoError(t, err) + + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + for _, k := range keys { + require.NoError(t, w.Set([]byte(k), nil)) + } + require.NoError(t, w.Close()) + require.NoError(t, d.Ingest([]string{"ext"})) + } + + var wg sync.WaitGroup + wg.Add(2) + + // First, Set('c') begins. This call will: + // + // * enqueue the batch to the pending queue. + // * allocate a sequence number `x`. + // * write the batch to the WAL. + // + // and then block until we read from the `applyBatch` channel down below. + go func() { + err := d.Set([]byte("c"), nil, nil) + if err != nil { + t.Error(err) + } + wg.Done() + }() + + // When the above Set('c') is ready to apply, it sends on the + // `assignedBatch` channel. Once that happens, we start Ingest('a', 'c'). + // The Ingest('a', 'c') allocates sequence number `x + 1`. + go func() { + // Wait until the Set has grabbed a sequence number before ingesting. + <-assignedBatch + ingest("a", "c") + wg.Done() + }() + + // The Set('c')#1 and Ingest('a', 'c')#2 are both pending. To maintain + // sequence number invariants, the Set needs to be applied and flushed + // before the Ingest determines its target level. + // + // Sleep a bit to ensure that the Ingest has time to call into + // AllocateSeqNum. Once it allocates its sequence number, it should see + // that there are unpublished sequence numbers below it and spin until the + // Set's sequence number is published. After sleeping, read from + // `applyBatch` to actually allow the Set to apply and publish its + // sequence number. + time.Sleep(100 * time.Millisecond) + <-applyBatch + + // Wait for both calls to complete. + wg.Wait() + require.NoError(t, d.Flush()) + require.NoError(t, d.CheckLevels(nil)) + require.NoError(t, d.Close()) +} + +type testLogger struct { + t testing.TB +} + +func (l testLogger) Infof(format string, args ...interface{}) { + l.t.Logf(format, args...) +} + +func (l testLogger) Errorf(format string, args ...interface{}) { + l.t.Logf(format, args...) +} + +func (l testLogger) Fatalf(format string, args ...interface{}) { + l.t.Fatalf(format, args...) +} + +// TestIngestMemtableOverlapRace is a regression test for the race described in +// #2196. If an ingest that checks for overlap with the mutable memtable and +// finds no overlap, it must not allow overlapping keys with later sequence +// numbers to be applied to the memtable and the memtable to be flushed before +// the ingest completes. +// +// This test operates by committing the same key concurrently: +// - 1 goroutine repeatedly ingests the same sstable writing the key `foo` +// - n goroutines repeatedly apply batches writing the key `foo` and trigger +// flushes. +// +// After a while, the database is closed and the manifest is verified. Version +// edits should contain new files with monotonically increasing sequence +// numbers, since every flush and every ingest conflicts with one another. +func TestIngestMemtableOverlapRace(t *testing.T) { + mem := vfs.NewMem() + el := MakeLoggingEventListener(testLogger{t: t}) + d, err := Open("", &Options{ + FS: mem, + // Disable automatic compactions to keep the manifest clean; only + // flushes and ingests. + DisableAutomaticCompactions: true, + // Disable the WAL to speed up batch commits. + DisableWAL: true, + EventListener: &el, + // We're endlessly appending to L0 without clearing it, so set a maximal + // stop writes threshold. + L0StopWritesThreshold: math.MaxInt, + // Accumulating more than 1 immutable memtable doesn't help us exercise + // the bug, since the committed keys need to be flushed promptly. + MemTableStopWritesThreshold: 2, + }) + require.NoError(t, err) + + // Prepare a sstable `ext` deleting foo. + f, err := mem.Create("ext") + require.NoError(t, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + require.NoError(t, w.Delete([]byte("foo"))) + require.NoError(t, w.Close()) + + var done atomic.Bool + const numSetters = 2 + var wg sync.WaitGroup + wg.Add(numSetters + 1) + + untilDone := func(fn func()) { + defer wg.Done() + for !done.Load() { + fn() + } + } + + // Ingest in the background. + totalIngests := 0 + go untilDone(func() { + filename := fmt.Sprintf("ext%d", totalIngests) + require.NoError(t, mem.Link("ext", filename)) + require.NoError(t, d.Ingest([]string{filename})) + totalIngests++ + }) + + // Apply batches and trigger flushes in the background. + wo := &WriteOptions{Sync: false} + var localCommits [numSetters]int + for i := 0; i < numSetters; i++ { + i := i + v := []byte(fmt.Sprintf("v%d", i+1)) + go untilDone(func() { + // Commit a batch setting foo=vN. + b := d.NewBatch() + require.NoError(t, b.Set([]byte("foo"), v, nil)) + require.NoError(t, b.Commit(wo)) + localCommits[i]++ + d.AsyncFlush() + }) + } + time.Sleep(100 * time.Millisecond) + done.Store(true) + wg.Wait() + + var totalCommits int + for i := 0; i < numSetters; i++ { + totalCommits += localCommits[i] + } + m := d.Metrics() + tot := m.Total() + t.Logf("Committed %d batches.", totalCommits) + t.Logf("Flushed %d times.", m.Flush.Count) + t.Logf("Ingested %d sstables.", tot.TablesIngested) + require.NoError(t, d.CheckLevels(nil)) + require.NoError(t, d.Close()) + + // Replay the manifest. Every flush and ingest is a separate version edit. + // Since they all write the same key and compactions are disabled, sequence + // numbers of new files should be monotonically increasing. + // + // This check is necessary because most of these sstables are ingested into + // L0. The L0 sublevels construction will order them by LargestSeqNum, even + // if they're added to L0 out-of-order. The CheckLevels call at the end of + // the test may find that the sublevels are all appropriately ordered, but + // the manifest may reveal they were added to the LSM out-of-order. + dbDesc, err := Peek("", mem) + require.NoError(t, err) + require.True(t, dbDesc.Exists) + f, err = mem.Open(dbDesc.ManifestFilename) + require.NoError(t, err) + defer f.Close() + rr := record.NewReader(f, 0 /* logNum */) + var largest *fileMetadata + for { + r, err := rr.Next() + if err == io.EOF || err == record.ErrInvalidChunk { + break + } + require.NoError(t, err) + var ve manifest.VersionEdit + require.NoError(t, ve.Decode(r)) + t.Log(ve.String()) + for _, f := range ve.NewFiles { + if largest != nil { + require.Equal(t, 0, f.Level) + if largest.LargestSeqNum > f.Meta.LargestSeqNum { + t.Fatalf("previous largest file %s has sequence number > next file %s", largest, f.Meta) + } + } + largest = f.Meta + } + } +} + +type ingestCrashFS struct { + vfs.FS +} + +func (fs ingestCrashFS) Link(oldname, newname string) error { + if err := fs.FS.Link(oldname, newname); err != nil { + return err + } + panic(errorfs.ErrInjected) +} + +type noRemoveFS struct { + vfs.FS +} + +func (fs noRemoveFS) Remove(string) error { + return errorfs.ErrInjected +} + +func TestIngestFileNumReuseCrash(t *testing.T) { + const count = 10 + // Use an on-disk filesystem, because Ingest with a MemFS will copy, not + // link the ingested file. + dir, err := os.MkdirTemp("", "ingest-filenum-reuse") + require.NoError(t, err) + defer os.RemoveAll(dir) + fs := vfs.Default + + readFile := func(s string) []byte { + f, err := fs.Open(fs.PathJoin(dir, s)) + require.NoError(t, err) + b, err := io.ReadAll(f) + require.NoError(t, err) + require.NoError(t, f.Close()) + return b + } + + // Create sstables to ingest. + var files []string + var fileBytes [][]byte + for i := 0; i < count; i++ { + name := fmt.Sprintf("ext%d", i) + f, err := fs.Create(fs.PathJoin(dir, name)) + require.NoError(t, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + require.NoError(t, w.Set([]byte(fmt.Sprintf("foo%d", i)), nil)) + require.NoError(t, w.Close()) + files = append(files, name) + fileBytes = append(fileBytes, readFile(name)) + } + + // Open a database with a filesystem that will successfully link the + // ingested files but then panic. This is an approximation of what a crash + // after linking but before updating the manifest would look like. + d, err := Open(dir, &Options{ + FS: ingestCrashFS{FS: fs}, + }) + // A flush here ensures the file num bumps from creating OPTIONS files, + // etc get recorded in the manifest. We want the nextFileNum after the + // restart to be the same as one of our ingested sstables. + require.NoError(t, err) + require.NoError(t, d.Set([]byte("boop"), nil, nil)) + require.NoError(t, d.Flush()) + for _, f := range files { + func() { + defer func() { err = recover().(error) }() + err = d.Ingest([]string{fs.PathJoin(dir, f)}) + }() + if err == nil || !errors.Is(err, errorfs.ErrInjected) { + t.Fatalf("expected injected error, got %v", err) + } + } + // Leave something in the WAL so that Open will flush while replaying the + // WAL. + require.NoError(t, d.Set([]byte("wal"), nil, nil)) + require.NoError(t, d.Close()) + + // There are now two links to each external file: the original extX link + // and a numbered sstable link. The sstable files are still not a part of + // the manifest and so they may be overwritten. Open will detect the + // obsolete number sstables and try to remove them. The FS here is wrapped + // to induce errors on Remove calls. Even if we're unsuccessful in + // removing the obsolete files, the external files should not be + // overwritten. + d, err = Open(dir, &Options{FS: noRemoveFS{FS: fs}}) + require.NoError(t, err) + require.NoError(t, d.Set([]byte("bar"), nil, nil)) + require.NoError(t, d.Flush()) + require.NoError(t, d.Close()) + + // None of the external files should change despite modifying the linked + // versions. + for i, f := range files { + afterBytes := readFile(f) + require.Equal(t, fileBytes[i], afterBytes) + } +} + +func TestIngest_UpdateSequenceNumber(t *testing.T) { + mem := vfs.NewMem() + cmp := base.DefaultComparer.Compare + parse := func(input string) (*sstable.Writer, error) { + f, err := mem.Create("ext") + if err != nil { + return nil, err + } + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + TableFormat: sstable.TableFormatMax, + }) + for _, data := range strings.Split(input, "\n") { + if strings.HasPrefix(data, "rangekey: ") { + data = strings.TrimPrefix(data, "rangekey: ") + s := keyspan.ParseSpan(data) + err := rangekey.Encode(&s, w.AddRangeKey) + if err != nil { + return nil, err + } + continue + } + j := strings.Index(data, ":") + if j < 0 { + return nil, errors.Newf("malformed input: %s\n", data) + } + key := base.ParseInternalKey(data[:j]) + value := []byte(data[j+1:]) + if err := w.Add(key, value); err != nil { + return nil, err + } + } + return w, nil + } + + var ( + seqnum uint64 + err error + metas []*fileMetadata + ) + datadriven.RunTest(t, "testdata/ingest_update_seqnums", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "starting-seqnum": + seqnum, err = strconv.ParseUint(td.Input, 10, 64) + if err != nil { + return err.Error() + } + return "" + + case "reset": + metas = metas[:0] + return "" + + case "load": + w, err := parse(td.Input) + if err != nil { + return err.Error() + } + if err = w.Close(); err != nil { + return err.Error() + } + defer w.Close() + + // Format the bounds of the table. + wm, err := w.Metadata() + if err != nil { + return err.Error() + } + + // Upper bounds for range dels and range keys are expected to be sentinel + // keys. + maybeUpdateUpperBound := func(key base.InternalKey) base.InternalKey { + switch k := key.Kind(); { + case k == base.InternalKeyKindRangeDelete: + key.Trailer = base.InternalKeyRangeDeleteSentinel + case rangekey.IsRangeKey(k): + return base.MakeExclusiveSentinelKey(k, key.UserKey) + } + return key + } + + // Construct the file metadata from the writer metadata. + m := &fileMetadata{ + SmallestSeqNum: 0, // Simulate an ingestion. + LargestSeqNum: 0, + } + if wm.HasPointKeys { + m.ExtendPointKeyBounds(cmp, wm.SmallestPoint, wm.LargestPoint) + } + if wm.HasRangeDelKeys { + m.ExtendPointKeyBounds( + cmp, + wm.SmallestRangeDel, + maybeUpdateUpperBound(wm.LargestRangeDel), + ) + } + if wm.HasRangeKeys { + m.ExtendRangeKeyBounds( + cmp, + wm.SmallestRangeKey, + maybeUpdateUpperBound(wm.LargestRangeKey), + ) + } + m.InitPhysicalBacking() + if err := m.Validate(cmp, base.DefaultFormatter); err != nil { + return err.Error() + } + + // Collect this file. + metas = append(metas, m) + + // Return an index number for the file. + return fmt.Sprintf("file %d\n", len(metas)-1) + + case "update-files": + // Update the bounds across all files. + if err = ingestUpdateSeqNum(cmp, base.DefaultFormatter, seqnum, ingestLoadResult{localMeta: metas}); err != nil { + return err.Error() + } + + var buf bytes.Buffer + for i, m := range metas { + fmt.Fprintf(&buf, "file %d:\n", i) + fmt.Fprintf(&buf, " combined: %s-%s\n", m.Smallest, m.Largest) + fmt.Fprintf(&buf, " points: %s-%s\n", m.SmallestPointKey, m.LargestPointKey) + fmt.Fprintf(&buf, " ranges: %s-%s\n", m.SmallestRangeKey, m.LargestRangeKey) + } + + return buf.String() + + default: + return fmt.Sprintf("unknown command %s\n", td.Cmd) + } + }) +} + +func TestIngestCleanup(t *testing.T) { + fns := []base.FileNum{0, 1, 2} + + testCases := []struct { + closeFiles []base.FileNum + cleanupFiles []base.FileNum + wantErr string + }{ + // Close and remove all files. + { + closeFiles: fns, + cleanupFiles: fns, + }, + // Remove a non-existent file. + { + closeFiles: fns, + cleanupFiles: []base.FileNum{3}, + wantErr: "unknown to the objstorage provider", + }, + // Remove a file that has not been closed. + { + closeFiles: []base.FileNum{0, 2}, + cleanupFiles: fns, + wantErr: oserror.ErrInvalid.Error(), + }, + // Remove all files, one of which is still open, plus a file that does not exist. + { + closeFiles: []base.FileNum{0, 2}, + cleanupFiles: []base.FileNum{0, 1, 2, 3}, + wantErr: oserror.ErrInvalid.Error(), // The first error encountered is due to the open file. + }, + } + + for _, tc := range testCases { + t.Run("", func(t *testing.T) { + mem := vfs.NewMem() + mem.UseWindowsSemantics(true) + objProvider, err := objstorageprovider.Open(objstorageprovider.DefaultSettings(mem, "")) + require.NoError(t, err) + defer objProvider.Close() + + // Create the files in the VFS. + metaMap := make(map[base.FileNum]objstorage.Writable) + for _, fn := range fns { + w, _, err := objProvider.Create(context.Background(), base.FileTypeTable, fn.DiskFileNum(), objstorage.CreateOptions{}) + require.NoError(t, err) + + metaMap[fn] = w + } + + // Close a select number of files. + for _, m := range tc.closeFiles { + w, ok := metaMap[m] + if !ok { + continue + } + require.NoError(t, w.Finish()) + } + + // Cleanup the set of files in the FS. + var toRemove []*fileMetadata + for _, fn := range tc.cleanupFiles { + m := &fileMetadata{FileNum: fn} + m.InitPhysicalBacking() + toRemove = append(toRemove, m) + } + + err = ingestCleanup(objProvider, toRemove) + if tc.wantErr != "" { + require.Error(t, err, "got no error, expected %s", tc.wantErr) + require.Contains(t, err.Error(), tc.wantErr) + } else { + require.NoError(t, err) + } + }) + } +} + +// fatalCapturingLogger captures a fatal error instead of panicking. +type fatalCapturingLogger struct { + t testing.TB + err error +} + +// Infof implements the Logger interface. +func (l *fatalCapturingLogger) Infof(fmt string, args ...interface{}) { + l.t.Logf(fmt, args...) +} + +// Errorf implements the Logger interface. +func (l *fatalCapturingLogger) Errorf(fmt string, args ...interface{}) { + l.t.Logf(fmt, args...) +} + +// Fatalf implements the Logger interface. +func (l *fatalCapturingLogger) Fatalf(_ string, args ...interface{}) { + l.err = args[0].(error) +} + +func TestIngestValidation(t *testing.T) { + type keyVal struct { + key, val []byte + } + // The corruptionLocation enum defines where to corrupt an sstable if + // anywhere. corruptionLocation{Start,End} describe the start and end + // data blocks. corruptionLocationInternal describes a random data block + // that's neither the start or end blocks. The Ingest operation does not + // read the entire sstable, only the start and end blocks, so corruption + // introduced using corruptionLocationInternal will not be discovered until + // the asynchronous validation job runs. + type corruptionLocation int + const ( + corruptionLocationNone corruptionLocation = iota + corruptionLocationStart + corruptionLocationEnd + corruptionLocationInternal + ) + // The errReportLocation type defines an enum to allow tests to enforce + // expectations about how an error surfaced during ingestion or validation + // is reported. Asynchronous validation that uncovers corruption should call + // Fatalf on the Logger. Asychronous validation that encounters + // non-corruption errors should surface it through the + // EventListener.BackgroundError func. + type errReportLocation int + const ( + errReportLocationNone errReportLocation = iota + errReportLocationIngest + errReportLocationFatal + errReportLocationBackgroundError + ) + const ( + nKeys = 1_000 + keySize = 16 + valSize = 100 + blockSize = 100 + + ingestTableName = "ext" + ) + + seed := uint64(time.Now().UnixNano()) + rng := rand.New(rand.NewSource(seed)) + t.Logf("rng seed = %d", seed) + + // errfsCounter is used by test cases that make use of an errorfs.Injector + // to inject errors into the ingest validation code path. + var errfsCounter atomic.Int32 + testCases := []struct { + description string + cLoc corruptionLocation + wantErrType errReportLocation + wantErr error + errorfsInjector errorfs.Injector + }{ + { + description: "no corruption", + cLoc: corruptionLocationNone, + wantErrType: errReportLocationNone, + }, + { + description: "start block", + cLoc: corruptionLocationStart, + wantErr: ErrCorruption, + wantErrType: errReportLocationIngest, + }, + { + description: "end block", + cLoc: corruptionLocationEnd, + wantErr: ErrCorruption, + wantErrType: errReportLocationIngest, + }, + { + description: "non-end block", + cLoc: corruptionLocationInternal, + wantErr: ErrCorruption, + wantErrType: errReportLocationFatal, + }, + { + description: "non-corruption error", + cLoc: corruptionLocationNone, + wantErr: errorfs.ErrInjected, + wantErrType: errReportLocationBackgroundError, + errorfsInjector: errorfs.InjectorFunc(func(op errorfs.Op) error { + // Inject an error on the first read-at operation on an sstable + // (excluding the read on the sstable before ingestion has + // linked it in). + if op.Path != "ext" && op.Kind != errorfs.OpFileReadAt || filepath.Ext(op.Path) != ".sst" { + return nil + } + if errfsCounter.Add(1) == 1 { + return errorfs.ErrInjected + } + return nil + }), + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + errfsCounter.Store(0) + var wg sync.WaitGroup + wg.Add(1) + + fs := vfs.NewMem() + var testFS vfs.FS = fs + if tc.errorfsInjector != nil { + testFS = errorfs.Wrap(fs, tc.errorfsInjector) + } + + // backgroundErr is populated by EventListener.BackgroundError. + var backgroundErr error + logger := &fatalCapturingLogger{t: t} + opts := &Options{ + FS: testFS, + Logger: logger, + EventListener: &EventListener{ + TableValidated: func(i TableValidatedInfo) { + wg.Done() + }, + BackgroundError: func(err error) { + backgroundErr = err + }, + }, + } + // Disable table stats so that injected errors can't be accidentally + // injected into the table stats collector read, and so the table + // stats collector won't prime the table+block cache such that the + // error injection won't trigger at all during ingest validation. + opts.private.disableTableStats = true + opts.Experimental.ValidateOnIngest = true + d, err := Open("", opts) + require.NoError(t, err) + defer func() { require.NoError(t, d.Close()) }() + + corrupt := func(f vfs.File) { + readable, err := sstable.NewSimpleReadable(f) + require.NoError(t, err) + // Compute the layout of the sstable in order to find the + // appropriate block locations to corrupt. + r, err := sstable.NewReader(readable, sstable.ReaderOptions{}) + require.NoError(t, err) + l, err := r.Layout() + require.NoError(t, err) + + // Select an appropriate data block to corrupt. + var blockIdx int + switch tc.cLoc { + case corruptionLocationStart: + blockIdx = 0 + case corruptionLocationEnd: + blockIdx = len(l.Data) - 1 + case corruptionLocationInternal: + blockIdx = 1 + rng.Intn(len(l.Data)-2) + default: + t.Fatalf("unknown corruptionLocation: %T", tc.cLoc) + } + bh := l.Data[blockIdx] + + // Corrupting a key will cause the ingestion to fail due to a + // malformed key, rather than a block checksum mismatch. + // Instead, we corrupt the last byte in the selected block, + // before the trailer, which corresponds to a value. + offset := bh.Offset + bh.Length - 1 + _, err = f.WriteAt([]byte("\xff"), int64(offset)) + require.NoError(t, err) + require.NoError(t, r.Close()) + } + + type errT struct { + errLoc errReportLocation + err error + } + runIngest := func(keyVals []keyVal) (et errT) { + f, err := fs.Create(ingestTableName) + require.NoError(t, err) + defer func() { _ = fs.Remove(ingestTableName) }() + + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{ + BlockSize: blockSize, // Create many smaller blocks. + Compression: NoCompression, // For simpler debugging. + }) + for _, kv := range keyVals { + require.NoError(t, w.Set(kv.key, kv.val)) + } + require.NoError(t, w.Close()) + + // Possibly corrupt the file. + if tc.cLoc != corruptionLocationNone { + f, err = fs.OpenReadWrite(ingestTableName) + require.NoError(t, err) + corrupt(f) + } + + // Ingest the external table. + err = d.Ingest([]string{ingestTableName}) + if err != nil { + et.errLoc = errReportLocationIngest + et.err = err + return + } + + // Wait for the validation on the sstable to complete. + wg.Wait() + + // Return any error encountered during validation. + if logger.err != nil { + et.errLoc = errReportLocationFatal + et.err = logger.err + } else if backgroundErr != nil { + et.errLoc = errReportLocationBackgroundError + et.err = backgroundErr + } + return + } + + // Construct a set of keys to ingest. + var keyVals []keyVal + for i := 0; i < nKeys; i++ { + key := make([]byte, keySize) + _, err = rng.Read(key) + require.NoError(t, err) + + val := make([]byte, valSize) + _, err = rng.Read(val) + require.NoError(t, err) + + keyVals = append(keyVals, keyVal{key, val}) + } + + // Keys must be sorted. + slices.SortFunc(keyVals, func(a, b keyVal) int { return d.cmp(a.key, b.key) }) + + // Run the ingestion. + et := runIngest(keyVals) + + // Assert we saw the errors we expect. + switch tc.wantErrType { + case errReportLocationNone: + require.Equal(t, errReportLocationNone, et.errLoc) + require.NoError(t, et.err) + case errReportLocationIngest: + require.Equal(t, errReportLocationIngest, et.errLoc) + require.Error(t, et.err) + require.True(t, errors.Is(et.err, tc.wantErr)) + case errReportLocationFatal: + require.Equal(t, errReportLocationFatal, et.errLoc) + require.Error(t, et.err) + require.True(t, errors.Is(et.err, tc.wantErr)) + case errReportLocationBackgroundError: + require.Equal(t, errReportLocationBackgroundError, et.errLoc) + require.Error(t, et.err) + require.True(t, errors.Is(et.err, tc.wantErr)) + default: + t.Fatalf("unknown wantErrType %T", tc.wantErrType) + } + }) + } +} + +// BenchmarkManySSTables measures the cost of various operations with various +// counts of SSTables within the database. +func BenchmarkManySSTables(b *testing.B) { + counts := []int{10, 1_000, 10_000, 100_000, 1_000_000} + ops := []string{"ingest", "calculateInuseKeyRanges"} + for _, op := range ops { + b.Run(op, func(b *testing.B) { + for _, count := range counts { + b.Run(fmt.Sprintf("sstables=%d", count), func(b *testing.B) { + mem := vfs.NewMem() + d, err := Open("", &Options{ + FS: mem, + }) + require.NoError(b, err) + + var paths []string + for i := 0; i < count; i++ { + n := fmt.Sprintf("%07d", i) + f, err := mem.Create(n) + require.NoError(b, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + require.NoError(b, w.Set([]byte(n), nil)) + require.NoError(b, w.Close()) + paths = append(paths, n) + } + require.NoError(b, d.Ingest(paths)) + + { + const broadIngest = "broad.sst" + f, err := mem.Create(broadIngest) + require.NoError(b, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + require.NoError(b, w.Set([]byte("0"), nil)) + require.NoError(b, w.Set([]byte("Z"), nil)) + require.NoError(b, w.Close()) + require.NoError(b, d.Ingest([]string{broadIngest})) + } + + switch op { + case "ingest": + runBenchmarkManySSTablesIngest(b, d, mem, count) + case "calculateInuseKeyRanges": + runBenchmarkManySSTablesInUseKeyRanges(b, d, count) + } + require.NoError(b, d.Close()) + }) + } + }) + } +} + +func runBenchmarkManySSTablesIngest(b *testing.B, d *DB, fs vfs.FS, count int) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + n := fmt.Sprintf("%07d", count+i) + f, err := fs.Create(n) + require.NoError(b, err) + w := sstable.NewWriter(objstorageprovider.NewFileWritable(f), sstable.WriterOptions{}) + require.NoError(b, w.Set([]byte(n), nil)) + require.NoError(b, w.Close()) + require.NoError(b, d.Ingest([]string{n})) + } +} + +func runBenchmarkManySSTablesInUseKeyRanges(b *testing.B, d *DB, count int) { + // This benchmark is pretty contrived, but it's not easy to write a + // microbenchmark for this in a more natural way. L6 has many files, and + // L5 has 1 file spanning the entire breadth of L5. + d.mu.Lock() + defer d.mu.Unlock() + v := d.mu.versions.currentVersion() + b.ResetTimer() + + smallest := []byte("0") + largest := []byte("z") + for i := 0; i < b.N; i++ { + _ = calculateInuseKeyRanges(v, d.cmp, 0, numLevels-1, smallest, largest) + } +} diff --git a/pebble/internal.go b/pebble/internal.go new file mode 100644 index 0000000..61a4284 --- /dev/null +++ b/pebble/internal.go @@ -0,0 +1,51 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package pebble + +import "github.com/cockroachdb/pebble/internal/base" + +// InternalKeyKind exports the base.InternalKeyKind type. +type InternalKeyKind = base.InternalKeyKind + +// These constants are part of the file format, and should not be changed. +const ( + InternalKeyKindDelete = base.InternalKeyKindDelete + InternalKeyKindSet = base.InternalKeyKindSet + InternalKeyKindMerge = base.InternalKeyKindMerge + InternalKeyKindLogData = base.InternalKeyKindLogData + InternalKeyKindSingleDelete = base.InternalKeyKindSingleDelete + InternalKeyKindRangeDelete = base.InternalKeyKindRangeDelete + InternalKeyKindMax = base.InternalKeyKindMax + InternalKeyKindSetWithDelete = base.InternalKeyKindSetWithDelete + InternalKeyKindRangeKeySet = base.InternalKeyKindRangeKeySet + InternalKeyKindRangeKeyUnset = base.InternalKeyKindRangeKeyUnset + InternalKeyKindRangeKeyDelete = base.InternalKeyKindRangeKeyDelete + InternalKeyKindIngestSST = base.InternalKeyKindIngestSST + InternalKeyKindDeleteSized = base.InternalKeyKindDeleteSized + InternalKeyKindInvalid = base.InternalKeyKindInvalid + InternalKeySeqNumBatch = base.InternalKeySeqNumBatch + InternalKeySeqNumMax = base.InternalKeySeqNumMax + InternalKeyRangeDeleteSentinel = base.InternalKeyRangeDeleteSentinel +) + +// InternalKey exports the base.InternalKey type. +type InternalKey = base.InternalKey + +type internalIterator = base.InternalIterator + +// ErrCorruption is a marker to indicate that data in a file (WAL, MANIFEST, +// sstable) isn't in the expected format. +var ErrCorruption = base.ErrCorruption + +// AttributeAndLen exports the base.AttributeAndLen type. +type AttributeAndLen = base.AttributeAndLen + +// ShortAttribute exports the base.ShortAttribute type. +type ShortAttribute = base.ShortAttribute + +// LazyFetcher exports the base.LazyFetcher type. This export is needed since +// LazyValue.Clone requires a pointer to a LazyFetcher struct to avoid +// allocations. No code outside Pebble needs to peer into a LazyFetcher. +type LazyFetcher = base.LazyFetcher diff --git a/pebble/internal/ackseq/ackseq.go b/pebble/internal/ackseq/ackseq.go new file mode 100644 index 0000000..f2c682f --- /dev/null +++ b/pebble/internal/ackseq/ackseq.go @@ -0,0 +1,83 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package ackseq + +import ( + "sync" + "sync/atomic" + + "github.com/cockroachdb/errors" +) + +const ( + // The window size constants. These values specify a window that can hold ~1m + // pending unacknowledged sequence numbers using 128 KB of memory. + windowSize = 1 << 20 + windowMask = windowSize - 1 + windowBytes = (windowSize + 7) / 8 +) + +// S keeps track of the largest sequence number such that all sequence numbers +// in the range [0,v) have been acknowledged. +type S struct { + next atomic.Uint64 + mu struct { + sync.Mutex + base uint64 + window [windowBytes]uint8 + } +} + +// New creates a new acknowledged sequence tracker with the specified base +// sequence number. All of the sequence numbers in the range [0,base) are +// considered acknowledged. Next() will return base upon first call. +func New(base uint64) *S { + s := &S{} + s.next.Store(base) + s.mu.base = base + return s +} + +// Next returns the next sequence number to use. +func (s *S) Next() uint64 { + return s.next.Add(1) - 1 +} + +// Ack acknowledges the specified seqNum, adjusting base as necessary, +// returning the number of newly acknowledged sequence numbers. +func (s *S) Ack(seqNum uint64) (int, error) { + s.mu.Lock() + if s.getLocked(seqNum) { + defer s.mu.Unlock() + return 0, errors.Errorf( + "pending acks exceeds window size: %d has been acked, but %d has not", + errors.Safe(seqNum), errors.Safe(s.mu.base)) + } + + var count int + s.setLocked(seqNum) + for s.getLocked(s.mu.base) { + s.clearLocked(s.mu.base) + s.mu.base++ + count++ + } + s.mu.Unlock() + return count, nil +} + +func (s *S) getLocked(seqNum uint64) bool { + bit := seqNum & windowMask + return (s.mu.window[bit/8] & (1 << (bit % 8))) != 0 +} + +func (s *S) setLocked(seqNum uint64) { + bit := seqNum & windowMask + s.mu.window[bit/8] |= (1 << (bit % 8)) +} + +func (s *S) clearLocked(seqNum uint64) { + bit := seqNum & windowMask + s.mu.window[bit/8] &^= (1 << (bit % 8)) +} diff --git a/pebble/internal/arenaskl/LICENSE b/pebble/internal/arenaskl/LICENSE new file mode 100644 index 0000000..8dada3e --- /dev/null +++ b/pebble/internal/arenaskl/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/pebble/internal/arenaskl/README.md b/pebble/internal/arenaskl/README.md new file mode 100644 index 0000000..93a7d32 --- /dev/null +++ b/pebble/internal/arenaskl/README.md @@ -0,0 +1,93 @@ +# arenaskl + +Fast, lock-free, arena-based Skiplist implementation in Go that supports iteration +in both directions. + +## Advantages + +Arenaskl offers several advantages over other skiplist implementations: + +* High performance that linearly scales with the number of cores. This is + achieved by allocating from a fixed-size arena and by avoiding locks. +* Iterators that can be allocated on the stack and easily cloned by value. +* Simple-to-use and low overhead model for detecting and handling race conditions + with other threads. +* Support for iterating in reverse (i.e. previous links). + +## Limitations + +The advantages come at a cost that prevents arenaskl from being a general-purpose +skiplist implementation: + +* The size of the arena sets a hard upper bound on the combined size of skiplist + nodes, keys, and values. This limit includes even the size of deleted nodes, + keys, and values. +* Deletion is not supported. Instead, higher-level code is expected to + add deletion tombstones and needs to process those tombstones + appropriately. + +## Pedigree + +This code is based on Andy Kimball's arenaskl code: + +https://github.com/andy-kimball/arenaskl + +The arenaskl code is based on the skiplist found in Badger, a Go-based +KV store: + +https://github.com/dgraph-io/badger/tree/master/skl + +The skiplist in Badger is itself based on a C++ skiplist built for +Facebook's RocksDB: + +https://github.com/facebook/rocksdb/tree/master/memtable + +## Benchmarks + +The benchmarks consist of a mix of reads and writes executed in parallel. The +fraction of reads is indicated in the run name: "frac_X" indicates a run where +X percent of the operations are reads. + +The results are much better than `skiplist` and `slist`. + +``` +name time/op +ReadWrite/frac_0-8 470ns ±11% +ReadWrite/frac_10-8 462ns ± 3% +ReadWrite/frac_20-8 436ns ± 2% +ReadWrite/frac_30-8 410ns ± 2% +ReadWrite/frac_40-8 385ns ± 2% +ReadWrite/frac_50-8 360ns ± 4% +ReadWrite/frac_60-8 386ns ± 1% +ReadWrite/frac_70-8 352ns ± 2% +ReadWrite/frac_80-8 306ns ± 3% +ReadWrite/frac_90-8 253ns ± 4% +ReadWrite/frac_100-8 28.1ns ± 2% +``` + +Note that the above numbers are for concurrent operations using 8x +parallelism. The same benchmarks without concurrency (use these +numbers when comparing vs batchskl): + +``` +name time/op +ReadWrite/frac_0 1.53µs ± 1% +ReadWrite/frac_10 1.46µs ± 2% +ReadWrite/frac_20 1.39µs ± 3% +ReadWrite/frac_30 1.28µs ± 3% +ReadWrite/frac_40 1.21µs ± 2% +ReadWrite/frac_50 1.11µs ± 3% +ReadWrite/frac_60 1.23µs ±17% +ReadWrite/frac_70 1.16µs ± 4% +ReadWrite/frac_80 959ns ± 3% +ReadWrite/frac_90 738ns ± 5% +ReadWrite/frac_100 81.9ns ± 2% +``` + +Forward and backward iteration are also fast: + +``` +name time/op +IterNext 3.97ns ± 5% +IterPrev 3.88ns ± 3% +``` diff --git a/pebble/internal/arenaskl/arena.go b/pebble/internal/arenaskl/arena.go new file mode 100644 index 0000000..011c3b0 --- /dev/null +++ b/pebble/internal/arenaskl/arena.go @@ -0,0 +1,125 @@ +/* + * Copyright 2017 Dgraph Labs, Inc. and Contributors + * Modifications copyright (C) 2017 Andy Kimball and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package arenaskl + +import ( + "sync/atomic" + "unsafe" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/constants" + "github.com/cockroachdb/pebble/internal/invariants" +) + +// Arena is lock-free. +type Arena struct { + n atomic.Uint64 + buf []byte +} + +const nodeAlignment = 4 + +var ( + // ErrArenaFull indicates that the arena is full and cannot perform any more + // allocations. + ErrArenaFull = errors.New("allocation failed because arena is full") +) + +// NewArena allocates a new arena using the specified buffer as the backing +// store. +func NewArena(buf []byte) *Arena { + if len(buf) > constants.MaxUint32OrInt { + if invariants.Enabled { + panic(errors.AssertionFailedf("attempting to create arena of size %d", len(buf))) + } + buf = buf[:constants.MaxUint32OrInt] + } + a := &Arena{ + buf: buf, + } + // We don't store data at position 0 in order to reserve offset=0 as a kind of + // nil pointer. + a.n.Store(1) + return a +} + +// Size returns the number of bytes allocated by the arena. +func (a *Arena) Size() uint32 { + s := a.n.Load() + if s > constants.MaxUint32OrInt { + // The last failed allocation can push the size higher than len(a.buf). + // Saturate at the maximum representable offset. + return constants.MaxUint32OrInt + } + return uint32(s) +} + +// Capacity returns the capacity of the arena. +func (a *Arena) Capacity() uint32 { + return uint32(len(a.buf)) +} + +// alloc allocates a buffer of the given size and with the given alignment +// (which must be a power of 2). +// +// If overflow is not 0, it also ensures that many bytes after the buffer are +// inside the arena (this is used for structures that are larger than the +// requested size but don't use those extra bytes). +func (a *Arena) alloc(size, alignment, overflow uint32) (uint32, uint32, error) { + if invariants.Enabled && (alignment&(alignment-1)) != 0 { + panic(errors.AssertionFailedf("invalid alignment %d", alignment)) + } + // Verify that the arena isn't already full. + origSize := a.n.Load() + if int(origSize) > len(a.buf) { + return 0, 0, ErrArenaFull + } + + // Pad the allocation with enough bytes to ensure the requested alignment. + padded := uint64(size) + uint64(alignment) - 1 + + newSize := a.n.Add(padded) + if newSize+uint64(overflow) > uint64(len(a.buf)) { + return 0, 0, ErrArenaFull + } + + // Return the aligned offset. + offset := (uint32(newSize) - size) & ^(alignment - 1) + return offset, uint32(padded), nil +} + +func (a *Arena) getBytes(offset uint32, size uint32) []byte { + if offset == 0 { + return nil + } + return a.buf[offset : offset+size : offset+size] +} + +func (a *Arena) getPointer(offset uint32) unsafe.Pointer { + if offset == 0 { + return nil + } + return unsafe.Pointer(&a.buf[offset]) +} + +func (a *Arena) getPointerOffset(ptr unsafe.Pointer) uint32 { + if ptr == nil { + return 0 + } + return uint32(uintptr(ptr) - uintptr(unsafe.Pointer(&a.buf[0]))) +} diff --git a/pebble/internal/arenaskl/arena_test.go b/pebble/internal/arenaskl/arena_test.go new file mode 100644 index 0000000..f264b8d --- /dev/null +++ b/pebble/internal/arenaskl/arena_test.go @@ -0,0 +1,53 @@ +/* + * Copyright 2017 Dgraph Labs, Inc. and Contributors + * Modifications copyright (C) 2017 Andy Kimball and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package arenaskl + +import ( + "math" + "testing" + + "github.com/cockroachdb/pebble/internal/constants" + "github.com/stretchr/testify/require" +) + +func newArena(n uint32) *Arena { + return NewArena(make([]byte, n)) +} + +// TestArenaSizeOverflow tests that large allocations do not cause Arena's +// internal size accounting to overflow and produce incorrect results. +func TestArenaSizeOverflow(t *testing.T) { + a := newArena(constants.MaxUint32OrInt) + + // Allocating under the limit throws no error. + offset, _, err := a.alloc(math.MaxUint16, 1, 0) + require.Nil(t, err) + require.Equal(t, uint32(1), offset) + require.Equal(t, uint32(math.MaxUint16)+1, a.Size()) + + // Allocating over the limit could cause an accounting + // overflow if 32-bit arithmetic was used. It shouldn't. + _, _, err = a.alloc(math.MaxUint32, 1, 0) + require.Equal(t, ErrArenaFull, err) + require.Equal(t, uint32(constants.MaxUint32OrInt), a.Size()) + + // Continuing to allocate continues to throw an error. + _, _, err = a.alloc(math.MaxUint16, 1, 0) + require.Equal(t, ErrArenaFull, err) + require.Equal(t, uint32(constants.MaxUint32OrInt), a.Size()) +} diff --git a/pebble/internal/arenaskl/flush_iterator.go b/pebble/internal/arenaskl/flush_iterator.go new file mode 100644 index 0000000..2a7ea03 --- /dev/null +++ b/pebble/internal/arenaskl/flush_iterator.go @@ -0,0 +1,88 @@ +/* + * Copyright 2017 Dgraph Labs, Inc. and Contributors + * Modifications copyright (C) 2017 Andy Kimball and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package arenaskl + +import "github.com/cockroachdb/pebble/internal/base" + +// flushIterator is an iterator over the skiplist object. Use Skiplist.NewFlushIter +// to construct an iterator. The current state of the iterator can be cloned by +// simply value copying the struct. +type flushIterator struct { + Iterator + bytesIterated *uint64 +} + +// flushIterator implements the base.InternalIterator interface. +var _ base.InternalIterator = (*flushIterator)(nil) + +func (it *flushIterator) String() string { + return "memtable" +} + +func (it *flushIterator) SeekGE( + key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + panic("pebble: SeekGE unimplemented") +} + +func (it *flushIterator) SeekPrefixGE( + prefix, key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + panic("pebble: SeekPrefixGE unimplemented") +} + +func (it *flushIterator) SeekLT( + key []byte, flags base.SeekLTFlags, +) (*base.InternalKey, base.LazyValue) { + panic("pebble: SeekLT unimplemented") +} + +// First seeks position at the first entry in list. Returns the key and value +// if the iterator is pointing at a valid entry, and (nil, nil) otherwise. Note +// that First only checks the upper bound. It is up to the caller to ensure +// that key is greater than or equal to the lower bound. +func (it *flushIterator) First() (*base.InternalKey, base.LazyValue) { + key, val := it.Iterator.First() + if key == nil { + return nil, base.LazyValue{} + } + *it.bytesIterated += uint64(it.nd.allocSize) + return key, val +} + +// Next advances to the next position. Returns the key and value if the +// iterator is pointing at a valid entry, and (nil, nil) otherwise. +// Note: flushIterator.Next mirrors the implementation of Iterator.Next +// due to performance. Keep the two in sync. +func (it *flushIterator) Next() (*base.InternalKey, base.LazyValue) { + it.nd = it.list.getNext(it.nd, 0) + if it.nd == it.list.tail { + return nil, base.LazyValue{} + } + it.decodeKey() + *it.bytesIterated += uint64(it.nd.allocSize) + return &it.key, base.MakeInPlaceValue(it.value()) +} + +func (it *flushIterator) NextPrefix(succKey []byte) (*base.InternalKey, base.LazyValue) { + panic("pebble: NextPrefix unimplemented") +} + +func (it *flushIterator) Prev() (*base.InternalKey, base.LazyValue) { + panic("pebble: Prev unimplemented") +} diff --git a/pebble/internal/arenaskl/iterator.go b/pebble/internal/arenaskl/iterator.go new file mode 100644 index 0000000..a41dd7e --- /dev/null +++ b/pebble/internal/arenaskl/iterator.go @@ -0,0 +1,275 @@ +/* + * Copyright 2017 Dgraph Labs, Inc. and Contributors + * Modifications copyright (C) 2017 Andy Kimball and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package arenaskl + +import ( + "context" + "sync" + + "github.com/cockroachdb/pebble/internal/base" +) + +type splice struct { + prev *node + next *node +} + +func (s *splice) init(prev, next *node) { + s.prev = prev + s.next = next +} + +// Iterator is an iterator over the skiplist object. Use Skiplist.NewIter +// to construct an iterator. The current state of the iterator can be cloned by +// simply value copying the struct. All iterator methods are thread-safe. +type Iterator struct { + list *Skiplist + nd *node + key base.InternalKey + lower []byte + upper []byte +} + +// Iterator implements the base.InternalIterator interface. +var _ base.InternalIterator = (*Iterator)(nil) + +var iterPool = sync.Pool{ + New: func() interface{} { + return &Iterator{} + }, +} + +// Close resets the iterator. +func (it *Iterator) Close() error { + it.list = nil + it.nd = nil + it.lower = nil + it.upper = nil + iterPool.Put(it) + return nil +} + +func (it *Iterator) String() string { + return "memtable" +} + +// Error returns any accumulated error. +func (it *Iterator) Error() error { + return nil +} + +// SeekGE moves the iterator to the first entry whose key is greater than or +// equal to the given key. Returns the key and value if the iterator is +// pointing at a valid entry, and (nil, nil) otherwise. Note that SeekGE only +// checks the upper bound. It is up to the caller to ensure that key is greater +// than or equal to the lower bound. +func (it *Iterator) SeekGE(key []byte, flags base.SeekGEFlags) (*base.InternalKey, base.LazyValue) { + if flags.TrySeekUsingNext() { + if it.nd == it.list.tail { + // Iterator is done. + return nil, base.LazyValue{} + } + less := it.list.cmp(it.key.UserKey, key) < 0 + // Arbitrary constant. By measuring the seek cost as a function of the + // number of elements in the skip list, and fitting to a model, we + // could adjust the number of nexts based on the current size of the + // skip list. + const numNexts = 5 + for i := 0; less && i < numNexts; i++ { + k, _ := it.Next() + if k == nil { + // Iterator is done. + return nil, base.LazyValue{} + } + less = it.list.cmp(it.key.UserKey, key) < 0 + } + if !less { + return &it.key, base.MakeInPlaceValue(it.value()) + } + } + _, it.nd, _ = it.seekForBaseSplice(key) + if it.nd == it.list.tail { + return nil, base.LazyValue{} + } + it.decodeKey() + if it.upper != nil && it.list.cmp(it.upper, it.key.UserKey) <= 0 { + it.nd = it.list.tail + return nil, base.LazyValue{} + } + return &it.key, base.MakeInPlaceValue(it.value()) +} + +// SeekPrefixGE moves the iterator to the first entry whose key is greater than +// or equal to the given key. This method is equivalent to SeekGE and is +// provided so that an arenaskl.Iterator implements the +// internal/base.InternalIterator interface. +func (it *Iterator) SeekPrefixGE( + prefix, key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + return it.SeekGE(key, flags) +} + +// SeekLT moves the iterator to the last entry whose key is less than the given +// key. Returns the key and value if the iterator is pointing at a valid entry, +// and (nil, nil) otherwise. Note that SeekLT only checks the lower bound. It +// is up to the caller to ensure that key is less than the upper bound. +func (it *Iterator) SeekLT(key []byte, flags base.SeekLTFlags) (*base.InternalKey, base.LazyValue) { + // NB: the top-level Iterator has already adjusted key based on + // the upper-bound. + it.nd, _, _ = it.seekForBaseSplice(key) + if it.nd == it.list.head { + return nil, base.LazyValue{} + } + it.decodeKey() + if it.lower != nil && it.list.cmp(it.lower, it.key.UserKey) > 0 { + it.nd = it.list.head + return nil, base.LazyValue{} + } + return &it.key, base.MakeInPlaceValue(it.value()) +} + +// First seeks position at the first entry in list. Returns the key and value +// if the iterator is pointing at a valid entry, and (nil, nil) otherwise. Note +// that First only checks the upper bound. It is up to the caller to ensure +// that key is greater than or equal to the lower bound (e.g. via a call to SeekGE(lower)). +func (it *Iterator) First() (*base.InternalKey, base.LazyValue) { + it.nd = it.list.getNext(it.list.head, 0) + if it.nd == it.list.tail { + return nil, base.LazyValue{} + } + it.decodeKey() + if it.upper != nil && it.list.cmp(it.upper, it.key.UserKey) <= 0 { + it.nd = it.list.tail + return nil, base.LazyValue{} + } + return &it.key, base.MakeInPlaceValue(it.value()) +} + +// Last seeks position at the last entry in list. Returns the key and value if +// the iterator is pointing at a valid entry, and (nil, nil) otherwise. Note +// that Last only checks the lower bound. It is up to the caller to ensure that +// key is less than the upper bound (e.g. via a call to SeekLT(upper)). +func (it *Iterator) Last() (*base.InternalKey, base.LazyValue) { + it.nd = it.list.getPrev(it.list.tail, 0) + if it.nd == it.list.head { + return nil, base.LazyValue{} + } + it.decodeKey() + if it.lower != nil && it.list.cmp(it.lower, it.key.UserKey) > 0 { + it.nd = it.list.head + return nil, base.LazyValue{} + } + return &it.key, base.MakeInPlaceValue(it.value()) +} + +// Next advances to the next position. Returns the key and value if the +// iterator is pointing at a valid entry, and (nil, nil) otherwise. +// Note: flushIterator.Next mirrors the implementation of Iterator.Next +// due to performance. Keep the two in sync. +func (it *Iterator) Next() (*base.InternalKey, base.LazyValue) { + it.nd = it.list.getNext(it.nd, 0) + if it.nd == it.list.tail { + return nil, base.LazyValue{} + } + it.decodeKey() + if it.upper != nil && it.list.cmp(it.upper, it.key.UserKey) <= 0 { + it.nd = it.list.tail + return nil, base.LazyValue{} + } + return &it.key, base.MakeInPlaceValue(it.value()) +} + +// NextPrefix advances to the next position with a new prefix. Returns the key +// and value if the iterator is pointing at a valid entry, and (nil, nil) +// otherwise. +func (it *Iterator) NextPrefix(succKey []byte) (*base.InternalKey, base.LazyValue) { + return it.SeekGE(succKey, base.SeekGEFlagsNone.EnableTrySeekUsingNext()) +} + +// Prev moves to the previous position. Returns the key and value if the +// iterator is pointing at a valid entry, and (nil, nil) otherwise. +func (it *Iterator) Prev() (*base.InternalKey, base.LazyValue) { + it.nd = it.list.getPrev(it.nd, 0) + if it.nd == it.list.head { + return nil, base.LazyValue{} + } + it.decodeKey() + if it.lower != nil && it.list.cmp(it.lower, it.key.UserKey) > 0 { + it.nd = it.list.head + return nil, base.LazyValue{} + } + return &it.key, base.MakeInPlaceValue(it.value()) +} + +// value returns the value at the current position. +func (it *Iterator) value() []byte { + return it.nd.getValue(it.list.arena) +} + +// Head true iff the iterator is positioned at the sentinel head node. +func (it *Iterator) Head() bool { + return it.nd == it.list.head +} + +// Tail true iff the iterator is positioned at the sentinel tail node. +func (it *Iterator) Tail() bool { + return it.nd == it.list.tail +} + +// SetBounds sets the lower and upper bounds for the iterator. Note that the +// result of Next and Prev will be undefined until the iterator has been +// repositioned with SeekGE, SeekPrefixGE, SeekLT, First, or Last. +func (it *Iterator) SetBounds(lower, upper []byte) { + it.lower = lower + it.upper = upper +} + +// SetContext implements base.InternalIterator. +func (it *Iterator) SetContext(_ context.Context) {} + +func (it *Iterator) decodeKey() { + it.key.UserKey = it.list.arena.getBytes(it.nd.keyOffset, it.nd.keySize) + it.key.Trailer = it.nd.keyTrailer +} + +func (it *Iterator) seekForBaseSplice(key []byte) (prev, next *node, found bool) { + ikey := base.MakeSearchKey(key) + level := int(it.list.Height() - 1) + + prev = it.list.head + for { + prev, next, found = it.list.findSpliceForLevel(ikey, level, prev) + + if found { + if level != 0 { + // next is pointing at the target node, but we need to find previous on + // the bottom level. + prev = it.list.getPrev(next, 0) + } + break + } + + if level == 0 { + break + } + + level-- + } + + return +} diff --git a/pebble/internal/arenaskl/node.go b/pebble/internal/arenaskl/node.go new file mode 100644 index 0000000..d464bc5 --- /dev/null +++ b/pebble/internal/arenaskl/node.go @@ -0,0 +1,133 @@ +/* + * Copyright 2017 Dgraph Labs, Inc. and Contributors + * Modifications copyright (C) 2017 Andy Kimball and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package arenaskl + +import ( + "math" + "sync/atomic" + + "github.com/cockroachdb/pebble/internal/base" +) + +// MaxNodeSize returns the maximum space needed for a node with the specified +// key and value sizes. This could overflow a uint32, which is why a uint64 +// is used here. If a key/value overflows a uint32, it should not be added to +// the skiplist. +func MaxNodeSize(keySize, valueSize uint32) uint64 { + const maxPadding = nodeAlignment - 1 + return uint64(maxNodeSize) + uint64(keySize) + uint64(valueSize) + maxPadding +} + +type links struct { + nextOffset atomic.Uint32 + prevOffset atomic.Uint32 +} + +func (l *links) init(prevOffset, nextOffset uint32) { + l.nextOffset.Store(nextOffset) + l.prevOffset.Store(prevOffset) +} + +type node struct { + // Immutable fields, so no need to lock to access key. + keyOffset uint32 + keySize uint32 + keyTrailer uint64 + valueSize uint32 + allocSize uint32 + + // Most nodes do not need to use the full height of the tower, since the + // probability of each successive level decreases exponentially. Because + // these elements are never accessed, they do not need to be allocated. + // Therefore, when a node is allocated in the arena, its memory footprint + // is deliberately truncated to not include unneeded tower elements. + // + // All accesses to elements should use CAS operations, with no need to lock. + tower [maxHeight]links +} + +func newNode( + arena *Arena, height uint32, key base.InternalKey, value []byte, +) (nd *node, err error) { + if height < 1 || height > maxHeight { + panic("height cannot be less than one or greater than the max height") + } + keySize := len(key.UserKey) + if int64(keySize) > math.MaxUint32 { + panic("key is too large") + } + valueSize := len(value) + if int64(len(value)) > math.MaxUint32 { + panic("value is too large") + } + if int64(len(value))+int64(keySize)+int64(maxNodeSize) > math.MaxUint32 { + panic("combined key and value size is too large") + } + + nd, err = newRawNode(arena, height, uint32(keySize), uint32(valueSize)) + if err != nil { + return + } + nd.keyTrailer = key.Trailer + copy(nd.getKeyBytes(arena), key.UserKey) + copy(nd.getValue(arena), value) + return +} + +func newRawNode(arena *Arena, height uint32, keySize, valueSize uint32) (nd *node, err error) { + // Compute the amount of the tower that will never be used, since the height + // is less than maxHeight. + unusedSize := uint32((maxHeight - int(height)) * linksSize) + nodeSize := uint32(maxNodeSize) - unusedSize + + nodeOffset, allocSize, err := arena.alloc(nodeSize+keySize+valueSize, nodeAlignment, unusedSize) + if err != nil { + return + } + + nd = (*node)(arena.getPointer(nodeOffset)) + nd.keyOffset = nodeOffset + nodeSize + nd.keySize = keySize + nd.valueSize = valueSize + nd.allocSize = allocSize + return +} + +func (n *node) getKeyBytes(arena *Arena) []byte { + return arena.getBytes(n.keyOffset, n.keySize) +} + +func (n *node) getValue(arena *Arena) []byte { + return arena.getBytes(n.keyOffset+n.keySize, uint32(n.valueSize)) +} + +func (n *node) nextOffset(h int) uint32 { + return n.tower[h].nextOffset.Load() +} + +func (n *node) prevOffset(h int) uint32 { + return n.tower[h].prevOffset.Load() +} + +func (n *node) casNextOffset(h int, old, val uint32) bool { + return n.tower[h].nextOffset.CompareAndSwap(old, val) +} + +func (n *node) casPrevOffset(h int, old, val uint32) bool { + return n.tower[h].prevOffset.CompareAndSwap(old, val) +} diff --git a/pebble/internal/arenaskl/race_test.go b/pebble/internal/arenaskl/race_test.go new file mode 100644 index 0000000..b9310c9 --- /dev/null +++ b/pebble/internal/arenaskl/race_test.go @@ -0,0 +1,42 @@ +//go:build race +// +build race + +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package arenaskl + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +// TestNodeArenaEnd tests allocating a node at the boundary of an arena. In Go +// 1.14 when the race detector is running, Go will also perform some pointer +// alignment checks. It will detect alignment issues, for example #667 where a +// node's memory would straddle the arena boundary, with unused regions of the +// node struct dipping into unallocated memory. This test is only run when the +// race build tag is provided. +func TestNodeArenaEnd(t *testing.T) { + ikey := makeIkey("a") + val := []byte("b") + + // Rather than hardcode an arena size at just the right size, try + // allocating using successively larger arena sizes until we allocate + // successfully. The prior attempt will have exercised the right code + // path. + for i := uint32(1); i < 256; i++ { + a := newArena(i) + _, err := newNode(a, 1, ikey, val) + if err == nil { + // We reached an arena size big enough to allocate a node. + // If there's an issue at the boundary, the race detector would + // have found it by now. + t.Log(i) + break + } + require.Equal(t, ErrArenaFull, err) + } +} diff --git a/pebble/internal/arenaskl/skl.go b/pebble/internal/arenaskl/skl.go new file mode 100644 index 0000000..ef1ebfc --- /dev/null +++ b/pebble/internal/arenaskl/skl.go @@ -0,0 +1,464 @@ +/* + * Copyright 2017 Dgraph Labs, Inc. and Contributors + * Modifications copyright (C) 2017 Andy Kimball and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Adapted from RocksDB inline skiplist. + +Key differences: +- No optimization for sequential inserts (no "prev"). +- No custom comparator. +- Support overwrites. This requires care when we see the same key when inserting. + For RocksDB or LevelDB, overwrites are implemented as a newer sequence number in the key, so + there is no need for values. We don't intend to support versioning. In-place updates of values + would be more efficient. +- We discard all non-concurrent code. +- We do not support Splices. This simplifies the code a lot. +- No AllocateNode or other pointer arithmetic. +- We combine the findLessThan, findGreaterOrEqual, etc into one function. +*/ + +/* +Further adapted from Badger: https://github.com/dgraph-io/badger. + +Key differences: +- Support for previous pointers - doubly linked lists. Note that it's up to higher + level code to deal with the intermediate state that occurs during insertion, + where node A is linked to node B, but node B is not yet linked back to node A. +- Iterator includes mutator functions. +*/ + +package arenaskl // import "github.com/cockroachdb/pebble/internal/arenaskl" + +import ( + "math" + "runtime" + "sync/atomic" + "unsafe" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/fastrand" +) + +const ( + maxHeight = 20 + maxNodeSize = int(unsafe.Sizeof(node{})) + linksSize = int(unsafe.Sizeof(links{})) + pValue = 1 / math.E +) + +// ErrRecordExists indicates that an entry with the specified key already +// exists in the skiplist. Duplicate entries are not directly supported and +// instead must be handled by the user by appending a unique version suffix to +// keys. +var ErrRecordExists = errors.New("record with this key already exists") + +// Skiplist is a fast, concurrent skiplist implementation that supports forward +// and backward iteration. See batchskl.Skiplist for a non-concurrent +// skiplist. Keys and values are immutable once added to the skiplist and +// deletion is not supported. Instead, higher-level code is expected to add new +// entries that shadow existing entries and perform deletion via tombstones. It +// is up to the user to process these shadow entries and tombstones +// appropriately during retrieval. +type Skiplist struct { + arena *Arena + cmp base.Compare + head *node + tail *node + height atomic.Uint32 // Current height. 1 <= height <= maxHeight. CAS. + + // If set to true by tests, then extra delays are added to make it easier to + // detect unusual race conditions. + testing bool +} + +// Inserter TODO(peter) +type Inserter struct { + spl [maxHeight]splice + height uint32 +} + +// Add TODO(peter) +func (ins *Inserter) Add(list *Skiplist, key base.InternalKey, value []byte) error { + return list.addInternal(key, value, ins) +} + +var ( + probabilities [maxHeight]uint32 +) + +func init() { + // Precompute the skiplist probabilities so that only a single random number + // needs to be generated and so that the optimal pvalue can be used (inverse + // of Euler's number). + p := float64(1.0) + for i := 0; i < maxHeight; i++ { + probabilities[i] = uint32(float64(math.MaxUint32) * p) + p *= pValue + } +} + +// NewSkiplist constructs and initializes a new, empty skiplist. All nodes, keys, +// and values in the skiplist will be allocated from the given arena. +func NewSkiplist(arena *Arena, cmp base.Compare) *Skiplist { + skl := &Skiplist{} + skl.Reset(arena, cmp) + return skl +} + +// Reset the skiplist to empty and re-initialize. +func (s *Skiplist) Reset(arena *Arena, cmp base.Compare) { + // Allocate head and tail nodes. + head, err := newRawNode(arena, maxHeight, 0, 0) + if err != nil { + panic("arenaSize is not large enough to hold the head node") + } + head.keyOffset = 0 + + tail, err := newRawNode(arena, maxHeight, 0, 0) + if err != nil { + panic("arenaSize is not large enough to hold the tail node") + } + tail.keyOffset = 0 + + // Link all head/tail levels together. + headOffset := arena.getPointerOffset(unsafe.Pointer(head)) + tailOffset := arena.getPointerOffset(unsafe.Pointer(tail)) + for i := 0; i < maxHeight; i++ { + head.tower[i].nextOffset.Store(tailOffset) + tail.tower[i].prevOffset.Store(headOffset) + } + + *s = Skiplist{ + arena: arena, + cmp: cmp, + head: head, + tail: tail, + } + s.height.Store(1) +} + +// Height returns the height of the highest tower within any of the nodes that +// have ever been allocated as part of this skiplist. +func (s *Skiplist) Height() uint32 { return s.height.Load() } + +// Arena returns the arena backing this skiplist. +func (s *Skiplist) Arena() *Arena { return s.arena } + +// Size returns the number of bytes that have allocated from the arena. +func (s *Skiplist) Size() uint32 { return s.arena.Size() } + +// Add adds a new key if it does not yet exist. If the key already exists, then +// Add returns ErrRecordExists. If there isn't enough room in the arena, then +// Add returns ErrArenaFull. +func (s *Skiplist) Add(key base.InternalKey, value []byte) error { + var ins Inserter + return s.addInternal(key, value, &ins) +} + +func (s *Skiplist) addInternal(key base.InternalKey, value []byte, ins *Inserter) error { + if s.findSplice(key, ins) { + // Found a matching node, but handle case where it's been deleted. + return ErrRecordExists + } + + if s.testing { + // Add delay to make it easier to test race between this thread + // and another thread that sees the intermediate state between + // finding the splice and using it. + runtime.Gosched() + } + + nd, height, err := s.newNode(key, value) + if err != nil { + return err + } + + ndOffset := s.arena.getPointerOffset(unsafe.Pointer(nd)) + + // We always insert from the base level and up. After you add a node in base + // level, we cannot create a node in the level above because it would have + // discovered the node in the base level. + var found bool + var invalidateSplice bool + for i := 0; i < int(height); i++ { + prev := ins.spl[i].prev + next := ins.spl[i].next + + if prev == nil { + // New node increased the height of the skiplist, so assume that the + // new level has not yet been populated. + if next != nil { + panic("next is expected to be nil, since prev is nil") + } + + prev = s.head + next = s.tail + } + + // +----------------+ +------------+ +----------------+ + // | prev | | nd | | next | + // | prevNextOffset |---->| | | | + // | |<----| prevOffset | | | + // | | | nextOffset |---->| | + // | | | |<----| nextPrevOffset | + // +----------------+ +------------+ +----------------+ + // + // 1. Initialize prevOffset and nextOffset to point to prev and next. + // 2. CAS prevNextOffset to repoint from next to nd. + // 3. CAS nextPrevOffset to repoint from prev to nd. + for { + prevOffset := s.arena.getPointerOffset(unsafe.Pointer(prev)) + nextOffset := s.arena.getPointerOffset(unsafe.Pointer(next)) + nd.tower[i].init(prevOffset, nextOffset) + + // Check whether next has an updated link to prev. If it does not, + // that can mean one of two things: + // 1. The thread that added the next node hasn't yet had a chance + // to add the prev link (but will shortly). + // 2. Another thread has added a new node between prev and next. + nextPrevOffset := next.prevOffset(i) + if nextPrevOffset != prevOffset { + // Determine whether #1 or #2 is true by checking whether prev + // is still pointing to next. As long as the atomic operations + // have at least acquire/release semantics (no need for + // sequential consistency), this works, as it is equivalent to + // the "publication safety" pattern. + prevNextOffset := prev.nextOffset(i) + if prevNextOffset == nextOffset { + // Ok, case #1 is true, so help the other thread along by + // updating the next node's prev link. + next.casPrevOffset(i, nextPrevOffset, prevOffset) + } + } + + if prev.casNextOffset(i, nextOffset, ndOffset) { + // Managed to insert nd between prev and next, so update the next + // node's prev link and go to the next level. + if s.testing { + // Add delay to make it easier to test race between this thread + // and another thread that sees the intermediate state between + // setting next and setting prev. + runtime.Gosched() + } + + next.casPrevOffset(i, prevOffset, ndOffset) + break + } + + // CAS failed. We need to recompute prev and next. It is unlikely to + // be helpful to try to use a different level as we redo the search, + // because it is unlikely that lots of nodes are inserted between prev + // and next. + prev, next, found = s.findSpliceForLevel(key, i, prev) + if found { + if i != 0 { + panic("how can another thread have inserted a node at a non-base level?") + } + + return ErrRecordExists + } + invalidateSplice = true + } + } + + // If we had to recompute the splice for a level, invalidate the entire + // cached splice. + if invalidateSplice { + ins.height = 0 + } else { + // The splice was valid. We inserted a node between spl[i].prev and + // spl[i].next. Optimistically update spl[i].prev for use in a subsequent + // call to add. + for i := uint32(0); i < height; i++ { + ins.spl[i].prev = nd + } + } + + return nil +} + +// NewIter returns a new Iterator object. The lower and upper bound parameters +// control the range of keys the iterator will return. Specifying for nil for +// lower or upper bound disables the check for that boundary. Note that lower +// bound is not checked on {SeekGE,First} and upper bound is not check on +// {SeekLT,Last}. The user is expected to perform that check. Note that it is +// safe for an iterator to be copied by value. +func (s *Skiplist) NewIter(lower, upper []byte) *Iterator { + it := iterPool.Get().(*Iterator) + *it = Iterator{list: s, nd: s.head, lower: lower, upper: upper} + return it +} + +// NewFlushIter returns a new flushIterator, which is similar to an Iterator +// but also sets the current number of the bytes that have been iterated +// through. +func (s *Skiplist) NewFlushIter(bytesFlushed *uint64) base.InternalIterator { + return &flushIterator{ + Iterator: Iterator{list: s, nd: s.head}, + bytesIterated: bytesFlushed, + } +} + +func (s *Skiplist) newNode( + key base.InternalKey, value []byte, +) (nd *node, height uint32, err error) { + height = s.randomHeight() + nd, err = newNode(s.arena, height, key, value) + if err != nil { + return + } + + // Try to increase s.height via CAS. + listHeight := s.Height() + for height > listHeight { + if s.height.CompareAndSwap(listHeight, height) { + // Successfully increased skiplist.height. + break + } + + listHeight = s.Height() + } + + return +} + +func (s *Skiplist) randomHeight() uint32 { + rnd := fastrand.Uint32() + + h := uint32(1) + for h < maxHeight && rnd <= probabilities[h] { + h++ + } + + return h +} + +func (s *Skiplist) findSplice(key base.InternalKey, ins *Inserter) (found bool) { + listHeight := s.Height() + var level int + + prev := s.head + if ins.height < listHeight { + // Our cached height is less than the list height, which means there were + // inserts that increased the height of the list. Recompute the splice from + // scratch. + ins.height = listHeight + level = int(ins.height) + } else { + // Our cached height is equal to the list height. + for ; level < int(listHeight); level++ { + spl := &ins.spl[level] + if s.getNext(spl.prev, level) != spl.next { + // One or more nodes have been inserted between the splice at this + // level. + continue + } + if spl.prev != s.head && !s.keyIsAfterNode(spl.prev, key) { + // Key lies before splice. + level = int(listHeight) + break + } + if spl.next != s.tail && s.keyIsAfterNode(spl.next, key) { + // Key lies after splice. + level = int(listHeight) + break + } + // The splice brackets the key! + prev = spl.prev + break + } + } + + for level = level - 1; level >= 0; level-- { + var next *node + prev, next, found = s.findSpliceForLevel(key, level, prev) + if next == nil { + next = s.tail + } + ins.spl[level].init(prev, next) + } + + return +} + +func (s *Skiplist) findSpliceForLevel( + key base.InternalKey, level int, start *node, +) (prev, next *node, found bool) { + prev = start + + for { + // Assume prev.key < key. + next = s.getNext(prev, level) + if next == s.tail { + // Tail node, so done. + break + } + + offset, size := next.keyOffset, next.keySize + nextKey := s.arena.buf[offset : offset+size] + cmp := s.cmp(key.UserKey, nextKey) + if cmp < 0 { + // We are done for this level, since prev.key < key < next.key. + break + } + if cmp == 0 { + // User-key equality. + if key.Trailer == next.keyTrailer { + // Internal key equality. + found = true + break + } + if key.Trailer > next.keyTrailer { + // We are done for this level, since prev.key < key < next.key. + break + } + } + + // Keep moving right on this level. + prev = next + } + + return +} + +func (s *Skiplist) keyIsAfterNode(nd *node, key base.InternalKey) bool { + ndKey := s.arena.buf[nd.keyOffset : nd.keyOffset+nd.keySize] + cmp := s.cmp(ndKey, key.UserKey) + if cmp < 0 { + return true + } + if cmp > 0 { + return false + } + // User-key equality. + if key.Trailer == nd.keyTrailer { + // Internal key equality. + return false + } + return key.Trailer < nd.keyTrailer +} + +func (s *Skiplist) getNext(nd *node, h int) *node { + offset := nd.tower[h].nextOffset.Load() + return (*node)(s.arena.getPointer(offset)) +} + +func (s *Skiplist) getPrev(nd *node, h int) *node { + offset := nd.tower[h].prevOffset.Load() + return (*node)(s.arena.getPointer(offset)) +} diff --git a/pebble/internal/arenaskl/skl_test.go b/pebble/internal/arenaskl/skl_test.go new file mode 100644 index 0000000..6e74a4a --- /dev/null +++ b/pebble/internal/arenaskl/skl_test.go @@ -0,0 +1,972 @@ +/* + * Copyright 2017 Dgraph Labs, Inc. and Contributors + * Modifications copyright (C) 2017 Andy Kimball and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package arenaskl + +import ( + "bytes" + "encoding/binary" + "fmt" + "strconv" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" +) + +const arenaSize = 1 << 20 + +// iterAdapter adapts the new Iterator API which returns the key and value from +// positioning methods (Seek*, First, Last, Next, Prev) to the old API which +// returned a boolean corresponding to Valid. Only used by test code. +type iterAdapter struct { + *Iterator + key *base.InternalKey + val []byte +} + +func newIterAdapter(iter *Iterator) *iterAdapter { + return &iterAdapter{ + Iterator: iter, + } +} + +func (i *iterAdapter) update(key *base.InternalKey, val base.LazyValue) bool { + i.key = key + i.val = val.InPlaceValue() + return i.key != nil +} + +func (i *iterAdapter) String() string { + return "iter-adapter" +} + +func (i *iterAdapter) SeekGE(key []byte, flags base.SeekGEFlags) bool { + return i.update(i.Iterator.SeekGE(key, flags)) +} + +func (i *iterAdapter) SeekPrefixGE(prefix, key []byte, flags base.SeekGEFlags) bool { + return i.update(i.Iterator.SeekPrefixGE(prefix, key, flags)) +} + +func (i *iterAdapter) SeekLT(key []byte, flags base.SeekLTFlags) bool { + return i.update(i.Iterator.SeekLT(key, flags)) +} + +func (i *iterAdapter) First() bool { + return i.update(i.Iterator.First()) +} + +func (i *iterAdapter) Last() bool { + return i.update(i.Iterator.Last()) +} + +func (i *iterAdapter) Next() bool { + return i.update(i.Iterator.Next()) +} + +func (i *iterAdapter) Prev() bool { + return i.update(i.Iterator.Prev()) +} + +func (i *iterAdapter) Key() base.InternalKey { + return *i.key +} + +func (i *iterAdapter) Value() []byte { + return i.val +} + +func (i *iterAdapter) Valid() bool { + return i.key != nil +} + +func makeIntKey(i int) base.InternalKey { + return base.InternalKey{UserKey: []byte(fmt.Sprintf("%05d", i))} +} + +func makeKey(s string) []byte { + return []byte(s) +} + +func makeIkey(s string) base.InternalKey { + return base.InternalKey{UserKey: []byte(s)} +} + +func makeValue(i int) []byte { + return []byte(fmt.Sprintf("v%05d", i)) +} + +func makeInserterAdd(s *Skiplist) func(key base.InternalKey, value []byte) error { + ins := &Inserter{} + return func(key base.InternalKey, value []byte) error { + return ins.Add(s, key, value) + } +} + +// length iterates over skiplist to give exact size. +func length(s *Skiplist) int { + count := 0 + + it := newIterAdapter(s.NewIter(nil, nil)) + for valid := it.First(); valid; valid = it.Next() { + count++ + } + + return count +} + +// length iterates over skiplist in reverse order to give exact size. +func lengthRev(s *Skiplist) int { + count := 0 + + it := newIterAdapter(s.NewIter(nil, nil)) + for valid := it.Last(); valid; valid = it.Prev() { + count++ + } + + return count +} + +func TestEmpty(t *testing.T) { + key := makeKey("aaa") + l := NewSkiplist(newArena(arenaSize), bytes.Compare) + it := newIterAdapter(l.NewIter(nil, nil)) + + require.False(t, it.Valid()) + + it.First() + require.False(t, it.Valid()) + + it.Last() + require.False(t, it.Valid()) + + require.False(t, it.SeekGE(key, base.SeekGEFlagsNone)) + require.False(t, it.Valid()) +} + +func TestFull(t *testing.T) { + l := NewSkiplist(newArena(1000), bytes.Compare) + + foundArenaFull := false + for i := 0; i < 100; i++ { + err := l.Add(makeIntKey(i), makeValue(i)) + if err == ErrArenaFull { + foundArenaFull = true + break + } + } + + require.True(t, foundArenaFull) + + err := l.Add(makeIkey("someval"), nil) + require.Equal(t, ErrArenaFull, err) +} + +// TestBasic tests single-threaded seeks and adds. +func TestBasic(t *testing.T) { + for _, inserter := range []bool{false, true} { + t.Run(fmt.Sprintf("inserter=%t", inserter), func(t *testing.T) { + l := NewSkiplist(newArena(arenaSize), bytes.Compare) + it := newIterAdapter(l.NewIter(nil, nil)) + + add := l.Add + if inserter { + add = makeInserterAdd(l) + } + + // Try adding values. + add(makeIkey("key1"), makeValue(1)) + add(makeIkey("key3"), makeValue(3)) + add(makeIkey("key2"), makeValue(2)) + + require.True(t, it.SeekGE(makeKey("key"), base.SeekGEFlagsNone)) + require.True(t, it.Valid()) + require.NotEqual(t, "key", it.Key().UserKey) + + require.True(t, it.SeekGE(makeKey("key1"), base.SeekGEFlagsNone)) + require.EqualValues(t, "key1", it.Key().UserKey) + require.EqualValues(t, makeValue(1), it.Value()) + + require.True(t, it.SeekGE(makeKey("key2"), base.SeekGEFlagsNone)) + require.EqualValues(t, "key2", it.Key().UserKey) + require.EqualValues(t, makeValue(2), it.Value()) + + require.True(t, it.SeekGE(makeKey("key3"), base.SeekGEFlagsNone)) + require.EqualValues(t, "key3", it.Key().UserKey) + require.EqualValues(t, makeValue(3), it.Value()) + + key := makeIkey("a") + key.SetSeqNum(1) + add(key, nil) + key.SetSeqNum(2) + add(key, nil) + + require.True(t, it.SeekGE(makeKey("a"), base.SeekGEFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "a", it.Key().UserKey) + require.EqualValues(t, 2, it.Key().SeqNum()) + + require.True(t, it.Next()) + require.True(t, it.Valid()) + require.EqualValues(t, "a", it.Key().UserKey) + require.EqualValues(t, 1, it.Key().SeqNum()) + + key = makeIkey("b") + key.SetSeqNum(2) + add(key, nil) + key.SetSeqNum(1) + add(key, nil) + + require.True(t, it.SeekGE(makeKey("b"), base.SeekGEFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "b", it.Key().UserKey) + require.EqualValues(t, 2, it.Key().SeqNum()) + + require.True(t, it.Next()) + require.True(t, it.Valid()) + require.EqualValues(t, "b", it.Key().UserKey) + require.EqualValues(t, 1, it.Key().SeqNum()) + }) + } +} + +// TestConcurrentBasic tests concurrent writes followed by concurrent reads. +func TestConcurrentBasic(t *testing.T) { + const n = 1000 + + for _, inserter := range []bool{false, true} { + t.Run(fmt.Sprintf("inserter=%t", inserter), func(t *testing.T) { + // Set testing flag to make it easier to trigger unusual race conditions. + l := NewSkiplist(newArena(arenaSize), bytes.Compare) + l.testing = true + + var wg sync.WaitGroup + for i := 0; i < n; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + + if inserter { + var ins Inserter + ins.Add(l, makeIntKey(i), makeValue(i)) + } else { + l.Add(makeIntKey(i), makeValue(i)) + } + }(i) + } + wg.Wait() + + // Check values. Concurrent reads. + for i := 0; i < n; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + + it := newIterAdapter(l.NewIter(nil, nil)) + require.True(t, it.SeekGE(makeKey(fmt.Sprintf("%05d", i)), base.SeekGEFlagsNone)) + require.EqualValues(t, fmt.Sprintf("%05d", i), it.Key().UserKey) + }(i) + } + wg.Wait() + require.Equal(t, n, length(l)) + require.Equal(t, n, lengthRev(l)) + }) + } +} + +// TestConcurrentOneKey will read while writing to one single key. +func TestConcurrentOneKey(t *testing.T) { + const n = 100 + key := makeKey("thekey") + ikey := makeIkey("thekey") + + for _, inserter := range []bool{false, true} { + t.Run(fmt.Sprintf("inserter=%t", inserter), func(t *testing.T) { + // Set testing flag to make it easier to trigger unusual race conditions. + l := NewSkiplist(newArena(arenaSize), bytes.Compare) + l.testing = true + + var wg sync.WaitGroup + writeDone := make(chan struct{}, 1) + for i := 0; i < n; i++ { + wg.Add(1) + go func(i int) { + defer func() { + wg.Done() + select { + case writeDone <- struct{}{}: + default: + } + }() + + if inserter { + var ins Inserter + ins.Add(l, ikey, makeValue(i)) + } else { + l.Add(ikey, makeValue(i)) + } + }(i) + } + // Wait until at least some write made it such that reads return a value. + <-writeDone + var sawValue atomic.Int32 + for i := 0; i < n; i++ { + wg.Add(1) + go func() { + defer wg.Done() + + it := newIterAdapter(l.NewIter(nil, nil)) + it.SeekGE(key, base.SeekGEFlagsNone) + require.True(t, it.Valid()) + require.True(t, bytes.Equal(key, it.Key().UserKey)) + + sawValue.Add(1) + v, err := strconv.Atoi(string(it.Value()[1:])) + require.NoError(t, err) + require.True(t, 0 <= v && v < n) + }() + } + wg.Wait() + require.Equal(t, int32(n), sawValue.Load()) + require.Equal(t, 1, length(l)) + require.Equal(t, 1, lengthRev(l)) + }) + } +} + +func TestSkiplistAdd(t *testing.T) { + for _, inserter := range []bool{false, true} { + t.Run(fmt.Sprintf("inserter=%t", inserter), func(t *testing.T) { + l := NewSkiplist(newArena(arenaSize), bytes.Compare) + it := newIterAdapter(l.NewIter(nil, nil)) + + add := l.Add + if inserter { + add = makeInserterAdd(l) + } + + // Add nil key and value (treated same as empty). + err := add(base.InternalKey{}, nil) + require.Nil(t, err) + require.True(t, it.SeekGE([]byte{}, base.SeekGEFlagsNone)) + require.EqualValues(t, []byte{}, it.Key().UserKey) + require.EqualValues(t, []byte{}, it.Value()) + + l = NewSkiplist(newArena(arenaSize), bytes.Compare) + it = newIterAdapter(l.NewIter(nil, nil)) + + add = l.Add + if inserter { + add = makeInserterAdd(l) + } + + // Add empty key and value (treated same as nil). + err = add(makeIkey(""), []byte{}) + require.Nil(t, err) + require.True(t, it.SeekGE([]byte{}, base.SeekGEFlagsNone)) + require.EqualValues(t, []byte{}, it.Key().UserKey) + require.EqualValues(t, []byte{}, it.Value()) + + // Add to empty list. + err = add(makeIntKey(2), makeValue(2)) + require.Nil(t, err) + require.True(t, it.SeekGE(makeKey("00002"), base.SeekGEFlagsNone)) + require.EqualValues(t, "00002", it.Key().UserKey) + require.EqualValues(t, makeValue(2), it.Value()) + + // Add first element in non-empty list. + err = add(makeIntKey(1), makeValue(1)) + require.Nil(t, err) + require.True(t, it.SeekGE(makeKey("00001"), base.SeekGEFlagsNone)) + require.EqualValues(t, "00001", it.Key().UserKey) + require.EqualValues(t, makeValue(1), it.Value()) + + // Add last element in non-empty list. + err = add(makeIntKey(4), makeValue(4)) + require.Nil(t, err) + require.True(t, it.SeekGE(makeKey("00004"), base.SeekGEFlagsNone)) + require.EqualValues(t, "00004", it.Key().UserKey) + require.EqualValues(t, makeValue(4), it.Value()) + + // Add element in middle of list. + err = add(makeIntKey(3), makeValue(3)) + require.Nil(t, err) + require.True(t, it.SeekGE(makeKey("00003"), base.SeekGEFlagsNone)) + require.EqualValues(t, "00003", it.Key().UserKey) + require.EqualValues(t, makeValue(3), it.Value()) + + // Try to add element that already exists. + err = add(makeIntKey(2), nil) + require.Equal(t, ErrRecordExists, err) + require.EqualValues(t, "00003", it.Key().UserKey) + require.EqualValues(t, makeValue(3), it.Value()) + + require.Equal(t, 5, length(l)) + require.Equal(t, 5, lengthRev(l)) + }) + } +} + +// TestConcurrentAdd races between adding same nodes. +func TestConcurrentAdd(t *testing.T) { + for _, inserter := range []bool{false, true} { + t.Run(fmt.Sprintf("inserter=%t", inserter), func(t *testing.T) { + const n = 100 + + // Set testing flag to make it easier to trigger unusual race conditions. + l := NewSkiplist(newArena(arenaSize), bytes.Compare) + l.testing = true + + start := make([]sync.WaitGroup, n) + end := make([]sync.WaitGroup, n) + + for i := 0; i < n; i++ { + start[i].Add(1) + end[i].Add(2) + } + + for f := 0; f < 2; f++ { + go func(f int) { + it := newIterAdapter(l.NewIter(nil, nil)) + add := l.Add + if inserter { + add = makeInserterAdd(l) + } + + for i := 0; i < n; i++ { + start[i].Wait() + + key := makeIntKey(i) + if add(key, nil) == nil { + require.True(t, it.SeekGE(key.UserKey, base.SeekGEFlagsNone)) + require.EqualValues(t, key, it.Key()) + } + + end[i].Done() + } + }(f) + } + + for i := 0; i < n; i++ { + start[i].Done() + end[i].Wait() + } + + require.Equal(t, n, length(l)) + require.Equal(t, n, lengthRev(l)) + }) + } +} + +// TestIteratorNext tests a basic iteration over all nodes from the beginning. +func TestIteratorNext(t *testing.T) { + const n = 100 + l := NewSkiplist(newArena(arenaSize), bytes.Compare) + it := newIterAdapter(l.NewIter(nil, nil)) + + require.False(t, it.Valid()) + + it.First() + require.False(t, it.Valid()) + + for i := n - 1; i >= 0; i-- { + l.Add(makeIntKey(i), makeValue(i)) + } + + it.First() + for i := 0; i < n; i++ { + require.True(t, it.Valid()) + require.EqualValues(t, makeIntKey(i), it.Key()) + require.EqualValues(t, makeValue(i), it.Value()) + it.Next() + } + require.False(t, it.Valid()) +} + +// TestIteratorPrev tests a basic iteration over all nodes from the end. +func TestIteratorPrev(t *testing.T) { + const n = 100 + l := NewSkiplist(newArena(arenaSize), bytes.Compare) + it := newIterAdapter(l.NewIter(nil, nil)) + + require.False(t, it.Valid()) + + it.Last() + require.False(t, it.Valid()) + + var ins Inserter + for i := 0; i < n; i++ { + ins.Add(l, makeIntKey(i), makeValue(i)) + } + + it.Last() + for i := n - 1; i >= 0; i-- { + require.True(t, it.Valid()) + require.EqualValues(t, makeIntKey(i), it.Key()) + require.EqualValues(t, makeValue(i), it.Value()) + it.Prev() + } + require.False(t, it.Valid()) +} + +func TestIteratorSeekGEAndSeekPrefixGE(t *testing.T) { + const n = 100 + l := NewSkiplist(newArena(arenaSize), bytes.Compare) + it := newIterAdapter(l.NewIter(nil, nil)) + + require.False(t, it.Valid()) + it.First() + require.False(t, it.Valid()) + // 1000, 1010, 1020, ..., 1990. + + var ins Inserter + for i := n - 1; i >= 0; i-- { + v := i*10 + 1000 + ins.Add(l, makeIntKey(v), makeValue(v)) + } + + require.True(t, it.SeekGE(makeKey(""), base.SeekGEFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "01000", it.Key().UserKey) + require.EqualValues(t, "v01000", it.Value()) + + require.True(t, it.SeekGE(makeKey("01000"), base.SeekGEFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "01000", it.Key().UserKey) + require.EqualValues(t, "v01000", it.Value()) + + require.True(t, it.SeekGE(makeKey("01005"), base.SeekGEFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "01010", it.Key().UserKey) + require.EqualValues(t, "v01010", it.Value()) + + require.True(t, it.SeekGE(makeKey("01010"), base.SeekGEFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "01010", it.Key().UserKey) + require.EqualValues(t, "v01010", it.Value()) + + require.False(t, it.SeekGE(makeKey("99999"), base.SeekGEFlagsNone)) + require.False(t, it.Valid()) + + // Test SeekGE with trySeekUsingNext optimization. + { + require.True(t, it.SeekGE(makeKey("01000"), base.SeekGEFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "01000", it.Key().UserKey) + require.EqualValues(t, "v01000", it.Value()) + + // Seeking to the same key. + require.True(t, it.SeekGE(makeKey("01000"), base.SeekGEFlagsNone.EnableTrySeekUsingNext())) + require.True(t, it.Valid()) + require.EqualValues(t, "01000", it.Key().UserKey) + require.EqualValues(t, "v01000", it.Value()) + + // Seeking to a nearby key that can be reached using Next. + require.True(t, it.SeekGE(makeKey("01020"), base.SeekGEFlagsNone.EnableTrySeekUsingNext())) + require.True(t, it.Valid()) + require.EqualValues(t, "01020", it.Key().UserKey) + require.EqualValues(t, "v01020", it.Value()) + + // Seeking to a key that cannot be reached using Next. + require.True(t, it.SeekGE(makeKey("01200"), base.SeekGEFlagsNone.EnableTrySeekUsingNext())) + require.True(t, it.Valid()) + require.EqualValues(t, "01200", it.Key().UserKey) + require.EqualValues(t, "v01200", it.Value()) + + // Seeking to an earlier key, but the caller lies. Incorrect result. + require.True(t, it.SeekGE(makeKey("01100"), base.SeekGEFlagsNone.EnableTrySeekUsingNext())) + require.True(t, it.Valid()) + require.EqualValues(t, "01200", it.Key().UserKey) + require.EqualValues(t, "v01200", it.Value()) + + // Telling the truth works. + require.True(t, it.SeekGE(makeKey("01100"), base.SeekGEFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "01100", it.Key().UserKey) + require.EqualValues(t, "v01100", it.Value()) + } + + // Test SeekPrefixGE with trySeekUsingNext optimization. + { + require.True(t, it.SeekPrefixGE(makeKey("01000"), makeKey("01000"), base.SeekGEFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "01000", it.Key().UserKey) + require.EqualValues(t, "v01000", it.Value()) + + // Seeking to the same key. + require.True(t, it.SeekPrefixGE(makeKey("01000"), makeKey("01000"), base.SeekGEFlagsNone.EnableTrySeekUsingNext())) + require.True(t, it.Valid()) + require.EqualValues(t, "01000", it.Key().UserKey) + require.EqualValues(t, "v01000", it.Value()) + + // Seeking to a nearby key that can be reached using Next. + require.True(t, it.SeekPrefixGE(makeKey("01020"), makeKey("01020"), base.SeekGEFlagsNone.EnableTrySeekUsingNext())) + require.True(t, it.Valid()) + require.EqualValues(t, "01020", it.Key().UserKey) + require.EqualValues(t, "v01020", it.Value()) + + // Seeking to a key that cannot be reached using Next. + require.True(t, it.SeekPrefixGE(makeKey("01200"), makeKey("01200"), base.SeekGEFlagsNone.EnableTrySeekUsingNext())) + require.True(t, it.Valid()) + require.EqualValues(t, "01200", it.Key().UserKey) + require.EqualValues(t, "v01200", it.Value()) + + // Seeking to an earlier key, but the caller lies. Incorrect result. + require.True(t, it.SeekPrefixGE(makeKey("01100"), makeKey("01100"), base.SeekGEFlagsNone.EnableTrySeekUsingNext())) + require.True(t, it.Valid()) + require.EqualValues(t, "01200", it.Key().UserKey) + require.EqualValues(t, "v01200", it.Value()) + + // Telling the truth works. + require.True(t, it.SeekPrefixGE(makeKey("01100"), makeKey("01100"), base.SeekGEFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "01100", it.Key().UserKey) + require.EqualValues(t, "v01100", it.Value()) + } + + // Test seek for empty key. + ins.Add(l, base.InternalKey{}, nil) + require.True(t, it.SeekGE([]byte{}, base.SeekGEFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "", it.Key().UserKey) + + require.True(t, it.SeekGE(makeKey(""), base.SeekGEFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "", it.Key().UserKey) +} + +func TestIteratorSeekLT(t *testing.T) { + const n = 100 + l := NewSkiplist(newArena(arenaSize), bytes.Compare) + it := newIterAdapter(l.NewIter(nil, nil)) + + require.False(t, it.Valid()) + it.First() + require.False(t, it.Valid()) + // 1000, 1010, 1020, ..., 1990. + var ins Inserter + for i := n - 1; i >= 0; i-- { + v := i*10 + 1000 + ins.Add(l, makeIntKey(v), makeValue(v)) + } + + require.False(t, it.SeekLT(makeKey(""), base.SeekLTFlagsNone)) + require.False(t, it.Valid()) + + require.False(t, it.SeekLT(makeKey("01000"), base.SeekLTFlagsNone)) + require.False(t, it.Valid()) + + require.True(t, it.SeekLT(makeKey("01001"), base.SeekLTFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "01000", it.Key().UserKey) + require.EqualValues(t, "v01000", it.Value()) + + require.True(t, it.SeekLT(makeKey("01005"), base.SeekLTFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "01000", it.Key().UserKey) + require.EqualValues(t, "v01000", it.Value()) + + require.True(t, it.SeekLT(makeKey("01991"), base.SeekLTFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "01990", it.Key().UserKey) + require.EqualValues(t, "v01990", it.Value()) + + require.True(t, it.SeekLT(makeKey("99999"), base.SeekLTFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "01990", it.Key().UserKey) + require.EqualValues(t, "v01990", it.Value()) + + // Test seek for empty key. + ins.Add(l, base.InternalKey{}, nil) + require.False(t, it.SeekLT([]byte{}, base.SeekLTFlagsNone)) + require.False(t, it.Valid()) + + require.True(t, it.SeekLT(makeKey("\x01"), base.SeekLTFlagsNone)) + require.True(t, it.Valid()) + require.EqualValues(t, "", it.Key().UserKey) +} + +// TODO(peter): test First and Last. +func TestIteratorBounds(t *testing.T) { + l := NewSkiplist(newArena(arenaSize), bytes.Compare) + for i := 1; i < 10; i++ { + require.NoError(t, l.Add(makeIntKey(i), makeValue(i))) + } + + key := func(i int) []byte { + return makeIntKey(i).UserKey + } + + it := newIterAdapter(l.NewIter(key(3), key(7))) + + // SeekGE within the lower and upper bound succeeds. + for i := 3; i <= 6; i++ { + k := key(i) + require.True(t, it.SeekGE(k, base.SeekGEFlagsNone)) + require.EqualValues(t, string(k), string(it.Key().UserKey)) + } + + // SeekGE before the lower bound still succeeds (only the upper bound is + // checked). + for i := 1; i < 3; i++ { + k := key(i) + require.True(t, it.SeekGE(k, base.SeekGEFlagsNone)) + require.EqualValues(t, string(k), string(it.Key().UserKey)) + } + + // SeekGE beyond the upper bound fails. + for i := 7; i < 10; i++ { + require.False(t, it.SeekGE(key(i), base.SeekGEFlagsNone)) + } + + require.True(t, it.SeekGE(key(6), base.SeekGEFlagsNone)) + require.EqualValues(t, "00006", it.Key().UserKey) + require.EqualValues(t, "v00006", it.Value()) + + // Next into the upper bound fails. + require.False(t, it.Next()) + + // SeekLT within the lower and upper bound succeeds. + for i := 4; i <= 7; i++ { + require.True(t, it.SeekLT(key(i), base.SeekLTFlagsNone)) + require.EqualValues(t, string(key(i-1)), string(it.Key().UserKey)) + } + + // SeekLT beyond the upper bound still succeeds (only the lower bound is + // checked). + for i := 8; i < 9; i++ { + require.True(t, it.SeekLT(key(8), base.SeekLTFlagsNone)) + require.EqualValues(t, string(key(i-1)), string(it.Key().UserKey)) + } + + // SeekLT before the lower bound fails. + for i := 1; i < 4; i++ { + require.False(t, it.SeekLT(key(i), base.SeekLTFlagsNone)) + } + + require.True(t, it.SeekLT(key(4), base.SeekLTFlagsNone)) + require.EqualValues(t, "00003", it.Key().UserKey) + require.EqualValues(t, "v00003", it.Value()) + + // Prev into the lower bound fails. + require.False(t, it.Prev()) +} + +func TestBytesIterated(t *testing.T) { + l := NewSkiplist(newArena(arenaSize), bytes.Compare) + emptySize := l.arena.Size() + for i := 0; i < 200; i++ { + bytesIterated := l.bytesIterated(t) + expected := uint64(l.arena.Size() - emptySize) + if bytesIterated != expected { + t.Fatalf("bytesIterated: got %d, want %d", bytesIterated, expected) + } + l.Add(base.InternalKey{UserKey: []byte{byte(i)}}, nil) + } +} + +// bytesIterated returns the number of bytes iterated in the skiplist. +func (s *Skiplist) bytesIterated(t *testing.T) (bytesIterated uint64) { + x := s.NewFlushIter(&bytesIterated) + var prevIterated uint64 + for key, _ := x.First(); key != nil; key, _ = x.Next() { + if bytesIterated < prevIterated { + t.Fatalf("bytesIterated moved backward: %d < %d", bytesIterated, prevIterated) + } + prevIterated = bytesIterated + } + if x.Close() != nil { + return 0 + } + return bytesIterated +} + +func randomKey(rng *rand.Rand, b []byte) base.InternalKey { + key := rng.Uint32() + key2 := rng.Uint32() + binary.LittleEndian.PutUint32(b, key) + binary.LittleEndian.PutUint32(b[4:], key2) + return base.InternalKey{UserKey: b} +} + +// Standard test. Some fraction is read. Some fraction is write. Writes have +// to go through mutex lock. +func BenchmarkReadWrite(b *testing.B) { + for i := 0; i <= 10; i++ { + readFrac := float32(i) / 10.0 + b.Run(fmt.Sprintf("frac_%d", i*10), func(b *testing.B) { + l := NewSkiplist(newArena(uint32((b.N+2)*maxNodeSize)), bytes.Compare) + b.ResetTimer() + var count int + b.RunParallel(func(pb *testing.PB) { + it := l.NewIter(nil, nil) + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + buf := make([]byte, 8) + + for pb.Next() { + if rng.Float32() < readFrac { + key, _ := it.SeekGE(randomKey(rng, buf).UserKey, base.SeekGEFlagsNone) + if key != nil { + _ = key + count++ + } + } else { + _ = l.Add(randomKey(rng, buf), nil) + } + } + }) + }) + } +} + +func BenchmarkOrderedWrite(b *testing.B) { + l := NewSkiplist(newArena(8<<20), bytes.Compare) + var ins Inserter + buf := make([]byte, 8) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + binary.BigEndian.PutUint64(buf, uint64(i)) + if err := ins.Add(l, base.InternalKey{UserKey: buf}, nil); err == ErrArenaFull { + b.StopTimer() + l = NewSkiplist(newArena(uint32((b.N+2)*maxNodeSize)), bytes.Compare) + ins = Inserter{} + b.StartTimer() + } + } +} + +func BenchmarkIterNext(b *testing.B) { + l := NewSkiplist(newArena(64<<10), bytes.Compare) + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + buf := make([]byte, 8) + for { + if err := l.Add(randomKey(rng, buf), nil); err == ErrArenaFull { + break + } + } + + it := l.NewIter(nil, nil) + b.ResetTimer() + for i := 0; i < b.N; i++ { + key, _ := it.Next() + if key == nil { + key, _ = it.First() + } + _ = key + } +} + +func BenchmarkIterPrev(b *testing.B) { + l := NewSkiplist(newArena(64<<10), bytes.Compare) + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + buf := make([]byte, 8) + for { + if err := l.Add(randomKey(rng, buf), nil); err == ErrArenaFull { + break + } + } + + it := l.NewIter(nil, nil) + _, _ = it.Last() + b.ResetTimer() + for i := 0; i < b.N; i++ { + key, _ := it.Prev() + if key == nil { + key, _ = it.Last() + } + _ = key + } +} + +// BenchmarkSeekPrefixGE looks at the performance of repeated calls to +// SeekPrefixGE, with different skip distances and different settings of +// trySeekUsingNext. +func BenchmarkSeekPrefixGE(b *testing.B) { + l := NewSkiplist(newArena(64<<10), bytes.Compare) + var count int + // count was measured to be 1279. + for count = 0; ; count++ { + if err := l.Add(makeIntKey(count), makeValue(count)); err == ErrArenaFull { + break + } + } + for _, skip := range []int{1, 2, 4, 8, 16} { + for _, useNext := range []bool{false, true} { + b.Run(fmt.Sprintf("skip=%d/use-next=%t", skip, useNext), func(b *testing.B) { + it := l.NewIter(nil, nil) + j := 0 + var k []byte + makeKey := func() { + k = []byte(fmt.Sprintf("%05d", j)) + } + makeKey() + it.SeekPrefixGE(k, k, base.SeekGEFlagsNone) + b.ResetTimer() + for i := 0; i < b.N; i++ { + j += skip + var flags base.SeekGEFlags + if useNext { + flags = flags.EnableTrySeekUsingNext() + } + if j >= count { + j = 0 + flags = flags.DisableTrySeekUsingNext() + } + makeKey() + it.SeekPrefixGE(k, k, flags) + } + }) + } + } +} + +// Standard test. Some fraction is read. Some fraction is write. Writes have +// to go through mutex lock. +// func BenchmarkReadWriteMap(b *testing.B) { +// for i := 0; i <= 10; i++ { +// readFrac := float32(i) / 10.0 +// b.Run(fmt.Sprintf("frac_%d", i*10), func(b *testing.B) { +// m := make(map[string]struct{}) +// var mutex sync.RWMutex +// b.ResetTimer() +// var count int +// b.RunParallel(func(pb *testing.PB) { +// rng := rand.New(rand.NewSource(time.Now().UnixNano())) +// for pb.Next() { +// if rng.Float32() < readFrac { +// mutex.RLock() +// _, ok := m[string(randomKey(rng))] +// mutex.RUnlock() +// if ok { +// count++ +// } +// } else { +// mutex.Lock() +// m[string(randomKey(rng))] = struct{}{} +// mutex.Unlock() +// } +// } +// }) +// }) +// } +// } diff --git a/pebble/internal/base/cleaner.go b/pebble/internal/base/cleaner.go new file mode 100644 index 0000000..b86d455 --- /dev/null +++ b/pebble/internal/base/cleaner.go @@ -0,0 +1,60 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import "github.com/cockroachdb/pebble/vfs" + +// Cleaner cleans obsolete files. +type Cleaner interface { + Clean(fs vfs.FS, fileType FileType, path string) error +} + +// NeedsFileContents is implemented by a cleaner that needs the contents of the +// files that it is being asked to clean. +type NeedsFileContents interface { + needsFileContents() +} + +// DeleteCleaner deletes file. +type DeleteCleaner struct{} + +// Clean removes file. +func (DeleteCleaner) Clean(fs vfs.FS, fileType FileType, path string) error { + return fs.Remove(path) +} + +func (DeleteCleaner) String() string { + return "delete" +} + +// ArchiveCleaner archives file instead delete. +type ArchiveCleaner struct{} + +var _ NeedsFileContents = ArchiveCleaner{} + +// Clean archives file. +func (ArchiveCleaner) Clean(fs vfs.FS, fileType FileType, path string) error { + switch fileType { + case FileTypeLog, FileTypeManifest, FileTypeTable: + destDir := fs.PathJoin(fs.PathDir(path), "archive") + + if err := fs.MkdirAll(destDir, 0755); err != nil { + return err + } + + destPath := fs.PathJoin(destDir, fs.PathBase(path)) + return fs.Rename(path, destPath) + + default: + return fs.Remove(path) + } +} + +func (ArchiveCleaner) String() string { + return "archive" +} + +func (ArchiveCleaner) needsFileContents() { +} diff --git a/pebble/internal/base/comparer.go b/pebble/internal/base/comparer.go new file mode 100644 index 0000000..a630962 --- /dev/null +++ b/pebble/internal/base/comparer.go @@ -0,0 +1,260 @@ +// Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import ( + "bytes" + "encoding/binary" + "fmt" + "strconv" + "unicode/utf8" +) + +// Compare returns -1, 0, or +1 depending on whether a is 'less than', 'equal +// to' or 'greater than' b. The two arguments can only be 'equal' if their +// contents are exactly equal. Furthermore, the empty slice must be 'less than' +// any non-empty slice. Compare is used to compare user keys, such as those +// passed as arguments to the various DB methods, as well as those returned +// from Separator, Successor, and Split. +type Compare func(a, b []byte) int + +// Equal returns true if a and b are equivalent. For a given Compare, +// Equal(a,b) must return true iff Compare(a,b) returns zero, that is, +// Equal is a (potentially faster) specialization of Compare. +type Equal func(a, b []byte) bool + +// AbbreviatedKey returns a fixed length prefix of a user key such that AbbreviatedKey(a) +// < AbbreviatedKey(b) iff a < b and AbbreviatedKey(a) > AbbreviatedKey(b) iff a > b. If +// AbbreviatedKey(a) == AbbreviatedKey(b) an additional comparison is required to +// determine if the two keys are actually equal. +// +// This helps optimize indexed batch comparisons for cache locality. If a Split +// function is specified, AbbreviatedKey usually returns the first eight bytes +// of the user key prefix in the order that gives the correct ordering. +type AbbreviatedKey func(key []byte) uint64 + +// FormatKey returns a formatter for the user key. +type FormatKey func(key []byte) fmt.Formatter + +// FormatValue returns a formatter for the user value. The key is also +// specified for the value formatter in order to support value formatting that +// is dependent on the key. +type FormatValue func(key, value []byte) fmt.Formatter + +// Separator is used to construct SSTable index blocks. A trivial implementation +// is `return a`, but appending fewer bytes leads to smaller SSTables. +// +// Given keys a, b for which Compare(a, b) < 0, Separator returns a key k such +// that: +// +// 1. Compare(a, k) <= 0, and +// 2. Compare(k, b) < 0. +// +// As a special case, b may be nil in which case the second condition is dropped. +// +// For example, if dst, a and b are the []byte equivalents of the strings +// "aqua", "black" and "blue", then the result may be "aquablb". +// Similarly, if the arguments were "aqua", "green" and "", then the result +// may be "aquah". +type Separator func(dst, a, b []byte) []byte + +// Successor returns a shortened key given a key a, such that Compare(k, a) >= +// 0. A simple implementation may return a unchanged. The dst parameter may be +// used to store the returned key, though it is valid to pass nil. The returned +// key must be valid to pass to Compare. +type Successor func(dst, a []byte) []byte + +// ImmediateSuccessor is invoked with a prefix key ([Split(a) == len(a)]) and +// returns the smallest key that is larger than the given prefix a. +// ImmediateSuccessor must return a prefix key k such that: +// +// Split(k) == len(k) and Compare(k, a) > 0 +// +// and there exists no representable k2 such that: +// +// Split(k2) == len(k2) and Compare(k2, a) > 0 and Compare(k2, k) < 0 +// +// As an example, an implementation built on the natural byte ordering using +// bytes.Compare could append a `\0` to `a`. +// +// The dst parameter may be used to store the returned key, though it is valid +// to pass nil. The returned key must be valid to pass to Compare. +type ImmediateSuccessor func(dst, a []byte) []byte + +// Split returns the length of the prefix of the user key that corresponds to +// the key portion of an MVCC encoding scheme to enable the use of prefix bloom +// filters. +// +// The method will only ever be called with valid MVCC keys, that is, keys that +// the user could potentially store in the database. Pebble does not know which +// keys are MVCC keys and which are not, and may call Split on both MVCC keys +// and non-MVCC keys. +// +// A trivial MVCC scheme is one in which Split() returns len(a). This +// corresponds to assigning a constant version to each key in the database. For +// performance reasons, it is preferable to use a `nil` split in this case. +// +// The returned prefix must have the following properties: +// +// 1. The prefix must be a byte prefix: +// +// bytes.HasPrefix(a, prefix(a)) +// +// 2. A key consisting of just a prefix must sort before all other keys with +// that prefix: +// +// Compare(prefix(a), a) < 0 if len(suffix(a)) > 0 +// +// 3. Prefixes must be used to order keys before suffixes: +// +// If Compare(a, b) <= 0, then Compare(prefix(a), prefix(b)) <= 0 +// +// 4. Suffixes themselves must be valid keys and comparable, respecting the same +// ordering as within a key. +// +// If Compare(prefix(a), prefix(b)) == 0, then Compare(suffix(a), suffix(b)) == Compare(a, b) +type Split func(a []byte) int + +// Comparer defines a total ordering over the space of []byte keys: a 'less +// than' relationship. +type Comparer struct { + Compare Compare + Equal Equal + AbbreviatedKey AbbreviatedKey + FormatKey FormatKey + FormatValue FormatValue + Separator Separator + Split Split + Successor Successor + ImmediateSuccessor ImmediateSuccessor + + // Name is the name of the comparer. + // + // The Level-DB on-disk format stores the comparer name, and opening a + // database with a different comparer from the one it was created with + // will result in an error. + Name string +} + +// DefaultFormatter is the default implementation of user key formatting: +// non-ASCII data is formatted as escaped hexadecimal values. +var DefaultFormatter = func(key []byte) fmt.Formatter { + return FormatBytes(key) +} + +// DefaultComparer is the default implementation of the Comparer interface. +// It uses the natural ordering, consistent with bytes.Compare. +var DefaultComparer = &Comparer{ + Compare: bytes.Compare, + Equal: bytes.Equal, + + AbbreviatedKey: func(key []byte) uint64 { + if len(key) >= 8 { + return binary.BigEndian.Uint64(key) + } + var v uint64 + for _, b := range key { + v <<= 8 + v |= uint64(b) + } + return v << uint(8*(8-len(key))) + }, + + FormatKey: DefaultFormatter, + + Separator: func(dst, a, b []byte) []byte { + i, n := SharedPrefixLen(a, b), len(dst) + dst = append(dst, a...) + + min := len(a) + if min > len(b) { + min = len(b) + } + if i >= min { + // Do not shorten if one string is a prefix of the other. + return dst + } + + if a[i] >= b[i] { + // b is smaller than a or a is already the shortest possible. + return dst + } + + if i < len(b)-1 || a[i]+1 < b[i] { + i += n + dst[i]++ + return dst[:i+1] + } + + i += n + 1 + for ; i < len(dst); i++ { + if dst[i] != 0xff { + dst[i]++ + return dst[:i+1] + } + } + return dst + }, + + Successor: func(dst, a []byte) (ret []byte) { + for i := 0; i < len(a); i++ { + if a[i] != 0xff { + dst = append(dst, a[:i+1]...) + dst[len(dst)-1]++ + return dst + } + } + // a is a run of 0xffs, leave it alone. + return append(dst, a...) + }, + + ImmediateSuccessor: func(dst, a []byte) (ret []byte) { + return append(append(dst, a...), 0x00) + }, + + // This name is part of the C++ Level-DB implementation's default file + // format, and should not be changed. + Name: "leveldb.BytewiseComparator", +} + +// SharedPrefixLen returns the largest i such that a[:i] equals b[:i]. +// This function can be useful in implementing the Comparer interface. +func SharedPrefixLen(a, b []byte) int { + i, n := 0, len(a) + if n > len(b) { + n = len(b) + } + asUint64 := func(c []byte, i int) uint64 { + return binary.LittleEndian.Uint64(c[i:]) + } + for i < n-7 && asUint64(a, i) == asUint64(b, i) { + i += 8 + } + for i < n && a[i] == b[i] { + i++ + } + return i +} + +// FormatBytes formats a byte slice using hexadecimal escapes for non-ASCII +// data. +type FormatBytes []byte + +const lowerhex = "0123456789abcdef" + +// Format implements the fmt.Formatter interface. +func (p FormatBytes) Format(s fmt.State, c rune) { + buf := make([]byte, 0, len(p)) + for _, b := range p { + if b < utf8.RuneSelf && strconv.IsPrint(rune(b)) { + buf = append(buf, b) + continue + } + buf = append(buf, `\x`...) + buf = append(buf, lowerhex[b>>4]) + buf = append(buf, lowerhex[b&0xF]) + } + s.Write(buf) +} diff --git a/pebble/internal/base/comparer_test.go b/pebble/internal/base/comparer_test.go new file mode 100644 index 0000000..ae49a31 --- /dev/null +++ b/pebble/internal/base/comparer_test.go @@ -0,0 +1,117 @@ +// Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import ( + "fmt" + "slices" + "testing" + "time" + + "golang.org/x/exp/rand" +) + +func TestDefAppendSeparator(t *testing.T) { + testCases := []struct { + a, b, want string + }{ + // Examples from the doc comments. + {"black", "blue", "blb"}, + {"green", "", "green"}, + // Non-empty b values. The C++ Level-DB code calls these separators. + {"", "2", ""}, + {"1", "2", "1"}, + {"1", "29", "2"}, + {"13", "19", "14"}, + {"13", "99", "2"}, + {"135", "19", "14"}, + {"1357", "19", "14"}, + {"1357", "2", "14"}, + {"13\xff", "14", "13\xff"}, + {"13\xff", "19", "14"}, + {"1\xff\xff", "19", "1\xff\xff"}, + {"1\xff\xff", "2", "1\xff\xff"}, + {"1\xff\xff", "9", "2"}, + // Empty b values. The C++ Level-DB code calls these successors. + {"", "", ""}, + {"1", "", "1"}, + {"11", "", "11"}, + {"11\xff", "", "11\xff"}, + {"1\xff", "", "1\xff"}, + {"1\xff\xff", "", "1\xff\xff"}, + {"\xff", "", "\xff"}, + {"\xff\xff", "", "\xff\xff"}, + {"\xff\xff\xff", "", "\xff\xff\xff"}, + } + for _, tc := range testCases { + t.Run("", func(t *testing.T) { + got := string(DefaultComparer.Separator(nil, []byte(tc.a), []byte(tc.b))) + if got != tc.want { + t.Errorf("a, b = %q, %q: got %q, want %q", tc.a, tc.b, got, tc.want) + } + }) + } +} + +func TestAbbreviatedKey(t *testing.T) { + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + randBytes := func(size int) []byte { + data := make([]byte, size) + for i := range data { + data[i] = byte(rng.Int() & 0xff) + } + return data + } + + keys := make([][]byte, 10000) + for i := range keys { + keys[i] = randBytes(rng.Intn(16)) + } + slices.SortFunc(keys, DefaultComparer.Compare) + + for i := 1; i < len(keys); i++ { + last := DefaultComparer.AbbreviatedKey(keys[i-1]) + cur := DefaultComparer.AbbreviatedKey(keys[i]) + cmp := DefaultComparer.Compare(keys[i-1], keys[i]) + if cmp == 0 { + if last != cur { + t.Fatalf("expected equal abbreviated keys: %x[%x] != %x[%x]", + last, keys[i-1], cur, keys[i]) + } + } else { + if last > cur { + t.Fatalf("unexpected abbreviated key ordering: %x[%x] > %x[%x]", + last, keys[i-1], cur, keys[i]) + } + } + } +} + +func BenchmarkAbbreviatedKey(b *testing.B) { + rng := rand.New(rand.NewSource(1449168817)) + randBytes := func(size int) []byte { + data := make([]byte, size) + for i := range data { + data[i] = byte(rng.Int() & 0xff) + } + return data + } + keys := make([][]byte, 10000) + for i := range keys { + keys[i] = randBytes(8) + } + + b.ResetTimer() + var sum uint64 + for i := 0; i < b.N; i++ { + j := i % len(keys) + sum += DefaultComparer.AbbreviatedKey(keys[j]) + } + + if testing.Verbose() { + // Ensure the compiler doesn't optimize away our benchmark. + fmt.Println(sum) + } +} diff --git a/pebble/internal/base/error.go b/pebble/internal/base/error.go new file mode 100644 index 0000000..6ef7783 --- /dev/null +++ b/pebble/internal/base/error.go @@ -0,0 +1,28 @@ +// Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import "github.com/cockroachdb/errors" + +// ErrNotFound means that a get or delete call did not find the requested key. +var ErrNotFound = errors.New("pebble: not found") + +// ErrCorruption is a marker to indicate that data in a file (WAL, MANIFEST, +// sstable) isn't in the expected format. +var ErrCorruption = errors.New("pebble: corruption") + +// MarkCorruptionError marks given error as a corruption error. +func MarkCorruptionError(err error) error { + if errors.Is(err, ErrCorruption) { + return err + } + return errors.Mark(err, ErrCorruption) +} + +// CorruptionErrorf formats according to a format specifier and returns +// the string as an error value that is marked as a corruption error. +func CorruptionErrorf(format string, args ...interface{}) error { + return errors.Mark(errors.Newf(format, args...), ErrCorruption) +} diff --git a/pebble/internal/base/filenames.go b/pebble/internal/base/filenames.go new file mode 100644 index 0000000..06098ab --- /dev/null +++ b/pebble/internal/base/filenames.go @@ -0,0 +1,202 @@ +// Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import ( + "fmt" + "strconv" + "strings" + + "github.com/cockroachdb/errors/oserror" + "github.com/cockroachdb/pebble/vfs" + "github.com/cockroachdb/redact" +) + +// FileNum is an internal DB identifier for a file. +type FileNum uint64 + +// String returns a string representation of the file number. +func (fn FileNum) String() string { return fmt.Sprintf("%06d", fn) } + +// SafeFormat implements redact.SafeFormatter. +func (fn FileNum) SafeFormat(w redact.SafePrinter, _ rune) { + w.Printf("%06d", redact.SafeUint(fn)) +} + +// DiskFileNum converts a FileNum to a DiskFileNum. DiskFileNum should only be +// called if the caller can ensure that the FileNum belongs to a physical file +// on disk. These could be manifests, log files, physical sstables on disk, the +// options file, but not virtual sstables. +func (fn FileNum) DiskFileNum() DiskFileNum { + return DiskFileNum(fn) +} + +// A DiskFileNum is just a FileNum belonging to a file which exists on disk. +// Note that a FileNum is an internal DB identifier and it could belong to files +// which don't exist on disk. An example would be virtual sstable FileNums. +// Converting a DiskFileNum to a FileNum is always valid, whereas converting a +// FileNum to DiskFileNum may not be valid and care should be taken to prove +// that the FileNum actually exists on disk. +type DiskFileNum uint64 + +func (dfn DiskFileNum) String() string { return fmt.Sprintf("%06d", dfn) } + +// SafeFormat implements redact.SafeFormatter. +func (dfn DiskFileNum) SafeFormat(w redact.SafePrinter, verb rune) { + w.Printf("%06d", redact.SafeUint(dfn)) +} + +// FileNum converts a DiskFileNum to a FileNum. This conversion is always valid. +func (dfn DiskFileNum) FileNum() FileNum { + return FileNum(dfn) +} + +// FileType enumerates the types of files found in a DB. +type FileType int + +// The FileType enumeration. +const ( + FileTypeLog FileType = iota + FileTypeLock + FileTypeTable + FileTypeManifest + FileTypeCurrent + FileTypeOptions + FileTypeOldTemp + FileTypeTemp +) + +// MakeFilename builds a filename from components. +func MakeFilename(fileType FileType, dfn DiskFileNum) string { + switch fileType { + case FileTypeLog: + return fmt.Sprintf("%s.log", dfn) + case FileTypeLock: + return "LOCK" + case FileTypeTable: + return fmt.Sprintf("%s.sst", dfn) + case FileTypeManifest: + return fmt.Sprintf("MANIFEST-%s", dfn) + case FileTypeCurrent: + return "CURRENT" + case FileTypeOptions: + return fmt.Sprintf("OPTIONS-%s", dfn) + case FileTypeOldTemp: + return fmt.Sprintf("CURRENT.%s.dbtmp", dfn) + case FileTypeTemp: + return fmt.Sprintf("temporary.%s.dbtmp", dfn) + } + panic("unreachable") +} + +// MakeFilepath builds a filepath from components. +func MakeFilepath(fs vfs.FS, dirname string, fileType FileType, dfn DiskFileNum) string { + return fs.PathJoin(dirname, MakeFilename(fileType, dfn)) +} + +// ParseFilename parses the components from a filename. +func ParseFilename(fs vfs.FS, filename string) (fileType FileType, dfn DiskFileNum, ok bool) { + filename = fs.PathBase(filename) + switch { + case filename == "CURRENT": + return FileTypeCurrent, 0, true + case filename == "LOCK": + return FileTypeLock, 0, true + case strings.HasPrefix(filename, "MANIFEST-"): + dfn, ok = parseDiskFileNum(filename[len("MANIFEST-"):]) + if !ok { + break + } + return FileTypeManifest, dfn, true + case strings.HasPrefix(filename, "OPTIONS-"): + dfn, ok = parseDiskFileNum(filename[len("OPTIONS-"):]) + if !ok { + break + } + return FileTypeOptions, dfn, ok + case strings.HasPrefix(filename, "CURRENT.") && strings.HasSuffix(filename, ".dbtmp"): + s := strings.TrimSuffix(filename[len("CURRENT."):], ".dbtmp") + dfn, ok = parseDiskFileNum(s) + if !ok { + break + } + return FileTypeOldTemp, dfn, ok + case strings.HasPrefix(filename, "temporary.") && strings.HasSuffix(filename, ".dbtmp"): + s := strings.TrimSuffix(filename[len("temporary."):], ".dbtmp") + dfn, ok = parseDiskFileNum(s) + if !ok { + break + } + return FileTypeTemp, dfn, ok + default: + i := strings.IndexByte(filename, '.') + if i < 0 { + break + } + dfn, ok = parseDiskFileNum(filename[:i]) + if !ok { + break + } + switch filename[i+1:] { + case "sst": + return FileTypeTable, dfn, true + case "log": + return FileTypeLog, dfn, true + } + } + return 0, dfn, false +} + +func parseDiskFileNum(s string) (dfn DiskFileNum, ok bool) { + u, err := strconv.ParseUint(s, 10, 64) + if err != nil { + return dfn, false + } + return DiskFileNum(u), true +} + +// A Fataler fatals a process with a message when called. +type Fataler interface { + Fatalf(format string, args ...interface{}) +} + +// MustExist checks if err is an error indicating a file does not exist. +// If it is, it lists the containing directory's files to annotate the error +// with counts of the various types of files and invokes the provided fataler. +// See cockroachdb/cockroach#56490. +func MustExist(fs vfs.FS, filename string, fataler Fataler, err error) { + if err == nil || !oserror.IsNotExist(err) { + return + } + + ls, lsErr := fs.List(fs.PathDir(filename)) + if lsErr != nil { + // TODO(jackson): if oserror.IsNotExist(lsErr), the the data directory + // doesn't exist anymore. Another process likely deleted it before + // killing the process. We want to fatal the process, but without + // triggering error reporting like Sentry. + fataler.Fatalf("%s:\norig err: %s\nlist err: %s", redact.Safe(fs.PathBase(filename)), err, lsErr) + } + var total, unknown, tables, logs, manifests int + total = len(ls) + for _, f := range ls { + typ, _, ok := ParseFilename(fs, f) + if !ok { + unknown++ + continue + } + switch typ { + case FileTypeTable: + tables++ + case FileTypeLog: + logs++ + case FileTypeManifest: + manifests++ + } + } + + fataler.Fatalf("%s:\n%s\ndirectory contains %d files, %d unknown, %d tables, %d logs, %d manifests", + fs.PathBase(filename), err, total, unknown, tables, logs, manifests) +} diff --git a/pebble/internal/base/filenames_test.go b/pebble/internal/base/filenames_test.go new file mode 100644 index 0000000..07b7430 --- /dev/null +++ b/pebble/internal/base/filenames_test.go @@ -0,0 +1,114 @@ +// Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import ( + "bytes" + "fmt" + "os" + "testing" + + "github.com/cockroachdb/pebble/vfs" + "github.com/cockroachdb/redact" + "github.com/stretchr/testify/require" +) + +func TestParseFilename(t *testing.T) { + testCases := map[string]bool{ + "000000.log": true, + "000000.log.zip": false, + "000000..log": false, + "a000000.log": false, + "abcdef.log": false, + "000001ldb": false, + "000001.sst": true, + "CURRENT": true, + "CURRaNT": false, + "LOCK": true, + "xLOCK": false, + "x.LOCK": false, + "MANIFEST": false, + "MANIFEST123456": false, + "MANIFEST-": false, + "MANIFEST-123456": true, + "MANIFEST-123456.doc": false, + "OPTIONS": false, + "OPTIONS123456": false, + "OPTIONS-": false, + "OPTIONS-123456": true, + "OPTIONS-123456.doc": false, + "CURRENT.123456": false, + "CURRENT.dbtmp": false, + "CURRENT.123456.dbtmp": true, + "temporary.123456.dbtmp": true, + } + fs := vfs.NewMem() + for tc, want := range testCases { + _, _, got := ParseFilename(fs, fs.PathJoin("foo", tc)) + if got != want { + t.Errorf("%q: got %v, want %v", tc, got, want) + } + } +} + +func TestFilenameRoundTrip(t *testing.T) { + testCases := map[FileType]bool{ + // CURRENT and LOCK files aren't numbered. + FileTypeCurrent: false, + FileTypeLock: false, + // The remaining file types are numbered. + FileTypeLog: true, + FileTypeManifest: true, + FileTypeTable: true, + FileTypeOptions: true, + FileTypeOldTemp: true, + FileTypeTemp: true, + } + fs := vfs.NewMem() + for fileType, numbered := range testCases { + fileNums := []FileNum{0} + if numbered { + fileNums = []FileNum{0, 1, 2, 3, 10, 42, 99, 1001} + } + for _, fileNum := range fileNums { + filename := MakeFilepath(fs, "foo", fileType, fileNum.DiskFileNum()) + gotFT, gotFN, gotOK := ParseFilename(fs, filename) + if !gotOK { + t.Errorf("could not parse %q", filename) + continue + } + if gotFT != fileType || gotFN.FileNum() != fileNum { + t.Errorf("filename=%q: got %v, %v, want %v, %v", filename, gotFT, gotFN, fileType, fileNum) + continue + } + } + } +} + +type bufferFataler struct { + buf bytes.Buffer +} + +func (b *bufferFataler) Fatalf(msg string, args ...interface{}) { + fmt.Fprintf(&b.buf, msg, args...) +} + +func TestMustExist(t *testing.T) { + err := os.ErrNotExist + fs := vfs.Default + var buf bufferFataler + filename := fs.PathJoin("..", "..", "testdata", "db-stage-4", "000000.sst") + + MustExist(fs, filename, &buf, err) + require.Equal(t, `000000.sst: +file does not exist +directory contains 9 files, 2 unknown, 1 tables, 1 logs, 2 manifests`, buf.buf.String()) +} + +func TestRedactFileNum(t *testing.T) { + // Ensure that redaction never redacts file numbers. + require.Equal(t, redact.RedactableString("000005"), redact.Sprint(FileNum(5))) + require.Equal(t, redact.RedactableString("000005"), redact.Sprint(DiskFileNum(5))) +} diff --git a/pebble/internal/base/internal.go b/pebble/internal/base/internal.go new file mode 100644 index 0000000..db691ee --- /dev/null +++ b/pebble/internal/base/internal.go @@ -0,0 +1,502 @@ +// Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base // import "github.com/cockroachdb/pebble/internal/base" + +import ( + "encoding/binary" + "fmt" + "strconv" + "strings" + + "github.com/cockroachdb/redact" + "github.com/cockroachdb/pebble/shims/cmp" +) + +const ( + // SeqNumZero is the zero sequence number, set by compactions if they can + // guarantee there are no keys underneath an internal key. + SeqNumZero = uint64(0) + // SeqNumStart is the first sequence number assigned to a key. Sequence + // numbers 1-9 are reserved for potential future use. + SeqNumStart = uint64(10) +) + +// InternalKeyKind enumerates the kind of key: a deletion tombstone, a set +// value, a merged value, etc. +type InternalKeyKind uint8 + +// These constants are part of the file format, and should not be changed. +const ( + InternalKeyKindDelete InternalKeyKind = 0 + InternalKeyKindSet InternalKeyKind = 1 + InternalKeyKindMerge InternalKeyKind = 2 + InternalKeyKindLogData InternalKeyKind = 3 + //InternalKeyKindColumnFamilyDeletion InternalKeyKind = 4 + //InternalKeyKindColumnFamilyValue InternalKeyKind = 5 + //InternalKeyKindColumnFamilyMerge InternalKeyKind = 6 + + // InternalKeyKindSingleDelete (SINGLEDEL) is a performance optimization + // solely for compactions (to reduce write amp and space amp). Readers other + // than compactions should treat SINGLEDEL as equivalent to a DEL. + // Historically, it was simpler for readers other than compactions to treat + // SINGLEDEL as equivalent to DEL, but as of the introduction of + // InternalKeyKindSSTableInternalObsoleteBit, this is also necessary for + // correctness. + InternalKeyKindSingleDelete InternalKeyKind = 7 + //InternalKeyKindColumnFamilySingleDelete InternalKeyKind = 8 + //InternalKeyKindBeginPrepareXID InternalKeyKind = 9 + //InternalKeyKindEndPrepareXID InternalKeyKind = 10 + //InternalKeyKindCommitXID InternalKeyKind = 11 + //InternalKeyKindRollbackXID InternalKeyKind = 12 + //InternalKeyKindNoop InternalKeyKind = 13 + //InternalKeyKindColumnFamilyRangeDelete InternalKeyKind = 14 + InternalKeyKindRangeDelete InternalKeyKind = 15 + //InternalKeyKindColumnFamilyBlobIndex InternalKeyKind = 16 + //InternalKeyKindBlobIndex InternalKeyKind = 17 + + // InternalKeyKindSeparator is a key used for separator / successor keys + // written to sstable block indexes. + // + // NOTE: the RocksDB value has been repurposed. This was done to ensure that + // keys written to block indexes with value "17" (when 17 happened to be the + // max value, and InternalKeyKindMax was therefore set to 17), remain stable + // when new key kinds are supported in Pebble. + InternalKeyKindSeparator InternalKeyKind = 17 + + // InternalKeyKindSetWithDelete keys are SET keys that have met with a + // DELETE or SINGLEDEL key in a prior compaction. This key kind is + // specific to Pebble. See + // https://github.com/cockroachdb/pebble/issues/1255. + InternalKeyKindSetWithDelete InternalKeyKind = 18 + + // InternalKeyKindRangeKeyDelete removes all range keys within a key range. + // See the internal/rangekey package for more details. + InternalKeyKindRangeKeyDelete InternalKeyKind = 19 + // InternalKeyKindRangeKeySet and InternalKeyKindRangeUnset represent + // keys that set and unset values associated with ranges of key + // space. See the internal/rangekey package for more details. + InternalKeyKindRangeKeyUnset InternalKeyKind = 20 + InternalKeyKindRangeKeySet InternalKeyKind = 21 + + // InternalKeyKindIngestSST is used to distinguish a batch that corresponds to + // the WAL entry for ingested sstables that are added to the flushable + // queue. This InternalKeyKind cannot appear, amongst other key kinds in a + // batch, or in an sstable. + InternalKeyKindIngestSST InternalKeyKind = 22 + + // InternalKeyKindDeleteSized keys behave identically to + // InternalKeyKindDelete keys, except that they hold an associated uint64 + // value indicating the (len(key)+len(value)) of the shadowed entry the + // tombstone is expected to delete. This value is used to inform compaction + // heuristics, but is not required to be accurate for correctness. + InternalKeyKindDeleteSized InternalKeyKind = 23 + + // This maximum value isn't part of the file format. Future extensions may + // increase this value. + // + // When constructing an internal key to pass to DB.Seek{GE,LE}, + // internalKeyComparer sorts decreasing by kind (after sorting increasing by + // user key and decreasing by sequence number). Thus, use InternalKeyKindMax, + // which sorts 'less than or equal to' any other valid internalKeyKind, when + // searching for any kind of internal key formed by a certain user key and + // seqNum. + InternalKeyKindMax InternalKeyKind = 23 + + // Internal to the sstable format. Not exposed by any sstable iterator. + // Declared here to prevent definition of valid key kinds that set this bit. + InternalKeyKindSSTableInternalObsoleteBit InternalKeyKind = 64 + InternalKeyKindSSTableInternalObsoleteMask InternalKeyKind = 191 + + // InternalKeyZeroSeqnumMaxTrailer is the largest trailer with a + // zero sequence number. + InternalKeyZeroSeqnumMaxTrailer = uint64(255) + + // A marker for an invalid key. + InternalKeyKindInvalid InternalKeyKind = InternalKeyKindSSTableInternalObsoleteMask + + // InternalKeySeqNumBatch is a bit that is set on batch sequence numbers + // which prevents those entries from being excluded from iteration. + InternalKeySeqNumBatch = uint64(1 << 55) + + // InternalKeySeqNumMax is the largest valid sequence number. + InternalKeySeqNumMax = uint64(1<<56 - 1) + + // InternalKeyRangeDeleteSentinel is the marker for a range delete sentinel + // key. This sequence number and kind are used for the upper stable boundary + // when a range deletion tombstone is the largest key in an sstable. This is + // necessary because sstable boundaries are inclusive, while the end key of a + // range deletion tombstone is exclusive. + InternalKeyRangeDeleteSentinel = (InternalKeySeqNumMax << 8) | uint64(InternalKeyKindRangeDelete) + + // InternalKeyBoundaryRangeKey is the marker for a range key boundary. This + // sequence number and kind are used during interleaved range key and point + // iteration to allow an iterator to stop at range key start keys where + // there exists no point key. + InternalKeyBoundaryRangeKey = (InternalKeySeqNumMax << 8) | uint64(InternalKeyKindRangeKeySet) +) + +// Assert InternalKeyKindSSTableInternalObsoleteBit > InternalKeyKindMax +const _ = uint(InternalKeyKindSSTableInternalObsoleteBit - InternalKeyKindMax - 1) + +var internalKeyKindNames = []string{ + InternalKeyKindDelete: "DEL", + InternalKeyKindSet: "SET", + InternalKeyKindMerge: "MERGE", + InternalKeyKindLogData: "LOGDATA", + InternalKeyKindSingleDelete: "SINGLEDEL", + InternalKeyKindRangeDelete: "RANGEDEL", + InternalKeyKindSeparator: "SEPARATOR", + InternalKeyKindSetWithDelete: "SETWITHDEL", + InternalKeyKindRangeKeySet: "RANGEKEYSET", + InternalKeyKindRangeKeyUnset: "RANGEKEYUNSET", + InternalKeyKindRangeKeyDelete: "RANGEKEYDEL", + InternalKeyKindIngestSST: "INGESTSST", + InternalKeyKindDeleteSized: "DELSIZED", + InternalKeyKindInvalid: "INVALID", +} + +func (k InternalKeyKind) String() string { + if int(k) < len(internalKeyKindNames) { + return internalKeyKindNames[k] + } + return fmt.Sprintf("UNKNOWN:%d", k) +} + +// SafeFormat implements redact.SafeFormatter. +func (k InternalKeyKind) SafeFormat(w redact.SafePrinter, _ rune) { + w.Print(redact.SafeString(k.String())) +} + +// InternalKey is a key used for the in-memory and on-disk partial DBs that +// make up a pebble DB. +// +// It consists of the user key (as given by the code that uses package pebble) +// followed by 8-bytes of metadata: +// - 1 byte for the type of internal key: delete or set, +// - 7 bytes for a uint56 sequence number, in little-endian format. +type InternalKey struct { + UserKey []byte + Trailer uint64 +} + +// InvalidInternalKey is an invalid internal key for which Valid() will return +// false. +var InvalidInternalKey = MakeInternalKey(nil, 0, InternalKeyKindInvalid) + +// MakeInternalKey constructs an internal key from a specified user key, +// sequence number and kind. +func MakeInternalKey(userKey []byte, seqNum uint64, kind InternalKeyKind) InternalKey { + return InternalKey{ + UserKey: userKey, + Trailer: (seqNum << 8) | uint64(kind), + } +} + +// MakeTrailer constructs an internal key trailer from the specified sequence +// number and kind. +func MakeTrailer(seqNum uint64, kind InternalKeyKind) uint64 { + return (seqNum << 8) | uint64(kind) +} + +// MakeSearchKey constructs an internal key that is appropriate for searching +// for a the specified user key. The search key contain the maximal sequence +// number and kind ensuring that it sorts before any other internal keys for +// the same user key. +func MakeSearchKey(userKey []byte) InternalKey { + return InternalKey{ + UserKey: userKey, + Trailer: (InternalKeySeqNumMax << 8) | uint64(InternalKeyKindMax), + } +} + +// MakeRangeDeleteSentinelKey constructs an internal key that is a range +// deletion sentinel key, used as the upper boundary for an sstable when a +// range deletion is the largest key in an sstable. +func MakeRangeDeleteSentinelKey(userKey []byte) InternalKey { + return InternalKey{ + UserKey: userKey, + Trailer: InternalKeyRangeDeleteSentinel, + } +} + +// MakeExclusiveSentinelKey constructs an internal key that is an +// exclusive sentinel key, used as the upper boundary for an sstable +// when a ranged key is the largest key in an sstable. +func MakeExclusiveSentinelKey(kind InternalKeyKind, userKey []byte) InternalKey { + return InternalKey{ + UserKey: userKey, + Trailer: (InternalKeySeqNumMax << 8) | uint64(kind), + } +} + +var kindsMap = map[string]InternalKeyKind{ + "DEL": InternalKeyKindDelete, + "SINGLEDEL": InternalKeyKindSingleDelete, + "RANGEDEL": InternalKeyKindRangeDelete, + "LOGDATA": InternalKeyKindLogData, + "SET": InternalKeyKindSet, + "MERGE": InternalKeyKindMerge, + "INVALID": InternalKeyKindInvalid, + "SEPARATOR": InternalKeyKindSeparator, + "SETWITHDEL": InternalKeyKindSetWithDelete, + "RANGEKEYSET": InternalKeyKindRangeKeySet, + "RANGEKEYUNSET": InternalKeyKindRangeKeyUnset, + "RANGEKEYDEL": InternalKeyKindRangeKeyDelete, + "INGESTSST": InternalKeyKindIngestSST, + "DELSIZED": InternalKeyKindDeleteSized, +} + +// ParseInternalKey parses the string representation of an internal key. The +// format is ... If the seq-num starts with a "b" it +// is marked as a batch-seq-num (i.e. the InternalKeySeqNumBatch bit is set). +func ParseInternalKey(s string) InternalKey { + x := strings.Split(s, ".") + ukey := x[0] + kind, ok := kindsMap[x[1]] + if !ok { + panic(fmt.Sprintf("unknown kind: %q", x[1])) + } + j := 0 + if x[2][0] == 'b' { + j = 1 + } + seqNum, _ := strconv.ParseUint(x[2][j:], 10, 64) + if x[2][0] == 'b' { + seqNum |= InternalKeySeqNumBatch + } + return MakeInternalKey([]byte(ukey), seqNum, kind) +} + +// ParseKind parses the string representation of an internal key kind. +func ParseKind(s string) InternalKeyKind { + kind, ok := kindsMap[s] + if !ok { + panic(fmt.Sprintf("unknown kind: %q", s)) + } + return kind +} + +// InternalTrailerLen is the number of bytes used to encode InternalKey.Trailer. +const InternalTrailerLen = 8 + +// DecodeInternalKey decodes an encoded internal key. See InternalKey.Encode(). +func DecodeInternalKey(encodedKey []byte) InternalKey { + n := len(encodedKey) - InternalTrailerLen + var trailer uint64 + if n >= 0 { + trailer = binary.LittleEndian.Uint64(encodedKey[n:]) + encodedKey = encodedKey[:n:n] + } else { + trailer = uint64(InternalKeyKindInvalid) + encodedKey = nil + } + return InternalKey{ + UserKey: encodedKey, + Trailer: trailer, + } +} + +// InternalCompare compares two internal keys using the specified comparison +// function. For equal user keys, internal keys compare in descending sequence +// number order. For equal user keys and sequence numbers, internal keys +// compare in descending kind order (this may happen in practice among range +// keys). +func InternalCompare(userCmp Compare, a, b InternalKey) int { + if x := userCmp(a.UserKey, b.UserKey); x != 0 { + return x + } + // Reverse order for trailer comparison. + return cmp.Compare(b.Trailer, a.Trailer) +} + +// Encode encodes the receiver into the buffer. The buffer must be large enough +// to hold the encoded data. See InternalKey.Size(). +func (k InternalKey) Encode(buf []byte) { + i := copy(buf, k.UserKey) + binary.LittleEndian.PutUint64(buf[i:], k.Trailer) +} + +// EncodeTrailer returns the trailer encoded to an 8-byte array. +func (k InternalKey) EncodeTrailer() [8]byte { + var buf [8]byte + binary.LittleEndian.PutUint64(buf[:], k.Trailer) + return buf +} + +// Separator returns a separator key such that k <= x && x < other, where less +// than is consistent with the Compare function. The buf parameter may be used +// to store the returned InternalKey.UserKey, though it is valid to pass a +// nil. See the Separator type for details on separator keys. +func (k InternalKey) Separator( + cmp Compare, sep Separator, buf []byte, other InternalKey, +) InternalKey { + buf = sep(buf, k.UserKey, other.UserKey) + if len(buf) <= len(k.UserKey) && cmp(k.UserKey, buf) < 0 { + // The separator user key is physically shorter than k.UserKey (if it is + // longer, we'll continue to use "k"), but logically after. Tack on the max + // sequence number to the shortened user key. Note that we could tack on + // any sequence number and kind here to create a valid separator key. We + // use the max sequence number to match the behavior of LevelDB and + // RocksDB. + return MakeInternalKey(buf, InternalKeySeqNumMax, InternalKeyKindSeparator) + } + return k +} + +// Successor returns a successor key such that k <= x. A simple implementation +// may return k unchanged. The buf parameter may be used to store the returned +// InternalKey.UserKey, though it is valid to pass a nil. +func (k InternalKey) Successor(cmp Compare, succ Successor, buf []byte) InternalKey { + buf = succ(buf, k.UserKey) + if len(buf) <= len(k.UserKey) && cmp(k.UserKey, buf) < 0 { + // The successor user key is physically shorter that k.UserKey (if it is + // longer, we'll continue to use "k"), but logically after. Tack on the max + // sequence number to the shortened user key. Note that we could tack on + // any sequence number and kind here to create a valid separator key. We + // use the max sequence number to match the behavior of LevelDB and + // RocksDB. + return MakeInternalKey(buf, InternalKeySeqNumMax, InternalKeyKindSeparator) + } + return k +} + +// Size returns the encoded size of the key. +func (k InternalKey) Size() int { + return len(k.UserKey) + 8 +} + +// SetSeqNum sets the sequence number component of the key. +func (k *InternalKey) SetSeqNum(seqNum uint64) { + k.Trailer = (seqNum << 8) | (k.Trailer & 0xff) +} + +// SeqNum returns the sequence number component of the key. +func (k InternalKey) SeqNum() uint64 { + return k.Trailer >> 8 +} + +// SeqNumFromTrailer returns the sequence number component of a trailer. +func SeqNumFromTrailer(t uint64) uint64 { + return t >> 8 +} + +// Visible returns true if the key is visible at the specified snapshot +// sequence number. +func (k InternalKey) Visible(snapshot, batchSnapshot uint64) bool { + return Visible(k.SeqNum(), snapshot, batchSnapshot) +} + +// Visible returns true if a key with the provided sequence number is visible at +// the specified snapshot sequence numbers. +func Visible(seqNum uint64, snapshot, batchSnapshot uint64) bool { + // There are two snapshot sequence numbers, one for committed keys and one + // for batch keys. If a seqNum is less than `snapshot`, then seqNum + // corresponds to a committed key that is visible. If seqNum has its batch + // bit set, then seqNum corresponds to an uncommitted batch key. Its + // visible if its snapshot is less than batchSnapshot. + // + // There's one complication. The maximal sequence number + // (`InternalKeySeqNumMax`) is used across Pebble for exclusive sentinel + // keys and other purposes. The maximal sequence number has its batch bit + // set, but it can never be < `batchSnapshot`, since there is no expressible + // larger snapshot. We dictate that the maximal sequence number is always + // visible. + return seqNum < snapshot || + ((seqNum&InternalKeySeqNumBatch) != 0 && seqNum < batchSnapshot) || + seqNum == InternalKeySeqNumMax +} + +// SetKind sets the kind component of the key. +func (k *InternalKey) SetKind(kind InternalKeyKind) { + k.Trailer = (k.Trailer &^ 0xff) | uint64(kind) +} + +// Kind returns the kind component of the key. +func (k InternalKey) Kind() InternalKeyKind { + return TrailerKind(k.Trailer) +} + +// TrailerKind returns the key kind of the key trailer. +func TrailerKind(trailer uint64) InternalKeyKind { + return InternalKeyKind(trailer & 0xff) +} + +// Valid returns true if the key has a valid kind. +func (k InternalKey) Valid() bool { + return k.Kind() <= InternalKeyKindMax +} + +// Clone clones the storage for the UserKey component of the key. +func (k InternalKey) Clone() InternalKey { + if len(k.UserKey) == 0 { + return k + } + return InternalKey{ + UserKey: append([]byte(nil), k.UserKey...), + Trailer: k.Trailer, + } +} + +// CopyFrom converts this InternalKey into a clone of the passed-in InternalKey, +// reusing any space already used for the current UserKey. +func (k *InternalKey) CopyFrom(k2 InternalKey) { + k.UserKey = append(k.UserKey[:0], k2.UserKey...) + k.Trailer = k2.Trailer +} + +// String returns a string representation of the key. +func (k InternalKey) String() string { + return fmt.Sprintf("%s#%d,%d", FormatBytes(k.UserKey), k.SeqNum(), k.Kind()) +} + +// Pretty returns a formatter for the key. +func (k InternalKey) Pretty(f FormatKey) fmt.Formatter { + return prettyInternalKey{k, f} +} + +// IsExclusiveSentinel returns whether this internal key excludes point keys +// with the same user key if used as an end boundary. See the comment on +// InternalKeyRangeDeletionSentinel. +func (k InternalKey) IsExclusiveSentinel() bool { + switch kind := k.Kind(); kind { + case InternalKeyKindRangeDelete: + return k.Trailer == InternalKeyRangeDeleteSentinel + case InternalKeyKindRangeKeyDelete, InternalKeyKindRangeKeyUnset, InternalKeyKindRangeKeySet: + return (k.Trailer >> 8) == InternalKeySeqNumMax + default: + return false + } +} + +type prettyInternalKey struct { + InternalKey + formatKey FormatKey +} + +func (k prettyInternalKey) Format(s fmt.State, c rune) { + if seqNum := k.SeqNum(); seqNum == InternalKeySeqNumMax { + fmt.Fprintf(s, "%s#inf,%s", k.formatKey(k.UserKey), k.Kind()) + } else { + fmt.Fprintf(s, "%s#%d,%s", k.formatKey(k.UserKey), k.SeqNum(), k.Kind()) + } +} + +// ParsePrettyInternalKey parses the pretty string representation of an +// internal key. The format is #,. +func ParsePrettyInternalKey(s string) InternalKey { + x := strings.FieldsFunc(s, func(c rune) bool { return c == '#' || c == ',' }) + ukey := x[0] + kind, ok := kindsMap[x[2]] + if !ok { + panic(fmt.Sprintf("unknown kind: %q", x[2])) + } + var seqNum uint64 + if x[1] == "max" || x[1] == "inf" { + seqNum = InternalKeySeqNumMax + } else { + seqNum, _ = strconv.ParseUint(x[1], 10, 64) + } + return MakeInternalKey([]byte(ukey), seqNum, kind) +} diff --git a/pebble/internal/base/internal_test.go b/pebble/internal/base/internal_test.go new file mode 100644 index 0000000..39466cd --- /dev/null +++ b/pebble/internal/base/internal_test.go @@ -0,0 +1,226 @@ +// Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func (k InternalKey) encodedString() string { + buf := make([]byte, k.Size()) + k.Encode(buf) + return string(buf) +} + +func TestInternalKey(t *testing.T) { + k := MakeInternalKey([]byte("foo"), 0x08070605040302, 1) + if got, want := k.encodedString(), "foo\x01\x02\x03\x04\x05\x06\x07\x08"; got != want { + t.Fatalf("k = %q want %q", got, want) + } + if !k.Valid() { + t.Fatalf("invalid key") + } + if got, want := string(k.UserKey), "foo"; got != want { + t.Errorf("ukey = %q want %q", got, want) + } + if got, want := k.Kind(), InternalKeyKind(1); got != want { + t.Errorf("kind = %d want %d", got, want) + } + if got, want := k.SeqNum(), uint64(0x08070605040302); got != want { + t.Errorf("seqNum = %d want %d", got, want) + } +} + +func TestInvalidInternalKey(t *testing.T) { + testCases := []string{ + "", + "\x01\x02\x03\x04\x05\x06\x07", + "foo", + "foo\x08\x07\x06\x05\x04\x03\x02", + "foo\x18\x07\x06\x05\x04\x03\x02\x01", + } + for _, tc := range testCases { + k := DecodeInternalKey([]byte(tc)) + if k.Valid() { + t.Errorf("%q is a valid key, want invalid", tc) + } + // Invalid key kind because the key doesn't have an 8 byte trailer. + if k.Kind() == InternalKeyKindInvalid && k.UserKey != nil { + t.Errorf("expected nil UserKey after decoding encodedKey=%q", tc) + } + } +} + +func TestInternalKeyComparer(t *testing.T) { + // keys are some internal keys, in sorted order. + keys := []string{ + // The remaining test keys are all valid. + "" + "\x01\xff\xff\xff\xff\xff\xff\xff", + "" + "\x00\xff\xff\xff\xff\xff\xff\xff", + "" + "\x01\x01\x00\x00\x00\x00\x00\x00", + "" + "\x00\x01\x00\x00\x00\x00\x00\x00", + // Invalid internal keys have no user key, but have trailer "\xff \x00 \x00 \x00 \x00 \x00 \x00 \x00" + // i.e. seqNum 0 and kind 255 (InternalKeyKindInvalid). + "", + "" + "\x01\x00\x00\x00\x00\x00\x00\x00", + "" + "\x00\x00\x00\x00\x00\x00\x00\x00", + "\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + "\x00blue" + "\x01\x11\x00\x00\x00\x00\x00\x00", + "bl\x00ue" + "\x01\x11\x00\x00\x00\x00\x00\x00", + "blue" + "\x01\x11\x00\x00\x00\x00\x00\x00", + "blue\x00" + "\x01\x11\x00\x00\x00\x00\x00\x00", + "green" + "\xff\x11\x00\x00\x00\x00\x00\x00", + "green" + "\x01\x11\x00\x00\x00\x00\x00\x00", + "green" + "\x01\x00\x00\x00\x00\x00\x00\x00", + "red" + "\x01\xff\xff\xff\xff\xff\xff\xff", + "red" + "\x01\x72\x73\x74\x75\x76\x77\x78", + "red" + "\x01\x00\x00\x00\x00\x00\x00\x11", + "red" + "\x01\x00\x00\x00\x00\x00\x11\x00", + "red" + "\x01\x00\x00\x00\x00\x11\x00\x00", + "red" + "\x01\x00\x00\x00\x11\x00\x00\x00", + "red" + "\x01\x00\x00\x11\x00\x00\x00\x00", + "red" + "\x01\x00\x11\x00\x00\x00\x00\x00", + "red" + "\x01\x11\x00\x00\x00\x00\x00\x00", + "red" + "\x00\x11\x00\x00\x00\x00\x00\x00", + "red" + "\x00\x00\x00\x00\x00\x00\x00\x00", + "\xfe" + "\x01\xff\xff\xff\xff\xff\xff\xff", + "\xfe" + "\x00\x00\x00\x00\x00\x00\x00\x00", + "\xff" + "\x01\xff\xff\xff\xff\xff\xff\xff", + "\xff" + "\x00\x00\x00\x00\x00\x00\x00\x00", + "\xff\x40" + "\x01\xff\xff\xff\xff\xff\xff\xff", + "\xff\x40" + "\x00\x00\x00\x00\x00\x00\x00\x00", + "\xff\xff" + "\x01\xff\xff\xff\xff\xff\xff\xff", + "\xff\xff" + "\x00\x00\x00\x00\x00\x00\x00\x00", + } + c := DefaultComparer.Compare + for i := range keys { + for j := range keys { + ik := DecodeInternalKey([]byte(keys[i])) + jk := DecodeInternalKey([]byte(keys[j])) + got := InternalCompare(c, ik, jk) + want := 0 + if i < j { + want = -1 + } else if i > j { + want = +1 + } + if got != want { + t.Errorf("i=%d, j=%d, keys[i]=%q, keys[j]=%q: got %d, want %d", + i, j, keys[i], keys[j], got, want) + } + } + } +} + +func TestKindsRoundtrip(t *testing.T) { + for kindNum, prettied := range internalKeyKindNames { + if prettied == "" { + continue + } + kind := InternalKeyKind(kindNum) + got := ParseKind(kind.String()) + require.Equal(t, got, kind) + } +} + +func TestInternalKeySeparator(t *testing.T) { + testCases := []struct { + a string + b string + expected string + }{ + {"foo.SET.100", "foo.SET.99", "foo.SET.100"}, + {"foo.SET.100", "foo.SET.100", "foo.SET.100"}, + {"foo.SET.100", "foo.DEL.100", "foo.SET.100"}, + {"foo.SET.100", "foo.SET.101", "foo.SET.100"}, + {"foo.SET.100", "bar.SET.99", "foo.SET.100"}, + {"foo.SET.100", "hello.SET.200", "g.SEPARATOR.72057594037927935"}, + {"ABC1AAAAA.SET.100", "ABC2ABB.SET.200", "ABC2.SEPARATOR.72057594037927935"}, + {"AAA1AAA.SET.100", "AAA2AA.SET.200", "AAA2.SEPARATOR.72057594037927935"}, + {"AAA1AAA.SET.100", "AAA4.SET.200", "AAA2.SEPARATOR.72057594037927935"}, + {"AAA1AAA.SET.100", "AAA2.SET.200", "AAA1B.SEPARATOR.72057594037927935"}, + {"AAA1AAA.SET.100", "AAA2A.SET.200", "AAA2.SEPARATOR.72057594037927935"}, + {"AAA1.SET.100", "AAA2.SET.200", "AAA1.SET.100"}, + {"foo.SET.100", "foobar.SET.200", "foo.SET.100"}, + {"foobar.SET.100", "foo.SET.200", "foobar.SET.100"}, + {"foo.INGESTSST.100", "foo.INGESTSST.99", "foo.INGESTSST.100"}, + } + d := DefaultComparer + for _, c := range testCases { + t.Run("", func(t *testing.T) { + a := ParseInternalKey(c.a) + b := ParseInternalKey(c.b) + expected := ParseInternalKey(c.expected) + result := a.Separator(d.Compare, d.Separator, nil, b) + if cmp := InternalCompare(d.Compare, expected, result); cmp != 0 { + t.Fatalf("expected %s, but found %s", expected, result) + } + }) + } +} + +func TestIsExclusiveSentinel(t *testing.T) { + userKey := []byte("foo") + testCases := []struct { + name string + key InternalKey + want bool + }{ + { + name: "rangedel; max seqnum", + key: MakeInternalKey(userKey, InternalKeySeqNumMax, InternalKeyKindRangeKeyDelete), + want: true, + }, + { + name: "rangedel; non-max seqnum", + key: MakeInternalKey(userKey, 42, InternalKeyKindRangeKeyDelete), + want: false, + }, + { + name: "rangekeyset; max seqnum", + key: MakeInternalKey(userKey, InternalKeySeqNumMax, InternalKeyKindRangeKeySet), + want: true, + }, + { + name: "rangekeyset; non-max seqnum", + key: MakeInternalKey(userKey, 42, InternalKeyKindRangeKeySet), + want: false, + }, + { + name: "rangekeyunset; max seqnum", + key: MakeInternalKey(userKey, InternalKeySeqNumMax, InternalKeyKindRangeKeyUnset), + want: true, + }, + { + name: "rangekeyunset; non-max seqnum", + key: MakeInternalKey(userKey, 42, InternalKeyKindRangeKeyUnset), + want: false, + }, + { + name: "rangekeydel; max seqnum", + key: MakeInternalKey(userKey, InternalKeySeqNumMax, InternalKeyKindRangeKeyDelete), + want: true, + }, + { + name: "rangekeydel; non-max seqnum", + key: MakeInternalKey(userKey, 42, InternalKeyKindRangeKeyDelete), + want: false, + }, + { + name: "neither rangedel nor rangekey", + key: MakeInternalKey(userKey, InternalKeySeqNumMax, InternalKeyKindSet), + want: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + got := tc.key.IsExclusiveSentinel() + require.Equal(t, tc.want, got) + }) + } +} diff --git a/pebble/internal/base/iterator.go b/pebble/internal/base/iterator.go new file mode 100644 index 0000000..1b72432 --- /dev/null +++ b/pebble/internal/base/iterator.go @@ -0,0 +1,414 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import ( + "context" + "fmt" + "time" +) + +// InternalIterator iterates over a DB's key/value pairs in key order. Unlike +// the Iterator interface, the returned keys are InternalKeys composed of the +// user-key, a sequence number and a key kind. In forward iteration, key/value +// pairs for identical user-keys are returned in descending sequence order. In +// reverse iteration, key/value pairs for identical user-keys are returned in +// ascending sequence order. +// +// InternalIterators provide 5 absolute positioning methods and 2 relative +// positioning methods. The absolute positioning methods are: +// +// - SeekGE +// - SeekPrefixGE +// - SeekLT +// - First +// - Last +// +// The relative positioning methods are: +// +// - Next +// - Prev +// +// The relative positioning methods can be used in conjunction with any of the +// absolute positioning methods with one exception: SeekPrefixGE does not +// support reverse iteration via Prev. It is undefined to call relative +// positioning methods without ever calling an absolute positioning method. +// +// InternalIterators can optionally implement a prefix iteration mode. This +// mode is entered by calling SeekPrefixGE and exited by any other absolute +// positioning method (SeekGE, SeekLT, First, Last). When in prefix iteration +// mode, a call to Next will advance to the next key which has the same +// "prefix" as the one supplied to SeekPrefixGE. Note that "prefix" in this +// context is not a strict byte prefix, but defined by byte equality for the +// result of the Comparer.Split method. An InternalIterator is not required to +// support prefix iteration mode, and can implement SeekPrefixGE by forwarding +// to SeekGE. When the iteration prefix is exhausted, it is not valid to call +// Next on an internal iterator that's already returned (nil,nilv) or a key +// beyond the prefix. +// +// Bounds, [lower, upper), can be set on iterators, either using the SetBounds() +// function in the interface, or in implementation specific ways during iterator +// creation. The forward positioning routines (SeekGE, First, and Next) only +// check the upper bound. The reverse positioning routines (SeekLT, Last, and +// Prev) only check the lower bound. It is up to the caller to ensure that the +// forward positioning routines respect the lower bound and the reverse +// positioning routines respect the upper bound (i.e. calling SeekGE instead of +// First if there is a lower bound, and SeekLT instead of Last if there is an +// upper bound). This imposition is done in order to elevate that enforcement to +// the caller (generally pebble.Iterator or pebble.mergingIter) rather than +// having it duplicated in every InternalIterator implementation. +// +// Additionally, the caller needs to ensure that SeekGE/SeekPrefixGE are not +// called with a key > the upper bound, and SeekLT is not called with a key < +// the lower bound. InternalIterator implementations are required to respect +// the iterator bounds, never returning records outside of the bounds with one +// exception: an iterator may generate synthetic RANGEDEL marker records. See +// levelIter.syntheticBoundary for the sole existing example of this behavior. +// Specifically, levelIter can return synthetic keys whose user key is equal to +// the lower/upper bound. +// +// The bounds provided to an internal iterator must remain valid until a +// subsequent call to SetBounds has returned. This requirement exists so that +// iterator implementations may compare old and new bounds to apply low-level +// optimizations. The pebble.Iterator satisfies this requirement by maintaining +// two bound buffers and switching between them. +// +// An iterator must be closed after use, but it is not necessary to read an +// iterator until exhaustion. +// +// An iterator is not goroutine-safe, but it is safe to use multiple iterators +// concurrently, either in separate goroutines or switching between the +// iterators in a single goroutine. +// +// It is also safe to use an iterator concurrently with modifying its +// underlying DB, if that DB permits modification. However, the resultant +// key/value pairs are not guaranteed to be a consistent snapshot of that DB +// at a particular point in time. +// +// InternalIterators accumulate errors encountered during operation, exposing +// them through the Error method. All of the absolute positioning methods +// reset any accumulated error before positioning. Relative positioning +// methods return without advancing if the iterator has accumulated an error. +// +// nilv == shorthand for LazyValue{}, which represents a nil value. +type InternalIterator interface { + // SeekGE moves the iterator to the first key/value pair whose key is greater + // than or equal to the given key. Returns the key and value if the iterator + // is pointing at a valid entry, and (nil, nilv) otherwise. Note that SeekGE + // only checks the upper bound. It is up to the caller to ensure that key + // is greater than or equal to the lower bound. + SeekGE(key []byte, flags SeekGEFlags) (*InternalKey, LazyValue) + + // SeekPrefixGE moves the iterator to the first key/value pair whose key is + // greater than or equal to the given key. Returns the key and value if the + // iterator is pointing at a valid entry, and (nil, nilv) otherwise. Note that + // SeekPrefixGE only checks the upper bound. It is up to the caller to ensure + // that key is greater than or equal to the lower bound. + // + // The prefix argument is used by some InternalIterator implementations (e.g. + // sstable.Reader) to avoid expensive operations. A user-defined Split + // function must be supplied to the Comparer for the DB. The supplied prefix + // will be the prefix of the given key returned by that Split function. If + // the iterator is able to determine that no key with the prefix exists, it + // can return (nil,nilv). Unlike SeekGE, this is not an indication that + // iteration is exhausted. + // + // Note that the iterator may return keys not matching the prefix. It is up + // to the caller to check if the prefix matches. + // + // Calling SeekPrefixGE places the receiver into prefix iteration mode. Once + // in this mode, reverse iteration may not be supported and will return an + // error. Note that pebble/Iterator.SeekPrefixGE has this same restriction on + // not supporting reverse iteration in prefix iteration mode until a + // different positioning routine (SeekGE, SeekLT, First or Last) switches the + // iterator out of prefix iteration. + SeekPrefixGE(prefix, key []byte, flags SeekGEFlags) (*InternalKey, LazyValue) + + // SeekLT moves the iterator to the last key/value pair whose key is less + // than the given key. Returns the key and value if the iterator is pointing + // at a valid entry, and (nil, nilv) otherwise. Note that SeekLT only checks + // the lower bound. It is up to the caller to ensure that key is less than + // the upper bound. + SeekLT(key []byte, flags SeekLTFlags) (*InternalKey, LazyValue) + + // First moves the iterator the the first key/value pair. Returns the key and + // value if the iterator is pointing at a valid entry, and (nil, nilv) + // otherwise. Note that First only checks the upper bound. It is up to the + // caller to ensure that First() is not called when there is a lower bound, + // and instead call SeekGE(lower). + First() (*InternalKey, LazyValue) + + // Last moves the iterator the the last key/value pair. Returns the key and + // value if the iterator is pointing at a valid entry, and (nil, nilv) + // otherwise. Note that Last only checks the lower bound. It is up to the + // caller to ensure that Last() is not called when there is an upper bound, + // and instead call SeekLT(upper). + Last() (*InternalKey, LazyValue) + + // Next moves the iterator to the next key/value pair. Returns the key and + // value if the iterator is pointing at a valid entry, and (nil, nilv) + // otherwise. Note that Next only checks the upper bound. It is up to the + // caller to ensure that key is greater than or equal to the lower bound. + // + // It is valid to call Next when the iterator is positioned before the first + // key/value pair due to either a prior call to SeekLT or Prev which returned + // (nil, nilv). It is not allowed to call Next when the previous call to SeekGE, + // SeekPrefixGE or Next returned (nil, nilv). + Next() (*InternalKey, LazyValue) + + // NextPrefix moves the iterator to the next key/value pair with a different + // prefix than the key at the current iterator position. Returns the key and + // value if the iterator is pointing at a valid entry, and (nil, nil) + // otherwise. Note that NextPrefix only checks the upper bound. It is up to + // the caller to ensure that key is greater than or equal to the lower + // bound. + // + // NextPrefix is passed the immediate successor to the current prefix key. A + // valid implementation of NextPrefix is to call SeekGE with succKey. + // + // It is not allowed to call NextPrefix when the previous call was a reverse + // positioning operation or a call to a forward positioning method that + // returned (nil, nilv). It is also not allowed to call NextPrefix when the + // iterator is in prefix iteration mode. + NextPrefix(succKey []byte) (*InternalKey, LazyValue) + + // Prev moves the iterator to the previous key/value pair. Returns the key + // and value if the iterator is pointing at a valid entry, and (nil, nilv) + // otherwise. Note that Prev only checks the lower bound. It is up to the + // caller to ensure that key is less than the upper bound. + // + // It is valid to call Prev when the iterator is positioned after the last + // key/value pair due to either a prior call to SeekGE or Next which returned + // (nil, nilv). It is not allowed to call Prev when the previous call to SeekLT + // or Prev returned (nil, nilv). + Prev() (*InternalKey, LazyValue) + + // Error returns any accumulated error. It may not include errors returned + // to the client when calling LazyValue.Value(). + Error() error + + // Close closes the iterator and returns any accumulated error. Exhausting + // all the key/value pairs in a table is not considered to be an error. + // It is valid to call Close multiple times. Other methods should not be + // called after the iterator has been closed. + Close() error + + // SetBounds sets the lower and upper bounds for the iterator. Note that the + // result of Next and Prev will be undefined until the iterator has been + // repositioned with SeekGE, SeekPrefixGE, SeekLT, First, or Last. + // + // The bounds provided must remain valid until a subsequent call to + // SetBounds has returned. This requirement exists so that iterator + // implementations may compare old and new bounds to apply low-level + // optimizations. + SetBounds(lower, upper []byte) + + // SetContext replaces the context provided at iterator creation, or the + // last one provided by SetContext. + SetContext(ctx context.Context) + + fmt.Stringer +} + +// SeekGEFlags holds flags that may configure the behavior of a forward seek. +// Not all flags are relevant to all iterators. +type SeekGEFlags uint8 + +const ( + seekGEFlagTrySeekUsingNext uint8 = iota + seekGEFlagRelativeSeek + seekGEFlagBatchJustRefreshed +) + +// SeekGEFlagsNone is the default value of SeekGEFlags, with all flags disabled. +const SeekGEFlagsNone = SeekGEFlags(0) + +// TrySeekUsingNext indicates whether a performance optimization was enabled +// by a caller, indicating the caller has not done any action to move this +// iterator beyond the first key that would be found if this iterator were to +// honestly do the intended seek. For example, say the caller did a +// SeekGE(k1...), followed by SeekGE(k2...) where k1 <= k2, without any +// intermediate positioning calls. The caller can safely specify true for this +// parameter in the second call. As another example, say the caller did do one +// call to Next between the two Seek calls, and k1 < k2. Again, the caller can +// safely specify a true value for this parameter. Note that a false value is +// always safe. The callee is free to ignore the true value if its +// implementation does not permit this optimization. +// +// We make the caller do this determination since a string comparison of k1, k2 +// is not necessarily cheap, and there may be many iterators in the iterator +// stack. Doing it once at the root of the iterator stack is cheaper. +// +// This optimization could also be applied to SeekLT (where it would be +// trySeekUsingPrev). We currently only do it for SeekPrefixGE and SeekGE +// because this is where this optimization helps the performance of CockroachDB. +// The SeekLT cases in CockroachDB are typically accompanied with bounds that +// change between seek calls, and is optimized inside certain iterator +// implementations, like singleLevelIterator, without any extra parameter +// passing (though the same amortization of string comparisons could be done to +// improve that optimization, by making the root of the iterator stack do it). +func (s SeekGEFlags) TrySeekUsingNext() bool { return (s & (1 << seekGEFlagTrySeekUsingNext)) != 0 } + +// RelativeSeek is set when in the course of a forward positioning operation, a +// higher-level iterator seeks a lower-level iterator to a larger key than the +// one at the current iterator position. +// +// Concretely, this occurs when the merging iterator observes a range deletion +// covering the key at a level's current position, and the merging iterator +// seeks the level to the range deletion's end key. During lazy-combined +// iteration, this flag signals to the level iterator that the seek is NOT an +// absolute-positioning operation from the perspective of the pebble.Iterator, +// and the level iterator must look for range keys in tables between the current +// iterator position and the new seeked position. +func (s SeekGEFlags) RelativeSeek() bool { return (s & (1 << seekGEFlagRelativeSeek)) != 0 } + +// BatchJustRefreshed is set by Seek[Prefix]GE when an iterator's view of an +// indexed batch was just refreshed. It serves as a signal to the batch iterator +// to ignore the TrySeekUsingNext optimization, because the external knowledge +// imparted by the TrySeekUsingNext flag does not apply to the batch iterator's +// position. See (pebble.Iterator).batchJustRefreshed. +func (s SeekGEFlags) BatchJustRefreshed() bool { return (s & (1 << seekGEFlagBatchJustRefreshed)) != 0 } + +// EnableTrySeekUsingNext returns the provided flags with the +// try-seek-using-next optimization enabled. See TrySeekUsingNext for an +// explanation of this optimization. +func (s SeekGEFlags) EnableTrySeekUsingNext() SeekGEFlags { + return s | (1 << seekGEFlagTrySeekUsingNext) +} + +// DisableTrySeekUsingNext returns the provided flags with the +// try-seek-using-next optimization disabled. +func (s SeekGEFlags) DisableTrySeekUsingNext() SeekGEFlags { + return s &^ (1 << seekGEFlagTrySeekUsingNext) +} + +// EnableRelativeSeek returns the provided flags with the relative-seek flag +// enabled. See RelativeSeek for an explanation of this flag's use. +func (s SeekGEFlags) EnableRelativeSeek() SeekGEFlags { + return s | (1 << seekGEFlagRelativeSeek) +} + +// DisableRelativeSeek returns the provided flags with the relative-seek flag +// disabled. +func (s SeekGEFlags) DisableRelativeSeek() SeekGEFlags { + return s &^ (1 << seekGEFlagRelativeSeek) +} + +// EnableBatchJustRefreshed returns the provided flags with the +// batch-just-refreshed bit set. See BatchJustRefreshed for an explanation of +// this flag. +func (s SeekGEFlags) EnableBatchJustRefreshed() SeekGEFlags { + return s | (1 << seekGEFlagBatchJustRefreshed) +} + +// DisableBatchJustRefreshed returns the provided flags with the +// batch-just-refreshed bit unset. +func (s SeekGEFlags) DisableBatchJustRefreshed() SeekGEFlags { + return s &^ (1 << seekGEFlagBatchJustRefreshed) +} + +// SeekLTFlags holds flags that may configure the behavior of a reverse seek. +// Not all flags are relevant to all iterators. +type SeekLTFlags uint8 + +const ( + seekLTFlagRelativeSeek uint8 = iota +) + +// SeekLTFlagsNone is the default value of SeekLTFlags, with all flags disabled. +const SeekLTFlagsNone = SeekLTFlags(0) + +// RelativeSeek is set when in the course of a reverse positioning operation, a +// higher-level iterator seeks a lower-level iterator to a smaller key than the +// one at the current iterator position. +// +// Concretely, this occurs when the merging iterator observes a range deletion +// covering the key at a level's current position, and the merging iterator +// seeks the level to the range deletion's start key. During lazy-combined +// iteration, this flag signals to the level iterator that the seek is NOT an +// absolute-positioning operation from the perspective of the pebble.Iterator, +// and the level iterator must look for range keys in tables between the current +// iterator position and the new seeked position. +func (s SeekLTFlags) RelativeSeek() bool { return s&(1<= len(ref) { + // Verify that ref matches the flag predicates. + for j := 0; j < i; j++ { + if got := flags[j].pred(); ref[j] != got { + t.Errorf("%s() = %t, want %t", flags[j].label, got, ref[j]) + } + } + return + } + + // flag i remains unset. + t.Run(fmt.Sprintf("%s begin unset", flags[i].label), func(t *testing.T) { + checkCombination(t, i+1, flags, ref) + }) + + // set flag i + ref[i] = true + flags[i].set() + t.Run(fmt.Sprintf("%s set", flags[i].label), func(t *testing.T) { + checkCombination(t, i+1, flags, ref) + }) + + // unset flag i + ref[i] = false + flags[i].unset() + t.Run(fmt.Sprintf("%s unset", flags[i].label), func(t *testing.T) { + checkCombination(t, i+1, flags, ref) + }) +} diff --git a/pebble/internal/base/lazy_value.go b/pebble/internal/base/lazy_value.go new file mode 100644 index 0000000..cc6d56d --- /dev/null +++ b/pebble/internal/base/lazy_value.go @@ -0,0 +1,287 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import "github.com/cockroachdb/pebble/internal/invariants" + +// A value can have user-defined attributes that are a function of the value +// byte slice. For now, we only support "short attributes", which can be +// encoded in 3 bits. We will likely extend this to "long attributes" later +// for values that are even more expensive to access than those in value +// blocks in the same sstable. +// +// When a sstable writer chooses not to store a value together with the key, +// it can call the ShortAttributeExtractor to extract the attribute and store +// it together with the key. This allows for cheap retrieval of +// AttributeAndLen on the read-path, without doing a more expensive retrieval +// of the value. In general, the extraction code may want to also look at the +// key to decide how to treat the value, hence the key* parameters. +// +// Write path performance: The ShortAttributeExtractor func cannot be inlined, +// so we will pay the cost of this function call. However, we will only pay +// this when (a) the value is not being stored together with the key, and (b) +// the key-value pair is being initially written to the DB, or a compaction is +// transitioning the key-value pair from being stored together to being stored +// separately. + +// ShortAttribute encodes a user-specified attribute of the value. +type ShortAttribute uint8 + +// MaxShortAttribute is the maximum value of the short attribute (3 bits). +const MaxShortAttribute = 7 + +// ShortAttributeExtractor is an extractor that given the value, will return +// the ShortAttribute. +type ShortAttributeExtractor func( + key []byte, keyPrefixLen int, value []byte) (ShortAttribute, error) + +// AttributeAndLen represents the pair of value length and the short +// attribute. +type AttributeAndLen struct { + ValueLen int32 + ShortAttribute ShortAttribute +} + +// LazyValue represents a value that may not already have been extracted. +// Currently, it can represent either an in-place value (stored with the key) +// or a value stored in the value section. However, the interface is general +// enough to support values that are stored in separate files. +// +// LazyValue is used in the InternalIterator interface, such that all +// positioning calls return (*InternalKey, LazyValue). It is also exposed via +// the public Iterator for callers that need to remember a recent but not +// necessarily latest LazyValue, in case they need the actual value in the +// future. An example is a caller that is iterating in reverse and looking for +// the latest MVCC version for a key -- it cannot identify the latest MVCC +// version without stepping to the previous key-value pair e.g. +// storage.pebbleMVCCScanner in CockroachDB. +// +// Performance note: It is important for this struct to not exceed a sizeof 32 +// bytes, for optimizing the common case of the in-place value. Prior to +// introducing LazyValue, we were passing around a []byte which is 24 bytes. +// Passing a 40 byte or larger struct causes performance to drop by 75% on +// some benchmarks that do tight iteration loops. +// +// Memory management: +// This is subtle, but important for performance. +// +// A LazyValue returned by an InternalIterator or Iterator is unstable in that +// repositioning the iterator will invalidate the memory inside it. A caller +// wishing to maintain that LazyValue needs to call LazyValue.Clone(). Note +// that this does not fetch the value if it is not in-place. Clone() should +// ideally not be called if LazyValue.Value() has been called, since the +// cloned LazyValue will forget the extracted/fetched value, and calling +// Value() on this clone will cause the value to be extracted again. That is, +// Clone() does not make any promise about the memory stability of the +// underlying value. +// +// A user of an iterator that calls LazyValue.Value() wants as much as +// possible for the returned value []byte to point to iterator owned memory. +// +// 1. [P1] The underlying iterator that owns that memory also needs a promise +// from that user that at any time there is at most one value []byte slice +// that the caller is expecting it to maintain. Otherwise, the underlying +// iterator has to maintain multiple such []byte slices which results in +// more complicated and inefficient code. +// +// 2. [P2] The underlying iterator, in order to make the promise that it is +// maintaining the one value []byte slice, also needs a way to know when +// it is relieved of that promise. One way it is relieved of that promise +// is by being told that it is being repositioned. Typically, the owner of +// the value []byte slice is a sstable iterator, and it will know that it +// is relieved of the promise when it is repositioned. However, consider +// the case where the caller has used LazyValue.Clone() and repositioned +// the iterator (which is actually a tree of iterators). In this case the +// underlying sstable iterator may not even be open. LazyValue.Value() +// will still work (at a higher cost), but since the sstable iterator is +// not open, it does not have a mechanism to know when the retrieved value +// is no longer in use. We refer to this situation as "not satisfying P2". +// To handle this situation, the LazyValue.Value() method accepts a caller +// owned buffer, that the callee will use if needed. The callee explicitly +// tells the caller whether the []byte slice for the value is now owned by +// the caller. This will be true if the callee attempted to use buf and +// either successfully used it or allocated a new []byte slice. +// +// To ground the above in reality, we consider three examples of callers of +// LazyValue.Value(): +// +// - Iterator: it calls LazyValue.Value for its own use when merging values. +// When merging during reverse iteration, it may have cloned the LazyValue. +// In this case it calls LazyValue.Value() on the cloned value, merges it, +// and then calls LazyValue.Value() on the current iterator position and +// merges it. So it is honoring P1. +// +// - Iterator on behalf of Iterator clients: The Iterator.Value() method +// needs to call LazyValue.Value(). The client of Iterator is satisfying P1 +// because of the inherent Iterator interface constraint, i.e., it is calling +// Iterator.Value() on the current Iterator position. It is possible that +// the Iterator has cloned this LazyValue (for the reverse iteration case), +// which the client is unaware of, so the underlying sstable iterator may +// not be able to satisfy P2. This is ok because Iterator will call +// LazyValue.Value with its (reusable) owned buffer. +// +// - CockroachDB's pebbleMVCCScanner: This will use LazyValues from Iterator +// since during reverse iteration in order to find the highest version that +// satisfies a read it needs to clone the LazyValue, step back the iterator +// and then decide whether it needs the value from the previously cloned +// LazyValue. The pebbleMVCCScanner will satisfy P1. The P2 story is +// similar to the previous case in that it will call LazyValue.Value with +// its (reusable) owned buffer. +// +// Corollary: callers that directly use InternalIterator can know that they +// have done nothing to interfere with promise P2 can pass in a nil buf and be +// sure that it will not trigger an allocation. +// +// Repeated calling of LazyValue.Value: +// This is ok as long as the caller continues to satisfy P1. The previously +// fetched value will be remembered inside LazyValue to avoid fetching again. +// So if the caller's buffer is used the first time the value was fetched, it +// is still in use. +// +// LazyValue fields are visible outside the package for use in +// InternalIterator implementations and in Iterator, but not meant for direct +// use by users of Pebble. +type LazyValue struct { + // ValueOrHandle represents a value, or a handle to be passed to ValueFetcher. + // - Fetcher == nil: ValueOrHandle is a value. + // - Fetcher != nil: ValueOrHandle is a handle and Fetcher.Attribute is + // initialized. + // The ValueOrHandle exposed by InternalIterator or Iterator may not be stable + // if the iterator is stepped. To make it stable, make a copy using Clone. + ValueOrHandle []byte + // Fetcher provides support for fetching an actually lazy value. + Fetcher *LazyFetcher +} + +// LazyFetcher supports fetching a lazy value. +// +// Fetcher and Attribute are to be initialized at creation time. The fields +// are arranged to reduce the sizeof this struct. +type LazyFetcher struct { + // Fetcher, given a handle, returns the value. + Fetcher ValueFetcher + err error + value []byte + // Attribute includes the short attribute and value length. + Attribute AttributeAndLen + fetched bool + callerOwned bool +} + +// ValueFetcher is an interface for fetching a value. +type ValueFetcher interface { + // Fetch returns the value, given the handle. It is acceptable to call the + // ValueFetcher.Fetch as long as the DB is open. However, one should assume + // there is a fast-path when the iterator tree has not moved off the sstable + // iterator that initially provided this LazyValue. Hence, to utilize this + // fast-path the caller should try to decide whether it needs the value or + // not as soon as possible, with minimal possible stepping of the iterator. + // + // buf will be used if the fetcher cannot satisfy P2 (see earlier comment). + // If the fetcher attempted to use buf *and* len(buf) was insufficient, it + // will allocate a new slice for the value. In either case it will set + // callerOwned to true. + Fetch( + handle []byte, valLen int32, buf []byte) (val []byte, callerOwned bool, err error) +} + +// Value returns the underlying value. +func (lv *LazyValue) Value(buf []byte) (val []byte, callerOwned bool, err error) { + if lv.Fetcher == nil { + return lv.ValueOrHandle, false, nil + } + // Do the rest of the work in a separate method to attempt mid-stack + // inlining of Value(). Unfortunately, this still does not inline since the + // cost of 85 exceeds the budget of 80. + // + // TODO(sumeer): Packing the return values into a struct{[]byte error bool} + // causes it to be below the budget. Consider this if we need to recover + // more performance. I suspect that inlining this only matters in + // micro-benchmarks, and in actual use cases in CockroachDB it will not + // matter because there is substantial work done with a fetched value. + return lv.fetchValue(buf) +} + +// INVARIANT: lv.Fetcher != nil +func (lv *LazyValue) fetchValue(buf []byte) (val []byte, callerOwned bool, err error) { + f := lv.Fetcher + if !f.fetched { + f.fetched = true + f.value, f.callerOwned, f.err = f.Fetcher.Fetch( + lv.ValueOrHandle, lv.Fetcher.Attribute.ValueLen, buf) + } + return f.value, f.callerOwned, f.err +} + +// InPlaceValue returns the value under the assumption that it is in-place. +// This is for Pebble-internal code. +func (lv *LazyValue) InPlaceValue() []byte { + if invariants.Enabled && lv.Fetcher != nil { + panic("value must be in-place") + } + return lv.ValueOrHandle +} + +// Len returns the length of the value. +func (lv *LazyValue) Len() int { + if lv.Fetcher == nil { + return len(lv.ValueOrHandle) + } + return int(lv.Fetcher.Attribute.ValueLen) +} + +// TryGetShortAttribute returns the ShortAttribute and a bool indicating +// whether the ShortAttribute was populated. +func (lv *LazyValue) TryGetShortAttribute() (ShortAttribute, bool) { + if lv.Fetcher == nil { + return 0, false + } + return lv.Fetcher.Attribute.ShortAttribute, true +} + +// Clone creates a stable copy of the LazyValue, by appending bytes to buf. +// The fetcher parameter must be non-nil and may be over-written and used +// inside the returned LazyValue -- this is needed to avoid an allocation. +// Most callers have at most K cloned LazyValues, where K is hard-coded, so +// they can have a pool of exactly K LazyFetcher structs they can reuse in +// these calls. The alternative of allocating LazyFetchers from a sync.Pool is +// not viable since we have no code trigger for returning to the pool +// (LazyValues are simply GC'd). +// +// NB: It is highly preferable that LazyValue.Value() has not been called, +// since the Clone will forget any previously extracted value, and a future +// call to Value will cause it to be fetched again. We do this since we don't +// want to reason about whether or not to clone an already extracted value +// inside the Fetcher (we don't). Property P1 applies here too: if lv1.Value() +// has been called, and then lv2 is created as a clone of lv1, then calling +// lv2.Value() can invalidate any backing memory maintained inside the fetcher +// for lv1 (even though these are the same values). We initially prohibited +// calling LazyValue.Clone() if LazyValue.Value() has been called, but there +// is at least one complex caller (pebbleMVCCScanner inside CockroachDB) where +// it is not easy to prove this invariant. +func (lv *LazyValue) Clone(buf []byte, fetcher *LazyFetcher) (LazyValue, []byte) { + var lvCopy LazyValue + if lv.Fetcher != nil { + *fetcher = LazyFetcher{ + Fetcher: lv.Fetcher.Fetcher, + Attribute: lv.Fetcher.Attribute, + // Not copying anything that has been extracted. + } + lvCopy.Fetcher = fetcher + } + vLen := len(lv.ValueOrHandle) + if vLen == 0 { + return lvCopy, buf + } + bufLen := len(buf) + buf = append(buf, lv.ValueOrHandle...) + lvCopy.ValueOrHandle = buf[bufLen : bufLen+vLen] + return lvCopy, buf +} + +// MakeInPlaceValue constructs an in-place value. +func MakeInPlaceValue(val []byte) LazyValue { + return LazyValue{ValueOrHandle: val} +} diff --git a/pebble/internal/base/lazy_value_test.go b/pebble/internal/base/lazy_value_test.go new file mode 100644 index 0000000..82ad51c --- /dev/null +++ b/pebble/internal/base/lazy_value_test.go @@ -0,0 +1,74 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import ( + "bytes" + "testing" + "unsafe" + + "github.com/stretchr/testify/require" +) + +type valueFetcherFunc func( + handle []byte, valLen int32, buf []byte) (val []byte, callerOwned bool, err error) + +func (v valueFetcherFunc) Fetch( + handle []byte, valLen int32, buf []byte, +) (val []byte, callerOwned bool, err error) { + return v(handle, valLen, buf) +} + +func TestLazyValue(t *testing.T) { + // Both 40 and 48 bytes makes iteration benchmarks like + // BenchmarkIteratorScan/keys=1000,r-amp=1,key-types=points-only 75% + // slower. + require.True(t, unsafe.Sizeof(LazyValue{}) <= 32) + + fooBytes1 := []byte("foo") + fooLV1 := MakeInPlaceValue(fooBytes1) + require.Equal(t, 3, fooLV1.Len()) + _, hasAttr := fooLV1.TryGetShortAttribute() + require.False(t, hasAttr) + fooLV2, fooBytes2 := fooLV1.Clone(nil, &LazyFetcher{}) + require.Equal(t, 3, fooLV2.Len()) + _, hasAttr = fooLV2.TryGetShortAttribute() + require.False(t, hasAttr) + require.Equal(t, fooLV1.InPlaceValue(), fooLV2.InPlaceValue()) + getValue := func(lv LazyValue, expectedCallerOwned bool) []byte { + v, callerOwned, err := lv.Value(nil) + require.NoError(t, err) + require.Equal(t, expectedCallerOwned, callerOwned) + return v + } + require.Equal(t, getValue(fooLV1, false), getValue(fooLV2, false)) + fooBytes2[0] = 'b' + require.False(t, bytes.Equal(fooLV1.InPlaceValue(), fooLV2.InPlaceValue())) + + for _, callerOwned := range []bool{false, true} { + numCalls := 0 + fooLV3 := LazyValue{ + ValueOrHandle: []byte("foo-handle"), + Fetcher: &LazyFetcher{ + Fetcher: valueFetcherFunc( + func(handle []byte, valLen int32, buf []byte) ([]byte, bool, error) { + numCalls++ + require.Equal(t, []byte("foo-handle"), handle) + require.Equal(t, int32(3), valLen) + return fooBytes1, callerOwned, nil + }), + Attribute: AttributeAndLen{ValueLen: 3, ShortAttribute: 7}, + }, + } + require.Equal(t, []byte("foo"), getValue(fooLV3, callerOwned)) + require.Equal(t, 1, numCalls) + require.Equal(t, []byte("foo"), getValue(fooLV3, callerOwned)) + require.Equal(t, 1, numCalls) + require.Equal(t, 3, fooLV3.Len()) + attr, hasAttr := fooLV3.TryGetShortAttribute() + require.True(t, hasAttr) + require.Equal(t, ShortAttribute(7), attr) + } +} diff --git a/pebble/internal/base/logger.go b/pebble/internal/base/logger.go new file mode 100644 index 0000000..5448137 --- /dev/null +++ b/pebble/internal/base/logger.go @@ -0,0 +1,158 @@ +// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import ( + "bytes" + "context" + "fmt" + "log" + "os" + "runtime" + "sync" + + "github.com/cockroachdb/pebble/internal/invariants" +) + +// Logger defines an interface for writing log messages. +type Logger interface { + Infof(format string, args ...interface{}) + Errorf(format string, args ...interface{}) + Fatalf(format string, args ...interface{}) +} +type defaultLogger struct{} + +// DefaultLogger logs to the Go stdlib logs. +var DefaultLogger defaultLogger + +var _ Logger = DefaultLogger + +// Infof implements the Logger.Infof interface. +func (defaultLogger) Infof(format string, args ...interface{}) { + _ = log.Output(2, fmt.Sprintf(format, args...)) +} + +// Errorf implements the Logger.Errorf interface. +func (defaultLogger) Errorf(format string, args ...interface{}) { + _ = log.Output(2, fmt.Sprintf(format, args...)) +} + +// Fatalf implements the Logger.Fatalf interface. +func (defaultLogger) Fatalf(format string, args ...interface{}) { + _ = log.Output(2, fmt.Sprintf(format, args...)) + os.Exit(1) +} + +// InMemLogger implements Logger using an in-memory buffer (used for testing). +// The buffer can be read via String() and cleared via Reset(). +type InMemLogger struct { + mu struct { + sync.Mutex + buf bytes.Buffer + } +} + +var _ Logger = (*InMemLogger)(nil) + +// Reset clears the internal buffer. +func (b *InMemLogger) Reset() { + b.mu.Lock() + defer b.mu.Unlock() + b.mu.buf.Reset() +} + +// String returns the current internal buffer. +func (b *InMemLogger) String() string { + b.mu.Lock() + defer b.mu.Unlock() + return b.mu.buf.String() +} + +// Infof is part of the Logger interface. +func (b *InMemLogger) Infof(format string, args ...interface{}) { + s := fmt.Sprintf(format, args...) + b.mu.Lock() + defer b.mu.Unlock() + b.mu.buf.Write([]byte(s)) + if n := len(s); n == 0 || s[n-1] != '\n' { + b.mu.buf.Write([]byte("\n")) + } +} + +// Errorf is part of the Logger interface. +func (b *InMemLogger) Errorf(format string, args ...interface{}) { + b.Infof(format, args...) +} + +// Fatalf is part of the Logger interface. +func (b *InMemLogger) Fatalf(format string, args ...interface{}) { + b.Infof(format, args...) + runtime.Goexit() +} + +// LoggerAndTracer defines an interface for logging and tracing. +type LoggerAndTracer interface { + Logger + // Eventf formats and emits a tracing log, if tracing is enabled in the + // current context. + Eventf(ctx context.Context, format string, args ...interface{}) + // IsTracingEnabled returns true if tracing is enabled. It can be used as an + // optimization to avoid calling Eventf (which will be a noop when tracing + // is not enabled) to avoid the overhead of boxing the args. + IsTracingEnabled(ctx context.Context) bool +} + +// LoggerWithNoopTracer wraps a logger and does no tracing. +type LoggerWithNoopTracer struct { + Logger +} + +var _ LoggerAndTracer = &LoggerWithNoopTracer{} + +// Eventf implements LoggerAndTracer. +func (*LoggerWithNoopTracer) Eventf(ctx context.Context, format string, args ...interface{}) { + if invariants.Enabled && ctx == nil { + panic("Eventf context is nil") + } +} + +// IsTracingEnabled implements LoggerAndTracer. +func (*LoggerWithNoopTracer) IsTracingEnabled(ctx context.Context) bool { + if invariants.Enabled && ctx == nil { + panic("IsTracingEnabled ctx is nil") + } + return false +} + +// NoopLoggerAndTracer does no logging and tracing. Remember that struct{} is +// special cased in Go and does not incur an allocation when it backs the +// interface LoggerAndTracer. +type NoopLoggerAndTracer struct{} + +var _ LoggerAndTracer = NoopLoggerAndTracer{} + +// Infof implements LoggerAndTracer. +func (l NoopLoggerAndTracer) Infof(format string, args ...interface{}) {} + +// Errorf implements LoggerAndTracer. +func (l NoopLoggerAndTracer) Errorf(format string, args ...interface{}) {} + +// Fatalf implements LoggerAndTracer. +func (l NoopLoggerAndTracer) Fatalf(format string, args ...interface{}) {} + +// Eventf implements LoggerAndTracer. +func (l NoopLoggerAndTracer) Eventf(ctx context.Context, format string, args ...interface{}) { + if invariants.Enabled && ctx == nil { + panic("Eventf context is nil") + } +} + +// IsTracingEnabled implements LoggerAndTracer. +func (l NoopLoggerAndTracer) IsTracingEnabled(ctx context.Context) bool { + if invariants.Enabled && ctx == nil { + panic("IsTracingEnabled ctx is nil") + } + return false +} diff --git a/pebble/internal/base/merger.go b/pebble/internal/base/merger.go new file mode 100644 index 0000000..757d150 --- /dev/null +++ b/pebble/internal/base/merger.go @@ -0,0 +1,133 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import "io" + +// Merge creates a ValueMerger for the specified key initialized with the value +// of one merge operand. +type Merge func(key, value []byte) (ValueMerger, error) + +// ValueMerger receives merge operands one by one. The operand received is either +// newer or older than all operands received so far as indicated by the function +// names, `MergeNewer()` and `MergeOlder()`. Once all operands have been received, +// the client will invoke `Finish()` to obtain the final result. The order of +// a merge is not changed after the first call to `MergeNewer()` or +// `MergeOlder()`, i.e. the same method is used to submit all operands. +// +// The implementation may choose to merge values into the result immediately upon +// receiving each operand, or buffer operands until Finish() is called. For example, +// buffering may be useful to avoid (de)serializing partial merge results. +// +// The merge operation must be associative. That is, for the values A, B, C: +// +// Merge(A).MergeOlder(B).MergeOlder(C) == Merge(C).MergeNewer(B).MergeNewer(A) +// +// Examples of merge operators are integer addition, list append, and string +// concatenation. +type ValueMerger interface { + // MergeNewer adds an operand that is newer than all existing operands. + // The caller retains ownership of value. + // + // If an error is returned the merge is aborted and no other methods must + // be called. + MergeNewer(value []byte) error + + // MergeOlder adds an operand that is older than all existing operands. + // The caller retains ownership of value. + // + // If an error is returned the merge is aborted and no other methods must + // be called. + MergeOlder(value []byte) error + + // Finish does any final processing of the added operands and returns a + // result. The caller can assume the returned byte slice will not be mutated. + // + // Finish must be the last function called on the ValueMerger. The caller + // must not call any other ValueMerger functions after calling Finish. + // + // If `includesBase` is true, the oldest merge operand was part of the + // merge. This will always be the true during normal iteration, but may be + // false during compaction when only a subset of operands may be + // available. Note that `includesBase` is set to true conservatively: a false + // value means that we could not definitely determine that the base merge + // operand was included. + // + // If a Closer is returned, the returned slice will remain valid until it is + // closed. The caller must arrange for the closer to be eventually closed. + Finish(includesBase bool) ([]byte, io.Closer, error) +} + +// DeletableValueMerger is an extension to ValueMerger which allows indicating that the +// result of a merge operation is non-existent. Such non-existent entries will eventually +// be deleted during compaction. Note that during compaction, non-existence of the result +// of a merge means that the merge operands will not result in any record being output. +// This is not the same as transforming the merge operands into a deletion tombstone, as +// older merge operands will still be visible during iteration. Deletion of the merge operands +// in this way is akin to the way a SingleDelete+Set combine into non-existence while leaving +// older records for the same key unaffected. +type DeletableValueMerger interface { + ValueMerger + + // DeletableFinish enables a value merger to indicate that the result of a merge operation + // is non-existent. See Finish for a description of includesBase. + DeletableFinish(includesBase bool) (value []byte, delete bool, closer io.Closer, err error) +} + +// Merger defines an associative merge operation. The merge operation merges +// two or more values for a single key. A merge operation is requested by +// writing a value using {Batch,DB}.Merge(). The value at that key is merged +// with any existing value. It is valid to Set a value at a key and then Merge +// a new value. Similar to non-merged values, a merged value can be deleted by +// either Delete or DeleteRange. +// +// The merge operation is invoked when a merge value is encountered during a +// read, either during a compaction or during iteration. +type Merger struct { + Merge Merge + + // Name is the name of the merger. + // + // Pebble stores the merger name on disk, and opening a database with a + // different merger from the one it was created with will result in an error. + Name string +} + +// AppendValueMerger concatenates merge operands in order from oldest to newest. +type AppendValueMerger struct { + buf []byte +} + +// MergeNewer appends value to the result. +func (a *AppendValueMerger) MergeNewer(value []byte) error { + a.buf = append(a.buf, value...) + return nil +} + +// MergeOlder prepends value to the result, which involves allocating a new buffer. +func (a *AppendValueMerger) MergeOlder(value []byte) error { + buf := make([]byte, len(a.buf)+len(value)) + copy(buf, value) + copy(buf[len(value):], a.buf) + a.buf = buf + return nil +} + +// Finish returns the buffer that was constructed on-demand in `Merge{OlderNewer}()` calls. +func (a *AppendValueMerger) Finish(includesBase bool) ([]byte, io.Closer, error) { + return a.buf, nil, nil +} + +// DefaultMerger is the default implementation of the Merger interface. It +// concatenates the two values to merge. +var DefaultMerger = &Merger{ + Merge: func(key, value []byte) (ValueMerger, error) { + res := &AppendValueMerger{} + res.buf = append(res.buf, value...) + return res, nil + }, + + Name: "pebble.concatenate", +} diff --git a/pebble/internal/base/metrics.go b/pebble/internal/base/metrics.go new file mode 100644 index 0000000..520edc3 --- /dev/null +++ b/pebble/internal/base/metrics.go @@ -0,0 +1,98 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import "time" + +// ThroughputMetric is used to measure the byte throughput of some component +// that performs work in a single-threaded manner. The throughput can be +// approximated by Bytes/(WorkDuration+IdleTime). The idle time is represented +// separately, so that the user of this metric could approximate the peak +// throughput as Bytes/WorkTime. The metric is designed to be cumulative (see +// Merge). +type ThroughputMetric struct { + // Bytes is the processes bytes by the component. + Bytes int64 + // WorkDuration is the duration that the component spent doing work. + WorkDuration time.Duration + // IdleDuration is the duration that the component was idling, waiting for + // work. + IdleDuration time.Duration +} + +// Merge accumulates the information from another throughput metric. +func (tm *ThroughputMetric) Merge(x ThroughputMetric) { + tm.Bytes += x.Bytes + tm.WorkDuration += x.WorkDuration + tm.IdleDuration += x.IdleDuration +} + +// Subtract subtracts the information from another ThroughputMetric +func (tm *ThroughputMetric) Subtract(x ThroughputMetric) { + tm.Bytes -= x.Bytes + tm.WorkDuration -= x.WorkDuration + tm.IdleDuration -= x.IdleDuration +} + +// PeakRate returns the approximate peak rate if there was no idling. +func (tm *ThroughputMetric) PeakRate() int64 { + if tm.Bytes == 0 { + return 0 + } + return int64((float64(tm.Bytes) / float64(tm.WorkDuration)) * float64(time.Second)) +} + +// Rate returns the observed rate. +func (tm *ThroughputMetric) Rate() int64 { + if tm.Bytes == 0 { + return 0 + } + return int64((float64(tm.Bytes) / float64(tm.WorkDuration+tm.IdleDuration)) * + float64(time.Second)) +} + +// Utilization returns a percent [0, 1.0] indicating the percent of time +// work was performed. +func (tm *ThroughputMetric) Utilization() float64 { + if tm.WorkDuration == 0 { + return 0 + } + return float64(tm.WorkDuration) / float64(tm.WorkDuration+tm.IdleDuration) +} + +// GaugeSampleMetric is used to measure a gauge value (e.g. queue length) by +// accumulating samples of that gauge. +type GaugeSampleMetric struct { + // The sum of all the samples. + sampleSum int64 + // The number of samples. + count int64 +} + +// AddSample adds the given sample. +func (gsm *GaugeSampleMetric) AddSample(sample int64) { + gsm.sampleSum += sample + gsm.count++ +} + +// Merge accumulates the information from another gauge metric. +func (gsm *GaugeSampleMetric) Merge(x GaugeSampleMetric) { + gsm.sampleSum += x.sampleSum + gsm.count += x.count +} + +// Subtract subtracts the information from another gauge metric. +func (gsm *GaugeSampleMetric) Subtract(x GaugeSampleMetric) { + gsm.sampleSum -= x.sampleSum + gsm.count -= x.count +} + +// Mean returns the mean value. +func (gsm *GaugeSampleMetric) Mean() float64 { + if gsm.count == 0 { + return 0 + } + return float64(gsm.sampleSum) / float64(gsm.count) +} diff --git a/pebble/internal/base/metrics_test.go b/pebble/internal/base/metrics_test.go new file mode 100644 index 0000000..90e3166 --- /dev/null +++ b/pebble/internal/base/metrics_test.go @@ -0,0 +1,79 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +func TestThroughputMetric(t *testing.T) { + m1 := ThroughputMetric{ + Bytes: 10, + WorkDuration: time.Millisecond, + IdleDuration: 9 * time.Millisecond, + } + var m2 ThroughputMetric + m2.Merge(m1) + require.Equal(t, m1, m2) + m2.Merge(m1) + doubleM1 := ThroughputMetric{ + Bytes: 2 * m1.Bytes, + WorkDuration: 2 * m1.WorkDuration, + IdleDuration: 2 * m1.IdleDuration, + } + require.Equal(t, doubleM1, m2) + require.EqualValues(t, 10*100, m1.Rate()) + require.EqualValues(t, 10*1000, m1.PeakRate()) +} + +func TestThroughputMetric_Subtract(t *testing.T) { + m1 := ThroughputMetric{ + Bytes: 10, + WorkDuration: time.Millisecond, + IdleDuration: 9 * time.Millisecond, + } + m2 := ThroughputMetric{ + Bytes: 100, + WorkDuration: time.Millisecond, + IdleDuration: 90 * time.Millisecond, + } + + m2.Subtract(m1) + require.Equal(t, int64(90), m2.Bytes) + require.Equal(t, 0*time.Millisecond, m2.WorkDuration) + require.Equal(t, 81*time.Millisecond, m2.IdleDuration) +} + +func TestGaugeSampleMetric(t *testing.T) { + g1 := GaugeSampleMetric{} + g1.AddSample(10) + g1.AddSample(20) + g2 := GaugeSampleMetric{} + g2.Merge(g1) + g2.AddSample(60) + require.EqualValues(t, 30, g2.Mean()) + require.EqualValues(t, 3, g2.count) + require.EqualValues(t, 15, g1.Mean()) + require.EqualValues(t, 2, g1.count) +} + +func TestGaugeSampleMetricSubtract(t *testing.T) { + g1 := GaugeSampleMetric{} + g2 := GaugeSampleMetric{} + g1.AddSample(10) + g1.AddSample(20) + g1.AddSample(0) + + g2.AddSample(10) + + g1.Subtract(g2) + + require.Equal(t, int64(20), g1.sampleSum) + require.Equal(t, int64(2), g1.count) + +} diff --git a/pebble/internal/base/options.go b/pebble/internal/base/options.go new file mode 100644 index 0000000..316717e --- /dev/null +++ b/pebble/internal/base/options.go @@ -0,0 +1,76 @@ +// Copyright 2011 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package base + +// SSTable block defaults. +const ( + DefaultBlockRestartInterval = 16 + DefaultBlockSize = 4096 + DefaultBlockSizeThreshold = 90 +) + +// FilterType is the level at which to apply a filter: block or table. +type FilterType int + +// The available filter types. +const ( + TableFilter FilterType = iota +) + +func (t FilterType) String() string { + switch t { + case TableFilter: + return "table" + } + return "unknown" +} + +// FilterWriter provides an interface for creating filter blocks. See +// FilterPolicy for more details about filters. +type FilterWriter interface { + // AddKey adds a key to the current filter block. + AddKey(key []byte) + + // Finish appends to dst an encoded filter tha holds the current set of + // keys. The writer state is reset after the call to Finish allowing the + // writer to be reused for the creation of additional filters. + Finish(dst []byte) []byte +} + +// FilterPolicy is an algorithm for probabilistically encoding a set of keys. +// The canonical implementation is a Bloom filter. +// +// Every FilterPolicy has a name. This names the algorithm itself, not any one +// particular instance. Aspects specific to a particular instance, such as the +// set of keys or any other parameters, will be encoded in the []byte filter +// returned by NewWriter. +// +// The name may be written to files on disk, along with the filter data. To use +// these filters, the FilterPolicy name at the time of writing must equal the +// name at the time of reading. If they do not match, the filters will be +// ignored, which will not affect correctness but may affect performance. +type FilterPolicy interface { + // Name names the filter policy. + Name() string + + // MayContain returns whether the encoded filter may contain given key. + // False positives are possible, where it returns true for keys not in the + // original set. + MayContain(ftype FilterType, filter, key []byte) bool + + // NewWriter creates a new FilterWriter. + NewWriter(ftype FilterType) FilterWriter +} + +// BlockPropertyFilter is used in an Iterator to filter sstables and blocks +// within the sstable. It should not maintain any per-sstable state, and must +// be thread-safe. +type BlockPropertyFilter interface { + // Name returns the name of the block property collector. + Name() string + // Intersects returns true if the set represented by prop intersects with + // the set in the filter. + Intersects(prop []byte) (bool, error) +} diff --git a/pebble/internal/batchskl/README.md b/pebble/internal/batchskl/README.md new file mode 100644 index 0000000..1e0aa2d --- /dev/null +++ b/pebble/internal/batchskl/README.md @@ -0,0 +1,56 @@ +# batchskl + +Fast, non-concurrent skiplist implementation in Go that supports +forward and backward iteration. + +## Limitations + +* The interface is tailored for use in indexing pebble batches. Keys + and values are stored outside of the skiplist making the skiplist + awkward for general purpose use. +* Deletion is not supported. Instead, higher-level code is expected to + add deletion tombstones and needs to process those tombstones + appropriately. + +## Pedigree + +This code is based on Andy Kimball's arenaskl code. + +The arenaskl code is based on the skiplist found in Badger, a Go-based +KV store: + +https://github.com/dgraph-io/badger/tree/master/skl + +The skiplist in Badger is itself based on a C++ skiplist built for +Facebook's RocksDB: + +https://github.com/facebook/rocksdb/tree/master/memtable + +## Benchmarks + +The benchmarks consist of a mix of reads and writes executed in parallel. The +fraction of reads is indicated in the run name: "frac_X" indicates a run where +X percent of the operations are reads. + +``` +name time/op +ReadWrite/frac_0 1.03µs ± 2% +ReadWrite/frac_10 1.32µs ± 1% +ReadWrite/frac_20 1.26µs ± 1% +ReadWrite/frac_30 1.18µs ± 1% +ReadWrite/frac_40 1.09µs ± 1% +ReadWrite/frac_50 987ns ± 2% +ReadWrite/frac_60 1.07µs ± 1% +ReadWrite/frac_70 909ns ± 1% +ReadWrite/frac_80 693ns ± 2% +ReadWrite/frac_90 599ns ± 2% +ReadWrite/frac_100 45.3ns ± 3% +``` + +Forward and backward iteration are also fast: + +``` +name time/op +IterNext 4.49ns ± 3% +IterPrev 4.48ns ± 3% +``` diff --git a/pebble/internal/batchskl/iterator.go b/pebble/internal/batchskl/iterator.go new file mode 100644 index 0000000..5917ed1 --- /dev/null +++ b/pebble/internal/batchskl/iterator.go @@ -0,0 +1,223 @@ +/* + * Copyright 2017 Dgraph Labs, Inc. and Contributors + * Modifications copyright (C) 2017 Andy Kimball and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package batchskl + +import "github.com/cockroachdb/pebble/internal/base" + +type splice struct { + prev uint32 + next uint32 +} + +// Iterator is an iterator over the skiplist object. Use Skiplist.NewIter +// to construct an iterator. The current state of the iterator can be cloned +// by simply value copying the struct. +type Iterator struct { + list *Skiplist + nd uint32 + key base.InternalKey + lower []byte + upper []byte +} + +// Close resets the iterator. +func (it *Iterator) Close() error { + it.list = nil + it.nd = 0 + return nil +} + +// SeekGE moves the iterator to the first entry whose key is greater than or +// equal to the given key. Returns true if the iterator is pointing at a valid +// entry and false otherwise. Note that SeekGE only checks the upper bound. It +// is up to the caller to ensure that key is greater than or equal to the lower +// bound. +func (it *Iterator) SeekGE(key []byte, flags base.SeekGEFlags) *base.InternalKey { + if flags.TrySeekUsingNext() { + if it.nd == it.list.tail { + // Iterator is done. + return nil + } + less := it.list.cmp(it.key.UserKey, key) < 0 + // Arbitrary constant. By measuring the seek cost as a function of the + // number of elements in the skip list, and fitting to a model, we + // could adjust the number of nexts based on the current size of the + // skip list. + const numNexts = 5 + for i := 0; less && i < numNexts; i++ { + k := it.Next() + if k == nil { + // Iterator is done. + return nil + } + less = it.list.cmp(k.UserKey, key) < 0 + } + if !less { + return &it.key + } + } + + _, it.nd = it.seekForBaseSplice(key, it.list.abbreviatedKey(key)) + if it.nd == it.list.tail { + return nil + } + nodeKey := it.list.getKey(it.nd) + if it.upper != nil && it.list.cmp(it.upper, nodeKey.UserKey) <= 0 { + it.nd = it.list.tail + return nil + } + it.key = nodeKey + return &it.key +} + +// SeekLT moves the iterator to the last entry whose key is less the given +// key. Returns true if the iterator is pointing at a valid entry and false +// otherwise. Note that SeekLT only checks the lower bound. It is up to the +// caller to ensure that key is less than the upper bound. +func (it *Iterator) SeekLT(key []byte) *base.InternalKey { + it.nd, _ = it.seekForBaseSplice(key, it.list.abbreviatedKey(key)) + if it.nd == it.list.head { + return nil + } + nodeKey := it.list.getKey(it.nd) + if it.lower != nil && it.list.cmp(it.lower, nodeKey.UserKey) > 0 { + it.nd = it.list.head + return nil + } + it.key = nodeKey + return &it.key +} + +// First seeks position at the first entry in list. Final state of iterator is +// Valid() iff list is not empty. Note that First only checks the upper +// bound. It is up to the caller to ensure that key is greater than or equal to +// the lower bound (e.g. via a call to SeekGE(lower)). +func (it *Iterator) First() *base.InternalKey { + it.nd = it.list.getNext(it.list.head, 0) + if it.nd == it.list.tail { + return nil + } + nodeKey := it.list.getKey(it.nd) + if it.upper != nil && it.list.cmp(it.upper, nodeKey.UserKey) <= 0 { + it.nd = it.list.tail + return nil + } + it.key = nodeKey + return &it.key +} + +// Last seeks position at the last entry in list. Final state of iterator is +// Valid() iff list is not empty. Note that Last only checks the lower +// bound. It is up to the caller to ensure that key is less than the upper +// bound (e.g. via a call to SeekLT(upper)). +func (it *Iterator) Last() *base.InternalKey { + it.nd = it.list.getPrev(it.list.tail, 0) + if it.nd == it.list.head { + return nil + } + nodeKey := it.list.getKey(it.nd) + if it.lower != nil && it.list.cmp(it.lower, nodeKey.UserKey) > 0 { + it.nd = it.list.head + return nil + } + it.key = nodeKey + return &it.key +} + +// Next advances to the next position. If there are no following nodes, then +// Valid() will be false after this call. +func (it *Iterator) Next() *base.InternalKey { + it.nd = it.list.getNext(it.nd, 0) + if it.nd == it.list.tail { + return nil + } + nodeKey := it.list.getKey(it.nd) + if it.upper != nil && it.list.cmp(it.upper, nodeKey.UserKey) <= 0 { + it.nd = it.list.tail + return nil + } + it.key = nodeKey + return &it.key +} + +// Prev moves to the previous position. If there are no previous nodes, then +// Valid() will be false after this call. +func (it *Iterator) Prev() *base.InternalKey { + it.nd = it.list.getPrev(it.nd, 0) + if it.nd == it.list.head { + return nil + } + nodeKey := it.list.getKey(it.nd) + if it.lower != nil && it.list.cmp(it.lower, nodeKey.UserKey) > 0 { + it.nd = it.list.head + return nil + } + it.key = nodeKey + return &it.key +} + +// Key returns the key at the current position. +func (it *Iterator) Key() *base.InternalKey { + return &it.key +} + +// KeyInfo returns the offset of the start of the record, the start of the key, +// and the end of the key. +func (it *Iterator) KeyInfo() (offset, keyStart, keyEnd uint32) { + n := it.list.node(it.nd) + return n.offset, n.keyStart, n.keyEnd +} + +// Head true iff the iterator is positioned at the sentinel head node. +func (it *Iterator) Head() bool { + return it.nd == it.list.head +} + +// Tail true iff the iterator is positioned at the sentinel tail node. +func (it *Iterator) Tail() bool { + return it.nd == it.list.tail +} + +// Valid returns nil iff the iterator is positioned at a valid node. +func (it *Iterator) Valid() bool { + return it.list != nil && it.nd != it.list.head && it.nd != it.list.tail +} + +func (it *Iterator) String() string { + return "batch" +} + +// SetBounds sets the lower and upper bounds for the iterator. Note that the +// result of Next and Prev will be undefined until the iterator has been +// repositioned with SeekGE, SeekLT, First, or Last. +func (it *Iterator) SetBounds(lower, upper []byte) { + it.lower = lower + it.upper = upper +} + +func (it *Iterator) seekForBaseSplice(key []byte, abbreviatedKey uint64) (prev, next uint32) { + prev = it.list.head + for level := it.list.height - 1; ; level-- { + prev, next = it.list.findSpliceForLevel(key, abbreviatedKey, level, prev) + if level == 0 { + break + } + } + + return +} diff --git a/pebble/internal/batchskl/skl.go b/pebble/internal/batchskl/skl.go new file mode 100644 index 0000000..f56d95c --- /dev/null +++ b/pebble/internal/batchskl/skl.go @@ -0,0 +1,442 @@ +/* + * Copyright 2017 Dgraph Labs, Inc. and Contributors + * Modifications copyright (C) 2017 Andy Kimball and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License") + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +Adapted from RocksDB inline skiplist. + +Key differences: +- No optimization for sequential inserts (no "prev"). +- No custom comparator. +- Support overwrites. This requires care when we see the same key when inserting. + For RocksDB or LevelDB, overwrites are implemented as a newer sequence number in the key, so + there is no need for values. We don't intend to support versioning. In-place updates of values + would be more efficient. +- We discard all non-concurrent code. +- We do not support Splices. This simplifies the code a lot. +- No AllocateNode or other pointer arithmetic. +- We combine the findLessThan, findGreaterOrEqual, etc into one function. +*/ + +/* +Further adapted from Badger: https://github.com/dgraph-io/badger. + +Key differences: +- Support for previous pointers - doubly linked lists. Note that it's up to higher + level code to deal with the intermediate state that occurs during insertion, + where node A is linked to node B, but node B is not yet linked back to node A. +- Iterator includes mutator functions. +*/ + +/* +Further adapted from arenaskl: https://github.com/andy-kimball/arenaskl + +Key differences: +- Removed support for deletion. +- Removed support for concurrency. +- External storage of keys. +- Node storage grows to an arbitrary size. +*/ + +package batchskl // import "github.com/cockroachdb/pebble/internal/batchskl" + +import ( + "bytes" + "encoding/binary" + "fmt" + "math" + "time" + "unsafe" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/constants" + "golang.org/x/exp/rand" +) + +const ( + maxHeight = 20 + maxNodeSize = uint64(unsafe.Sizeof(node{})) + linksSize = uint64(unsafe.Sizeof(links{})) + maxNodesSize = constants.MaxUint32OrInt +) + +var ( + // ErrExists indicates that a duplicate record was inserted. This should never + // happen for normal usage of batchskl as every key should have a unique + // sequence number. + ErrExists = errors.New("record with this key already exists") + + // ErrTooManyRecords is a sentinel error returned when the size of the raw + // nodes slice exceeds the maximum allowed size (currently 1 << 32 - 1). This + // corresponds to ~117 M skiplist entries. + ErrTooManyRecords = errors.New("too many records") +) + +type links struct { + next uint32 + prev uint32 +} + +type node struct { + // The offset of the start of the record in the storage. + offset uint32 + // The offset of the start and end of the key in storage. + keyStart uint32 + keyEnd uint32 + // A fixed 8-byte abbreviation of the key, used to avoid retrieval of the key + // during seek operations. The key retrieval can be expensive purely due to + // cache misses while the abbreviatedKey stored here will be in the same + // cache line as the key and the links making accessing and comparing against + // it almost free. + abbreviatedKey uint64 + // Most nodes do not need to use the full height of the link tower, since the + // probability of each successive level decreases exponentially. Because + // these elements are never accessed, they do not need to be allocated. + // Therefore, when a node is allocated, its memory footprint is deliberately + // truncated to not include unneeded link elements. + links [maxHeight]links +} + +// Skiplist is a fast, non-cocnurrent skiplist implementation that supports +// forward and backward iteration. See arenaskl.Skiplist for a concurrent +// skiplist. Keys and values are stored externally from the skiplist via the +// Storage interface. Deletion is not supported. Instead, higher-level code is +// expected to perform deletion via tombstones and needs to process those +// tombstones appropriately during retrieval operations. +type Skiplist struct { + storage *[]byte + cmp base.Compare + abbreviatedKey base.AbbreviatedKey + nodes []byte + head uint32 + tail uint32 + height uint32 // Current height: 1 <= height <= maxHeight + rand rand.PCGSource +} + +var ( + probabilities [maxHeight]uint32 +) + +func init() { + const pValue = 1 / math.E + + // Precompute the skiplist probabilities so that only a single random number + // needs to be generated and so that the optimal pvalue can be used (inverse + // of Euler's number). + p := float64(1.0) + for i := 0; i < maxHeight; i++ { + probabilities[i] = uint32(float64(math.MaxUint32) * p) + p *= pValue + } +} + +// NewSkiplist constructs and initializes a new, empty skiplist. +func NewSkiplist(storage *[]byte, cmp base.Compare, abbreviatedKey base.AbbreviatedKey) *Skiplist { + s := &Skiplist{} + s.Init(storage, cmp, abbreviatedKey) + return s +} + +// Reset the fields in the skiplist for reuse. +func (s *Skiplist) Reset() { + *s = Skiplist{ + nodes: s.nodes[:0], + height: 1, + } + const batchMaxRetainedSize = 1 << 20 // 1 MB + if cap(s.nodes) > batchMaxRetainedSize { + s.nodes = nil + } +} + +// Init the skiplist to empty and re-initialize. +func (s *Skiplist) Init(storage *[]byte, cmp base.Compare, abbreviatedKey base.AbbreviatedKey) { + *s = Skiplist{ + storage: storage, + cmp: cmp, + abbreviatedKey: abbreviatedKey, + nodes: s.nodes[:0], + height: 1, + } + s.rand.Seed(uint64(time.Now().UnixNano())) + + const initBufSize = 256 + if cap(s.nodes) < initBufSize { + s.nodes = make([]byte, 0, initBufSize) + } + + // Allocate head and tail nodes. While allocating a new node can fail, in the + // context of initializing the skiplist we consider it unrecoverable. + var err error + s.head, err = s.newNode(maxHeight, 0, 0, 0, 0) + if err != nil { + panic(err) + } + s.tail, err = s.newNode(maxHeight, 0, 0, 0, 0) + if err != nil { + panic(err) + } + + // Link all head/tail levels together. + headNode := s.node(s.head) + tailNode := s.node(s.tail) + for i := uint32(0); i < maxHeight; i++ { + headNode.links[i].next = s.tail + tailNode.links[i].prev = s.head + } +} + +// Add adds a new key to the skiplist if it does not yet exist. If the record +// already exists, then Add returns ErrRecordExists. +func (s *Skiplist) Add(keyOffset uint32) error { + data := (*s.storage)[keyOffset+1:] + v, n := binary.Uvarint(data) + if n <= 0 { + return errors.Errorf("corrupted batch entry: %d", errors.Safe(keyOffset)) + } + data = data[n:] + if v > uint64(len(data)) { + return errors.Errorf("corrupted batch entry: %d", errors.Safe(keyOffset)) + } + keyStart := 1 + keyOffset + uint32(n) + keyEnd := keyStart + uint32(v) + key := data[:v] + abbreviatedKey := s.abbreviatedKey(key) + + // spl holds the list of next and previous links for each level in the + // skiplist indicating where the new node will be inserted. + var spl [maxHeight]splice + + // Fast-path for in-order insertion of keys: compare the new key against the + // last key. + prev := s.getPrev(s.tail, 0) + if prevNode := s.node(prev); prev == s.head || + abbreviatedKey > prevNode.abbreviatedKey || + (abbreviatedKey == prevNode.abbreviatedKey && + s.cmp(key, (*s.storage)[prevNode.keyStart:prevNode.keyEnd]) > 0) { + for level := uint32(0); level < s.height; level++ { + spl[level].prev = s.getPrev(s.tail, level) + spl[level].next = s.tail + } + } else { + s.findSplice(key, abbreviatedKey, &spl) + } + + height := s.randomHeight() + // Increase s.height as necessary. + for ; s.height < height; s.height++ { + spl[s.height].next = s.tail + spl[s.height].prev = s.head + } + + // We always insert from the base level and up. After you add a node in base + // level, we cannot create a node in the level above because it would have + // discovered the node in the base level. + nd, err := s.newNode(height, keyOffset, keyStart, keyEnd, abbreviatedKey) + if err != nil { + return err + } + newNode := s.node(nd) + for level := uint32(0); level < height; level++ { + next := spl[level].next + prev := spl[level].prev + newNode.links[level].next = next + newNode.links[level].prev = prev + s.node(next).links[level].prev = nd + s.node(prev).links[level].next = nd + } + + return nil +} + +// NewIter returns a new Iterator object. The lower and upper bound parameters +// control the range of keys the iterator will return. Specifying for nil for +// lower or upper bound disables the check for that boundary. Note that lower +// bound is not checked on {SeekGE,First} and upper bound is not check on +// {SeekLT,Last}. The user is expected to perform that check. Note that it is +// safe for an iterator to be copied by value. +func (s *Skiplist) NewIter(lower, upper []byte) Iterator { + return Iterator{list: s, lower: lower, upper: upper} +} + +func (s *Skiplist) newNode( + height, + offset, keyStart, keyEnd uint32, abbreviatedKey uint64, +) (uint32, error) { + if height < 1 || height > maxHeight { + panic("height cannot be less than one or greater than the max height") + } + + unusedSize := uint64(maxHeight-int(height)) * linksSize + nodeOffset, err := s.alloc(uint32(maxNodeSize - unusedSize)) + if err != nil { + return 0, err + } + nd := s.node(nodeOffset) + + nd.offset = offset + nd.keyStart = keyStart + nd.keyEnd = keyEnd + nd.abbreviatedKey = abbreviatedKey + return nodeOffset, nil +} + +func (s *Skiplist) alloc(size uint32) (uint32, error) { + offset := uint64(len(s.nodes)) + + // We only have a need for memory up to offset + size, but we never want + // to allocate a node whose tail points into unallocated memory. + minAllocSize := offset + maxNodeSize + if uint64(cap(s.nodes)) < minAllocSize { + allocSize := uint64(cap(s.nodes)) * 2 + if allocSize < minAllocSize { + allocSize = minAllocSize + } + // Cap the allocation at the max allowed size to avoid wasted capacity. + if allocSize > maxNodesSize { + // The new record may still not fit within the allocation, in which case + // we return early with an error. This avoids the panic below when we + // resize the slice. It also avoids the allocation and copy. + if uint64(offset)+uint64(size) > maxNodesSize { + return 0, errors.Wrapf(ErrTooManyRecords, + "alloc of new record (size=%d) would overflow uint32 (current size=%d)", + uint64(offset)+uint64(size), offset, + ) + } + allocSize = maxNodesSize + } + tmp := make([]byte, len(s.nodes), allocSize) + copy(tmp, s.nodes) + s.nodes = tmp + } + + newSize := uint32(offset) + size + s.nodes = s.nodes[:newSize] + return uint32(offset), nil +} + +func (s *Skiplist) node(offset uint32) *node { + return (*node)(unsafe.Pointer(&s.nodes[offset])) +} + +func (s *Skiplist) randomHeight() uint32 { + rnd := uint32(s.rand.Uint64()) + h := uint32(1) + for h < maxHeight && rnd <= probabilities[h] { + h++ + } + return h +} + +func (s *Skiplist) findSplice(key []byte, abbreviatedKey uint64, spl *[maxHeight]splice) { + prev := s.head + + for level := s.height - 1; ; level-- { + // The code in this loop is the same as findSpliceForLevel(). For some + // reason, calling findSpliceForLevel() here is much much slower than the + // inlined code below. The excess time is also caught up in the final + // return statement which makes little sense. Revisit when in go1.14 or + // later if inlining improves. + + next := s.getNext(prev, level) + for next != s.tail { + // Assume prev.key < key. + nextNode := s.node(next) + nextAbbreviatedKey := nextNode.abbreviatedKey + if abbreviatedKey < nextAbbreviatedKey { + // We are done for this level, since prev.key < key < next.key. + break + } + if abbreviatedKey == nextAbbreviatedKey { + if s.cmp(key, (*s.storage)[nextNode.keyStart:nextNode.keyEnd]) <= 0 { + // We are done for this level, since prev.key < key <= next.key. + break + } + } + + // Keep moving right on this level. + prev = next + next = nextNode.links[level].next + } + + spl[level].prev = prev + spl[level].next = next + if level == 0 { + break + } + } +} + +func (s *Skiplist) findSpliceForLevel( + key []byte, abbreviatedKey uint64, level, start uint32, +) (prev, next uint32) { + prev = start + next = s.getNext(prev, level) + + for next != s.tail { + // Assume prev.key < key. + nextNode := s.node(next) + nextAbbreviatedKey := nextNode.abbreviatedKey + if abbreviatedKey < nextAbbreviatedKey { + // We are done for this level, since prev.key < key < next.key. + break + } + if abbreviatedKey == nextAbbreviatedKey { + if s.cmp(key, (*s.storage)[nextNode.keyStart:nextNode.keyEnd]) <= 0 { + // We are done for this level, since prev.key < key < next.key. + break + } + } + + // Keep moving right on this level. + prev = next + next = nextNode.links[level].next + } + + return +} + +func (s *Skiplist) getKey(nd uint32) base.InternalKey { + n := s.node(nd) + kind := base.InternalKeyKind((*s.storage)[n.offset]) + key := (*s.storage)[n.keyStart:n.keyEnd] + return base.MakeInternalKey(key, uint64(n.offset)|base.InternalKeySeqNumBatch, kind) +} + +func (s *Skiplist) getNext(nd, h uint32) uint32 { + return s.node(nd).links[h].next +} + +func (s *Skiplist) getPrev(nd, h uint32) uint32 { + return s.node(nd).links[h].prev +} + +func (s *Skiplist) debug() string { + var buf bytes.Buffer + for level := uint32(0); level < s.height; level++ { + var count int + for nd := s.head; nd != s.tail; nd = s.getNext(nd, level) { + count++ + } + fmt.Fprintf(&buf, "%d: %d\n", level, count) + } + return buf.String() +} + +// Silence unused warning. +var _ = (*Skiplist).debug diff --git a/pebble/internal/batchskl/skl_test.go b/pebble/internal/batchskl/skl_test.go new file mode 100644 index 0000000..4f67a8b --- /dev/null +++ b/pebble/internal/batchskl/skl_test.go @@ -0,0 +1,539 @@ +/* + * Copyright 2017 Dgraph Labs, Inc. and Contributors + * Modifications copyright (C) 2017 Andy Kimball and Contributors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package batchskl + +import ( + "bytes" + "encoding/binary" + "fmt" + "testing" + "time" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" +) + +// iterAdapter adapts the new Iterator API which returns the key and value from +// positioning methods (Seek*, First, Last, Next, Prev) to the old API which +// returned a boolean corresponding to Valid. Only used by test code. +type iterAdapter struct { + Iterator +} + +func (i *iterAdapter) verify(key *base.InternalKey) bool { + valid := key != nil + if valid != i.Valid() { + panic(fmt.Sprintf("inconsistent valid: %t != %t", valid, i.Valid())) + } + if valid { + if base.InternalCompare(bytes.Compare, *key, i.Key()) != 0 { + panic(fmt.Sprintf("inconsistent key: %s != %s", *key, i.Key())) + } + } + return valid +} + +func (i *iterAdapter) SeekGE(key []byte) bool { + return i.verify(i.Iterator.SeekGE(key, base.SeekGEFlagsNone)) +} + +func (i *iterAdapter) SeekLT(key []byte) bool { + return i.verify(i.Iterator.SeekLT(key)) +} + +func (i *iterAdapter) First() bool { + return i.verify(i.Iterator.First()) +} + +func (i *iterAdapter) Last() bool { + return i.verify(i.Iterator.Last()) +} + +func (i *iterAdapter) Next() bool { + return i.verify(i.Iterator.Next()) +} + +func (i *iterAdapter) Prev() bool { + return i.verify(i.Iterator.Prev()) +} + +func (i *iterAdapter) Key() base.InternalKey { + return *i.Iterator.Key() +} + +// length iterates over skiplist to give exact size. +func length(s *Skiplist) int { + count := 0 + + it := iterAdapter{s.NewIter(nil, nil)} + for valid := it.First(); valid; valid = it.Next() { + count++ + } + + return count +} + +// length iterates over skiplist in reverse order to give exact size. +func lengthRev(s *Skiplist) int { + count := 0 + + it := iterAdapter{s.NewIter(nil, nil)} + for valid := it.Last(); valid; valid = it.Prev() { + count++ + } + + return count +} + +func makeKey(s string) []byte { + return []byte(s) +} + +type testStorage struct { + data []byte +} + +func (d *testStorage) add(key string) uint32 { + offset := uint32(len(d.data)) + d.data = append(d.data, uint8(base.InternalKeyKindSet)) + var buf [binary.MaxVarintLen64]byte + n := binary.PutUvarint(buf[:], uint64(len(key))) + d.data = append(d.data, buf[:n]...) + d.data = append(d.data, key...) + return offset +} + +func (d *testStorage) addBytes(key []byte) uint32 { + offset := uint32(len(d.data)) + d.data = append(d.data, uint8(base.InternalKeyKindSet)) + var buf [binary.MaxVarintLen64]byte + n := binary.PutUvarint(buf[:], uint64(len(key))) + d.data = append(d.data, buf[:n]...) + d.data = append(d.data, key...) + return offset +} + +func newTestSkiplist(storage *testStorage) *Skiplist { + return NewSkiplist(&storage.data, base.DefaultComparer.Compare, + base.DefaultComparer.AbbreviatedKey) +} + +func TestEmpty(t *testing.T) { + key := makeKey("aaa") + l := newTestSkiplist(&testStorage{}) + it := iterAdapter{l.NewIter(nil, nil)} + + require.False(t, it.Valid()) + + it.First() + require.False(t, it.Valid()) + + it.Last() + require.False(t, it.Valid()) + + require.False(t, it.SeekGE(key)) + require.False(t, it.Valid()) +} + +// TestBasic tests seeks and adds. +func TestBasic(t *testing.T) { + d := &testStorage{} + l := newTestSkiplist(d) + it := iterAdapter{l.NewIter(nil, nil)} + + // Try adding values. + require.Nil(t, l.Add(d.add("key1"))) + require.Nil(t, l.Add(d.add("key2"))) + require.Nil(t, l.Add(d.add("key3"))) + + require.True(t, it.SeekGE(makeKey("key"))) + require.EqualValues(t, "key1", it.Key().UserKey) + + require.True(t, it.SeekGE(makeKey("key1"))) + require.EqualValues(t, "key1", it.Key().UserKey) + + require.True(t, it.SeekGE(makeKey("key2"))) + require.EqualValues(t, "key2", it.Key().UserKey) + + require.True(t, it.SeekGE(makeKey("key3"))) + require.EqualValues(t, "key3", it.Key().UserKey) + + require.True(t, it.SeekGE(makeKey("key2"))) + require.True(t, it.SeekGE(makeKey("key3"))) +} + +func TestSkiplistAdd(t *testing.T) { + d := &testStorage{} + l := newTestSkiplist(d) + it := iterAdapter{l.NewIter(nil, nil)} + + // Add empty key. + require.Nil(t, l.Add(d.add(""))) + require.EqualValues(t, []byte(nil), it.Key().UserKey) + require.True(t, it.First()) + require.EqualValues(t, []byte{}, it.Key().UserKey) + + // Add to empty list. + require.Nil(t, l.Add(d.add("00002"))) + require.True(t, it.SeekGE(makeKey("00002"))) + require.EqualValues(t, "00002", it.Key().UserKey) + + // Add first element in non-empty list. + require.Nil(t, l.Add(d.add("00001"))) + require.True(t, it.SeekGE(makeKey("00001"))) + require.EqualValues(t, "00001", it.Key().UserKey) + + // Add last element in non-empty list. + require.Nil(t, l.Add(d.add("00004"))) + require.True(t, it.SeekGE(makeKey("00004"))) + require.EqualValues(t, "00004", it.Key().UserKey) + + // Add element in middle of list. + require.Nil(t, l.Add(d.add("00003"))) + require.True(t, it.SeekGE(makeKey("00003"))) + require.EqualValues(t, "00003", it.Key().UserKey) + + // Try to add element that already exists. + require.Nil(t, l.Add(d.add("00002"))) + require.Equal(t, 6, length(l)) + require.Equal(t, 6, lengthRev(l)) +} + +func TestSkiplistAdd_Overflow(t *testing.T) { + // Regression test for cockroachdb/pebble#1258. The length of the nodes buffer + // cannot exceed the maximum allowable size. + d := &testStorage{} + l := newTestSkiplist(d) + + // Simulate a full nodes slice. This speeds up the test significantly, as + // opposed to adding data to the list. + l.nodes = make([]byte, maxNodesSize) + + // Adding a new node to the list would overflow the nodes slice. Note that it + // is the size of a new node struct that is relevant here, rather than the + // size of the data being added to the list. + err := l.Add(d.add("too much!")) + require.Error(t, err) + require.True(t, errors.Is(err, ErrTooManyRecords)) +} + +// TestIteratorNext tests a basic iteration over all nodes from the beginning. +func TestIteratorNext(t *testing.T) { + const n = 100 + d := &testStorage{} + l := newTestSkiplist(d) + it := iterAdapter{l.NewIter(nil, nil)} + + require.False(t, it.Valid()) + + it.First() + require.False(t, it.Valid()) + + for i := n - 1; i >= 0; i-- { + require.Nil(t, l.Add(d.add(fmt.Sprintf("%05d", i)))) + } + + it.First() + for i := 0; i < n; i++ { + require.True(t, it.Valid()) + require.EqualValues(t, fmt.Sprintf("%05d", i), it.Key().UserKey) + it.Next() + } + require.False(t, it.Valid()) +} + +// // TestIteratorPrev tests a basic iteration over all nodes from the end. +func TestIteratorPrev(t *testing.T) { + const n = 100 + d := &testStorage{} + l := newTestSkiplist(d) + it := iterAdapter{l.NewIter(nil, nil)} + + require.False(t, it.Valid()) + + it.Last() + require.False(t, it.Valid()) + + for i := 0; i < n; i++ { + l.Add(d.add(fmt.Sprintf("%05d", i))) + } + + it.Last() + for i := n - 1; i >= 0; i-- { + require.True(t, it.Valid()) + require.EqualValues(t, fmt.Sprintf("%05d", i), string(it.Key().UserKey)) + it.Prev() + } + require.False(t, it.Valid()) +} + +func TestIteratorSeekGE(t *testing.T) { + const n = 1000 + d := &testStorage{} + l := newTestSkiplist(d) + it := iterAdapter{l.NewIter(nil, nil)} + + require.False(t, it.Valid()) + it.First() + require.False(t, it.Valid()) + // 1000, 1010, 1020, ..., 1990. + for i := n - 1; i >= 0; i-- { + require.Nil(t, l.Add(d.add(fmt.Sprintf("%05d", i*10+1000)))) + } + + require.True(t, it.SeekGE(makeKey(""))) + require.True(t, it.Valid()) + require.EqualValues(t, "01000", it.Key().UserKey) + + require.True(t, it.SeekGE(makeKey("01000"))) + require.True(t, it.Valid()) + require.EqualValues(t, "01000", it.Key().UserKey) + + require.True(t, it.SeekGE(makeKey("01005"))) + require.True(t, it.Valid()) + require.EqualValues(t, "01010", it.Key().UserKey) + + require.True(t, it.SeekGE(makeKey("01010"))) + require.True(t, it.Valid()) + require.EqualValues(t, "01010", it.Key().UserKey) + + require.False(t, it.SeekGE(makeKey("99999"))) + require.False(t, it.Valid()) + + // Test seek for empty key. + require.Nil(t, l.Add(d.add(""))) + require.True(t, it.SeekGE([]byte{})) + require.True(t, it.Valid()) + + require.True(t, it.SeekGE(makeKey(""))) + require.True(t, it.Valid()) +} + +func TestIteratorSeekLT(t *testing.T) { + const n = 100 + d := &testStorage{} + l := newTestSkiplist(d) + it := iterAdapter{l.NewIter(nil, nil)} + + require.False(t, it.Valid()) + it.First() + require.False(t, it.Valid()) + // 1000, 1010, 1020, ..., 1990. + for i := n - 1; i >= 0; i-- { + require.Nil(t, l.Add(d.add(fmt.Sprintf("%05d", i*10+1000)))) + } + + require.False(t, it.SeekLT(makeKey(""))) + require.False(t, it.Valid()) + + require.False(t, it.SeekLT(makeKey("01000"))) + require.False(t, it.Valid()) + + require.True(t, it.SeekLT(makeKey("01001"))) + require.EqualValues(t, "01000", it.Key().UserKey) + require.True(t, it.Valid()) + + require.True(t, it.SeekLT(makeKey("01005"))) + require.EqualValues(t, "01000", it.Key().UserKey) + require.True(t, it.Valid()) + + require.True(t, it.SeekLT(makeKey("01991"))) + require.EqualValues(t, "01990", it.Key().UserKey) + require.True(t, it.Valid()) + + require.True(t, it.SeekLT(makeKey("99999"))) + require.True(t, it.Valid()) + require.EqualValues(t, "01990", it.Key().UserKey) + + // Test seek for empty key. + require.Nil(t, l.Add(d.add(""))) + require.False(t, it.SeekLT([]byte{})) + require.False(t, it.Valid()) + require.True(t, it.SeekLT(makeKey("\x01"))) + require.True(t, it.Valid()) + require.EqualValues(t, "", it.Key().UserKey) +} + +// TODO(peter): test First and Last. +func TestIteratorBounds(t *testing.T) { + d := &testStorage{} + l := newTestSkiplist(d) + for i := 1; i < 10; i++ { + require.NoError(t, l.Add(d.add(fmt.Sprintf("%05d", i)))) + } + + it := iterAdapter{l.NewIter(makeKey("00003"), makeKey("00007"))} + + // SeekGE within the lower and upper bound succeeds. + for i := 3; i <= 6; i++ { + key := makeKey(fmt.Sprintf("%05d", i)) + require.True(t, it.SeekGE(key)) + require.EqualValues(t, string(key), string(it.Key().UserKey)) + } + + // SeekGE before the lower bound still succeeds (only the upper bound is + // checked). + for i := 1; i < 3; i++ { + key := makeKey(fmt.Sprintf("%05d", i)) + require.True(t, it.SeekGE(key)) + require.EqualValues(t, string(key), string(it.Key().UserKey)) + } + + // SeekGE beyond the upper bound fails. + for i := 7; i < 10; i++ { + key := makeKey(fmt.Sprintf("%05d", i)) + require.False(t, it.SeekGE(key)) + } + + require.True(t, it.SeekGE(makeKey("00006"))) + require.EqualValues(t, "00006", it.Key().UserKey) + + // Next into the upper bound fails. + require.False(t, it.Next()) + + // SeekLT within the lower and upper bound succeeds. + for i := 4; i <= 7; i++ { + key := makeKey(fmt.Sprintf("%05d", i)) + require.True(t, it.SeekLT(key)) + require.EqualValues(t, fmt.Sprintf("%05d", i-1), string(it.Key().UserKey)) + } + + // SeekLT beyond the upper bound still succeeds (only the lower bound is + // checked). + for i := 8; i < 9; i++ { + key := makeKey(fmt.Sprintf("%05d", i)) + require.True(t, it.SeekLT(key)) + require.EqualValues(t, fmt.Sprintf("%05d", i-1), string(it.Key().UserKey)) + } + + // SeekLT before the lower bound fails. + for i := 1; i < 4; i++ { + key := makeKey(fmt.Sprintf("%05d", i)) + require.False(t, it.SeekLT(key)) + } + + require.True(t, it.SeekLT(makeKey("00004"))) + require.EqualValues(t, "00003", it.Key().UserKey) + + // Prev into the lower bound fails. + require.False(t, it.Prev()) +} + +func randomKey(rng *rand.Rand, b []byte) []byte { + key := rng.Uint32() + key2 := rng.Uint32() + binary.LittleEndian.PutUint32(b, key) + binary.LittleEndian.PutUint32(b[4:], key2) + return b +} + +// Standard test. Some fraction is read. Some fraction is write. Writes have +// to go through mutex lock. +func BenchmarkReadWrite(b *testing.B) { + for i := 0; i <= 10; i++ { + readFrac := float32(i) / 10.0 + b.Run(fmt.Sprintf("frac_%d", i*10), func(b *testing.B) { + var buf [8]byte + d := &testStorage{ + data: make([]byte, 0, b.N*10), + } + l := newTestSkiplist(d) + it := l.NewIter(nil, nil) + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + key := randomKey(rng, buf[:]) + if rng.Float32() < readFrac { + _ = it.SeekGE(key, base.SeekGEFlagsNone) + } else { + offset := d.addBytes(buf[:]) + _ = l.Add(offset) + } + } + b.StopTimer() + }) + } +} + +func BenchmarkOrderedWrite(b *testing.B) { + var buf [8]byte + d := &testStorage{ + data: make([]byte, 0, b.N*10), + } + l := newTestSkiplist(d) + + b.ResetTimer() + for i := 0; i < b.N; i++ { + binary.BigEndian.PutUint64(buf[:], uint64(i)) + offset := d.addBytes(buf[:]) + _ = l.Add(offset) + } +} + +func BenchmarkIterNext(b *testing.B) { + var buf [8]byte + d := &testStorage{ + data: make([]byte, 0, 64<<10), + } + l := newTestSkiplist(d) + + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + for len(d.data)+20 < cap(d.data) { + key := randomKey(rng, buf[:]) + offset := d.addBytes(key) + err := l.Add(offset) + require.NoError(b, err) + } + + it := l.NewIter(nil, nil) + b.ResetTimer() + for i := 0; i < b.N; i++ { + if !it.Valid() { + it.First() + } + it.Next() + } +} + +func BenchmarkIterPrev(b *testing.B) { + var buf [8]byte + d := &testStorage{ + data: make([]byte, 0, 64<<10), + } + l := newTestSkiplist(d) + + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + for len(d.data)+20 < cap(d.data) { + key := randomKey(rng, buf[:]) + offset := d.addBytes(key) + err := l.Add(offset) + require.NoError(b, err) + } + + it := l.NewIter(nil, nil) + b.ResetTimer() + for i := 0; i < b.N; i++ { + if !it.Valid() { + it.Last() + } + it.Prev() + } +} diff --git a/pebble/internal/bytealloc/bytealloc.go b/pebble/internal/bytealloc/bytealloc.go new file mode 100644 index 0000000..b905270 --- /dev/null +++ b/pebble/internal/bytealloc/bytealloc.go @@ -0,0 +1,69 @@ +// Copyright 2016 The Cockroach Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +// implied. See the License for the specific language governing +// permissions and limitations under the License. + +package bytealloc + +import "github.com/cockroachdb/pebble/internal/rawalloc" + +// An A provides chunk allocation of []byte, amortizing the overhead of each +// allocation. Because the underlying storage for the slices is shared, they +// should share a similar lifetime in order to avoid pinning large amounts of +// memory unnecessarily. The allocator itself is a []byte where cap() indicates +// the total amount of memory and len() is the amount already allocated. The +// size of the buffer to allocate from is grown exponentially when it runs out +// of room up to a maximum size (chunkAllocMaxSize). +type A []byte + +const chunkAllocMinSize = 512 +const chunkAllocMaxSize = 512 << 10 // 512 KB + +func (a A) reserve(n int) A { + allocSize := cap(a) * 2 + if allocSize < chunkAllocMinSize { + allocSize = chunkAllocMinSize + } else if allocSize > chunkAllocMaxSize { + allocSize = chunkAllocMaxSize + } + if allocSize < n { + allocSize = n + } + return rawalloc.New(0, allocSize) +} + +// Alloc allocates a new chunk of memory with the specified length. +func (a A) Alloc(n int) (A, []byte) { + if cap(a)-len(a) < n { + a = a.reserve(n) + } + p := len(a) + r := a[p : p+n : p+n] + a = a[:p+n] + return a, r +} + +// Copy allocates a new chunk of memory, initializing it from src. +func (a A) Copy(src []byte) (A, []byte) { + var alloc []byte + a, alloc = a.Alloc(len(src)) + copy(alloc, src) + return a, alloc +} + +// Reset returns the current chunk, resetting allocated memory back to none. +// Future allocations will use memory previously allocated by previous calls to +// Alloc or Copy, so the caller must know know that none of the previously +// allocated byte slices are still in use. +func (a A) Reset() A { + return a[:0] +} diff --git a/pebble/internal/cache/LICENSE b/pebble/internal/cache/LICENSE new file mode 100644 index 0000000..daa739e --- /dev/null +++ b/pebble/internal/cache/LICENSE @@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2018 Damian Gryski + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/pebble/internal/cache/cgo_disabled.go b/pebble/internal/cache/cgo_disabled.go new file mode 100644 index 0000000..0e75574 --- /dev/null +++ b/pebble/internal/cache/cgo_disabled.go @@ -0,0 +1,10 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build !cgo +// +build !cgo + +package cache + +const cgoEnabled = false diff --git a/pebble/internal/cache/cgo_enabled.go b/pebble/internal/cache/cgo_enabled.go new file mode 100644 index 0000000..b7014cb --- /dev/null +++ b/pebble/internal/cache/cgo_enabled.go @@ -0,0 +1,10 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build cgo +// +build cgo + +package cache + +const cgoEnabled = true diff --git a/pebble/internal/cache/clockpro.go b/pebble/internal/cache/clockpro.go new file mode 100644 index 0000000..cdae6a9 --- /dev/null +++ b/pebble/internal/cache/clockpro.go @@ -0,0 +1,909 @@ +// Copyright 2018. All rights reserved. Use of this source code is governed by +// an MIT-style license that can be found in the LICENSE file. + +// Package cache implements the CLOCK-Pro caching algorithm. +// +// CLOCK-Pro is a patent-free alternative to the Adaptive Replacement Cache, +// https://en.wikipedia.org/wiki/Adaptive_replacement_cache. +// It is an approximation of LIRS ( https://en.wikipedia.org/wiki/LIRS_caching_algorithm ), +// much like the CLOCK page replacement algorithm is an approximation of LRU. +// +// This implementation is based on the python code from https://bitbucket.org/SamiLehtinen/pyclockpro . +// +// Slides describing the algorithm: http://fr.slideshare.net/huliang64/clockpro +// +// The original paper: http://static.usenix.org/event/usenix05/tech/general/full_papers/jiang/jiang_html/html.html +// +// It is MIT licensed, like the original. +package cache // import "github.com/cockroachdb/pebble/internal/cache" + +import ( + "fmt" + "os" + "runtime" + "runtime/debug" + "strings" + "sync" + "sync/atomic" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invariants" +) + +type fileKey struct { + // id is the namespace for fileNums. + id uint64 + fileNum base.DiskFileNum +} + +type key struct { + fileKey + offset uint64 +} + +// file returns the "file key" for the receiver. This is the key used for the +// shard.files map. +func (k key) file() key { + k.offset = 0 + return k +} + +func (k key) String() string { + return fmt.Sprintf("%d/%d/%d", k.id, k.fileNum, k.offset) +} + +// Handle provides a strong reference to a value in the cache. The reference +// does not pin the value in the cache, but it does prevent the underlying byte +// slice from being reused. +type Handle struct { + value *Value +} + +// Get returns the value stored in handle. +func (h Handle) Get() []byte { + if h.value != nil { + // NB: We don't increment shard.hits in this code path because we only want + // to record a hit when the handle is retrieved from the cache. + return h.value.buf + } + return nil +} + +// Release releases the reference to the cache entry. +func (h Handle) Release() { + h.value.release() +} + +type shard struct { + hits atomic.Int64 + misses atomic.Int64 + + mu sync.RWMutex + + reservedSize int64 + maxSize int64 + coldTarget int64 + blocks robinHoodMap // fileNum+offset -> block + files robinHoodMap // fileNum -> list of blocks + + // The blocks and files maps store values in manually managed memory that is + // invisible to the Go GC. This is fine for Value and entry objects that are + // stored in manually managed memory, but when the "invariants" build tag is + // set, all Value and entry objects are Go allocated and the entries map will + // contain a reference to every entry. + entries map[*entry]struct{} + + handHot *entry + handCold *entry + handTest *entry + + sizeHot int64 + sizeCold int64 + sizeTest int64 + + // The count fields are used exclusively for asserting expectations. + // We've seen infinite looping (cockroachdb/cockroach#70154) that + // could be explained by a corrupted sizeCold. Through asserting on + // these fields, we hope to gain more insight from any future + // reproductions. + countHot int64 + countCold int64 + countTest int64 +} + +func (c *shard) Get(id uint64, fileNum base.DiskFileNum, offset uint64) Handle { + c.mu.RLock() + var value *Value + if e := c.blocks.Get(key{fileKey{id, fileNum}, offset}); e != nil { + value = e.acquireValue() + if value != nil { + e.referenced.Store(true) + } + } + c.mu.RUnlock() + if value == nil { + c.misses.Add(1) + return Handle{} + } + c.hits.Add(1) + return Handle{value: value} +} + +func (c *shard) Set(id uint64, fileNum base.DiskFileNum, offset uint64, value *Value) Handle { + if n := value.refs(); n != 1 { + panic(fmt.Sprintf("pebble: Value has already been added to the cache: refs=%d", n)) + } + + c.mu.Lock() + defer c.mu.Unlock() + + k := key{fileKey{id, fileNum}, offset} + e := c.blocks.Get(k) + + switch { + case e == nil: + // no cache entry? add it + e = newEntry(c, k, int64(len(value.buf))) + e.setValue(value) + if c.metaAdd(k, e) { + value.ref.trace("add-cold") + c.sizeCold += e.size + c.countCold++ + } else { + value.ref.trace("skip-cold") + e.free() + e = nil + } + + case e.peekValue() != nil: + // cache entry was a hot or cold page + e.setValue(value) + e.referenced.Store(true) + delta := int64(len(value.buf)) - e.size + e.size = int64(len(value.buf)) + if e.ptype == etHot { + value.ref.trace("add-hot") + c.sizeHot += delta + } else { + value.ref.trace("add-cold") + c.sizeCold += delta + } + c.evict() + + default: + // cache entry was a test page + c.sizeTest -= e.size + c.countTest-- + c.metaDel(e).release() + c.metaCheck(e) + + e.size = int64(len(value.buf)) + c.coldTarget += e.size + if c.coldTarget > c.targetSize() { + c.coldTarget = c.targetSize() + } + + e.referenced.Store(false) + e.setValue(value) + e.ptype = etHot + if c.metaAdd(k, e) { + value.ref.trace("add-hot") + c.sizeHot += e.size + c.countHot++ + } else { + value.ref.trace("skip-hot") + e.free() + e = nil + } + } + + c.checkConsistency() + + // Values are initialized with a reference count of 1. That reference count + // is being transferred to the returned Handle. + return Handle{value: value} +} + +func (c *shard) checkConsistency() { + // See the comment above the count{Hot,Cold,Test} fields. + switch { + case c.sizeHot < 0 || c.sizeCold < 0 || c.sizeTest < 0 || c.countHot < 0 || c.countCold < 0 || c.countTest < 0: + panic(fmt.Sprintf("pebble: unexpected negative: %d (%d bytes) hot, %d (%d bytes) cold, %d (%d bytes) test", + c.countHot, c.sizeHot, c.countCold, c.sizeCold, c.countTest, c.sizeTest)) + case c.sizeHot > 0 && c.countHot == 0: + panic(fmt.Sprintf("pebble: mismatch %d hot size, %d hot count", c.sizeHot, c.countHot)) + case c.sizeCold > 0 && c.countCold == 0: + panic(fmt.Sprintf("pebble: mismatch %d cold size, %d cold count", c.sizeCold, c.countCold)) + case c.sizeTest > 0 && c.countTest == 0: + panic(fmt.Sprintf("pebble: mismatch %d test size, %d test count", c.sizeTest, c.countTest)) + } +} + +// Delete deletes the cached value for the specified file and offset. +func (c *shard) Delete(id uint64, fileNum base.DiskFileNum, offset uint64) { + // The common case is there is nothing to delete, so do a quick check with + // shared lock. + k := key{fileKey{id, fileNum}, offset} + c.mu.RLock() + exists := c.blocks.Get(k) != nil + c.mu.RUnlock() + if !exists { + return + } + + var deletedValue *Value + func() { + c.mu.Lock() + defer c.mu.Unlock() + + e := c.blocks.Get(k) + if e == nil { + return + } + deletedValue = c.metaEvict(e) + c.checkConsistency() + }() + // Now that the mutex has been dropped, release the reference which will + // potentially free the memory associated with the previous cached value. + deletedValue.release() +} + +// EvictFile evicts all of the cache values for the specified file. +func (c *shard) EvictFile(id uint64, fileNum base.DiskFileNum) { + fkey := key{fileKey{id, fileNum}, 0} + for c.evictFileRun(fkey) { + // Sched switch to give another goroutine an opportunity to acquire the + // shard mutex. + runtime.Gosched() + } +} + +func (c *shard) evictFileRun(fkey key) (moreRemaining bool) { + // If most of the file's blocks are held in the block cache, evicting all + // the blocks may take a while. We don't want to block the entire cache + // shard, forcing concurrent readers to wait until we're finished. We drop + // the mutex every [blocksPerMutexAcquisition] blocks to give other + // goroutines an opportunity to make progress. + const blocksPerMutexAcquisition = 5 + c.mu.Lock() + + // Releasing a value may result in free-ing it back to the memory allocator. + // This can have a nontrivial cost that we'd prefer to not pay while holding + // the shard mutex, so we collect the evicted values in a local slice and + // only release them in a defer after dropping the cache mutex. + var obsoleteValuesAlloc [blocksPerMutexAcquisition]*Value + obsoleteValues := obsoleteValuesAlloc[:0] + defer func() { + c.mu.Unlock() + for _, v := range obsoleteValues { + v.release() + } + }() + + blocks := c.files.Get(fkey) + if blocks == nil { + // No blocks for this file. + return false + } + + // b is the current head of the doubly linked list, and n is the entry after b. + for b, n := blocks, (*entry)(nil); len(obsoleteValues) < cap(obsoleteValues); b = n { + n = b.fileLink.next + obsoleteValues = append(obsoleteValues, c.metaEvict(b)) + if b == n { + // b == n represents the case where b was the last entry remaining + // in the doubly linked list, which is why it pointed at itself. So + // no more entries left. + c.checkConsistency() + return false + } + } + // Exhausted blocksPerMutexAcquisition. + return true +} + +func (c *shard) Free() { + c.mu.Lock() + defer c.mu.Unlock() + + // NB: we use metaDel rather than metaEvict in order to avoid the expensive + // metaCheck call when the "invariants" build tag is specified. + for c.handHot != nil { + e := c.handHot + c.metaDel(c.handHot).release() + e.free() + } + + c.blocks.free() + c.files.free() +} + +func (c *shard) Reserve(n int) { + c.mu.Lock() + defer c.mu.Unlock() + c.reservedSize += int64(n) + + // Changing c.reservedSize will either increase or decrease + // the targetSize. But we want coldTarget to be in the range + // [0, targetSize]. So, if c.targetSize decreases, make sure + // that the coldTarget fits within the limits. + targetSize := c.targetSize() + if c.coldTarget > targetSize { + c.coldTarget = targetSize + } + + c.evict() + c.checkConsistency() +} + +// Size returns the current space used by the cache. +func (c *shard) Size() int64 { + c.mu.RLock() + size := c.sizeHot + c.sizeCold + c.mu.RUnlock() + return size +} + +func (c *shard) targetSize() int64 { + target := c.maxSize - c.reservedSize + // Always return a positive integer for targetSize. This is so that we don't + // end up in an infinite loop in evict(), in cases where reservedSize is + // greater than or equal to maxSize. + if target < 1 { + return 1 + } + return target +} + +// Add the entry to the cache, returning true if the entry was added and false +// if it would not fit in the cache. +func (c *shard) metaAdd(key key, e *entry) bool { + c.evict() + if e.size > c.targetSize() { + // The entry is larger than the target cache size. + return false + } + + c.blocks.Put(key, e) + if entriesGoAllocated { + // Go allocated entries need to be referenced from Go memory. The entries + // map provides that reference. + c.entries[e] = struct{}{} + } + + if c.handHot == nil { + // first element + c.handHot = e + c.handCold = e + c.handTest = e + } else { + c.handHot.link(e) + } + + if c.handCold == c.handHot { + c.handCold = c.handCold.prev() + } + + fkey := key.file() + if fileBlocks := c.files.Get(fkey); fileBlocks == nil { + c.files.Put(fkey, e) + } else { + fileBlocks.linkFile(e) + } + return true +} + +// Remove the entry from the cache. This removes the entry from the blocks map, +// the files map, and ensures that hand{Hot,Cold,Test} are not pointing at the +// entry. Returns the deleted value that must be released, if any. +func (c *shard) metaDel(e *entry) (deletedValue *Value) { + if value := e.peekValue(); value != nil { + value.ref.trace("metaDel") + } + // Remove the pointer to the value. + deletedValue = e.val + e.val = nil + + c.blocks.Delete(e.key) + if entriesGoAllocated { + // Go allocated entries need to be referenced from Go memory. The entries + // map provides that reference. + delete(c.entries, e) + } + + if e == c.handHot { + c.handHot = c.handHot.prev() + } + if e == c.handCold { + c.handCold = c.handCold.prev() + } + if e == c.handTest { + c.handTest = c.handTest.prev() + } + + if e.unlink() == e { + // This was the last entry in the cache. + c.handHot = nil + c.handCold = nil + c.handTest = nil + } + + fkey := e.key.file() + if next := e.unlinkFile(); e == next { + c.files.Delete(fkey) + } else { + c.files.Put(fkey, next) + } + return deletedValue +} + +// Check that the specified entry is not referenced by the cache. +func (c *shard) metaCheck(e *entry) { + if invariants.Enabled { + if _, ok := c.entries[e]; ok { + fmt.Fprintf(os.Stderr, "%p: %s unexpectedly found in entries map\n%s", + e, e.key, debug.Stack()) + os.Exit(1) + } + if c.blocks.findByValue(e) != nil { + fmt.Fprintf(os.Stderr, "%p: %s unexpectedly found in blocks map\n%s\n%s", + e, e.key, &c.blocks, debug.Stack()) + os.Exit(1) + } + if c.files.findByValue(e) != nil { + fmt.Fprintf(os.Stderr, "%p: %s unexpectedly found in files map\n%s\n%s", + e, e.key, &c.files, debug.Stack()) + os.Exit(1) + } + // NB: c.hand{Hot,Cold,Test} are pointers into a single linked list. We + // only have to traverse one of them to check all of them. + var countHot, countCold, countTest int64 + var sizeHot, sizeCold, sizeTest int64 + for t := c.handHot.next(); t != nil; t = t.next() { + // Recompute count{Hot,Cold,Test} and size{Hot,Cold,Test}. + switch t.ptype { + case etHot: + countHot++ + sizeHot += t.size + case etCold: + countCold++ + sizeCold += t.size + case etTest: + countTest++ + sizeTest += t.size + } + if e == t { + fmt.Fprintf(os.Stderr, "%p: %s unexpectedly found in blocks list\n%s", + e, e.key, debug.Stack()) + os.Exit(1) + } + if t == c.handHot { + break + } + } + if countHot != c.countHot || countCold != c.countCold || countTest != c.countTest || + sizeHot != c.sizeHot || sizeCold != c.sizeCold || sizeTest != c.sizeTest { + fmt.Fprintf(os.Stderr, `divergence of Hot,Cold,Test statistics + cache's statistics: hot %d, %d, cold %d, %d, test %d, %d + recalculated statistics: hot %d, %d, cold %d, %d, test %d, %d\n%s`, + c.countHot, c.sizeHot, c.countCold, c.sizeCold, c.countTest, c.sizeTest, + countHot, sizeHot, countCold, sizeCold, countTest, sizeTest, + debug.Stack()) + os.Exit(1) + } + } +} + +func (c *shard) metaEvict(e *entry) (evictedValue *Value) { + switch e.ptype { + case etHot: + c.sizeHot -= e.size + c.countHot-- + case etCold: + c.sizeCold -= e.size + c.countCold-- + case etTest: + c.sizeTest -= e.size + c.countTest-- + } + evictedValue = c.metaDel(e) + c.metaCheck(e) + e.free() + return evictedValue +} + +func (c *shard) evict() { + for c.targetSize() <= c.sizeHot+c.sizeCold && c.handCold != nil { + c.runHandCold(c.countCold, c.sizeCold) + } +} + +func (c *shard) runHandCold(countColdDebug, sizeColdDebug int64) { + // countColdDebug and sizeColdDebug should equal c.countCold and + // c.sizeCold. They're parameters only to aid in debugging of + // cockroachdb/cockroach#70154. Since they're parameters, their + // arguments will appear within stack traces should we encounter + // a reproduction. + if c.countCold != countColdDebug || c.sizeCold != sizeColdDebug { + panic(fmt.Sprintf("runHandCold: cold count and size are %d, %d, arguments are %d and %d", + c.countCold, c.sizeCold, countColdDebug, sizeColdDebug)) + } + + e := c.handCold + if e.ptype == etCold { + if e.referenced.Load() { + e.referenced.Store(false) + e.ptype = etHot + c.sizeCold -= e.size + c.countCold-- + c.sizeHot += e.size + c.countHot++ + } else { + e.setValue(nil) + e.ptype = etTest + c.sizeCold -= e.size + c.countCold-- + c.sizeTest += e.size + c.countTest++ + for c.targetSize() < c.sizeTest && c.handTest != nil { + c.runHandTest() + } + } + } + + c.handCold = c.handCold.next() + + for c.targetSize()-c.coldTarget <= c.sizeHot && c.handHot != nil { + c.runHandHot() + } +} + +func (c *shard) runHandHot() { + if c.handHot == c.handTest && c.handTest != nil { + c.runHandTest() + if c.handHot == nil { + return + } + } + + e := c.handHot + if e.ptype == etHot { + if e.referenced.Load() { + e.referenced.Store(false) + } else { + e.ptype = etCold + c.sizeHot -= e.size + c.countHot-- + c.sizeCold += e.size + c.countCold++ + } + } + + c.handHot = c.handHot.next() +} + +func (c *shard) runHandTest() { + if c.sizeCold > 0 && c.handTest == c.handCold && c.handCold != nil { + // sizeCold is > 0, so assert that countCold == 0. See the + // comment above count{Hot,Cold,Test}. + if c.countCold == 0 { + panic(fmt.Sprintf("pebble: mismatch %d cold size, %d cold count", c.sizeCold, c.countCold)) + } + + c.runHandCold(c.countCold, c.sizeCold) + if c.handTest == nil { + return + } + } + + e := c.handTest + if e.ptype == etTest { + c.sizeTest -= e.size + c.countTest-- + c.coldTarget -= e.size + if c.coldTarget < 0 { + c.coldTarget = 0 + } + c.metaDel(e).release() + c.metaCheck(e) + e.free() + } + + c.handTest = c.handTest.next() +} + +// Metrics holds metrics for the cache. +type Metrics struct { + // The number of bytes inuse by the cache. + Size int64 + // The count of objects (blocks or tables) in the cache. + Count int64 + // The number of cache hits. + Hits int64 + // The number of cache misses. + Misses int64 +} + +// Cache implements Pebble's sharded block cache. The Clock-PRO algorithm is +// used for page replacement +// (http://static.usenix.org/event/usenix05/tech/general/full_papers/jiang/jiang_html/html.html). In +// order to provide better concurrency, 4 x NumCPUs shards are created, with +// each shard being given 1/n of the target cache size. The Clock-PRO algorithm +// is run independently on each shard. +// +// Blocks are keyed by an (id, fileNum, offset) triple. The ID is a namespace +// for file numbers and allows a single Cache to be shared between multiple +// Pebble instances. The fileNum and offset refer to an sstable file number and +// the offset of the block within the file. Because sstables are immutable and +// file numbers are never reused, (fileNum,offset) are unique for the lifetime +// of a Pebble instance. +// +// In addition to maintaining a map from (fileNum,offset) to data, each shard +// maintains a map of the cached blocks for a particular fileNum. This allows +// efficient eviction of all of the blocks for a file which is used when an +// sstable is deleted from disk. +// +// # Memory Management +// +// In order to reduce pressure on the Go GC, manual memory management is +// performed for the data stored in the cache. Manual memory management is +// performed by calling into C.{malloc,free} to allocate memory. Cache.Values +// are reference counted and the memory backing a manual value is freed when +// the reference count drops to 0. +// +// Manual memory management brings the possibility of memory leaks. It is +// imperative that every Handle returned by Cache.{Get,Set} is eventually +// released. The "invariants" build tag enables a leak detection facility that +// places a GC finalizer on cache.Value. When the cache.Value finalizer is run, +// if the underlying buffer is still present a leak has occurred. The "tracing" +// build tag enables tracing of cache.Value reference count manipulation and +// eases finding where a leak has occurred. These two facilities are usually +// used in combination by specifying `-tags invariants,tracing`. Note that +// "tracing" produces a significant slowdown, while "invariants" does not. +type Cache struct { + refs atomic.Int64 + maxSize int64 + idAlloc atomic.Uint64 + shards []shard + + // Traces recorded by Cache.trace. Used for debugging. + tr struct { + sync.Mutex + msgs []string + } +} + +// New creates a new cache of the specified size. Memory for the cache is +// allocated on demand, not during initialization. The cache is created with a +// reference count of 1. Each DB it is associated with adds a reference, so the +// creator of the cache should usually release their reference after the DB is +// created. +// +// c := cache.New(...) +// defer c.Unref() +// d, err := pebble.Open(pebble.Options{Cache: c}) +func New(size int64) *Cache { + // How many cache shards should we create? + // + // Note that the probability two processors will try to access the same + // shard at the same time increases superlinearly with the number of + // processors (Eg, consider the brithday problem where each CPU is a person, + // and each shard is a possible birthday). + // + // We could consider growing the number of shards superlinearly, but + // increasing the shard count may reduce the effectiveness of the caching + // algorithm if frequently-accessed blocks are insufficiently distributed + // across shards. If a shard's size is smaller than a single frequently + // scanned sstable, then the shard will be unable to hold the entire + // frequently-scanned table in memory despite other shards still holding + // infrequently accessed blocks. + // + // Experimentally, we've observed contention contributing to tail latencies + // at 2 shards per processor. For now we use 4 shards per processor, + // recognizing this may not be final word. + m := 4 * runtime.GOMAXPROCS(0) + + // In tests we can use large CPU machines with small cache sizes and have + // many caches in existence at a time. If sharding into m shards would + // produce too small shards, constrain the number of shards to 4. + const minimumShardSize = 4 << 20 // 4 MiB + if m > 4 && int(size)/m < minimumShardSize { + m = 4 + } + return newShards(size, m) +} + +func newShards(size int64, shards int) *Cache { + c := &Cache{ + maxSize: size, + shards: make([]shard, shards), + } + c.refs.Store(1) + c.idAlloc.Store(1) + c.trace("alloc", c.refs.Load()) + for i := range c.shards { + c.shards[i] = shard{ + maxSize: size / int64(len(c.shards)), + coldTarget: size / int64(len(c.shards)), + } + if entriesGoAllocated { + c.shards[i].entries = make(map[*entry]struct{}) + } + c.shards[i].blocks.init(16) + c.shards[i].files.init(16) + } + + // Note: this is a no-op if invariants are disabled or race is enabled. + invariants.SetFinalizer(c, func(obj interface{}) { + c := obj.(*Cache) + if v := c.refs.Load(); v != 0 { + c.tr.Lock() + fmt.Fprintf(os.Stderr, + "pebble: cache (%p) has non-zero reference count: %d\n", c, v) + if len(c.tr.msgs) > 0 { + fmt.Fprintf(os.Stderr, "%s\n", strings.Join(c.tr.msgs, "\n")) + } + c.tr.Unlock() + os.Exit(1) + } + }) + return c +} + +func (c *Cache) getShard(id uint64, fileNum base.DiskFileNum, offset uint64) *shard { + if id == 0 { + panic("pebble: 0 cache ID is invalid") + } + + // Inlined version of fnv.New64 + Write. + const offset64 = 14695981039346656037 + const prime64 = 1099511628211 + + h := uint64(offset64) + for i := 0; i < 8; i++ { + h *= prime64 + h ^= uint64(id & 0xff) + id >>= 8 + } + fileNumVal := uint64(fileNum.FileNum()) + for i := 0; i < 8; i++ { + h *= prime64 + h ^= uint64(fileNumVal) & 0xff + fileNumVal >>= 8 + } + for i := 0; i < 8; i++ { + h *= prime64 + h ^= uint64(offset & 0xff) + offset >>= 8 + } + + return &c.shards[h%uint64(len(c.shards))] +} + +// Ref adds a reference to the cache. The cache only remains valid as long a +// reference is maintained to it. +func (c *Cache) Ref() { + v := c.refs.Add(1) + if v <= 1 { + panic(fmt.Sprintf("pebble: inconsistent reference count: %d", v)) + } + c.trace("ref", v) +} + +// Unref releases a reference on the cache. +func (c *Cache) Unref() { + v := c.refs.Add(-1) + c.trace("unref", v) + switch { + case v < 0: + panic(fmt.Sprintf("pebble: inconsistent reference count: %d", v)) + case v == 0: + for i := range c.shards { + c.shards[i].Free() + } + } +} + +// Get retrieves the cache value for the specified file and offset, returning +// nil if no value is present. +func (c *Cache) Get(id uint64, fileNum base.DiskFileNum, offset uint64) Handle { + return c.getShard(id, fileNum, offset).Get(id, fileNum, offset) +} + +// Set sets the cache value for the specified file and offset, overwriting an +// existing value if present. A Handle is returned which provides faster +// retrieval of the cached value than Get (lock-free and avoidance of the map +// lookup). The value must have been allocated by Cache.Alloc. +func (c *Cache) Set(id uint64, fileNum base.DiskFileNum, offset uint64, value *Value) Handle { + return c.getShard(id, fileNum, offset).Set(id, fileNum, offset, value) +} + +// Delete deletes the cached value for the specified file and offset. +func (c *Cache) Delete(id uint64, fileNum base.DiskFileNum, offset uint64) { + c.getShard(id, fileNum, offset).Delete(id, fileNum, offset) +} + +// EvictFile evicts all of the cache values for the specified file. +func (c *Cache) EvictFile(id uint64, fileNum base.DiskFileNum) { + if id == 0 { + panic("pebble: 0 cache ID is invalid") + } + for i := range c.shards { + c.shards[i].EvictFile(id, fileNum) + } +} + +// MaxSize returns the max size of the cache. +func (c *Cache) MaxSize() int64 { + return c.maxSize +} + +// Size returns the current space used by the cache. +func (c *Cache) Size() int64 { + var size int64 + for i := range c.shards { + size += c.shards[i].Size() + } + return size +} + +// Alloc allocates a byte slice of the specified size, possibly reusing +// previously allocated but unused memory. The memory backing the value is +// manually managed. The caller MUST either add the value to the cache (via +// Cache.Set), or release the value (via Cache.Free). Failure to do so will +// result in a memory leak. +func Alloc(n int) *Value { + return newValue(n) +} + +// Free frees the specified value. The buffer associated with the value will +// possibly be reused, making it invalid to use the buffer after calling +// Free. Do not call Free on a value that has been added to the cache. +func Free(v *Value) { + if n := v.refs(); n > 1 { + panic(fmt.Sprintf("pebble: Value has been added to the cache: refs=%d", n)) + } + v.release() +} + +// Reserve N bytes in the cache. This effectively shrinks the size of the cache +// by N bytes, without actually consuming any memory. The returned closure +// should be invoked to release the reservation. +func (c *Cache) Reserve(n int) func() { + // Round-up the per-shard reservation. Most reservations should be large, so + // this probably doesn't matter in practice. + shardN := (n + len(c.shards) - 1) / len(c.shards) + for i := range c.shards { + c.shards[i].Reserve(shardN) + } + return func() { + if shardN == -1 { + panic("pebble: cache reservation already released") + } + for i := range c.shards { + c.shards[i].Reserve(-shardN) + } + shardN = -1 + } +} + +// Metrics returns the metrics for the cache. +func (c *Cache) Metrics() Metrics { + var m Metrics + for i := range c.shards { + s := &c.shards[i] + s.mu.RLock() + m.Count += int64(s.blocks.Count()) + m.Size += s.sizeHot + s.sizeCold + s.mu.RUnlock() + m.Hits += s.hits.Load() + m.Misses += s.misses.Load() + } + return m +} + +// NewID returns a new ID to be used as a namespace for cached file +// blocks. +func (c *Cache) NewID() uint64 { + return c.idAlloc.Add(1) +} diff --git a/pebble/internal/cache/clockpro_normal.go b/pebble/internal/cache/clockpro_normal.go new file mode 100644 index 0000000..ae49938 --- /dev/null +++ b/pebble/internal/cache/clockpro_normal.go @@ -0,0 +1,10 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build !tracing +// +build !tracing + +package cache + +func (c *Cache) trace(_ string, _ int64) {} diff --git a/pebble/internal/cache/clockpro_test.go b/pebble/internal/cache/clockpro_test.go new file mode 100644 index 0000000..5ec7b7f --- /dev/null +++ b/pebble/internal/cache/clockpro_test.go @@ -0,0 +1,279 @@ +// Copyright 2018. All rights reserved. Use of this source code is governed by +// an MIT-style license that can be found in the LICENSE file. + +package cache + +import ( + "bufio" + "bytes" + "fmt" + "os" + "runtime" + "strconv" + "sync" + "testing" + "time" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" +) + +func TestCache(t *testing.T) { + // Test data was generated from the python code + f, err := os.Open("testdata/cache") + require.NoError(t, err) + + cache := newShards(200, 1) + defer cache.Unref() + + scanner := bufio.NewScanner(f) + line := 1 + + for scanner.Scan() { + fields := bytes.Fields(scanner.Bytes()) + + key, err := strconv.Atoi(string(fields[0])) + require.NoError(t, err) + + wantHit := fields[1][0] == 'h' + + var hit bool + h := cache.Get(1, base.FileNum(uint64(key)).DiskFileNum(), 0) + if v := h.Get(); v == nil { + value := Alloc(1) + value.Buf()[0] = fields[0][0] + cache.Set(1, base.FileNum(uint64(key)).DiskFileNum(), 0, value).Release() + } else { + hit = true + if !bytes.Equal(v, fields[0][:1]) { + t.Errorf("%d: cache returned bad data: got %s , want %s\n", line, v, fields[0][:1]) + } + } + h.Release() + if hit != wantHit { + t.Errorf("%d: cache hit mismatch: got %v, want %v\n", line, hit, wantHit) + } + line++ + } +} + +func testValue(cache *Cache, s string, repeat int) *Value { + b := bytes.Repeat([]byte(s), repeat) + v := Alloc(len(b)) + copy(v.Buf(), b) + return v +} + +func TestCacheDelete(t *testing.T) { + cache := newShards(100, 1) + defer cache.Unref() + + cache.Set(1, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 5)).Release() + cache.Set(1, base.FileNum(1).DiskFileNum(), 0, testValue(cache, "a", 5)).Release() + cache.Set(1, base.FileNum(2).DiskFileNum(), 0, testValue(cache, "a", 5)).Release() + if expected, size := int64(15), cache.Size(); expected != size { + t.Fatalf("expected cache size %d, but found %d", expected, size) + } + cache.Delete(1, base.FileNum(1).DiskFileNum(), 0) + if expected, size := int64(10), cache.Size(); expected != size { + t.Fatalf("expected cache size %d, but found %d", expected, size) + } + if h := cache.Get(1, base.FileNum(0).DiskFileNum(), 0); h.Get() == nil { + t.Fatalf("expected to find block 0/0") + } else { + h.Release() + } + if h := cache.Get(1, base.FileNum(1).DiskFileNum(), 0); h.Get() != nil { + t.Fatalf("expected to not find block 1/0") + } else { + h.Release() + } + // Deleting a non-existing block does nothing. + cache.Delete(1, base.FileNum(1).DiskFileNum(), 0) + if expected, size := int64(10), cache.Size(); expected != size { + t.Fatalf("expected cache size %d, but found %d", expected, size) + } +} + +func TestEvictFile(t *testing.T) { + cache := newShards(100, 1) + defer cache.Unref() + + cache.Set(1, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 5)).Release() + cache.Set(1, base.FileNum(1).DiskFileNum(), 0, testValue(cache, "a", 5)).Release() + cache.Set(1, base.FileNum(2).DiskFileNum(), 0, testValue(cache, "a", 5)).Release() + cache.Set(1, base.FileNum(2).DiskFileNum(), 1, testValue(cache, "a", 5)).Release() + cache.Set(1, base.FileNum(2).DiskFileNum(), 2, testValue(cache, "a", 5)).Release() + if expected, size := int64(25), cache.Size(); expected != size { + t.Fatalf("expected cache size %d, but found %d", expected, size) + } + cache.EvictFile(1, base.FileNum(0).DiskFileNum()) + if expected, size := int64(20), cache.Size(); expected != size { + t.Fatalf("expected cache size %d, but found %d", expected, size) + } + cache.EvictFile(1, base.FileNum(1).DiskFileNum()) + if expected, size := int64(15), cache.Size(); expected != size { + t.Fatalf("expected cache size %d, but found %d", expected, size) + } + cache.EvictFile(1, base.FileNum(2).DiskFileNum()) + if expected, size := int64(0), cache.Size(); expected != size { + t.Fatalf("expected cache size %d, but found %d", expected, size) + } +} + +func TestEvictAll(t *testing.T) { + // Verify that it is okay to evict all of the data from a cache. Previously + // this would trigger a nil-pointer dereference. + cache := newShards(100, 1) + defer cache.Unref() + + cache.Set(1, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 101)).Release() + cache.Set(1, base.FileNum(1).DiskFileNum(), 0, testValue(cache, "a", 101)).Release() +} + +func TestMultipleDBs(t *testing.T) { + cache := newShards(100, 1) + defer cache.Unref() + + cache.Set(1, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 5)).Release() + cache.Set(2, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "b", 5)).Release() + if expected, size := int64(10), cache.Size(); expected != size { + t.Fatalf("expected cache size %d, but found %d", expected, size) + } + cache.EvictFile(1, base.FileNum(0).DiskFileNum()) + if expected, size := int64(5), cache.Size(); expected != size { + t.Fatalf("expected cache size %d, but found %d", expected, size) + } + h := cache.Get(1, base.FileNum(0).DiskFileNum(), 0) + if v := h.Get(); v != nil { + t.Fatalf("expected not present, but found %s", v) + } + h = cache.Get(2, base.FileNum(0).DiskFileNum(), 0) + if v := h.Get(); string(v) != "bbbbb" { + t.Fatalf("expected bbbbb, but found %s", v) + } else { + h.Release() + } +} + +func TestZeroSize(t *testing.T) { + cache := newShards(0, 1) + defer cache.Unref() + + cache.Set(1, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 5)).Release() +} + +func TestReserve(t *testing.T) { + cache := newShards(4, 2) + defer cache.Unref() + + cache.Set(1, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 1)).Release() + cache.Set(2, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 1)).Release() + require.EqualValues(t, 2, cache.Size()) + r := cache.Reserve(1) + require.EqualValues(t, 0, cache.Size()) + cache.Set(1, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 1)).Release() + cache.Set(2, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 1)).Release() + cache.Set(3, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 1)).Release() + cache.Set(4, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 1)).Release() + require.EqualValues(t, 2, cache.Size()) + r() + require.EqualValues(t, 2, cache.Size()) + cache.Set(1, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 1)).Release() + cache.Set(2, base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 1)).Release() + require.EqualValues(t, 4, cache.Size()) +} + +func TestReserveDoubleRelease(t *testing.T) { + cache := newShards(100, 1) + defer cache.Unref() + + r := cache.Reserve(10) + r() + + result := func() (result string) { + defer func() { + if v := recover(); v != nil { + result = fmt.Sprint(v) + } + }() + r() + return "" + }() + const expected = "pebble: cache reservation already released" + if expected != result { + t.Fatalf("expected %q, but found %q", expected, result) + } +} + +func TestCacheStressSetExisting(t *testing.T) { + cache := newShards(1, 1) + defer cache.Unref() + + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func(i int) { + defer wg.Done() + for j := 0; j < 10000; j++ { + cache.Set(1, base.FileNum(0).DiskFileNum(), uint64(i), testValue(cache, "a", 1)).Release() + runtime.Gosched() + } + }(i) + } + wg.Wait() +} + +func BenchmarkCacheGet(b *testing.B) { + const size = 100000 + + cache := newShards(size, 1) + defer cache.Unref() + + for i := 0; i < size; i++ { + v := testValue(cache, "a", 1) + cache.Set(1, base.FileNum(0).DiskFileNum(), uint64(i), v).Release() + } + + b.ResetTimer() + b.RunParallel(func(pb *testing.PB) { + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + + for pb.Next() { + h := cache.Get(1, base.FileNum(0).DiskFileNum(), uint64(rng.Intn(size))) + if h.Get() == nil { + b.Fatal("failed to lookup value") + } + h.Release() + } + }) +} + +func TestReserveColdTarget(t *testing.T) { + // If coldTarget isn't updated when we call shard.Reserve, + // then we unnecessarily remove nodes from the + // cache. + + cache := newShards(100, 1) + defer cache.Unref() + + for i := 0; i < 50; i++ { + cache.Set(uint64(i+1), base.FileNum(0).DiskFileNum(), 0, testValue(cache, "a", 1)).Release() + } + + if cache.Size() != 50 { + require.Equal(t, 50, cache.Size(), "nodes were unnecessarily evicted from the cache") + } + + // There won't be enough space left for 50 nodes in the cache after + // we call shard.Reserve. This should trigger a call to evict. + cache.Reserve(51) + + // If we don't update coldTarget in Reserve then the cache gets emptied to + // size 0. In shard.Evict, we loop until shard.Size() < shard.targetSize(). + // Therefore, 100 - 51 = 49, but we evict one more node. + if cache.Size() != 48 { + t.Fatalf("expected positive cache size %d, but found %d", 48, cache.Size()) + } +} diff --git a/pebble/internal/cache/clockpro_tracing.go b/pebble/internal/cache/clockpro_tracing.go new file mode 100644 index 0000000..d14c1cd --- /dev/null +++ b/pebble/internal/cache/clockpro_tracing.go @@ -0,0 +1,20 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build tracing +// +build tracing + +package cache + +import ( + "fmt" + "runtime/debug" +) + +func (c *Cache) trace(msg string, refs int64) { + s := fmt.Sprintf("%s: refs=%d\n%s", msg, refs, debug.Stack()) + c.tr.Lock() + c.tr.msgs = append(c.tr.msgs, s) + c.tr.Unlock() +} diff --git a/pebble/internal/cache/entry.go b/pebble/internal/cache/entry.go new file mode 100644 index 0000000..a49fde6 --- /dev/null +++ b/pebble/internal/cache/entry.go @@ -0,0 +1,155 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package cache + +import "sync/atomic" + +type entryType int8 + +const ( + etTest entryType = iota + etCold + etHot +) + +func (p entryType) String() string { + switch p { + case etTest: + return "test" + case etCold: + return "cold" + case etHot: + return "hot" + } + return "unknown" +} + +// entry holds the metadata for a cache entry. The memory for an entry is +// allocated from manually managed memory. +// +// Using manual memory management for entries is technically a volation of the +// Cgo pointer rules: +// +// https://golang.org/cmd/cgo/#hdr-Passing_pointers +// +// Specifically, Go pointers should not be stored in C allocated memory. The +// reason for this rule is that the Go GC will not look at C allocated memory +// to find pointers to Go objects. If the only reference to a Go object is +// stored in C allocated memory, the object will be reclaimed. The shard field +// of the entry struct points to a Go allocated object, thus the +// violation. What makes this "safe" is that the Cache guarantees that there +// are other pointers to the shard which will keep it alive. +type entry struct { + key key + // The value associated with the entry. The entry holds a reference on the + // value which is maintained by entry.setValue(). + val *Value + blockLink struct { + next *entry + prev *entry + } + fileLink struct { + next *entry + prev *entry + } + size int64 + ptype entryType + // referenced is atomically set to indicate that this entry has been accessed + // since the last time one of the clock hands swept it. + referenced atomic.Bool + shard *shard + // Reference count for the entry. The entry is freed when the reference count + // drops to zero. + ref refcnt +} + +func newEntry(s *shard, key key, size int64) *entry { + e := entryAllocNew() + *e = entry{ + key: key, + size: size, + ptype: etCold, + shard: s, + } + e.blockLink.next = e + e.blockLink.prev = e + e.fileLink.next = e + e.fileLink.prev = e + e.ref.init(1) + return e +} + +func (e *entry) free() { + e.setValue(nil) + *e = entry{} + entryAllocFree(e) +} + +func (e *entry) next() *entry { + if e == nil { + return nil + } + return e.blockLink.next +} + +func (e *entry) prev() *entry { + if e == nil { + return nil + } + return e.blockLink.prev +} + +func (e *entry) link(s *entry) { + s.blockLink.prev = e.blockLink.prev + s.blockLink.prev.blockLink.next = s + s.blockLink.next = e + s.blockLink.next.blockLink.prev = s +} + +func (e *entry) unlink() *entry { + next := e.blockLink.next + e.blockLink.prev.blockLink.next = e.blockLink.next + e.blockLink.next.blockLink.prev = e.blockLink.prev + e.blockLink.prev = e + e.blockLink.next = e + return next +} + +func (e *entry) linkFile(s *entry) { + s.fileLink.prev = e.fileLink.prev + s.fileLink.prev.fileLink.next = s + s.fileLink.next = e + s.fileLink.next.fileLink.prev = s +} + +func (e *entry) unlinkFile() *entry { + next := e.fileLink.next + e.fileLink.prev.fileLink.next = e.fileLink.next + e.fileLink.next.fileLink.prev = e.fileLink.prev + e.fileLink.prev = e + e.fileLink.next = e + return next +} + +func (e *entry) setValue(v *Value) { + if v != nil { + v.acquire() + } + old := e.val + e.val = v + old.release() +} + +func (e *entry) peekValue() *Value { + return e.val +} + +func (e *entry) acquireValue() *Value { + v := e.val + if v != nil { + v.acquire() + } + return v +} diff --git a/pebble/internal/cache/entry_invariants.go b/pebble/internal/cache/entry_invariants.go new file mode 100644 index 0000000..31c54e4 --- /dev/null +++ b/pebble/internal/cache/entry_invariants.go @@ -0,0 +1,38 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. +// +//go:build (invariants && !race) || (tracing && !race) +// +build invariants,!race tracing,!race + +package cache + +import ( + "fmt" + "os" + + "github.com/cockroachdb/pebble/internal/invariants" +) + +// When the "invariants" or "tracing" build tags are enabled, we need to +// allocate entries using the Go allocator so entry.val properly maintains a +// reference to the Value. +const entriesGoAllocated = true + +func entryAllocNew() *entry { + e := &entry{} + // Note: this is a no-op if invariants and tracing are disabled or race is + // enabled. + invariants.SetFinalizer(e, func(obj interface{}) { + e := obj.(*entry) + if v := e.ref.refs(); v != 0 { + fmt.Fprintf(os.Stderr, "%p: cache entry has non-zero reference count: %d\n%s", + e, v, e.ref.traces()) + os.Exit(1) + } + }) + return e +} + +func entryAllocFree(e *entry) { +} diff --git a/pebble/internal/cache/entry_normal.go b/pebble/internal/cache/entry_normal.go new file mode 100644 index 0000000..92afb04 --- /dev/null +++ b/pebble/internal/cache/entry_normal.go @@ -0,0 +1,103 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. +// +//go:build (!invariants && !tracing) || race +// +build !invariants,!tracing race + +package cache + +import ( + "runtime" + "sync" + "unsafe" + + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/internal/manual" +) + +const ( + entrySize = int(unsafe.Sizeof(entry{})) + entryAllocCacheLimit = 128 + // Avoid using runtime.SetFinalizer in race builds as finalizers tickle a bug + // in the Go race detector in go1.15 and earlier versions. This requires that + // entries are Go allocated rather than manually allocated. + // + // If cgo is disabled we need to allocate the entries using the Go allocator + // and is violates the Go GC rules to put Go pointers (such as the entry + // pointer fields) into untyped memory (i.e. a []byte). + entriesGoAllocated = invariants.RaceEnabled || !cgoEnabled +) + +var entryAllocPool = sync.Pool{ + New: func() interface{} { + return newEntryAllocCache() + }, +} + +func entryAllocNew() *entry { + a := entryAllocPool.Get().(*entryAllocCache) + e := a.alloc() + entryAllocPool.Put(a) + return e +} + +func entryAllocFree(e *entry) { + a := entryAllocPool.Get().(*entryAllocCache) + a.free(e) + entryAllocPool.Put(a) +} + +type entryAllocCache struct { + entries []*entry +} + +func newEntryAllocCache() *entryAllocCache { + c := &entryAllocCache{} + if !entriesGoAllocated { + // Note the use of a "real" finalizer here (as opposed to a build tag-gated + // no-op finalizer). Without the finalizer, objects released from the pool + // and subsequently GC'd by the Go runtime would fail to have their manually + // allocated memory freed, which results in a memory leak. + // lint:ignore SetFinalizer + runtime.SetFinalizer(c, freeEntryAllocCache) + } + return c +} + +func freeEntryAllocCache(obj interface{}) { + c := obj.(*entryAllocCache) + for i, e := range c.entries { + c.dealloc(e) + c.entries[i] = nil + } +} + +func (c *entryAllocCache) alloc() *entry { + n := len(c.entries) + if n == 0 { + if entriesGoAllocated { + return &entry{} + } + b := manual.New(entrySize) + return (*entry)(unsafe.Pointer(&b[0])) + } + e := c.entries[n-1] + c.entries = c.entries[:n-1] + return e +} + +func (c *entryAllocCache) dealloc(e *entry) { + if !entriesGoAllocated { + buf := (*[manual.MaxArrayLen]byte)(unsafe.Pointer(e))[:entrySize:entrySize] + manual.Free(buf) + } +} + +func (c *entryAllocCache) free(e *entry) { + if len(c.entries) == entryAllocCacheLimit { + c.dealloc(e) + return + } + c.entries = append(c.entries, e) +} diff --git a/pebble/internal/cache/refcnt_normal.go b/pebble/internal/cache/refcnt_normal.go new file mode 100644 index 0000000..9ab3348 --- /dev/null +++ b/pebble/internal/cache/refcnt_normal.go @@ -0,0 +1,59 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build !tracing +// +build !tracing + +package cache + +import ( + "fmt" + "sync/atomic" + + "github.com/cockroachdb/redact" +) + +// refcnt provides an atomic reference count. This version is used when the +// "tracing" build tag is not enabled. See refcnt_tracing.go for the "tracing" +// enabled version. +type refcnt struct { + val atomic.Int32 +} + +// initialize the reference count to the specified value. +func (v *refcnt) init(val int32) { + v.val.Store(val) +} + +func (v *refcnt) refs() int32 { + return v.val.Load() +} + +func (v *refcnt) acquire() { + switch v := v.val.Add(1); { + case v <= 1: + panic(redact.Safe(fmt.Sprintf("pebble: inconsistent reference count: %d", v))) + } +} + +func (v *refcnt) release() bool { + switch v := v.val.Add(-1); { + case v < 0: + panic(redact.Safe(fmt.Sprintf("pebble: inconsistent reference count: %d", v))) + case v == 0: + return true + default: + return false + } +} + +func (v *refcnt) trace(msg string) { +} + +func (v *refcnt) traces() string { + return "" +} + +// Silence unused warning. +var _ = (*refcnt)(nil).traces diff --git a/pebble/internal/cache/refcnt_tracing.go b/pebble/internal/cache/refcnt_tracing.go new file mode 100644 index 0000000..1d5e6c0 --- /dev/null +++ b/pebble/internal/cache/refcnt_tracing.go @@ -0,0 +1,66 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build tracing +// +build tracing + +package cache + +import ( + "fmt" + "runtime/debug" + "strings" + "sync" + "sync/atomic" +) + +// refcnt provides an atomic reference count, along with a tracing facility for +// debugging logic errors in manipulating the reference count. This version is +// used when the "tracing" build tag is enabled. +type refcnt struct { + val atomic.Int32 + sync.Mutex + msgs []string +} + +func (v *refcnt) init(val int32) { + v.val.Store(val) + v.trace("init") +} + +func (v *refcnt) refs() int32 { + return v.val.Load() +} + +func (v *refcnt) acquire() { + switch n := v.val.Add(1); { + case n <= 1: + panic(fmt.Sprintf("pebble: inconsistent reference count: %d", n)) + } + v.trace("acquire") +} + +func (v *refcnt) release() bool { + n := v.val.Add(-1) + switch { + case n < 0: + panic(fmt.Sprintf("pebble: inconsistent reference count: %d", n)) + } + v.trace("release") + return n == 0 +} + +func (v *refcnt) trace(msg string) { + s := fmt.Sprintf("%s: refs=%d\n%s", msg, v.refs(), debug.Stack()) + v.Lock() + v.msgs = append(v.msgs, s) + v.Unlock() +} + +func (v *refcnt) traces() string { + v.Lock() + s := strings.Join(v.msgs, "\n") + v.Unlock() + return s +} diff --git a/pebble/internal/cache/robin_hood.go b/pebble/internal/cache/robin_hood.go new file mode 100644 index 0000000..6e093fd --- /dev/null +++ b/pebble/internal/cache/robin_hood.go @@ -0,0 +1,320 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package cache + +import ( + "fmt" + "math/bits" + "os" + "runtime/debug" + "strings" + "time" + "unsafe" + + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/internal/manual" +) + +var hashSeed = uint64(time.Now().UnixNano()) + +// Fibonacci hash: https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/ +func robinHoodHash(k key, shift uint32) uint32 { + const m = 11400714819323198485 + h := hashSeed + h ^= k.id * m + h ^= uint64(k.fileNum.FileNum()) * m + h ^= k.offset * m + return uint32(h >> shift) +} + +type robinHoodEntry struct { + key key + // Note that value may point to a Go allocated object (if the "invariants" + // build tag was specified), even though the memory for the entry itself is + // manually managed. This is technically a volation of the Cgo pointer rules: + // + // https://golang.org/cmd/cgo/#hdr-Passing_pointers + // + // Specifically, Go pointers should not be stored in C allocated memory. The + // reason for this rule is that the Go GC will not look at C allocated memory + // to find pointers to Go objects. If the only reference to a Go object is + // stored in C allocated memory, the object will be reclaimed. What makes + // this "safe" is that the Cache guarantees that there are other pointers to + // the entry and shard which will keep them alive. In particular, every Go + // allocated entry in the cache is referenced by the shard.entries map. And + // every shard is referenced by the Cache.shards map. + value *entry + // The distance the entry is from its desired position. + dist uint32 +} + +type robinHoodEntries struct { + ptr unsafe.Pointer + len uint32 +} + +func newRobinHoodEntries(n uint32) robinHoodEntries { + size := uintptr(n) * unsafe.Sizeof(robinHoodEntry{}) + return robinHoodEntries{ + ptr: unsafe.Pointer(&(manual.New(int(size)))[0]), + len: n, + } +} + +func (e robinHoodEntries) at(i uint32) *robinHoodEntry { + return (*robinHoodEntry)(unsafe.Pointer(uintptr(e.ptr) + + uintptr(i)*unsafe.Sizeof(robinHoodEntry{}))) +} + +func (e robinHoodEntries) free() { + size := uintptr(e.len) * unsafe.Sizeof(robinHoodEntry{}) + buf := (*[manual.MaxArrayLen]byte)(e.ptr)[:size:size] + manual.Free(buf) +} + +// robinHoodMap is an implementation of Robin Hood hashing. Robin Hood hashing +// is an open-address hash table using linear probing. The twist is that the +// linear probe distance is reduced by moving existing entries when inserting +// and deleting. This is accomplished by keeping track of how far an entry is +// from its "desired" slot (hash of key modulo number of slots). During +// insertion, if the new entry being inserted is farther from its desired slot +// than the target entry, we swap the target and new entry. This effectively +// steals from the "rich" target entry and gives to the "poor" new entry (thus +// the origin of the name). +// +// An extension over the base Robin Hood hashing idea comes from +// https://probablydance.com/2017/02/26/i-wrote-the-fastest-hashtable/. A cap +// is placed on the max distance an entry can be from its desired slot. When +// this threshold is reached during insertion, the size of the table is doubled +// and insertion is restarted. Additionally, the entries slice is given "max +// dist" extra entries on the end. The very last entry in the entries slice is +// never used and acts as a sentinel which terminates loops. The previous +// maxDist-1 entries act as the extra entries. For example, if the size of the +// table is 2, maxDist is computed as 4 and the actual size of the entry slice +// is 6. +// +// +---+---+---+---+---+---+ +// | 0 | 1 | 2 | 3 | 4 | 5 | +// +---+---+---+---+---+---+ +// ^ +// size +// +// In this scenario, the target entry for a key will always be in the range +// [0,1]. Valid entries may reside in the range [0,4] due to the linear probing +// of up to maxDist entries. The entry at index 5 will never contain a value, +// and instead acts as a sentinel (its distance is always 0). The max distance +// threshold is set to log2(num-entries). This ensures that retrieval is O(log +// N), though note that N is the number of total entries, not the count of +// valid entries. +// +// Deletion is implemented via the backward shift delete mechanism instead of +// tombstones. This preserves the performance of the table in the presence of +// deletions. See +// http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion +// for details. +type robinHoodMap struct { + entries robinHoodEntries + size uint32 + shift uint32 + count uint32 + maxDist uint32 +} + +func maxDistForSize(size uint32) uint32 { + desired := uint32(bits.Len32(size)) + if desired < 4 { + desired = 4 + } + return desired +} + +func newRobinHoodMap(initialCapacity int) *robinHoodMap { + m := &robinHoodMap{} + m.init(initialCapacity) + + // Note: this is a no-op if invariants are disabled or race is enabled. + invariants.SetFinalizer(m, func(obj interface{}) { + m := obj.(*robinHoodMap) + if m.entries.ptr != nil { + fmt.Fprintf(os.Stderr, "%p: robin-hood map not freed\n", m) + os.Exit(1) + } + }) + return m +} + +func (m *robinHoodMap) init(initialCapacity int) { + if initialCapacity < 1 { + initialCapacity = 1 + } + targetSize := 1 << (uint(bits.Len(uint(2*initialCapacity-1))) - 1) + m.rehash(uint32(targetSize)) +} + +func (m *robinHoodMap) free() { + if m.entries.ptr != nil { + m.entries.free() + m.entries.ptr = nil + } +} + +func (m *robinHoodMap) rehash(size uint32) { + oldEntries := m.entries + + m.size = size + m.shift = uint32(64 - bits.Len32(m.size-1)) + m.maxDist = maxDistForSize(size) + m.entries = newRobinHoodEntries(size + m.maxDist) + m.count = 0 + + for i := uint32(0); i < oldEntries.len; i++ { + e := oldEntries.at(i) + if e.value != nil { + m.Put(e.key, e.value) + } + } + + if oldEntries.ptr != nil { + oldEntries.free() + } +} + +// Find an entry containing the specified value. This is intended to be used +// from debug and test code. +func (m *robinHoodMap) findByValue(v *entry) *robinHoodEntry { + for i := uint32(0); i < m.entries.len; i++ { + e := m.entries.at(i) + if e.value == v { + return e + } + } + return nil +} + +func (m *robinHoodMap) Count() int { + return int(m.count) +} + +func (m *robinHoodMap) Put(k key, v *entry) { + maybeExists := true + n := robinHoodEntry{key: k, value: v, dist: 0} + for i := robinHoodHash(k, m.shift); ; i++ { + e := m.entries.at(i) + if maybeExists && k == e.key { + // Entry already exists: overwrite. + e.value = n.value + m.checkEntry(i) + return + } + + if e.value == nil { + // Found an empty entry: insert here. + *e = n + m.count++ + m.checkEntry(i) + return + } + + if e.dist < n.dist { + // Swap the new entry with the current entry because the current is + // rich. We then continue to loop, looking for a new location for the + // current entry. Note that this is also the not-found condition for + // retrieval, which means that "k" is not present in the map. See Get(). + n, *e = *e, n + m.checkEntry(i) + maybeExists = false + } + + // The new entry gradually moves away from its ideal position. + n.dist++ + + // If we've reached the max distance threshold, grow the table and restart + // the insertion. + if n.dist == m.maxDist { + m.rehash(2 * m.size) + i = robinHoodHash(n.key, m.shift) - 1 + n.dist = 0 + maybeExists = false + } + } +} + +func (m *robinHoodMap) Get(k key) *entry { + var dist uint32 + for i := robinHoodHash(k, m.shift); ; i++ { + e := m.entries.at(i) + if k == e.key { + // Found. + return e.value + } + if e.dist < dist { + // Not found. + return nil + } + dist++ + } +} + +func (m *robinHoodMap) Delete(k key) { + var dist uint32 + for i := robinHoodHash(k, m.shift); ; i++ { + e := m.entries.at(i) + if k == e.key { + m.checkEntry(i) + // We found the entry to delete. Shift the following entries backwards + // until the next empty value or entry with a zero distance. Note that + // empty values are guaranteed to have "dist == 0". + m.count-- + for j := i + 1; ; j++ { + t := m.entries.at(j) + if t.dist == 0 { + *e = robinHoodEntry{} + return + } + e.key = t.key + e.value = t.value + e.dist = t.dist - 1 + e = t + m.checkEntry(j) + } + } + if dist > e.dist { + // Not found. + return + } + dist++ + } +} + +func (m *robinHoodMap) checkEntry(i uint32) { + if invariants.Enabled { + e := m.entries.at(i) + if e.value != nil { + pos := robinHoodHash(e.key, m.shift) + if (uint32(i) - pos) != e.dist { + fmt.Fprintf(os.Stderr, "%d: invalid dist=%d, expected %d: %s\n%s", + i, e.dist, uint32(i)-pos, e.key, debug.Stack()) + os.Exit(1) + } + if e.dist > m.maxDist { + fmt.Fprintf(os.Stderr, "%d: invalid dist=%d > maxDist=%d: %s\n%s", + i, e.dist, m.maxDist, e.key, debug.Stack()) + os.Exit(1) + } + } + } +} + +func (m *robinHoodMap) String() string { + var buf strings.Builder + fmt.Fprintf(&buf, "count: %d\n", m.count) + for i := uint32(0); i < m.entries.len; i++ { + e := m.entries.at(i) + if e.value != nil { + fmt.Fprintf(&buf, "%d: [%s,%p,%d]\n", i, e.key, e.value, e.dist) + } + } + return buf.String() +} diff --git a/pebble/internal/cache/robin_hood_test.go b/pebble/internal/cache/robin_hood_test.go new file mode 100644 index 0000000..d72c1b3 --- /dev/null +++ b/pebble/internal/cache/robin_hood_test.go @@ -0,0 +1,241 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package cache + +import ( + "fmt" + "io" + "runtime" + "testing" + "time" + + "github.com/cockroachdb/pebble/internal/base" + "golang.org/x/exp/rand" +) + +func TestRobinHoodMap(t *testing.T) { + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + rhMap := newRobinHoodMap(0) + defer rhMap.free() + + goMap := make(map[key]*entry) + + randomKey := func() key { + n := rng.Intn(len(goMap)) + for k := range goMap { + if n == 0 { + return k + } + n-- + } + return key{} + } + + ops := 10000 + rng.Intn(10000) + for i := 0; i < ops; i++ { + var which float64 + if len(goMap) > 0 { + which = rng.Float64() + } + + switch { + case which < 0.4: + // 40% insert. + var k key + k.id = rng.Uint64() + k.fileNum = base.FileNum(rng.Uint64()).DiskFileNum() + k.offset = rng.Uint64() + e := &entry{} + goMap[k] = e + rhMap.Put(k, e) + if len(goMap) != rhMap.Count() { + t.Fatalf("map sizes differ: %d != %d", len(goMap), rhMap.Count()) + } + + case which < 0.1: + // 10% overwrite. + k := randomKey() + e := &entry{} + goMap[k] = e + rhMap.Put(k, e) + if len(goMap) != rhMap.Count() { + t.Fatalf("map sizes differ: %d != %d", len(goMap), rhMap.Count()) + } + + case which < 0.75: + // 25% delete. + k := randomKey() + delete(goMap, k) + rhMap.Delete(k) + if len(goMap) != rhMap.Count() { + t.Fatalf("map sizes differ: %d != %d", len(goMap), rhMap.Count()) + } + + default: + // 25% lookup. + k := randomKey() + v := goMap[k] + u := rhMap.Get(k) + if v != u { + t.Fatalf("%s: expected %p, but found %p", k, v, u) + } + } + } + + t.Logf("map size: %d", len(goMap)) +} + +const benchSize = 1 << 20 + +func BenchmarkGoMapInsert(b *testing.B) { + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + keys := make([]key, benchSize) + for i := range keys { + keys[i].fileNum = base.FileNum(rng.Uint64n(1 << 20)).DiskFileNum() + keys[i].offset = uint64(rng.Intn(1 << 20)) + } + b.ResetTimer() + + var m map[key]*entry + for i, j := 0, 0; i < b.N; i, j = i+1, j+1 { + if m == nil || j == len(keys) { + b.StopTimer() + m = make(map[key]*entry, len(keys)) + j = 0 + b.StartTimer() + } + m[keys[j]] = nil + } +} + +func BenchmarkRobinHoodInsert(b *testing.B) { + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + keys := make([]key, benchSize) + for i := range keys { + keys[i].fileNum = base.FileNum(rng.Uint64n(1 << 20)).DiskFileNum() + keys[i].offset = uint64(rng.Intn(1 << 20)) + } + e := &entry{} + b.ResetTimer() + + var m *robinHoodMap + for i, j := 0, 0; i < b.N; i, j = i+1, j+1 { + if m == nil || j == len(keys) { + b.StopTimer() + m = newRobinHoodMap(len(keys)) + j = 0 + b.StartTimer() + } + m.Put(keys[j], e) + } + + runtime.KeepAlive(e) +} + +func BenchmarkGoMapLookupHit(b *testing.B) { + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + keys := make([]key, benchSize) + m := make(map[key]*entry, len(keys)) + e := &entry{} + for i := range keys { + keys[i].fileNum = base.FileNum(rng.Uint64n(1 << 20)).DiskFileNum() + keys[i].offset = uint64(rng.Intn(1 << 20)) + m[keys[i]] = e + } + b.ResetTimer() + + var p *entry + for i, j := 0, 0; i < b.N; i, j = i+1, j+1 { + if j == len(keys) { + j = 0 + } + p = m[keys[j]] + } + + if testing.Verbose() { + fmt.Fprintln(io.Discard, p) + } +} + +func BenchmarkRobinHoodLookupHit(b *testing.B) { + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + keys := make([]key, benchSize) + m := newRobinHoodMap(len(keys)) + e := &entry{} + for i := range keys { + keys[i].fileNum = base.FileNum(rng.Uint64n(1 << 20)).DiskFileNum() + keys[i].offset = uint64(rng.Intn(1 << 20)) + m.Put(keys[i], e) + } + b.ResetTimer() + + var p *entry + for i, j := 0, 0; i < b.N; i, j = i+1, j+1 { + if j == len(keys) { + j = 0 + } + p = m.Get(keys[j]) + } + + if testing.Verbose() { + fmt.Fprintln(io.Discard, p) + } + runtime.KeepAlive(e) +} + +func BenchmarkGoMapLookupMiss(b *testing.B) { + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + keys := make([]key, benchSize) + m := make(map[key]*entry, len(keys)) + e := &entry{} + for i := range keys { + keys[i].id = 1 + keys[i].fileNum = base.FileNum(rng.Uint64n(1 << 20)).DiskFileNum() + keys[i].offset = uint64(rng.Intn(1 << 20)) + m[keys[i]] = e + keys[i].id = 2 + } + b.ResetTimer() + + var p *entry + for i, j := 0, 0; i < b.N; i, j = i+1, j+1 { + if j == len(keys) { + j = 0 + } + p = m[keys[j]] + } + + if testing.Verbose() { + fmt.Fprintln(io.Discard, p) + } +} + +func BenchmarkRobinHoodLookupMiss(b *testing.B) { + rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + keys := make([]key, benchSize) + m := newRobinHoodMap(len(keys)) + e := &entry{} + for i := range keys { + keys[i].id = 1 + keys[i].fileNum = base.FileNum(rng.Uint64n(1 << 20)).DiskFileNum() + keys[i].offset = uint64(rng.Intn(1 << 20)) + m.Put(keys[i], e) + keys[i].id = 2 + } + b.ResetTimer() + + var p *entry + for i, j := 0, 0; i < b.N; i, j = i+1, j+1 { + if j == len(keys) { + j = 0 + } + p = m.Get(keys[j]) + } + + if testing.Verbose() { + fmt.Fprintln(io.Discard, p) + } + runtime.KeepAlive(e) +} diff --git a/pebble/internal/cache/testdata/cache b/pebble/internal/cache/testdata/cache new file mode 100644 index 0000000..fc7b218 --- /dev/null +++ b/pebble/internal/cache/testdata/cache @@ -0,0 +1,99991 @@ +0 m +1 m +2 m +3 m +1 h +1 h +4 m +5 m +6 m +1 h +7 m +8 m +9 m +4 h +10 m +4 h +1 h +11 m +10 h +4 h +12 m +4 h +13 m +1 h +4 h +14 m +15 m +16 m +17 m +18 m +19 m +4 h +1 h +1 h +4 h +4 h +20 m +21 m +10 h +22 m +4 h +4 h +4 h +4 h +8 h +23 m +4 h +24 m +1 h +1 h +1 h +4 h +25 m +26 m +27 m +4 h +28 m +29 m +10 h +30 m +1 h +4 h +1 h +11 h +31 m +4 h +3 h +4 h +1 h +31 h +10 h +32 m +33 m +10 h +34 m +35 m +36 m +37 m +4 h +4 h +1 h +1 h +38 m +39 m +40 m +41 m +42 m +43 d +10 h +4 h +4 h +44 m +45 m +46 m +1 h +1 h +1 h +4 h +10 h +47 m +1 h +1 h +48 m +1 h +49 m +10 h +11 h +4 h +50 m +27 h +10 h +10 h +51 m +11 h +3 h +4 h +52 m +10 h +53 m +1 h +1 h +3 h +54 m +55 m +56 m +57 m +58 m +59 m +60 m +4 h +4 h +1 h +61 m +1 h +1 h +62 m +63 m +1 h +64 m +65 m +66 m +36 h +67 m +4 h +4 h +59 h +68 m +10 h +69 m +4 h +4 h +4 h +70 m +1 h +71 m +1 h +72 m +10 h +73 m +4 h +74 m +10 h +4 h +11 h +4 h +10 h +75 m +76 m +4 h +77 m +78 m +1 h +79 m +80 m +81 m +4 h +4 h +82 m +83 m +84 m +1 h +85 m +1 h +4 h +10 h +86 m +1 h +87 m +11 h +4 h +59 h +1 h +88 m +89 m +90 m +91 m +4 h +4 h +92 m +3 h +4 h +4 h +4 h +93 m +1 h +94 m +10 h +95 m +1 h +82 h +96 m +82 h +1 h +10 h +10 h +97 m +1 h +4 h +4 h +98 m +31 h +99 m +100 m +41 h +10 h +101 m +102 m +1 h +103 m +11 h +104 m +105 m +57 h +106 m +4 h +107 m +83 h +4 h +10 h +108 m +1 h +109 m +1 h +110 m +1 h +10 h +111 m +4 h +4 h +4 h +10 h +112 m +10 h +113 m +4 h +114 m +115 m +116 m +1 h +57 h +117 m +118 m +4 h +1 h +1 h +1 h +10 h +10 h +119 m +4 h +120 m +1 h +1 h +4 h +69 h +4 h +121 m +73 h +122 m +4 h +123 m +4 h +124 m +1 h +4 h +45 h +10 h +10 h +125 m +4 h +109 h +1 h +126 m +82 h +1 h +127 m +11 h +1 h +10 h +10 h +4 h +1 h +1 h +4 h +128 m +10 h +4 h +10 h +129 m +83 h +4 h +1 h +130 m +131 m +132 m +1 h +133 m +134 m +135 m +136 m +1 h +137 m +4 h +4 h +4 h +11 h +4 h +4 h +138 m +139 m +140 m +141 m +4 h +1 h +4 h +65 h +4 h +142 m +4 h +11 h +124 h +143 m +4 h +144 m +145 m +10 h +97 h +146 m +147 m +4 h +148 m +10 h +4 h +82 h +1 h +4 h +3 h +149 m +65 h +150 m +4 h +151 m +152 m +1 h +153 m +91 h +59 h +4 h +4 h +154 m +1 h +4 h +155 m +156 m +157 m +25 h +4 h +158 m +159 m +3 h +82 h +160 m +4 h +161 m +4 h +4 h +162 m +74 h +163 m +4 h +10 h +1 h +4 h +164 m +10 h +165 m +166 m +4 h +167 m +4 h +4 h +74 h +10 h +10 h +1 h +4 h +168 m +169 m +4 h +4 h +143 h +4 h +55 h +170 m +171 m +10 h +11 h +124 h +124 h +1 h +1 h +172 m +4 h +173 m +174 m +4 h +1 h +124 h +4 h +123 h +4 h +4 h +104 h +82 h +175 m +176 m +94 h +4 h +1 h +177 m +1 h +178 m +1 h +179 m +45 h +10 h +4 h +1 h +1 h +1 h +180 m +146 h +181 m +25 h +4 h +182 m +183 m +184 m +185 m +4 h +4 h +11 h +1 h +186 m +143 h +4 h +187 m +188 m +189 m +190 m +57 h +1 h +147 h +10 h +1 h +10 h +4 h +1 h +1 h +10 h +191 m +192 m +10 h +193 m +22 h +1 h +194 m +195 m +25 h +196 m +197 m +4 h +4 h +198 m +92 h +4 h +199 m +147 h +4 h +200 m +201 m +202 m +65 h +4 h +1 h +203 m +10 h +204 m +205 m +4 h +206 m +207 m +1 h +1 h +208 m +4 h +209 m +4 h +124 h +45 h +210 m +59 h +211 m +212 m +4 h +213 m +1 h +10 h +41 h +4 h +109 h +10 h +214 m +215 m +124 h +4 h +1 h +216 m +10 h +4 h +11 h +109 h +4 h +4 h +217 m +10 h +4 h +8 h +1 h +10 h +4 h +4 h +218 m +36 h +219 m +4 h +1 h +220 m +221 m +4 h +4 h +10 h +222 m +4 h +73 h +3 h +51 h +223 m +158 h +11 h +224 m +59 h +11 h +10 h +97 h +74 h +4 h +225 m +102 h +1 h +56 h +4 h +4 h +4 h +226 m +4 h +1 h +227 m +4 h +228 m +229 m +109 h +10 h +230 m +1 h +10 h +231 m +1 h +232 m +10 h +1 h +10 h +82 h +233 m +4 h +4 h +234 m +1 h +4 h +235 m +12 m +10 h +28 m +236 m +237 m +1 h +10 h +238 m +10 h +4 h +239 m +113 h +10 h +40 m +240 m +10 h +4 h +4 h +10 h +1 h +1 h +10 h +82 h +4 h +241 m +4 h +4 h +242 m +243 m +110 h +31 h +244 m +245 m +246 m +4 h +1 h +247 m +248 m +4 h +1 h +1 h +249 m +4 h +124 h +119 h +4 h +4 h +10 h +4 h +10 h +123 h +250 m +251 m +10 h +4 h +252 m +253 m +254 m +238 h +8 h +1 h +1 h +4 h +57 h +4 h +255 m +4 h +10 h +4 h +41 h +10 h +256 m +257 m +92 h +129 h +258 m +125 h +57 h +10 h +97 h +4 h +1 h +1 h +31 h +259 m +1 h +260 m +4 h +4 h +1 h +261 m +196 h +262 m +1 h +1 h +263 m +1 h +3 h +1 h +264 m +265 m +1 h +45 h +82 h +10 h +266 m +267 m +268 m +4 h +262 h +10 h +269 m +4 h +270 m +271 m +272 m +11 h +140 h +10 h +4 h +10 h +273 m +82 h +25 h +4 h +274 m +1 h +10 h +4 h +275 m +1 h +10 h +276 m +4 h +1 h +277 m +10 h +10 h +4 h +4 h +108 m +4 h +278 m +4 h +279 m +4 h +10 h +1 h +57 h +1 h +1 h +57 h +3 h +1 h +10 h +4 h +1 h +280 m +11 h +4 h +1 h +281 m +282 m +10 h +283 m +284 m +285 m +143 h +4 h +1 h +124 h +57 h +12 h +266 h +4 h +4 h +104 h +4 h +11 h +4 h +4 h +286 m +109 h +4 h +4 h +10 h +4 h +287 m +1 h +288 m +79 m +125 h +94 h +139 h +289 m +290 m +4 h +1 h +1 h +1 h +291 m +292 m +293 m +4 h +4 h +294 m +295 m +1 h +10 h +11 h +10 h +92 h +112 m +11 h +10 h +167 h +1 h +296 m +4 h +297 m +73 h +298 m +258 h +65 h +4 h +1 h +4 h +299 m +4 h +1 h +1 h +74 h +300 m +1 h +1 h +10 h +82 h +301 m +83 h +302 m +139 h +4 h +4 h +278 h +303 m +304 m +147 h +305 m +4 h +306 m +1 h +135 m +1 h +4 h +1 h +4 h +307 m +4 h +57 h +10 h +1 h +11 h +25 h +125 h +57 h +4 h +1 h +4 h +4 h +308 m +1 h +10 h +309 m +1 h +310 m +83 h +119 h +311 m +312 m +313 m +4 h +119 h +314 m +1 h +4 h +315 m +316 m +317 m +10 h +10 h +1 h +4 h +59 h +318 m +1 h +319 m +1 h +4 h +10 h +4 h +320 m +109 h +321 m +10 h +322 m +323 m +108 h +10 h +324 m +10 h +4 h +135 h +1 h +325 m +4 h +10 h +272 h +1 h +10 h +4 h +1 h +64 m +326 m +119 h +327 m +328 m +329 m +60 m +36 h +330 m +119 h +4 h +331 m +332 m +4 h +10 h +4 h +333 m +10 h +10 h +4 h +1 h +1 h +10 h +1 h +1 h +334 m +335 m +1 h +65 h +167 h +336 m +143 h +266 h +114 m +337 m +10 h +3 h +124 h +338 m +4 h +339 m +10 h +10 h +340 m +82 h +10 h +11 h +83 h +10 h +4 h +4 h +4 h +295 h +10 h +4 h +1 h +1 h +341 m +1 h +342 m +343 m +57 h +1 h +79 h +110 h +1 h +1 h +344 m +82 h +181 m +11 h +345 m +10 h +1 h +1 h +1 h +346 m +1 h +347 m +1 h +13 m +4 h +348 m +349 m +10 h +13 h +4 h +4 h +1 h +4 h +11 h +350 m +4 h +351 m +10 h +4 h +352 m +353 m +104 h +4 h +82 h +354 m +1 h +4 h +124 h +355 m +11 h +295 h +250 h +12 h +10 h +356 m +109 h +10 h +357 m +1 h +10 h +73 h +25 h +358 m +1 h +1 h +57 h +4 h +359 m +11 h +360 m +1 h +10 h +31 h +361 m +59 h +1 h +4 h +4 h +1 h +362 m +1 h +12 h +41 h +363 m +1 h +10 h +11 h +4 h +147 h +114 h +10 h +10 h +5 m +10 h +10 h +364 m +365 m +1 h +10 h +11 h +4 h +366 m +1 h +4 h +4 h +1 h +367 m +368 m +12 h +147 h +129 h +65 h +10 h +369 m +10 h +82 h +4 h +4 h +370 m +25 h +1 h +371 m +10 h +119 h +4 h +372 m +94 h +373 m +4 h +1 h +4 h +374 m +12 h +4 h +10 h +375 m +28 h +61 m +4 h +114 h +147 h +195 m +376 m +1 h +377 m +378 m +170 m +4 h +10 h +196 h +4 h +1 h +4 h +379 m +380 m +4 h +381 m +4 h +144 m +41 h +1 h +382 m +383 m +384 m +1 h +190 m +112 h +10 h +10 h +10 h +385 m +1 h +1 h +4 h +10 h +1 h +4 h +386 m +4 h +4 h +31 h +10 h +4 h +10 h +74 h +387 m +1 h +388 m +31 h +10 h +389 m +10 h +3 h +10 h +383 h +10 h +1 h +110 h +390 m +10 h +391 m +392 m +25 h +4 h +238 h +10 h +393 m +394 m +4 h +10 h +22 m +4 h +282 h +1 h +4 h +1 h +395 m +396 m +230 m +1 h +1 h +4 h +1 h +10 h +4 h +4 h +195 h +92 h +307 h +10 h +397 m +4 h +4 h +1 h +1 h +4 h +229 m +398 m +4 h +1 h +4 h +4 h +399 m +45 h +79 h +4 h +1 h +27 m +140 h +1 h +400 m +1 h +10 h +1 h +48 m +401 m +402 m +403 m +404 m +4 h +10 h +405 m +1 h +406 m +1 h +1 h +407 m +408 m +10 h +4 h +270 m +195 h +4 h +1 h +409 m +4 h +135 h +1 h +4 h +3 h +410 m +4 h +411 m +4 h +10 h +412 m +10 h +413 m +10 h +138 m +104 h +4 h +172 m +1 h +4 h +1 h +414 m +4 h +4 h +4 h +11 h +195 h +1 h +83 h +109 h +1 h +10 h +10 h +10 h +4 h +415 m +4 h +10 h +416 m +1 h +1 h +4 h +417 m +10 h +10 h +4 h +4 h +368 h +74 h +10 h +65 h +295 h +383 h +4 h +4 h +82 h +10 h +25 h +11 h +64 h +143 h +418 m +10 h +92 h +419 m +420 m +421 m +1 h +59 h +57 h +4 h +422 m +10 h +258 h +423 m +424 m +1 h +4 h +1 h +11 h +425 m +426 m +10 h +4 h +4 h +4 h +427 m +1 h +56 h +1 h +428 m +279 m +429 m +11 h +1 h +430 m +83 h +124 h +4 h +4 h +1 h +1 h +11 h +1 h +31 h +10 h +1 h +266 h +4 h +431 m +432 m +10 h +1 h +1 h +433 m +1 h +11 h +1 h +4 h +1 h +434 m +143 h +4 h +112 h +435 m +436 m +437 m +1 h +10 h +4 h +1 h +438 m +439 m +10 h +110 h +1 h +440 m +10 h +12 h +1 h +441 m +4 h +442 m +1 h +1 h +147 h +4 h +4 h +10 h +1 h +4 h +443 m +10 h +444 m +4 h +445 m +4 h +10 h +1 h +4 h +79 h +74 h +1 h +31 h +1 h +146 h +446 m +10 h +10 h +1 h +164 m +31 h +4 h +83 h +4 h +82 h +4 h +4 h +4 h +10 h +1 h +447 m +10 h +11 h +10 h +104 h +448 m +449 m +450 m +1 h +451 m +1 h +41 h +36 h +452 m +82 h +453 m +10 h +1 h +371 h +4 h +454 m +455 m +31 h +1 h +10 h +456 m +10 h +358 h +457 m +74 h +458 m +10 h +459 m +4 h +65 h +12 h +10 h +4 h +460 m +4 h +11 h +156 m +125 h +118 m +1 h +10 h +10 h +461 m +4 h +114 h +11 h +4 h +462 m +31 h +124 h +463 m +464 m +1 h +1 h +109 h +135 h +10 h +1 h +1 h +465 m +1 h +74 h +466 m +4 h +467 m +4 h +4 h +10 h +468 m +11 h +4 h +1 h +10 h +1 h +469 m +1 h +4 h +4 h +1 h +10 h +195 h +10 h +470 m +471 m +4 h +472 m +4 h +125 h +4 h +146 h +172 h +473 m +4 h +10 h +59 h +4 h +31 h +4 h +474 m +4 h +475 m +4 h +266 h +8 h +4 h +27 h +57 h +476 m +477 m +478 m +10 h +4 h +1 h +10 h +479 m +371 h +1 h +4 h +59 h +94 h +4 h +1 h +480 m +481 m +4 h +482 m +483 m +1 h +484 m +1 h +169 m +1 h +4 h +4 h +4 h +266 h +4 h +4 h +4 h +485 m +41 h +124 h +1 h +1 h +25 h +486 m +487 m +25 h +488 m +1 h +1 h +489 m +4 h +1 h +25 h +4 h +10 h +490 m +4 h +4 h +224 m +97 h +491 m +10 h +22 h +492 m +10 h +493 m +1 h +494 m +1 h +10 h +170 h +495 m +1 h +83 h +496 m +497 m +498 m +4 h +11 h +114 h +1 h +499 m +500 m +4 h +196 h +82 h +1 h +1 h +57 h +10 h +501 m +1 h +10 h +1 h +1 h +27 h +83 h +502 m +4 h +503 m +4 h +4 h +332 m +1 h +4 h +4 h +238 h +504 m +505 m +195 h +83 h +10 h +156 h +4 h +506 m +4 h +507 m +4 h +1 h +4 h +1 h +307 h +59 h +508 m +1 h +97 h +4 h +509 m +10 h +510 m +10 h +1 h +4 h +125 h +185 m +4 h +511 m +4 h +4 h +10 h +4 h +347 m +92 h +190 m +169 h +196 h +110 h +1 h +4 h +10 h +10 h +1 h +11 h +1 h +512 m +4 h +513 m +514 m +10 h +515 m +516 m +1 h +517 m +57 h +138 h +11 h +330 m +1 h +4 h +518 m +4 h +10 h +82 h +31 h +1 h +10 h +1 h +519 m +1 h +4 h +1 h +4 h +1 h +1 h +109 h +10 h +520 m +55 m +521 m +522 m +523 m +297 m +108 h +10 h +10 h +31 h +164 h +524 m +97 h +525 m +526 m +41 h +4 h +4 h +1 h +124 h +4 h +1 h +527 m +195 h +528 m +529 m +4 h +109 h +241 m +4 h +4 h +4 h +1 h +530 m +146 h +10 h +135 h +11 h +4 h +358 h +169 h +531 m +1 h +532 m +4 h +533 m +4 h +4 h +94 h +10 h +4 h +4 h +55 h +1 h +266 h +1 h +10 h +1 h +4 h +82 h +4 h +534 m +535 m +10 h +10 h +4 h +181 m +10 h +536 m +3 h +10 h +1 h +1 h +1 h +10 h +4 h +1 h +265 m +537 m +10 h +82 h +56 h +538 m +4 h +539 m +1 h +92 h +56 h +540 m +541 m +1 h +10 h +1 h +542 m +4 h +64 h +543 m +82 h +544 m +4 h +258 h +4 h +545 m +1 h +170 h +10 h +4 h +297 h +368 h +1 h +1 h +10 h +190 h +135 h +1 h +1 h +546 m +4 h +4 h +307 h +4 h +1 h +169 h +4 h +4 h +143 h +10 h +1 h +547 m +4 h +4 h +10 h +10 h +97 h +548 m +1 h +549 m +550 m +1 h +1 h +551 m +4 h +8 h +4 h +4 h +1 h +552 m +4 h +553 m +538 h +1 h +554 m +1 h +555 m +10 h +4 h +10 h +556 m +10 h +10 h +557 m +558 m +4 h +125 h +184 m +559 m +560 m +4 h +10 h +92 h +10 h +561 m +562 m +57 h +3 h +4 h +4 h +563 m +190 h +4 h +10 h +4 h +564 m +4 h +4 h +59 h +1 h +565 m +10 h +4 h +1 h +566 m +41 h +1 h +1 h +143 h +567 m +10 h +10 h +568 m +82 h +1 h +10 h +4 h +569 m +353 m +570 m +110 h +571 m +572 m +31 h +82 h +573 m +574 m +55 h +1 h +1 h +4 h +4 h +386 m +575 m +576 m +93 m +4 h +577 m +4 h +578 m +579 m +580 m +4 h +4 h +581 m +4 h +582 m +4 h +10 h +4 h +583 m +4 h +1 h +4 h +4 h +584 m +143 h +45 h +1 h +4 h +585 m +59 h +4 h +11 h +586 m +587 m +4 h +588 m +1 h +10 h +10 h +1 h +589 m +1 h +590 m +11 h +1 h +4 h +591 m +4 h +592 m +4 h +4 h +593 m +10 h +594 m +595 m +4 h +241 h +596 m +1 h +443 m +1 h +1 h +597 m +598 m +4 h +10 h +10 h +36 h +599 m +1 h +13 h +4 h +4 h +229 m +1 h +1 h +1 h +4 h +10 h +1 h +10 h +600 m +601 m +4 h +104 h +11 h +4 h +11 h +1 h +10 h +602 m +1 h +83 h +386 h +4 h +83 h +10 h +158 h +603 m +604 m +605 m +83 h +4 h +10 h +83 h +606 m +607 m +10 h +109 h +4 h +25 h +608 m +609 m +4 h +10 h +10 h +610 m +31 h +611 m +10 h +10 h +4 h +612 m +330 h +613 m +10 h +10 h +79 h +614 m +518 h +92 h +4 h +615 m +147 h +4 h +10 h +616 m +4 h +4 h +113 m +617 m +4 h +4 h +108 h +618 m +11 h +1 h +1 h +619 m +620 m +307 h +4 h +4 h +11 h +109 h +10 h +621 m +622 m +386 h +4 h +1 h +274 m +10 h +10 h +623 m +97 h +10 h +10 h +3 h +4 h +4 h +4 h +1 h +443 h +624 m +1 h +1 h +10 h +170 h +3 h +625 m +626 m +4 h +4 h +11 h +4 h +59 h +31 h +627 m +143 h +628 m +13 h +629 m +10 h +1 h +4 h +1 h +4 h +630 m +56 h +10 h +631 m +129 h +22 h +27 h +1 h +4 h +632 m +1 h +25 h +1 h +4 h +82 h +633 m +4 h +634 m +1 h +635 m +636 m +274 h +114 h +1 h +637 m +1 h +638 m +639 m +97 h +10 h +10 h +332 h +4 h +12 h +640 m +368 h +450 m +641 m +11 h +4 h +92 h +4 h +4 h +642 m +4 h +459 m +10 h +1 h +643 m +1 h +1 h +10 h +270 m +3 h +644 m +536 m +10 h +4 h +645 m +646 m +647 m +1 h +114 h +5 m +1 h +10 h +65 h +224 h +170 h +648 m +82 h +10 h +4 h +649 m +1 h +10 h +1 h +650 m +11 h +651 m +1 h +652 m +653 m +4 h +654 m +97 h +109 h +83 h +10 h +56 h +146 h +4 h +10 h +65 h +4 h +4 h +655 m +656 m +4 h +1 h +657 m +11 h +11 h +1 h +270 h +25 h +10 h +1 h +147 h +658 m +64 h +1 h +1 h +1 h +59 h +1 h +659 m +660 m +10 h +27 h +661 m +4 h +662 m +1 h +1 h +4 h +104 h +663 m +10 h +4 h +664 m +10 h +665 m +666 m +11 h +278 h +10 h +181 h +10 h +667 m +4 h +668 m +1 h +669 m +330 h +670 m +671 m +1 h +64 h +4 h +11 h +11 h +672 m +4 h +4 h +673 m +10 h +4 h +674 m +675 m +4 h +676 m +677 m +4 h +10 h +4 h +97 h +4 h +1 h +1 h +1 h +1 h +10 h +678 m +4 h +10 h +114 h +679 m +4 h +59 h +59 h +4 h +4 h +1 h +4 h +4 h +680 m +4 h +65 h +45 h +4 h +41 h +73 h +4 h +31 h +1 h +681 m +10 h +109 h +146 h +22 h +682 m +683 m +4 h +684 m +10 h +10 h +685 m +10 h +10 h +4 h +4 h +4 h +686 m +1 h +4 h +687 m +1 h +688 m +1 h +1 h +4 h +1 h +10 h +434 m +689 m +690 m +41 h +4 h +691 m +4 h +13 h +692 m +4 h +1 h +693 m +10 h +10 h +694 m +1 h +10 h +695 m +59 h +41 h +1 h +4 h +696 m +1 h +10 h +113 h +697 m +698 m +23 m +11 h +699 m +10 h +700 m +94 h +701 m +640 h +702 m +250 h +10 h +1 h +703 m +4 h +1 h +1 h +4 h +10 h +10 h +704 m +265 h +4 h +10 h +74 h +147 h +705 m +4 h +4 h +706 m +59 h +707 m +4 h +569 m +4 h +135 h +4 h +708 m +4 h +1 h +709 m +31 h +1 h +143 h +4 h +710 m +711 m +11 h +57 h +1 h +4 h +1 h +110 h +10 h +712 m +4 h +713 m +12 h +1 h +714 m +541 m +10 h +1 h +97 h +10 h +1 h +359 m +1 h +715 m +716 m +1 h +10 h +717 m +10 h +4 h +57 h +1 h +10 h +1 h +4 h +4 h +4 h +1 h +1 h +718 m +1 h +10 h +295 h +719 m +720 m +4 h +119 m +4 h +11 h +266 h +721 m +4 h +36 h +722 m +1 h +4 h +4 h +723 m +4 h +724 m +353 h +1 h +10 h +1 h +195 h +10 h +1 h +250 h +725 m +726 m +31 h +727 m +4 h +196 h +1 h +36 h +4 h +493 m +575 m +4 h +728 m +1 h +146 h +729 m +1 h +82 h +1 h +4 h +730 m +59 h +731 m +10 h +119 h +4 h +1 h +732 m +10 h +1 h +10 h +1 h +733 m +82 h +4 h +11 h +1 h +4 h +4 h +4 h +4 h +734 m +10 h +36 h +10 h +4 h +27 h +1 h +735 m +736 m +79 h +45 h +737 m +10 h +4 h +10 h +1 h +4 h +10 h +10 h +4 h +1 h +1 h +25 h +738 m +1 h +10 h +739 m +27 h +167 h +4 h +740 m +10 h +10 h +692 h +1 h +57 h +741 m +4 h +156 h +4 h +10 h +1 h +4 h +4 h +464 m +1 h +59 h +4 h +742 m +1 h +1 h +743 m +744 m +169 h +25 h +4 h +1 h +10 h +295 h +745 m +250 h +12 h +9 m +746 m +747 m +124 h +748 m +4 h +749 m +307 h +92 h +4 h +10 h +4 h +10 h +1 h +1 h +750 m +174 m +1 h +278 h +1 h +8 h +258 h +751 m +4 h +4 h +1 h +752 m +65 h +10 h +1 h +753 m +258 h +4 h +59 h +164 h +4 h +754 m +755 m +82 h +1 h +10 h +4 h +10 h +1 h +756 m +57 h +1 h +48 m +757 m +1 h +276 m +82 h +1 h +758 m +1 h +358 h +4 h +759 m +760 m +4 h +4 h +761 m +238 h +717 h +762 m +10 h +4 h +241 h +10 h +1 h +4 h +10 h +10 h +4 h +446 m +763 m +1 h +764 m +83 h +4 h +765 m +766 m +767 m +4 h +768 m +158 h +238 h +1 h +1 h +83 h +82 h +82 h +4 h +109 h +4 h +31 h +1 h +4 h +109 h +769 m +770 m +112 h +229 h +1 h +31 h +771 m +4 h +11 h +204 m +1 h +4 h +4 h +204 h +772 m +272 h +4 h +4 h +1 h +83 h +536 h +773 m +4 h +774 m +4 h +3 h +775 m +776 m +718 h +57 h +1 h +46 m +777 m +1 h +778 m +82 h +82 h +4 h +1 h +779 m +124 h +97 h +266 h +780 m +781 m +4 h +10 h +4 h +782 m +11 h +783 m +4 h +1 h +784 m +10 h +10 h +785 m +4 h +31 h +786 m +787 m +4 h +1 h +10 h +4 h +788 m +4 h +789 m +790 m +143 h +10 h +3 h +110 h +791 m +1 h +10 h +27 h +792 m +4 h +1 h +4 h +1 h +4 h +793 m +143 h +4 h +4 h +4 h +794 m +795 m +13 h +4 h +125 h +4 h +4 h +796 m +4 h +94 h +195 h +4 h +36 h +1 h +4 h +4 h +59 h +4 h +174 h +797 m +289 m +82 h +4 h +798 m +123 h +1 h +10 h +4 h +799 m +1 h +800 m +4 h +801 m +146 h +55 h +802 m +3 h +10 h +83 h +82 h +803 m +4 h +1 h +1 h +1 h +4 h +804 m +82 h +57 h +278 h +805 m +94 h +1 h +4 h +25 h +806 m +57 h +1 h +4 h +4 h +807 m +1 h +443 h +808 m +1 h +1 h +809 m +13 h +1 h +64 h +4 h +810 m +1 h +1 h +811 m +57 h +812 m +4 h +41 h +10 h +813 m +814 m +11 h +4 h +45 h +4 h +25 h +204 h +4 h +1 h +41 h +28 h +815 m +4 h +4 h +10 h +816 m +817 m +1 h +4 h +172 h +4 h +208 m +818 m +819 m +435 m +820 m +4 h +821 m +124 h +4 h +4 h +186 m +4 h +1 h +536 h +4 h +123 h +822 m +123 h +10 h +1 h +10 h +1 h +4 h +10 h +1 h +823 m +158 h +824 m +1 h +825 m +4 h +826 m +4 h +4 h +1 h +4 h +4 h +827 m +828 m +4 h +109 h +4 h +4 h +10 h +64 h +4 h +829 m +1 h +1 h +10 h +4 h +830 m +57 h +57 h +4 h +1 h +831 m +832 m +1 h +833 m +10 h +1 h +834 m +10 h +11 h +835 m +4 h +10 h +4 h +299 m +299 h +55 h +1 h +118 h +74 h +4 h +104 h +10 h +56 h +10 h +10 h +836 m +4 h +124 h +1 h +119 h +1 h +1 h +94 h +10 h +44 m +837 m +82 h +1 h +838 m +109 h +10 h +83 h +1 h +839 m +83 h +840 m +841 m +11 h +1 h +11 h +10 h +10 h +718 h +4 h +842 m +843 m +1 h +4 h +281 m +77 m +45 h +10 h +332 h +844 m +97 h +4 h +25 h +845 m +846 m +10 h +847 m +265 h +848 m +10 h +10 h +4 h +849 m +1 h +850 m +4 h +157 m +1 h +851 m +852 m +4 h +4 h +853 m +854 m +855 m +4 h +1 h +856 m +10 h +4 h +857 m +358 h +4 h +59 h +858 m +4 h +3 h +10 h +4 h +1 h +4 h +10 h +172 h +12 h +114 h +4 h +146 h +57 h +859 m +82 h +4 h +860 m +1 h +861 m +1 h +10 h +4 h +862 m +146 h +4 h +4 h +1 h +533 m +10 h +863 m +1 h +11 h +1 h +12 h +1 h +83 h +864 m +4 h +10 h +4 h +4 h +1 h +865 m +10 h +866 m +3 h +867 m +289 h +4 h +10 h +10 h +868 m +1 h +1 h +10 h +13 h +1 h +4 h +1 h +170 h +146 h +4 h +4 h +94 h +1 h +869 m +1 h +870 m +1 h +1 h +871 m +1 h +4 h +92 h +1 h +4 h +872 m +873 m +874 m +8 h +358 h +28 h +875 m +4 h +119 h +4 h +876 m +83 h +1 h +57 h +279 m +4 h +1 h +1 h +400 m +4 h +536 h +11 h +172 h +877 m +4 h +4 h +10 h +10 h +1 h +4 h +1 h +69 m +1 h +1 h +878 m +879 m +1 h +10 h +4 h +1 h +880 m +59 h +881 m +10 h +25 h +882 m +4 h +1 h +10 h +4 h +1 h +10 h +883 m +884 m +276 m +4 h +125 h +4 h +885 m +124 h +4 h +125 h +1 h +59 h +1 h +1 h +4 h +4 h +886 m +1 h +887 m +196 h +888 m +79 h +27 h +11 h +889 m +4 h +10 h +4 h +4 h +41 h +890 m +4 h +4 h +10 h +891 m +892 m +90 m +893 m +894 m +91 m +10 h +1 h +59 h +1 h +4 h +1 h +1 h +1 h +895 m +56 h +4 h +1 h +896 m +897 m +898 m +4 h +4 h +11 h +82 h +1 h +899 m +258 h +4 h +900 m +901 m +902 m +109 h +1 h +4 h +903 m +135 h +57 h +94 h +1 h +158 h +297 h +56 h +904 m +905 m +4 h +906 m +687 m +1 h +25 h +196 h +1 h +10 h +907 m +1 h +10 h +4 h +1 h +170 h +11 h +1 h +110 h +4 h +10 h +10 h +4 h +908 m +1 h +61 m +10 h +1 h +4 h +909 m +1 h +185 h +910 m +10 h +911 m +105 m +912 m +10 h +10 h +83 h +10 h +4 h +59 h +913 m +10 h +192 m +1 h +1 h +4 h +4 h +4 h +124 h +914 m +4 h +915 m +10 h +916 m +10 h +83 h +1 h +4 h +1 h +307 h +172 h +917 m +143 h +1 h +4 h +918 m +196 h +1 h +4 h +874 h +82 h +919 m +920 g +1 h +1 h +1 h +4 h +55 h +1 h +921 m +922 m +170 h +1 h +923 m +10 h +10 h +4 h +1 h +924 m +192 h +857 h +11 h +10 h +4 h +925 m +926 m +927 m +1 h +11 h +10 h +928 m +4 h +1 h +4 h +104 h +929 m +1 h +10 h +82 h +930 m +196 h +4 h +931 m +3 h +932 m +933 m +10 h +1 h +934 m +10 h +164 h +266 h +4 h +935 m +10 h +4 h +1 h +447 m +936 m +937 m +4 h +938 m +4 h +4 h +4 h +4 h +4 h +939 m +124 h +83 h +940 m +1 h +36 h +941 m +383 h +10 h +942 m +83 h +4 h +11 h +1 h +4 h +4 h +4 h +1 h +1 h +943 m +4 h +4 h +944 m +10 h +1 h +3 h +945 m +10 h +10 h +10 h +1 h +946 m +1 h +1 h +55 h +4 h +947 m +1 h +948 m +4 h +124 h +190 h +949 m +950 m +10 h +951 m +4 h +952 m +278 h +31 h +1 h +953 m +1 h +299 h +4 h +954 m +1 h +69 h +955 m +10 h +1 h +10 h +4 h +156 h +4 h +10 h +956 m +307 h +4 h +4 h +55 h +1 h +1 h +957 m +74 h +4 h +229 h +174 h +195 h +10 h +939 h +4 h +4 h +4 h +195 h +10 h +1 h +1 h +57 h +190 h +4 h +1 h +10 h +48 m +104 h +1 h +1 h +435 m +1 h +1 h +1 h +4 h +4 h +11 h +10 h +31 h +10 h +83 h +4 h +250 h +4 h +4 h +10 h +11 h +1 h +958 m +4 h +83 h +25 h +4 h +959 m +10 h +4 h +158 h +1 h +1 h +10 h +4 h +960 m +25 h +4 h +961 m +10 h +119 h +10 h +4 h +1 h +962 m +146 h +104 h +1 h +10 h +156 h +57 h +4 h +1 h +963 m +1 h +1 h +10 h +964 m +125 h +4 h +1 h +10 h +965 m +966 m +45 h +967 m +10 h +4 h +55 h +1 h +4 h +4 h +8 h +10 h +27 h +59 h +1 h +10 h +1 h +4 h +129 h +10 h +164 h +4 h +4 h +4 h +1 h +10 h +4 h +968 m +10 h +82 h +1 h +1 h +969 m +10 h +4 h +970 m +971 m +972 m +10 h +1 h +4 h +1 h +238 h +203 m +77 m +1 h +45 h +973 m +4 h +13 h +4 h +55 h +45 h +974 m +1 h +757 m +3 h +4 h +4 h +975 m +104 h +976 m +330 h +109 h +1 h +4 h +10 h +536 h +763 m +57 h +4 h +4 h +977 m +125 h +10 h +978 m +4 h +1 h +196 h +358 h +4 h +979 m +4 h +11 h +4 h +980 m +10 h +25 h +10 h +981 m +1 h +57 h +1 h +73 h +10 h +92 h +1 h +41 h +4 h +1 h +4 h +1 h +10 h +11 h +1 h +982 m +10 h +983 m +687 h +12 h +11 h +1 h +4 h +196 h +1 h +41 h +984 m +4 h +4 h +985 m +386 h +10 h +1 h +986 m +59 h +987 m +1 h +988 m +10 h +1 h +11 h +10 h +238 h +146 h +4 h +757 h +10 h +989 m +990 m +991 m +383 h +992 m +1 h +993 m +59 h +4 h +4 h +258 h +97 h +4 h +1 h +994 m +1 h +995 m +1 h +1 h +4 h +10 h +996 m +4 h +1 h +124 h +12 h +4 h +997 m +998 m +4 h +1 h +4 h +10 h +1 h +4 h +10 h +36 h +999 m +4 h +443 h +11 h +359 m +1 h +10 h +4 h +1000 m +4 h +10 h +1001 m +438 m +1 h +1002 m +1003 m +1004 m +109 h +1 h +1005 m +4 h +196 h +1006 m +97 h +1 h +4 h +10 h +10 h +1007 m +1008 m +83 h +104 h +1009 m +1 h +4 h +56 h +1010 m +477 m +1011 m +3 h +1012 m +56 h +10 h +12 h +4 h +3 h +25 h +1013 m +10 h +1 h +1 h +10 h +1014 m +1015 m +10 h +1016 m +279 m +10 h +4 h +4 h +4 h +1017 m +10 h +4 h +114 h +1 h +4 h +1 h +4 h +172 h +1 h +125 h +1 h +10 h +1018 m +4 h +258 h +4 h +10 h +10 h +74 h +1 h +25 h +10 h +124 h +581 m +195 h +1019 m +57 h +1020 m +319 m +109 h +1021 m +4 h +4 h +181 h +65 h +10 h +1022 m +92 h +1 h +79 h +109 h +278 h +10 h +27 h +4 h +40 h +1 h +1023 m +4 h +10 h +1024 m +250 h +74 h +1 h +250 h +1025 m +11 h +59 h +10 h +10 h +1026 m +10 h +4 h +143 h +267 m +4 h +1 h +478 m +25 h +1027 m +1028 m +1 h +1029 m +4 h +4 h +1030 m +1 h +4 h +1 h +83 h +4 h +10 h +955 m +10 h +1031 m +1 h +4 h +41 h +164 h +4 h +1032 m +4 h +1 h +1033 m +1034 m +1035 m +1036 m +1037 m +83 h +119 h +1 h +272 h +10 h +1038 m +4 h +11 h +1039 m +4 h +79 h +4 h +1040 m +28 h +10 h +11 h +1041 m +59 h +41 h +4 h +13 h +4 h +1042 m +1043 m +10 h +1 h +4 h +1 h +1044 m +1 h +1045 m +10 h +4 h +10 h +1046 m +1 h +1047 m +4 h +1048 m +83 h +1049 m +1 h +10 h +3 h +4 h +112 h +36 h +1 h +1 h +4 h +1003 h +1050 m +114 h +4 h +1051 m +1052 m +1 h +1053 m +3 h +11 h +1 h +1 h +4 h +1 h +1054 m +1055 m +4 h +10 h +1056 m +4 h +1027 h +150 m +4 h +73 h +1057 m +109 h +83 h +779 m +1 h +195 h +10 h +25 h +4 h +4 h +4 h +10 h +4 h +4 h +1 h +4 h +41 h +1 h +112 h +1 h +4 h +1 h +10 h +1058 m +1 h +1 h +4 h +1059 m +1060 m +1 h +4 h +10 h +278 h +59 h +41 h +10 h +1 h +10 h +97 h +10 h +4 h +1061 m +1062 m +10 h +1063 m +11 h +1064 m +10 h +4 h +10 h +64 h +10 h +279 h +10 h +1 h +250 h +45 h +10 h +10 h +11 h +1 h +1065 m +10 h +3 h +10 h +170 h +41 h +27 h +4 h +1066 m +1067 m +11 h +57 h +1 h +1 h +10 h +1068 m +41 h +92 h +1069 m +262 h +4 h +1 h +1 h +4 h +1070 m +386 h +4 h +1071 m +1 h +1 h +12 h +1 h +1072 m +1 h +114 h +4 h +10 h +4 h +57 h +403 m +1073 m +330 h +1074 m +4 h +1 h +109 h +10 h +4 h +10 h +1075 m +1 h +1 h +1076 m +94 h +10 h +1 h +11 h +45 h +10 h +4 h +10 h +1 h +4 h +4 h +1077 m +10 h +1 h +124 h +10 h +10 h +4 h +1 h +41 h +1078 m +146 h +4 h +1 h +1079 m +1080 m +1081 m +1 h +92 h +1 h +4 h +10 h +25 h +4 h +4 h +77 h +11 h +4 h +4 h +1082 m +1083 m +1 h +1 h +1084 m +172 h +10 h +885 m +4 h +1 h +3 h +4 h +1 h +10 h +104 h +124 h +11 h +1 h +10 h +4 h +4 h +10 h +1 h +1 h +1085 m +105 m +265 h +8 h +1086 m +1087 m +4 h +4 h +4 h +109 h +27 h +4 h +1 h +4 h +4 h +1088 m +10 h +4 h +10 h +1 h +57 h +1089 m +367 m +125 h +4 h +1 h +4 h +1090 m +1091 m +146 h +1092 m +1093 m +1094 m +1095 m +195 h +57 h +4 h +1096 m +4 h +4 h +4 h +10 h +1097 m +74 h +1098 m +97 h +1 h +4 h +169 h +13 h +1099 m +4 h +1 h +10 h +4 h +4 h +10 h +4 h +4 h +1100 m +1 h +10 h +173 m +4 h +1101 m +1102 m +4 h +1 h +1 h +1103 m +11 h +1104 m +4 h +1105 m +1 h +4 h +112 h +4 h +1 h +1 h +1106 m +59 h +1 h +82 h +10 h +1107 m +4 h +1108 m +55 h +10 h +124 h +79 h +1109 m +358 h +258 h +1 h +1110 m +146 h +10 h +433 m +1111 m +56 h +1 h +4 h +4 h +1 h +1 h +4 h +4 h +1112 m +4 h +1 h +83 h +57 h +1113 m +10 h +4 h +13 h +434 h +1114 m +1 h +238 h +1115 m +1116 m +4 h +109 h +1117 m +10 h +1118 m +1 h +97 h +1 h +1119 m +1120 m +10 h +1121 m +4 h +4 h +4 h +4 h +4 h +36 h +1122 m +104 h +59 h +1123 m +10 h +4 h +129 h +1124 m +10 h +135 h +1125 m +1 h +1 h +1126 m +1 h +1 h +1127 m +1128 m +4 h +12 h +10 h +4 h +82 h +10 h +1 h +4 h +1 h +169 h +1 h +1129 m +1130 m +1 h +1131 m +1132 m +172 h +4 h +1133 m +10 h +10 h +1 h +1 h +1 h +1134 m +4 h +1 h +109 h +443 h +1 h +4 h +1135 m +1136 m +1137 m +1 h +1138 m +1 h +31 h +4 h +1139 m +1 h +10 h +4 h +4 h +1140 m +4 h +1 h +1 h +82 h +83 h +91 m +1141 m +4 h +1142 m +4 h +82 h +3 h +4 h +69 h +1143 m +4 h +1 h +1144 m +79 h +109 h +4 h +4 h +10 h +3 h +1145 m +82 h +297 h +4 h +1 h +1146 m +1 h +10 h +114 h +1 h +1147 m +13 h +1 h +3 h +12 h +4 h +4 h +1148 m +11 h +4 h +1 h +1149 m +79 h +4 h +1 h +124 h +10 h +64 h +10 h +4 h +10 h +1150 m +1151 m +41 h +1152 m +1153 m +4 h +1 h +4 h +1154 m +104 h +1 h +4 h +59 h +1155 m +1 h +338 m +59 h +1 h +4 h +1 h +1156 m +1157 m +112 h +1 h +4 h +1158 m +65 h +1 h +10 h +1159 m +1160 m +4 h +10 h +1161 m +1 h +109 h +4 h +11 h +4 h +1162 m +91 h +4 h +125 h +1 h +10 h +4 h +4 h +1 h +4 h +173 h +59 h +1163 m +124 h +1164 m +1165 m +104 h +73 h +1166 m +1167 m +1168 m +1169 m +10 h +4 h +10 h +22 h +82 h +11 h +1170 m +4 h +885 h +10 h +4 h +11 h +4 h +4 h +4 h +4 h +4 h +4 h +4 h +146 h +45 h +1171 m +1172 m +169 h +3 h +4 h +108 h +4 h +1 h +1173 m +4 h +114 h +1174 m +1 h +1175 m +4 h +1027 h +1176 m +10 h +4 h +1 h +97 h +75 m +4 h +10 h +83 h +4 h +25 h +1177 m +1178 m +536 h +4 h +4 h +10 h +109 h +73 h +1179 m +4 h +3 h +31 h +25 h +10 h +1180 m +10 h +1181 m +1182 m +4 h +1 h +1 h +4 h +1016 m +10 h +10 h +4 h +4 h +11 h +4 h +11 h +1183 m +1 h +1184 m +4 h +4 h +493 h +1 h +4 h +4 h +1 h +10 h +10 h +74 h +1 h +4 h +4 h +10 h +4 h +59 h +143 h +129 h +1185 m +10 h +1 h +1186 m +258 h +4 h +1187 m +1 h +10 h +1 h +1188 m +10 h +403 m +10 h +1189 m +1190 m +10 h +10 h +79 h +1 h +10 h +4 h +4 h +1191 m +10 h +1 h +10 h +94 h +4 h +11 h +4 h +1 h +1192 m +1 h +4 h +4 h +135 h +31 h +1193 m +4 h +4 h +1 h +368 h +10 h +4 h +73 h +4 h +10 h +4 h +1016 h +219 m +11 h +250 h +4 h +4 h +1 h +10 h +109 h +1194 m +1 h +12 h +1195 m +27 h +10 h +270 h +3 h +1 h +10 h +10 h +1062 m +10 h +297 h +1 h +170 h +282 h +57 h +10 h +1196 m +4 h +82 h +1197 m +1 h +1 h +1 h +13 h +1 h +1198 m +10 h +1199 m +1198 h +1 h +135 h +11 h +4 h +386 h +1200 m +4 h +10 h +57 h +1201 m +10 h +4 h +1202 m +10 h +1203 m +1204 m +4 h +1 h +11 h +649 m +41 h +1 h +469 m +172 h +74 h +4 h +1 h +4 h +1 h +94 h +1205 m +256 m +3 h +224 h +1206 m +11 h +1207 m +4 h +386 h +4 h +1 h +75 h +10 h +10 h +353 h +4 h +1208 m +4 h +4 h +1 h +10 h +10 h +4 h +4 h +1 h +1 h +10 h +1209 m +11 h +1210 m +4 h +4 h +135 h +11 h +1 h +10 h +31 h +4 h +327 m +1 h +4 h +4 h +1211 m +57 h +4 h +4 h +1 h +1212 m +1213 m +1 h +1 h +10 h +1 h +10 h +4 h +56 h +10 h +1214 m +1215 m +1216 m +196 h +11 h +97 h +1217 m +1218 m +1 h +4 h +4 h +1219 m +10 h +1220 m +1221 m +4 h +1222 m +1 h +123 h +10 h +1223 m +4 h +65 h +169 h +1224 m +10 h +4 h +1 h +10 h +1225 m +4 h +10 h +13 h +4 h +4 h +1226 m +250 h +1227 m +1 h +10 h +4 h +87 m +1228 m +4 h +4 h +4 h +353 h +138 h +10 h +10 h +1 h +1 h +10 h +1229 m +1230 m +4 h +4 h +1231 m +1 h +1232 m +1 h +1233 m +4 h +4 h +1 h +36 h +4 h +4 h +4 h +327 h +1 h +25 h +4 h +4 h +41 h +82 h +1 h +4 h +10 h +238 h +10 h +11 h +1 h +1 h +1 h +383 h +10 h +97 h +448 m +1234 m +1235 m +11 h +4 h +1 h +10 h +4 h +1 h +4 h +11 h +94 h +4 h +144 m +1236 m +10 h +125 h +1237 m +4 h +10 h +322 m +4 h +1 h +57 h +4 h +112 h +124 h +1238 m +1239 m +10 h +10 h +1240 m +1241 m +10 h +10 h +4 h +57 h +1242 m +1 h +258 h +109 h +170 h +1 h +1243 m +1 h +1 h +4 h +181 h +1244 m +10 h +1 h +4 h +4 h +1 h +4 h +358 h +10 h +4 h +113 h +97 h +1245 m +4 h +25 h +4 h +238 h +1246 m +124 h +4 h +10 h +1 h +23 m +4 h +1247 m +1 h +332 h +1 h +83 h +92 h +124 h +1 h +1248 m +4 h +1249 m +4 h +1250 m +10 h +1251 m +1252 m +158 h +1253 m +4 h +10 h +1254 m +1255 m +1256 m +4 h +1 h +4 h +1257 m +4 h +158 h +10 h +1258 m +1 h +4 h +276 h +4 h +575 h +1259 m +4 h +4 h +1260 m +1 h +11 h +1261 m +8 h +10 h +1262 m +1263 m +1 h +1264 m +4 h +1265 m +3 h +264 m +4 h +1266 m +4 h +124 h +4 h +4 h +4 h +1 h +1 h +36 h +1267 m +1 h +1 h +1268 m +4 h +4 h +12 h +1 h +74 h +124 h +195 h +1269 m +10 h +358 h +10 h +109 h +190 h +4 h +10 h +1270 m +1271 m +1272 m +4 h +4 h +1 h +10 h +4 h +169 h +11 h +41 h +1 h +1273 m +1274 m +4 h +1275 m +1276 m +195 h +1 h +1277 m +10 h +1 h +1 h +1 h +1278 m +73 h +1 h +1279 m +1096 m +10 h +10 h +1 h +10 h +4 h +124 h +170 h +4 h +4 h +25 h +4 h +97 h +138 h +4 h +1280 m +10 h +8 h +10 h +1 h +196 h +195 h +1281 m +173 h +195 h +1282 m +1 h +1283 m +1284 m +4 h +1285 m +1286 m +57 h +1287 m +582 m +4 h +10 h +11 h +1288 m +1289 m +1 h +56 h +434 h +146 h +1290 m +1291 m +164 h +10 h +11 h +4 h +1 h +1292 m +4 h +1293 m +265 h +4 h +10 h +1294 m +327 h +1295 m +1 h +55 h +1296 m +538 h +1297 m +10 h +1 h +181 h +94 h +1 h +1 h +4 h +4 h +1298 m +4 h +10 h +1299 m +12 h +1300 m +10 h +114 h +10 h +1 h +124 h +119 h +4 h +1301 m +3 h +4 h +1 h +10 h +1302 m +1303 m +10 h +1 h +10 h +4 h +1304 m +1 h +4 h +4 h +147 h +1305 m +4 h +4 h +1 h +57 h +4 h +3 h +1306 m +4 h +41 h +1307 m +4 h +48 h +10 h +139 h +4 h +11 h +13 h +10 h +109 h +3 h +1308 m +10 h +1 h +1309 m +4 h +1310 m +4 h +119 h +1 h +4 h +4 h +1311 m +22 h +146 h +1312 m +4 h +1 h +258 h +4 h +1 h +1313 m +307 h +1314 m +135 h +10 h +124 h +4 h +10 h +4 h +146 h +10 h +184 m +4 h +135 h +10 h +1315 m +4 h +4 h +10 h +4 h +10 h +27 h +1 h +10 h +10 h +840 m +1 h +4 h +4 h +493 h +307 h +65 h +1 h +112 h +181 h +25 h +1316 m +4 h +447 m +4 h +4 h +140 h +1317 m +1318 m +109 h +11 h +1319 m +1320 m +1 h +10 h +1321 m +123 h +4 h +173 h +1322 m +4 h +10 h +1323 m +4 h +10 h +135 h +1324 m +1 h +109 h +1325 m +1326 m +11 h +1327 m +1 h +1 h +158 h +10 h +144 m +36 h +57 h +4 h +10 h +10 h +278 h +1 h +4 h +10 h +124 h +1 h +4 h +4 h +4 h +4 h +4 h +1 h +10 h +1328 m +59 h +4 h +94 h +10 h +1329 m +1 h +1 h +4 h +1330 m +10 h +10 h +4 h +4 h +1 h +1 h +10 h +1331 m +1 h +10 h +82 h +57 h +4 h +4 h +1332 m +10 h +4 h +29 m +399 m +1 h +59 h +119 h +1 h +12 h +4 h +1333 m +629 m +1 h +1334 m +1335 m +1336 m +1 h +11 h +1337 m +10 h +1338 m +36 h +1 h +4 h +4 h +1339 m +1340 m +25 h +124 h +124 h +4 h +1 h +92 h +65 h +41 h +4 h +82 h +10 h +4 h +10 h +204 h +10 h +31 h +125 h +1341 m +4 h +1 h +10 h +123 h +276 h +77 h +170 h +1 h +12 h +10 h +10 h +10 h +1342 m +10 h +31 h +11 h +1 h +1 h +4 h +4 h +1343 m +10 h +1016 h +4 h +1344 m +4 h +1345 m +4 h +13 h +45 h +3 h +1346 m +1 h +10 h +1347 m +1348 m +1 h +1349 m +266 h +1 h +1350 m +4 h +1 h +59 h +1351 m +1 h +1 h +10 h +10 h +4 h +1352 m +4 h +1353 m +172 h +4 h +4 h +48 h +3 h +4 h +4 h +11 h +1354 m +1355 m +1356 m +4 h +4 h +1 h +10 h +1357 m +1 h +167 h +41 h +10 h +236 m +10 h +1358 m +1 h +10 h +1359 m +1 h +25 h +1360 m +10 h +3 h +4 h +1361 m +48 h +4 h +1362 m +119 h +1363 m +1 h +1 h +10 h +1364 m +10 h +258 h +82 h +82 h +4 h +195 h +4 h +1365 m +4 h +1366 m +10 h +10 h +170 h +55 h +25 h +1367 m +1368 m +4 h +11 h +1 h +124 h +1 h +4 h +113 h +10 h +339 m +12 h +11 h +1369 m +10 h +146 h +4 h +10 h +82 h +1 h +1370 m +1 h +181 h +1371 m +59 h +1 h +4 h +1372 m +1373 m +986 m +10 h +1 h +65 h +1374 m +1 h +10 h +10 h +1 h +1271 m +1375 m +1 h +92 h +1 h +157 m +1376 m +1377 m +1 h +1 h +82 h +13 h +65 h +4 h +1378 m +10 h +4 h +10 h +1379 m +630 m +737 m +1 h +1 h +4 h +4 h +97 h +4 h +4 h +4 h +1380 m +1261 m +1189 m +1381 m +1382 m +10 h +4 h +4 h +1 h +1383 m +4 h +57 h +1384 m +4 h +11 h +41 h +1385 m +65 h +4 h +11 h +83 h +4 h +1386 m +1387 m +1 h +1105 m +1388 m +135 h +1389 m +1390 m +1391 m +106 m +4 h +1392 m +10 h +173 h +1 h +1 h +1393 m +1394 m +1198 h +10 h +4 h +1 h +1 h +125 h +28 h +1 h +10 h +4 h +601 m +104 h +28 h +57 h +1 h +1395 m +104 h +1396 m +1 h +4 h +1397 m +4 h +10 h +41 h +1 h +57 h +10 h +4 h +12 h +124 h +4 h +1 h +4 h +1 h +4 h +4 h +124 h +4 h +10 h +10 h +1398 m +94 h +10 h +4 h +4 h +4 h +10 h +109 h +1399 m +1 h +1 h +1 h +10 h +1400 m +4 h +4 h +1 h +1401 m +74 h +4 h +1402 m +41 h +1 h +1 h +10 h +79 h +144 h +1 h +92 h +1403 m +4 h +4 h +1 h +82 h +1404 m +1405 m +1406 m +195 h +4 h +3 h +105 h +10 h +10 h +1 h +4 h +250 h +146 h +4 h +22 h +1 h +46 m +10 h +1407 m +31 h +4 h +367 m +1 h +1408 m +359 m +1409 m +140 h +4 h +74 h +91 h +124 h +1 h +10 h +158 h +1 h +4 h +4 h +4 h +10 h +4 h +1410 m +276 h +1411 m +1 h +59 h +74 h +4 h +11 h +146 h +4 h +10 h +1 h +4 h +4 h +118 h +4 h +1412 m +10 h +1 h +1413 m +4 h +1414 m +124 h +10 h +1415 m +10 h +1416 m +3 h +1027 h +1417 m +1 h +224 h +1 h +1418 m +4 h +282 h +4 h +1419 m +1420 m +1250 m +1421 m +1 h +10 h +4 h +1422 m +65 h +4 h +59 h +112 h +4 h +1309 m +94 h +1423 m +10 h +1424 m +1 h +190 h +4 h +4 h +1105 h +82 h +1425 m +1426 m +104 h +41 h +687 h +82 h +1 h +1 h +109 h +4 h +4 h +1427 m +12 h +4 h +1428 m +94 h +4 h +1 h +4 h +125 h +274 h +1429 m +4 h +258 h +192 h +1 h +123 h +125 h +83 h +4 h +129 h +173 h +1 h +1430 m +124 h +1431 m +79 h +3 h +10 h +4 h +1432 m +4 h +10 h +4 h +1433 m +1434 m +1 h +1435 m +10 h +4 h +4 h +1 h +11 h +1250 h +4 h +4 h +4 h +1436 m +1437 m +1 h +4 h +11 h +4 h +139 h +4 h +4 h +4 h +4 h +1438 m +10 h +4 h +1 h +1 h +270 h +1439 m +1440 m +1 h +124 h +1441 m +1442 m +10 h +1 h +4 h +4 h +10 h +4 h +10 h +1443 m +4 h +1444 m +4 h +4 h +10 h +4 h +278 h +1445 m +1446 m +4 h +1447 m +181 h +1448 m +1449 m +399 m +73 h +1 h +82 h +124 h +1 h +1 h +1450 m +1451 m +1452 m +4 h +1453 m +59 h +1454 m +1 h +367 h +10 h +4 h +4 h +1455 m +1456 m +615 m +929 m +1 h +4 h +4 h +11 h +1 h +4 h +1 h +1457 m +1458 m +1459 m +10 h +109 h +1460 m +1461 m +1462 m +27 h +4 h +169 h +4 h +1463 m +1464 m +1465 m +4 h +83 h +447 m +11 h +25 h +10 h +4 h +1466 m +12 h +4 h +25 h +164 h +332 h +1 h +11 h +10 h +1467 m +196 h +4 h +36 h +10 h +332 h +1468 m +73 h +258 h +1469 m +4 h +105 h +10 h +4 h +59 h +4 h +1 h +1470 m +1471 m +1 h +1 h +4 h +10 h +167 h +10 h +1 h +11 h +1472 m +1473 m +185 h +1474 m +1 h +4 h +4 h +4 h +36 h +1 h +1 h +238 h +1475 m +64 h +11 h +1476 m +59 h +4 h +1 h +1477 m +146 h +4 h +196 h +4 h +368 h +124 h +4 h +10 h +143 h +1478 m +4 h +1 h +146 h +4 h +65 h +97 h +1 h +1479 m +276 h +4 h +1478 h +1 h +196 h +4 h +1480 m +1481 m +1482 m +1483 m +119 h +57 h +399 h +1484 m +1485 m +1486 m +1487 m +135 h +4 h +1 h +4 h +935 m +73 h +158 h +3 h +59 h +4 h +173 h +1488 m +1 h +10 h +4 h +1 h +1489 m +4 h +1 h +1490 m +4 h +1 h +1491 m +1492 m +4 h +109 h +1493 m +1 h +10 h +1 h +36 h +1494 m +1495 m +10 h +1496 m +4 h +185 h +1497 m +1 h +4 h +1 h +1 h +146 h +4 h +64 h +412 m +4 h +4 h +1 h +266 h +1 h +4 h +1359 m +1498 m +1499 m +13 h +4 h +10 h +359 m +10 h +172 h +1 h +4 h +480 m +307 h +109 h +1 h +297 h +1500 m +25 h +1 h +10 h +3 h +1 h +10 h +4 h +1501 m +1 h +10 h +1 h +124 h +4 h +1502 m +10 h +158 h +4 h +4 h +1503 m +4 h +119 h +1 h +83 h +31 h +10 h +1 h +996 m +195 h +4 h +1504 m +1 h +4 h +737 m +1505 m +1506 m +57 h +1 h +10 h +770 m +1507 m +4 h +1508 m +173 h +1509 m +10 h +1 h +1510 m +1511 m +4 h +279 h +228 m +124 h +3 h +1512 m +4 h +1 h +172 h +1 h +10 h +1513 m +4 h +1514 m +40 h +10 h +41 h +10 h +1 h +124 h +1515 m +4 h +1516 m +55 h +1 h +4 h +1517 m +11 h +1518 m +4 h +4 h +1519 m +1520 m +4 h +10 h +1 h +1521 m +59 h +10 h +1522 m +1 h +1523 m +83 h +10 h +74 h +140 h +41 h +10 h +10 h +1 h +10 h +4 h +10 h +443 h +4 h +56 h +82 h +258 h +536 h +13 h +10 h +1524 m +10 h +4 h +10 h +4 h +1525 m +1 h +1362 m +1526 m +57 h +1527 m +266 h +4 h +1528 m +124 h +185 h +1 h +4 h +185 h +4 h +167 h +4 h +1 h +114 h +4 h +1 h +266 h +4 h +10 h +4 h +1529 m +1530 m +1531 m +10 h +10 h +195 h +4 h +10 h +1532 m +601 m +1 h +1533 m +10 h +1218 m +1 h +4 h +1534 m +4 h +1535 m +4 h +82 h +11 h +1 h +1536 m +10 h +195 h +10 h +124 h +4 h +1 h +1537 m +1538 m +1539 m +4 h +276 h +114 h +4 h +1 h +4 h +10 h +258 h +1 h +1540 m +82 h +1003 h +92 h +1541 m +156 h +4 h +1542 m +4 h +4 h +40 h +13 h +1543 m +4 h +73 h +10 h +4 h +22 h +1544 m +1545 m +4 h +4 h +4 h +4 h +4 h +4 h +59 h +83 h +4 h +4 h +1546 m +601 h +1 h +10 h +4 h +1 h +10 h +1 h +1547 m +1 h +1 h +83 h +112 h +4 h +208 m +1 h +11 h +10 h +10 h +1 h +1 h +4 h +4 h +4 h +1548 m +857 h +4 h +4 h +1549 m +109 h +59 h +1550 m +10 h +45 h +65 h +1 h +25 h +4 h +4 h +1551 m +4 h +109 h +228 h +12 h +4 h +1552 m +1 h +1553 m +1 h +4 h +10 h +4 h +1554 m +4 h +27 h +1555 m +4 h +1 h +10 h +4 h +129 h +192 h +1556 m +25 h +1557 m +1 h +1558 m +1559 m +124 h +4 h +4 h +443 h +10 h +1560 m +10 h +1 h +1 h +1561 m +10 h +57 h +4 h +10 h +1562 m +4 h +4 h +1563 m +1564 m +10 h +10 h +1565 m +4 h +1566 m +1567 m +1568 m +1569 m +4 h +1570 m +83 h +4 h +4 h +1 h +1 h +167 h +371 h +97 h +1299 m +59 h +10 h +10 h +4 h +1571 m +4 h +4 h +109 h +11 h +1572 m +4 h +10 h +74 h +10 h +1573 m +4 h +10 h +1574 m +1575 m +1 h +124 h +10 h +10 h +1576 m +4 h +10 h +1359 h +13 h +104 h +59 h +1577 m +1027 h +1578 m +1579 m +1580 m +266 h +1 h +1581 m +196 h +295 h +1582 m +1 h +1 h +1583 m +1 h +1584 m +4 h +1 h +1344 m +1 h +1585 m +1 h +1 h +4 h +1586 m +11 h +36 h +97 h +1 h +1 h +25 h +1 h +987 m +241 h +1587 m +10 h +156 h +195 h +82 h +1 h +4 h +1 h +10 h +1588 m +278 h +1589 m +1590 m +1 h +109 h +1591 m +888 m +1 h +1592 m +4 h +13 h +4 h +12 h +823 m +1593 m +4 h +146 h +82 h +1 h +73 h +1594 m +1 h +1595 m +1596 m +1 h +1597 m +10 h +1598 m +73 h +4 h +10 h +109 h +1 h +31 h +1 h +1599 m +4 h +1600 m +1 h +1601 m +204 h +10 h +10 h +1602 m +1603 m +1 h +4 h +1604 m +1 h +10 h +1 h +1 h +4 h +10 h +10 h +1 h +1 h +1605 m +297 h +55 h +124 h +140 h +10 h +59 h +1606 m +10 h +4 h +73 h +4 h +4 h +4 h +276 h +10 h +1607 m +1 h +1 h +1 h +125 h +1 h +1608 m +10 h +433 m +615 m +1535 m +10 h +10 h +10 h +1609 m +69 h +4 h +536 h +1 h +1610 m +4 h +4 h +1611 m +1 h +1612 m +4 h +25 h +10 h +1613 m +4 h +1614 m +1 h +1615 m +3 h +196 h +25 h +41 h +1616 m +1617 m +181 h +1 h +4 h +4 h +1 h +124 h +10 h +10 h +1 h +1 h +1618 m +1 h +1 h +10 h +1 h +3 h +10 h +4 h +1619 m +1620 m +4 h +4 h +4 h +262 h +41 h +167 h +1621 m +4 h +10 h +1622 m +64 h +1623 m +167 h +1 h +4 h +124 h +4 h +1624 m +4 h +1 h +353 h +1625 m +1437 m +11 h +383 h +1 h +1626 m +135 h +4 h +1627 m +181 h +92 h +31 h +104 h +730 m +1628 m +4 h +11 h +1629 m +10 h +1 h +73 h +1630 m +1631 m +4 h +10 h +55 h +1 h +1632 m +4 h +1 h +4 h +256 m +1 h +4 h +4 h +1633 m +1 h +368 h +4 h +1 h +4 h +114 h +1634 m +1 h +1635 m +4 h +10 h +1636 m +4 h +172 h +10 h +190 h +10 h +124 h +1637 m +195 h +57 h +10 h +4 h +4 h +1638 m +4 h +1 h +181 h +1639 m +11 h +1640 m +1 h +1641 m +1642 m +1643 m +4 h +167 h +1644 m +10 h +1645 m +4 h +4 h +10 h +4 h +1646 m +10 h +4 h +10 h +307 h +64 h +692 h +368 h +1 h +386 h +41 h +538 h +265 h +1 h +59 h +1647 m +1648 m +4 h +1 h +28 h +443 h +186 m +125 h +10 h +1649 m +3 h +10 h +1650 m +4 h +1 h +10 h +1651 m +4 h +1652 m +4 h +10 h +1653 m +1 h +139 h +10 h +1654 m +4 h +83 h +4 h +1 h +443 h +10 h +10 h +4 h +1655 m +1 h +124 h +1656 m +307 h +10 h +1657 m +4 h +10 h +10 h +1658 m +1 h +4 h +79 h +1 h +4 h +1659 m +1660 m +59 h +1661 m +1662 m +1 h +36 h +4 h +1663 m +83 h +4 h +1 h +114 h +359 h +65 h +1664 m +1665 m +1666 m +10 h +1667 m +3 h +1668 m +1 h +59 h +45 h +1669 m +4 h +104 h +1670 m +1671 m +83 h +119 h +10 h +1 h +10 h +1 h +56 h +1672 m +1 h +10 h +241 h +575 h +4 h +4 h +536 h +109 h +167 h +266 h +4 h +4 h +1673 m +10 h +4 h +119 h +4 h +146 h +10 h +54 m +10 h +1 h +124 h +1 h +91 h +4 h +1 h +1674 m +1 h +59 h +1675 m +278 h +1 h +4 h +4 h +1676 m +1 h +1 h +1677 m +36 h +1 h +10 h +10 h +48 h +1 h +59 h +1678 m +40 h +1 h +1 h +371 h +1 h +1679 m +4 h +1680 m +4 h +10 h +57 h +1 h +250 h +4 h +1 h +1 h +4 h +56 h +4 h +11 h +74 h +74 h +4 h +12 h +59 h +1681 m +10 h +1 h +1682 m +10 h +270 h +1 h +1 h +4 h +10 h +1 h +1683 m +69 h +3 h +1684 m +10 h +10 h +11 h +10 h +4 h +459 h +41 h +872 m +1 h +1685 m +4 h +1 h +1686 m +25 h +1 h +15 m +4 h +1687 m +109 h +104 h +1 h +1 h +1688 m +4 h +1689 m +10 h +10 h +4 h +295 h +1690 m +1 h +59 h +10 h +4 h +1 h +4 h +4 h +190 h +1691 m +1 h +1692 m +640 h +4 h +1 h +1 h +4 h +10 h +4 h +41 h +4 h +1 h +1693 m +4 h +36 h +25 h +4 h +4 h +1694 m +1695 m +1696 m +10 h +109 h +135 h +1 h +1697 m +1698 m +4 h +4 h +12 h +1699 m +1016 h +278 h +1 h +11 h +10 h +1 h +4 h +1700 m +1137 m +10 h +170 h +4 h +1701 m +1137 h +1074 m +1702 m +1703 m +4 h +125 h +4 h +10 h +64 h +1704 m +10 h +1 h +278 h +1 h +1705 m +238 h +687 h +1706 m +1 h +124 h +36 h +1707 m +388 m +1708 m +25 h +1 h +1709 m +10 h +57 h +570 m +90 m +1 h +10 h +1710 m +10 h +4 h +1711 m +10 h +1 h +264 m +4 h +4 h +1712 m +1713 m +1107 m +4 h +82 h +1 h +1714 m +1 h +104 h +359 h +4 h +11 h +1 h +1 h +11 h +10 h +285 m +4 h +1 h +10 h +1715 m +1 h +119 h +57 h +41 h +83 h +976 m +169 h +11 h +1716 m +1250 h +3 h +10 h +1 h +1717 m +1 h +65 h +124 h +649 m +1269 m +1718 m +57 h +656 m +1 h +112 h +1719 m +1720 m +4 h +1 h +10 h +1721 m +92 h +109 h +10 h +1722 m +1 h +1723 m +4 h +10 h +4 h +1261 h +4 h +4 h +4 h +10 h +10 h +640 h +1 h +185 h +1 h +1 h +11 h +94 h +1067 m +1724 m +10 h +1725 m +1726 m +1 h +4 h +190 h +4 h +181 h +4 h +4 h +57 h +57 h +185 h +10 h +1727 m +93 m +4 h +737 h +10 h +140 h +1728 m +338 m +4 h +1729 m +143 h +4 h +1 h +4 h +4 h +8 h +4 h +4 h +10 h +1730 m +13 h +367 h +4 h +3 h +1731 m +1732 m +1 h +23 m +4 h +272 h +31 h +10 h +97 h +1733 m +4 h +124 h +124 h +1 h +1734 m +157 m +1 h +258 h +1735 m +1736 m +1 h +1191 m +36 h +109 h +74 h +104 h +3 h +10 h +135 h +45 h +185 h +238 h +1737 m +4 h +10 h +4 h +1738 m +4 h +4 h +1 h +1 h +4 h +10 h +1739 m +1740 m +10 h +1 h +4 h +1741 m +1 h +1742 m +1 h +4 h +13 h +4 h +1321 m +59 h +4 h +164 h +157 h +4 h +4 h +1743 m +10 h +83 h +4 h +13 h +1744 m +1 h +1745 m +114 h +65 h +1261 h +10 h +359 h +1 h +83 h +10 h +1746 m +83 h +25 h +1747 m +1748 m +1749 m +156 h +1 h +1750 m +73 h +4 h +1751 m +1 h +1752 m +4 h +12 h +1753 m +4 h +109 h +10 h +1754 m +4 h +1755 m +27 h +4 h +10 h +1 h +10 h +10 h +10 h +57 h +1756 m +4 h +4 h +59 h +10 h +1757 m +1758 m +1759 m +181 h +1 h +1760 m +109 h +1 h +1406 m +1761 m +10 h +1 h +4 h +4 h +1762 m +10 h +1763 m +1764 m +10 h +1 h +11 h +1765 m +10 h +10 h +1 h +1 h +41 h +1766 m +1 h +11 h +11 h +4 h +109 h +1 h +1767 m +1768 m +25 h +4 h +1309 h +10 h +1 h +125 h +3 h +10 h +10 h +1769 m +1 h +1 h +1770 m +4 h +307 h +1 h +443 h +4 h +169 h +1771 m +1772 m +1 h +1773 m +1 h +4 h +10 h +1774 m +4 h +4 h +4 h +4 h +1775 m +124 h +64 h +1776 m +1777 m +1 h +1 h +4 h +1778 m +10 h +172 h +36 h +4 h +1779 m +41 h +601 h +104 h +4 h +4 h +97 h +1780 m +4 h +3 h +1781 m +4 h +4 h +1 h +57 h +1 h +10 h +59 h +41 h +1 h +1 h +4 h +25 h +158 h +1 h +10 h +10 h +1782 m +4 h +10 h +1783 m +1784 m +10 h +4 h +55 h +1785 m +1786 m +174 h +4 h +1 h +4 h +1787 m +10 h +4 h +1788 m +11 h +4 h +4 h +1789 m +1790 m +4 h +41 h +97 h +4 h +33 m +1791 m +1792 m +4 h +1 h +10 h +36 h +106 m +506 m +156 h +1793 m +1794 m +615 h +4 h +1 h +4 h +4 h +1795 m +4 h +1 h +1 h +83 h +1796 m +1797 m +1798 m +73 h +4 h +1799 m +82 h +109 h +1800 m +4 h +4 h +185 h +1 h +4 h +10 h +158 h +1801 m +1108 m +4 h +59 h +4 h +1802 m +1 h +399 h +1 h +1803 m +4 h +4 h +1804 m +83 h +113 h +4 h +4 h +4 h +1805 m +114 h +4 h +4 h +1406 m +119 h +124 h +1806 m +3 h +1 h +184 m +1 h +10 h +4 h +104 h +109 h +109 h +10 h +4 h +125 h +4 h +97 h +59 h +10 h +4 h +4 h +692 h +1807 m +4 h +3 h +4 h +1808 m +4 h +10 h +31 h +1809 m +4 h +113 h +1 h +123 h +1810 m +1811 m +4 h +1 h +11 h +10 h +3 h +4 h +1 h +82 h +1812 m +4 h +1 h +123 h +1 h +82 h +976 m +10 h +12 h +1 h +12 h +10 h +1 h +4 h +124 h +10 h +4 h +4 h +1 h +4 h +4 h +1813 m +55 h +1814 m +1359 h +4 h +1815 m +4 h +1816 m +3 h +388 m +820 m +1 h +1817 m +1 h +4 h +1 h +11 h +4 h +11 h +4 h +1818 m +1819 m +31 h +185 h +4 h +1820 m +4 h +1 h +1821 m +4 h +73 h +1261 h +124 h +1822 m +10 h +4 h +1823 m +25 h +10 h +1824 m +4 h +1825 m +578 m +10 h +109 h +59 h +186 m +10 h +1826 m +4 h +1 h +1827 m +1470 m +83 h +4 h +10 h +1 h +41 h +447 h +1828 m +4 h +1 h +10 h +10 h +22 h +4 h +11 h +1829 m +4 h +106 m +4 h +1830 m +10 h +1831 m +10 h +322 m +4 h +278 h +4 h +109 h +869 m +1 h +1832 m +1833 m +4 h +1 h +25 h +1834 m +1 h +143 h +4 h +10 h +1 h +4 h +1 h +1835 m +1836 m +56 h +1 h +1837 m +4 h +307 h +4 h +1 h +1 h +1838 m +986 m +4 h +1 h +1 h +1 h +185 h +1 h +4 h +157 h +1839 m +4 h +4 h +4 h +92 h +4 h +1840 m +1841 m +10 h +109 h +4 h +1842 m +443 h +1843 m +109 h +1 h +4 h +4 h +1 h +1844 m +31 h +10 h +1 h +124 h +59 h +1 h +4 h +4 h +4 h +3 h +10 h +4 h +1 h +1 h +10 h +4 h +10 h +25 h +4 h +359 h +1 h +83 h +1845 m +144 h +4 h +4 h +386 h +4 h +1846 m +135 h +1847 m +40 h +158 h +82 h +1 h +82 h +4 h +1848 m +4 h +56 h +1849 m +1 h +10 h +10 h +1850 m +10 h +1127 m +4 h +1851 m +74 h +1 h +1852 m +1853 m +1854 m +1855 m +4 h +11 h +1856 m +1857 m +31 h +1 h +4 h +295 h +1403 m +10 h +4 h +57 h +1030 m +1 h +104 h +1 h +1858 m +173 h +1859 m +82 h +1 h +1860 m +1030 h +1 h +15 m +112 h +8 h +185 h +1482 m +4 h +11 h +1861 m +10 h +4 h +10 h +4 h +1 h +4 h +10 h +328 m +4 h +10 h +976 h +4 h +1862 m +4 h +10 h +4 h +1863 m +1 h +4 h +1864 m +468 m +1 h +1865 m +1089 m +4 h +10 h +1 h +10 h +11 h +1866 m +10 h +114 h +1 h +1867 m +1409 m +10 h +4 h +1868 m +31 h +129 h +4 h +1869 m +1870 m +1 h +143 h +124 h +1871 m +1872 m +1873 m +908 m +1796 m +4 h +4 h +113 h +10 h +1874 m +1875 m +10 h +31 h +10 h +230 m +4 h +10 h +1876 m +1 h +124 h +1877 m +4 h +4 h +1 h +10 h +59 h +1205 m +935 m +1 h +143 h +10 h +10 h +1878 m +4 h +1879 m +1880 m +69 h +10 h +55 h +1 h +1 h +4 h +1 h +10 h +4 h +1881 m +4 h +4 h +4 h +85 m +901 m +1882 m +1 h +1883 m +1 h +4 h +10 h +11 h +1 h +4 h +57 h +569 h +1884 m +4 h +3 h +4 h +1 h +1885 m +124 h +4 h +520 m +1886 m +1 h +1887 m +1888 m +3 h +82 h +31 h +4 h +10 h +10 h +4 h +4 h +1 h +109 h +1 h +1889 m +616 m +4 h +185 h +4 h +1 h +56 h +10 h +1 h +1890 m +10 h +860 m +332 h +4 h +4 h +297 h +1 h +57 h +1 h +156 h +649 m +109 h +10 h +1891 m +4 h +1892 m +31 h +11 h +10 h +36 h +1 h +10 h +4 h +1893 m +73 h +4 h +10 h +1 h +10 h +1894 m +1895 m +4 h +4 h +10 h +4 h +4 h +4 h +4 h +4 h +10 h +4 h +238 h +1896 m +1 h +1897 m +1898 m +10 h +3 h +13 h +4 h +1 h +1899 m +4 h +4 h +1 h +64 h +10 h +10 h +278 h +4 h +4 h +4 h +10 h +12 h +1900 m +79 h +11 h +4 h +1250 h +10 h +1901 m +1902 m +3 h +1903 m +92 h +4 h +307 h +626 m +11 h +1 h +1 h +1904 m +164 h +27 h +1905 m +4 h +1389 m +4 h +4 h +1 h +25 h +10 h +10 h +1 h +4 h +10 h +1 h +538 h +1 h +307 h +11 h +1906 m +1 h +1 h +13 h +82 h +4 h +1 h +69 h +1907 m +3 h +1908 m +4 h +4 h +94 h +1909 m +1910 m +83 h +10 h +371 h +1911 m +10 h +4 h +1912 m +1913 m +1914 m +278 h +10 h +371 h +1772 m +83 h +4 h +11 h +82 h +1 h +230 m +8 h +1 h +4 h +1915 m +10 h +10 h +1916 m +1917 m +1918 m +4 h +1919 m +4 h +1920 m +4 h +4 h +359 h +1321 m +4 h +1 h +11 h +1 h +146 h +4 h +11 h +25 h +4 h +1 h +1921 m +1 h +4 h +1922 m +10 h +46 m +1923 m +4 h +1924 m +1470 m +1925 m +241 h +1926 m +1 h +4 h +4 h +8 h +10 h +10 h +124 h +172 h +1927 m +1 h +4 h +10 h +4 h +55 h +1 h +1 h +4 h +4 h +4 h +1928 m +1 h +56 h +4 h +10 h +27 h +4 h +4 h +59 h +1 h +11 h +1 h +31 h +146 h +4 h +1929 m +109 h +1 h +4 h +250 h +10 h +4 h +11 h +143 h +10 h +10 h +1930 m +10 h +4 h +79 h +1619 m +4 h +31 h +569 h +4 h +124 h +4 h +11 h +169 h +4 h +167 h +1 h +4 h +1931 m +399 h +4 h +266 h +4 h +1 h +1932 m +1780 m +10 h +1933 m +196 h +1934 m +1 h +1935 m +4 h +10 h +1936 m +1937 m +1 h +1 h +1 h +10 h +1938 m +1939 m +25 h +41 h +4 h +1 h +10 h +146 h +31 h +59 h +1940 m +1941 m +10 h +4 h +1942 m +4 h +1 h +1 h +4 h +313 m +4 h +1 h +83 h +10 h +1943 m +114 h +278 h +1944 m +1 h +1945 m +12 h +57 h +4 h +143 h +4 h +196 h +10 h +10 h +4 h +82 h +10 h +1946 m +1947 m +10 h +1 h +10 h +10 h +1 h +92 h +190 h +11 h +1 h +11 h +4 h +258 h +1948 m +185 h +92 h +1949 m +299 h +1950 m +41 h +1 h +4 h +1 h +11 h +10 h +1951 m +125 h +1260 m +10 h +124 h +1 h +1 h +12 h +10 h +186 h +1 h +1 h +1952 m +10 h +7 m +4 h +4 h +4 h +1953 m +4 h +4 h +4 h +238 h +82 h +82 h +1 h +1954 m +10 h +4 h +1 h +4 h +266 h +10 h +1955 m +1 h +1956 m +173 h +1 h +1 h +1027 h +1957 m +443 h +10 h +11 h +1958 m +4 h +164 h +386 h +4 h +1 h +1959 m +57 h +4 h +4 h +82 h +27 h +1960 m +1961 m +4 h +40 h +10 h +1962 m +319 m +10 h +3 h +10 h +10 h +11 h +1963 m +1964 m +1 h +10 h +3 h +270 h +10 h +10 h +1 h +109 h +4 h +113 h +307 h +1780 h +1 h +4 h +1965 m +1966 m +986 m +10 h +4 h +4 h +1967 m +1968 m +124 h +3 h +1969 m +1 h +4 h +87 m +4 h +1 h +1 h +65 h +1970 m +146 h +104 h +1632 m +4 h +10 h +4 h +125 h +83 h +109 h +1 h +4 h +124 h +10 h +124 h +11 h +10 h +1 h +4 h +4 h +1971 m +10 h +4 h +59 h +3 h +1 h +1972 m +10 h +139 h +195 h +10 h +1 h +1973 m +1 h +10 h +135 h +1 h +1 h +1 h +119 h +146 h +4 h +1 h +1974 m +1281 m +10 h +4 h +1975 m +1976 m +1977 m +110 h +146 h +1978 m +1 h +204 h +1 h +4 h +1979 m +124 h +1980 m +10 h +888 m +1 h +1835 m +4 h +10 h +94 h +109 h +10 h +4 h +1 h +4 h +10 h +4 h +4 h +10 h +1 h +1 h +4 h +238 h +1981 m +3 h +4 h +1982 m +11 h +10 h +1 h +4 h +41 h +1 h +1 h +1983 m +1 h +1 h +31 h +4 h +10 h +4 h +1 h +1 h +1 h +10 h +266 h +10 h +1714 m +147 h +4 h +1984 m +140 h +11 h +1985 m +4 h +59 h +1986 m +10 h +1987 m +4 h +173 h +104 h +10 h +73 h +4 h +57 h +4 h +1 h +82 h +10 h +1 h +1988 m +40 h +1 h +4 h +25 h +1989 m +307 h +4 h +1 h +295 h +1137 h +1 h +77 h +3 h +124 h +295 h +1990 m +10 h +4 h +265 h +11 h +12 h +190 h +1991 m +1992 m +4 h +1 h +59 h +1362 h +10 h +57 h +190 h +1 h +1 h +1993 m +1994 m +1 h +536 h +386 h +10 h +10 h +10 h +1 h +1995 m +4 h +13 h +172 h +276 h +1 h +4 h +1 h +692 h +10 h +10 h +1996 m +4 h +1997 m +10 h +4 h +1998 m +1999 m +2000 m +4 h +10 h +82 h +1 h +2001 m +4 h +147 h +1 h +2002 m +109 h +10 h +4 h +11 h +104 h +238 h +10 h +1 h +1 h +125 h +195 h +36 h +4 h +547 m +4 h +4 h +10 h +164 h +59 h +278 h +2003 m +2004 m +1 h +45 h +170 h +79 h +125 h +10 h +10 h +1478 h +1 h +2005 m +1 h +185 h +1 h +36 h +10 h +10 h +4 h +10 h +10 h +2006 m +57 h +2007 m +41 h +1 h +2008 m +2009 m +1 h +79 h +1 h +2010 m +124 h +536 h +97 h +2011 m +11 h +1 h +4 h +1 h +250 h +74 h +1 h +4 h +36 h +140 h +204 h +10 h +4 h +1 h +1403 m +1 h +10 h +4 h +330 h +10 h +1 h +1316 m +4 h +4 h +1 h +4 h +195 h +82 h +25 h +2012 m +307 h +11 h +2013 m +196 h +59 h +31 h +4 h +25 h +10 h +2014 m +1016 h +2015 m +2016 m +935 m +4 h +4 h +110 h +104 h +692 h +56 h +10 h +109 h +4 h +10 h +2017 m +2018 m +4 h +1 h +4 h +2019 m +2020 m +4 h +258 h +1 h +1 h +2021 m +10 h +13 h +1 h +172 h +4 h +196 h +4 h +10 h +59 h +10 h +2022 m +164 h +79 h +4 h +2023 m +4 h +2024 m +10 h +4 h +31 h +55 h +10 h +2025 m +97 h +278 h +4 h +4 h +1 h +10 h +41 h +2026 m +2027 m +1409 m +83 h +4 h +4 h +1 h +59 h +4 h +2028 m +10 h +520 m +2029 m +4 h +2030 m +2031 m +966 m +25 h +45 h +2032 m +4 h +74 h +4 h +83 h +2033 m +2034 m +331 m +1 h +4 h +57 h +2035 m +4 h +97 h +104 h +2036 m +1 h +31 h +278 h +139 h +241 h +4 h +2037 m +170 h +2038 m +2039 m +1 h +1 h +986 h +2040 m +1 h +83 h +4 h +2041 m +1 h +4 h +4 h +2042 m +1 h +2043 m +65 h +1 h +109 h +2044 m +1 h +10 h +10 h +1 h +2045 m +1 h +83 h +10 h +4 h +1 h +157 h +4 h +2046 m +1 h +124 h +10 h +11 h +4 h +2047 m +4 h +57 h +1 h +1 h +2048 m +2049 m +1 h +2050 m +4 h +1 h +83 h +25 h +1650 m +2051 m +1096 h +4 h +1 h +2052 m +11 h +10 h +1 h +2053 m +83 h +1 h +2054 m +1 h +1 h +4 h +10 h +2055 m +2056 m +2057 m +2058 m +2059 m +1 h +45 h +2060 m +123 h +2061 m +1607 m +104 h +4 h +1 h +4 h +1 h +4 h +1 h +169 h +1 h +79 h +1 h +258 h +124 h +11 h +82 h +2062 m +2063 m +1 h +31 h +1 h +1 h +1 h +1 h +10 h +1 h +3 h +331 m +4 h +1321 h +1 h +10 h +4 h +4 h +1 h +41 h +3 h +1 h +2064 m +4 h +4 h +2065 m +25 h +10 h +1 h +2066 m +3 h +57 h +33 m +22 h +4 h +124 h +1 h +4 h +10 h +41 h +10 h +295 h +1 h +2067 m +10 h +11 h +92 h +1016 h +2068 m +83 h +25 h +1 h +2069 m +224 h +157 h +12 h +2070 m +986 h +656 m +10 h +4 h +41 h +1 h +2071 m +1 h +109 h +238 h +204 h +2072 m +1074 m +4 h +1 h +2073 m +1 h +4 h +2074 m +4 h +82 h +59 h +2075 m +2076 m +2077 m +11 h +1 h +82 h +4 h +1 h +10 h +358 h +4 h +83 h +10 h +4 h +4 h +45 h +4 h +110 h +2078 m +1 h +25 h +4 h +2079 m +125 h +11 h +1 h +10 h +83 h +94 h +25 h +4 h +124 h +2080 m +10 h +2081 m +1 h +4 h +1 h +4 h +59 h +109 h +1 h +4 h +10 h +4 h +2082 m +1 h +2083 m +1 h +157 h +2084 m +10 h +4 h +55 h +2085 m +1 h +4 h +10 h +2086 m +4 h +10 h +4 h +4 h +986 h +2087 m +65 h +2088 m +10 h +172 h +10 h +4 h +1 h +59 h +2089 m +79 h +4 h +109 h +2090 m +2091 m +1 h +173 h +4 h +4 h +11 h +1 h +4 h +10 h +8 h +4 h +2092 m +4 h +4 h +2093 m +1 h +164 h +4 h +4 h +4 h +4 h +1 h +146 h +57 h +57 h +4 h +4 h +10 h +1128 m +172 h +2094 m +2095 m +10 h +2096 m +276 h +10 h +266 h +124 h +56 h +4 h +4 h +1 h +704 m +4 h +1 h +124 h +2097 m +10 h +10 h +25 h +4 h +4 h +103 m +114 h +10 h +10 h +2098 m +4 h +2099 m +2100 m +10 h +359 h +1 h +2101 m +22 h +143 h +1 h +4 h +4 h +1293 m +1 h +1 h +4 h +4 h +2102 m +262 h +57 h +192 h +172 h +4 h +1 h +2103 m +172 h +258 h +4 h +10 h +2104 m +4 h +4 h +3 h +4 h +250 h +2105 m +1 h +1 h +1 h +2106 m +144 h +1 h +112 h +297 h +2107 m +196 h +4 h +1 h +2108 m +4 h +83 h +1016 h +4 h +4 h +1 h +1 h +1 h +1 h +4 h +1766 m +2109 m +1 h +4 h +541 h +1 h +55 h +2110 m +185 h +105 h +10 h +31 h +2111 m +83 h +1 h +4 h +4 h +1089 m +4 h +2112 m +2113 m +4 h +2114 m +4 h +10 h +2115 m +4 h +4 h +10 h +1 h +2116 m +119 h +2117 m +4 h +4 h +2118 m +10 h +10 h +1 h +11 h +2119 m +4 h +2120 m +4 h +10 h +4 h +4 h +2121 m +1 h +1 h +4 h +74 h +2122 m +2123 m +79 h +4 h +2124 m +4 h +4 h +172 h +264 m +4 h +1 h +10 h +1 h +109 h +2125 m +4 h +1 h +258 h +4 h +196 h +4 h +10 h +4 h +1 h +10 h +104 h +4 h +1 h +82 h +1 h +4 h +2126 m +4 h +125 h +2127 m +2128 m +2129 m +172 h +135 h +2130 m +224 h +4 h +1 h +113 h +1 h +4 h +4 h +10 h +4 h +2131 m +10 h +10 h +55 h +2132 m +1 h +2133 m +4 h +2134 m +1 h +4 h +2135 m +146 h +110 h +2136 m +25 h +266 h +92 h +4 h +10 h +59 h +2137 m +10 h +2138 m +4 h +2139 m +2140 m +79 h +2141 m +4 h +4 h +106 h +4 h +2142 m +109 h +10 h +939 h +10 h +2143 m +4 h +41 h +4 h +4 h +125 h +1957 m +4 h +4 h +383 h +2144 m +842 m +1 h +265 h +10 h +1 h +10 h +2145 m +2146 m +1 h +59 h +1 h +2147 m +4 h +4 h +4 h +1 h +74 h +4 h +4 h +1 h +156 h +4 h +55 h +4 h +83 h +2148 m +1 h +2149 m +2150 m +172 h +2151 m +10 h +4 h +4 h +2152 m +1 h +265 h +2153 m +2154 m +4 h +59 h +1 h +1 h +2155 m +2156 m +11 h +4 h +10 h +169 h +10 h +4 h +4 h +4 h +1137 h +1 h +2157 m +4 h +1137 h +181 h +4 h +10 h +59 h +1 h +4 h +4 h +10 h +1 h +4 h +1 h +770 m +4 h +2158 m +4 h +104 h +55 h +1 h +258 h +4 h +2159 m +2160 m +4 h +10 h +4 h +2161 m +185 h +10 h +4 h +4 h +1 h +28 h +371 h +224 h +4 h +4 h +1 h +119 h +358 h +10 h +74 h +2162 m +10 h +4 h +11 h +10 h +10 h +332 h +57 h +238 h +238 h +2163 m +4 h +4 h +4 h +4 h +1 h +10 h +1 h +4 h +2164 m +10 h +4 h +2165 m +938 m +2166 m +4 h +11 h +4 h +1 h +41 h +256 m +224 h +4 h +2167 m +4 h +2168 m +2169 m +1 h +1 h +1796 h +4 h +2170 m +2171 m +4 h +4 h +4 h +2172 m +1 h +1 h +4 h +2173 m +4 h +307 h +2174 m +1137 h +1642 m +2175 m +10 h +1 h +196 h +25 h +361 m +10 h +4 h +2176 m +2177 m +36 h +2178 m +11 h +2179 m +11 h +2180 m +4 h +4 h +94 h +447 h +4 h +4 h +2181 m +45 h +2182 m +11 h +2183 m +4 h +1 h +31 h +59 h +10 h +124 h +4 h +1 h +196 h +4 h +1635 m +146 h +1309 h +2184 m +1 h +1535 h +2185 m +174 h +11 h +1 h +4 h +1 h +1 h +270 h +2186 m +82 h +10 h +1 h +84 m +403 h +2187 m +2188 m +2189 m +1 h +10 h +704 m +4 h +4 h +11 h +2190 m +2191 m +238 h +10 h +4 h +195 h +4 h +3 h +258 h +109 h +2192 m +4 h +2193 m +1 h +2194 m +4 h +1 h +4 h +4 h +536 h +2195 m +2196 m +4 h +4 h +10 h +57 h +57 h +2197 m +2198 m +10 h +57 h +1 h +10 h +69 h +2199 m +31 h +10 h +2200 m +2201 m +1 h +2202 m +1 h +27 h +1 h +976 h +1 h +114 h +279 h +82 h +4 h +1 h +2203 m +10 h +2204 m +1 h +2205 m +10 h +109 h +74 h +4 h +57 h +4 h +104 h +10 h +1 h +4 h +1 h +1559 m +4 h +4 h +1 h +2206 m +1 h +332 h +2207 m +224 h +2208 m +4 h +1 h +358 h +3 h +123 h +185 h +2209 m +2210 m +1 h +174 h +2211 m +10 h +2212 m +1 h +1 h +4 h +2213 m +1137 h +1 h +4 h +41 h +4 h +4 h +4 h +2214 m +10 h +186 h +2215 m +1 h +10 h +2216 m +109 h +1 h +10 h +2217 m +4 h +59 h +13 h +2218 m +1 h +2219 m +1 h +57 h +109 h +10 h +119 h +10 h +1 h +941 m +2148 m +1 h +1 h +4 h +31 h +443 h +10 h +109 h +2220 m +10 h +10 h +2221 m +82 h +2222 m +4 h +4 h +258 h +2223 m +97 h +4 h +3 h +1 h +1 h +2224 m +4 h +10 h +1 h +10 h +1 h +1 h +1 h +31 h +258 h +2225 m +174 h +1 h +10 h +4 h +1 h +4 h +135 h +1 h +2226 m +2227 m +84 m +195 h +1 h +4 h +55 h +4 h +10 h +25 h +119 h +4 h +10 h +64 h +2228 m +2229 m +1 h +4 h +4 h +2172 m +757 h +4 h +4 h +4 h +10 h +2230 m +262 h +2231 m +11 h +46 m +10 h +2232 m +4 h +3 h +276 h +41 h +4 h +114 h +1 h +1 h +4 h +1 h +383 h +190 h +25 h +1 h +1 h +1 h +1981 m +587 m +97 h +110 h +109 h +10 h +2233 m +10 h +4 h +403 h +2234 m +278 h +4 h +4 h +2235 m +146 h +2236 m +1 h +1 h +1 h +10 h +397 m +11 h +4 h +41 h +10 h +367 h +1 h +4 h +2237 m +1 h +196 h +1 h +10 h +250 h +1 h +82 h +65 h +3 h +4 h +83 h +1 h +403 h +4 h +1 h +59 h +2238 m +10 h +25 h +1 h +4 h +4 h +1 h +1 h +2239 m +2240 m +267 m +10 h +1 h +4 h +297 h +8 h +1 h +4 h +65 h +229 h +64 h +692 h +1 h +2241 m +2242 m +10 h +4 h +4 h +2243 m +10 h +2244 m +1 h +2245 m +258 h +4 h +569 h +1 h +192 h +10 h +4 h +195 h +31 h +1 h +4 h +1 h +2246 m +11 h +10 h +1 h +4 h +2247 m +2248 m +4 h +4 h +2249 m +1 h +2250 m +10 h +135 h +2251 m +4 h +4 h +11 h +2252 m +2253 m +10 h +4 h +4 h +143 h +2254 m +57 h +1 h +124 h +13 h +1 h +3 h +447 h +11 h +1 h +41 h +10 h +1 h +4 h +2255 m +1 h +1 h +2256 m +4 h +11 h +73 h +97 h +172 h +4 h +4 h +4 h +11 h +332 h +1403 h +2257 m +2258 m +1024 m +83 h +109 h +368 h +1 h +289 h +69 h +11 h +109 h +2259 m +113 h +2260 m +11 h +1 h +4 h +4 h +4 h +297 h +976 h +10 h +1 h +10 h +59 h +82 h +4 h +1 h +4 h +190 h +2261 m +4 h +4 h +4 h +1868 m +2262 m +4 h +1 h +8 h +4 h +258 h +2263 m +2264 m +10 h +4 h +4 h +2265 m +4 h +4 h +11 h +124 h +10 h +10 h +2266 m +2267 m +1 h +10 h +10 h +1 h +8 h +3 h +230 h +196 h +4 h +10 h +2268 m +2269 m +4 h +2270 m +4 h +4 h +4 h +10 h +2271 m +1 h +4 h +10 h +41 h +10 h +10 h +4 h +4 h +10 h +125 h +322 m +4 h +104 h +2272 m +2273 m +65 h +74 h +2274 m +1 h +4 h +2275 m +2276 m +2277 m +11 h +4 h +57 h +11 h +1 h +10 h +10 h +4 h +10 h +1406 h +4 h +28 h +1 h +10 h +2278 m +1938 m +109 h +2279 m +2280 m +2281 m +10 h +1 h +4 h +2282 m +1 h +4 h +36 h +2283 m +1 h +4 h +2284 m +91 h +4 h +2285 m +4 h +1 h +1 h +11 h +1 h +10 h +2286 m +4 h +359 h +2287 m +97 h +10 h +1 h +4 h +1 h +2288 m +582 m +1027 h +4 h +4 h +250 h +2289 m +164 h +4 h +250 h +2290 m +2291 m +1 h +2292 m +10 h +1 h +297 h +1 h +1 h +1 h +4 h +2293 m +109 h +4 h +4 h +4 h +2294 m +10 h +83 h +2295 m +2257 m +4 h +10 h +3 h +82 h +2296 m +4 h +2297 m +11 h +3 h +279 h +4 h +3 h +65 h +4 h +10 h +4 h +123 h +2298 m +1 h +4 h +4 h +167 h +4 h +4 h +1 h +4 h +157 h +2299 m +4 h +83 h +10 h +4 h +10 h +2300 m +74 h +11 h +295 h +1 h +4 h +1 h +10 h +2301 m +10 h +1 h +4 h +1 h +1 h +1 h +4 h +10 h +1 h +1 h +4 h +56 h +10 h +4 h +386 h +4 h +353 h +1 h +10 h +1 h +4 h +2302 m +1 h +307 h +4 h +2303 m +1 h +2124 m +10 h +1985 m +10 h +10 h +4 h +4 h +2304 m +124 h +359 h +2305 m +10 h +59 h +4 h +119 h +139 h +692 h +4 h +1 h +2306 m +10 h +4 h +4 h +4 h +2307 m +10 h +278 h +1 h +10 h +332 h +31 h +97 h +1 h +4 h +10 h +1016 h +82 h +114 h +4 h +10 h +276 h +4 h +307 h +1 h +2308 m +4 h +4 h +10 h +170 h +1 h +1 h +4 h +4 h +11 h +332 h +83 h +4 h +1 h +2309 m +10 h +59 h +74 h +2310 m +10 h +1220 m +2311 m +4 h +1 h +25 h +2312 m +4 h +2313 m +4 h +4 h +2314 m +11 h +1 h +4 h +2315 m +2316 m +4 h +241 h +4 h +1 h +2317 m +79 h +297 h +4 h +2318 m +2319 m +11 h +1 h +4 h +2320 m +10 h +4 h +10 h +2321 m +2322 m +1 h +2323 m +1 h +11 h +2324 m +1 h +124 h +4 h +10 h +79 h +4 h +2325 m +65 h +2326 m +4 h +2327 m +718 h +2328 m +2329 m +57 h +2330 m +4 h +10 h +2331 m +299 h +4 h +477 m +94 h +11 h +10 h +45 h +10 h +976 h +2332 m +109 h +2333 m +10 h +11 h +3 h +2334 m +4 h +4 h +1 h +147 h +109 h +41 h +11 h +10 h +4 h +2335 m +2336 m +4 h +4 h +11 h +2337 m +2338 m +94 h +13 h +1 h +1 h +1 h +4 h +4 h +195 h +1261 h +10 h +10 h +1 h +1 h +2339 m +11 h +4 h +4 h +10 h +4 h +1 h +4 h +1 h +169 h +2340 m +94 h +4 h +82 h +2341 m +536 h +41 h +274 h +184 m +11 h +1 h +10 h +2342 m +11 h +2343 m +10 h +93 m +3 h +4 h +10 h +83 h +2344 m +25 h +11 h +2345 m +1 h +2346 m +2347 m +4 h +4 h +135 h +2348 m +4 h +1 h +4 h +2349 m +4 h +10 h +4 h +10 h +802 m +10 h +1 h +12 h +10 h +65 h +4 h +2350 m +4 h +2351 m +10 h +11 h +278 h +10 h +2352 m +1 h +4 h +10 h +174 h +2353 m +2354 m +238 h +1766 m +64 h +1 h +4 h +2355 m +4 h +2356 m +1 h +2357 m +2358 m +857 h +368 h +434 h +10 h +11 h +65 h +1 h +241 h +4 h +1 h +3 h +2359 m +1 h +8 h +12 h +779 m +4 h +10 h +94 h +1 h +10 h +2360 m +25 h +1 h +1 h +2361 m +2362 m +2363 m +4 h +4 h +412 m +93 m +1 h +2364 m +1 h +2365 m +1 h +10 h +4 h +1 h +4 h +31 h +4 h +2366 m +82 h +10 h +2367 m +124 h +2368 m +22 h +238 h +185 h +1 h +10 h +4 h +1627 m +10 h +10 h +1 h +3 h +4 h +4 h +4 h +4 h +172 h +2369 m +4 h +4 h +2370 m +10 h +1 h +1 h +2371 m +10 h +4 h +4 h +2372 m +2373 m +4 h +1 h +2374 m +1 h +3 h +1 h +1 h +11 h +156 h +10 h +4 h +1 h +204 h +146 h +857 h +10 h +3 h +4 h +1 h +4 h +74 h +59 h +59 h +4 h +1 h +4 h +25 h +2375 m +196 h +1 h +4 h +4 h +41 h +2376 m +330 h +11 h +1030 h +1 h +2377 m +104 h +10 h +2378 m +4 h +10 h +4 h +4 h +1 h +2379 m +4 h +172 h +4 h +1 h +1 h +4 h +4 h +4 h +4 h +12 h +167 h +2380 m +2381 m +2382 m +22 h +4 h +2383 m +1 h +1 h +1 h +2384 m +4 h +1 h +125 h +2385 m +2386 m +185 h +1 h +195 h +10 h +2387 m +262 h +1 h +1 h +1 h +2388 m +2389 m +4 h +1 h +2390 m +10 h +4 h +10 h +1 h +10 h +1 h +2391 m +11 h +4 h +146 h +11 h +119 h +4 h +10 h +2392 m +4 h +94 h +146 h +1 h +2393 m +4 h +2394 m +2395 m +2396 m +2397 m +192 h +4 h +1 h +2398 m +1 h +1 h +4 h +4 h +41 h +65 h +730 m +2399 m +2400 m +1250 h +2401 m +135 h +403 h +1 h +1 h +1 h +10 h +3 h +2402 m +2403 m +1128 m +11 h +69 h +196 h +110 h +1952 m +4 h +1 h +1 h +1 h +2404 m +4 h +2405 m +10 h +10 h +1 h +164 h +110 h +2406 m +83 h +10 h +2407 m +276 h +1 h +1 h +11 h +351 m +125 h +192 h +196 h +996 m +2408 m +2409 m +4 h +3 h +278 h +434 h +2410 m +4 h +4 h +10 h +1 h +4 h +10 h +4 h +2411 m +2412 m +2413 m +4 h +1 h +1 h +2414 m +258 h +83 h +204 h +1 h +82 h +2415 m +74 h +124 h +10 h +1 h +97 h +2416 m +2417 m +4 h +520 h +583 m +1 h +4 h +1 h +1 h +4 h +2418 m +4 h +59 h +2419 m +4 h +2420 m +2421 m +1 h +1 h +97 h +2422 m +2172 h +2423 m +79 h +4 h +2424 m +82 h +2425 m +41 h +10 h +2426 m +1 h +1 h +1 h +2427 m +630 m +124 h +10 h +2428 m +11 h +2429 m +1 h +4 h +2430 m +181 h +45 h +10 h +2431 m +22 h +10 h +1 h +1 h +11 h +4 h +1 h +2432 m +83 h +10 h +2433 m +4 h +4 h +4 h +10 h +1083 m +1 h +195 h +2434 m +2435 m +4 h +147 h +192 h +4 h +4 h +10 h +10 h +1 h +2436 m +1 h +1 h +41 h +4 h +1 h +2437 m +83 h +2438 m +1089 m +4 h +2439 m +4 h +4 h +10 h +2440 m +4 h +3 h +11 h +2441 m +4 h +170 h +1 h +109 h +91 h +31 h +4 h +65 h +2442 m +87 m +285 m +2443 m +2444 m +4 h +4 h +146 h +10 h +46 m +737 h +170 h +1 h +146 h +10 h +124 h +10 h +10 h +3 h +1027 h +10 h +2445 m +74 h +135 h +3 h +2446 m +4 h +1 h +1 h +4 h +808 m +10 h +2041 m +1481 m +4 h +83 h +83 h +2447 m +4 h +1 h +1 h +4 h +1 h +2448 m +10 h +57 h +2449 m +10 h +1 h +282 h +3 h +4 h +74 h +2450 m +4 h +1 h +167 h +25 h +1880 m +604 m +2451 m +2452 m +4 h +41 h +2453 m +3 h +147 h +83 h +4 h +4 h +2454 m +169 h +10 h +2455 m +1 h +55 h +11 h +1884 m +1 h +4 h +190 h +4 h +4 h +82 h +1 h +2456 m +3 h +4 h +2457 m +10 h +295 h +1 h +65 h +4 h +2458 m +258 h +4 h +4 h +109 h +2459 m +1 h +45 h +2460 m +4 h +1 h +2461 m +195 h +10 h +1 h +4 h +4 h +1 h +2462 m +4 h +25 h +2463 m +1 h +2464 m +4 h +2465 m +1448 m +2466 m +4 h +1 h +2467 m +1 h +1 h +4 h +2468 m +4 h +195 h +1 h +1650 m +1 h +1137 h +1 h +10 h +1 h +10 h +10 h +4 h +1 h +4 h +4 h +1 h +4 h +4 h +10 h +1 h +2469 m +4 h +4 h +581 m +195 h +2470 m +4 h +10 h +2471 m +82 h +2472 m +4 h +278 h +25 h +911 m +36 h +1 h +2473 m +2474 m +4 h +4 h +757 h +1 h +25 h +4 h +1 h +184 m +41 h +1 h +11 h +11 h +129 h +1 h +1 h +4 h +794 m +10 h +2475 m +4 h +2476 m +83 h +4 h +805 m +1 h +359 h +2477 m +2478 m +10 h +2479 m +4 h +4 h +2480 m +10 h +2481 m +59 h +4 h +10 h +4 h +10 h +3 h +11 h +10 h +3 h +1 h +2482 m +1 h +4 h +195 h +4 h +10 h +2483 m +2484 m +10 h +41 h +109 h +10 h +4 h +1 h +13 h +97 h +94 h +4 h +13 h +4 h +10 h +4 h +976 h +4 h +4 h +1 h +1 h +4 h +4 h +4 h +1 h +538 h +2485 m +109 h +10 h +10 h +10 h +1 h +4 h +79 h +2486 m +1 h +204 h +144 h +3 h +2487 m +10 h +31 h +1 h +92 h +10 h +10 h +59 h +135 h +4 h +65 h +45 h +1 h +4 h +4 h +65 h +28 h +4 h +1 h +10 h +10 h +4 h +1 h +140 h +2488 m +1205 m +10 h +82 h +156 h +3 h +1646 m +935 h +1 h +1 h +10 h +481 m +1 h +2489 m +2490 m +10 h +1 h +1 h +4 h +4 h +4 h +1 h +2491 m +4 h +109 h +10 h +31 h +4 h +2492 m +1 h +1 h +805 m +4 h +4 h +2493 m +3 h +4 h +4 h +2494 m +4 h +2444 m +55 h +31 h +2495 m +2496 m +1 h +129 h +11 h +4 h +2497 m +2498 m +4 h +4 h +4 h +4 h +4 h +12 h +4 h +1 h +2499 m +258 h +4 h +4 h +156 h +1766 m +295 h +258 h +55 h +82 h +4 h +79 h +4 h +4 h +195 h +2500 m +2501 m +2502 m +258 h +4 h +2503 m +4 h +4 h +124 h +4 h +109 h +2504 m +1 h +1308 m +170 h +4 h +1 h +4 h +164 h +4 h +4 h +1 h +1250 h +2505 m +74 h +4 h +31 h +109 h +10 h +31 h +4 h +2506 m +10 h +2507 m +147 h +10 h +1 h +2508 m +10 h +10 h +1 h +2509 m +170 h +195 h +109 h +4 h +1 h +1 h +2510 m +4 h +4 h +4 h +172 h +2511 m +4 h +1 h +258 h +1030 h +270 h +79 h +41 h +2512 m +109 h +4 h +1 h +110 h +4 h +73 h +10 h +57 h +41 h +10 h +4 h +4 h +2513 m +2514 m +4 h +56 h +1 h +2515 m +4 h +11 h +4 h +1 h +1 h +82 h +1 h +10 h +2516 m +1327 m +4 h +2517 m +1 h +55 h +1 h +1 h +83 h +2518 m +4 h +11 h +4 h +2519 m +4 h +1 h +10 h +10 h +4 h +74 h +1 h +11 h +1 h +1 h +1 h +2520 m +25 h +1 h +2521 m +2522 m +2523 m +55 h +1 h +2524 m +10 h +1 h +506 m +2525 m +82 h +4 h +10 h +1 h +57 h +4 h +10 h +2526 m +4 h +1 h +2527 m +114 h +10 h +10 h +10 h +2528 m +1 h +41 h +10 h +2529 m +119 h +4 h +10 h +1 h +1 h +1504 m +1 h +1738 m +10 h +2530 m +2531 m +55 h +4 h +4 h +1 h +266 h +10 h +1 h +4 h +2532 m +4 h +4 h +4 h +1359 h +4 h +4 h +4 h +195 h +4 h +181 h +4 h +4 h +119 h +4 h +11 h +57 h +10 h +1 h +1 h +332 h +11 h +4 h +408 m +57 h +45 h +1 h +1 h +4 h +124 h +3 h +4 h +4 h +10 h +2533 m +22 h +4 h +1 h +1 h +13 h +11 h +1 h +10 h +4 h +319 m +1 h +10 h +135 h +4 h +1 h +10 h +2534 m +1184 m +11 h +4 h +10 h +1 h +1 h +250 h +4 h +2535 m +1 h +110 h +4 h +2536 m +2537 m +2538 m +170 h +4 h +2539 m +4 h +1 h +1 h +172 h +4 h +569 h +2540 m +4 h +4 h +319 m +4 h +1 h +368 h +1 h +82 h +2541 m +4 h +3 h +1 h +143 h +83 h +4 h +10 h +2542 m +1 h +2543 m +4 h +1 h +10 h +10 h +4 h +4 h +1137 h +2544 m +11 h +4 h +2545 m +2546 m +2547 m +4 h +2548 m +82 h +8 h +4 h +83 h +59 h +10 h +10 h +2549 m +1 h +4 h +1 h +10 h +45 h +31 h +2550 m +1 h +4 h +11 h +1 h +1 h +4 h +25 h +4 h +1 h +1454 m +22 h +266 h +2551 m +1 h +2552 m +2553 m +2554 m +10 h +135 h +1 h +278 h +97 h +1 h +4 h +2555 m +113 h +493 h +196 h +59 h +10 h +195 h +563 m +109 h +10 h +10 h +794 m +2556 m +2557 m +430 m +425 m +41 h +4 h +4 h +4 h +2558 m +10 h +258 h +4 h +4 h +2559 m +4 h +1 h +4 h +11 h +2560 m +1 h +976 h +578 m +12 h +1780 h +2561 m +83 h +276 h +1 h +2562 m +10 h +79 h +28 h +4 h +2563 m +2564 m +4 h +10 h +10 h +94 h +143 h +1 h +2565 m +4 h +11 h +2566 m +4 h +56 h +11 h +1 h +2567 m +1016 h +2568 m +4 h +332 h +1 h +77 h +4 h +11 h +4 h +82 h +4 h +2569 m +238 h +4 h +1 h +4 h +4 h +4 h +278 h +11 h +4 h +1619 m +1 h +1 h +4 h +10 h +3 h +2570 m +2571 m +195 h +4 h +4 h +1 h +10 h +2572 m +119 h +1 h +1 h +1 h +31 h +12 h +4 h +2573 m +2574 m +10 h +10 h +4 h +65 h +276 h +2575 m +124 h +1 h +57 h +31 h +4 h +4 h +4 h +10 h +2576 m +41 h +2577 m +46 m +125 h +2578 m +1939 m +2579 m +2580 m +4 h +25 h +2581 m +2582 m +10 h +10 h +124 h +2583 m +3 h +164 h +10 h +4 h +2584 m +4 h +1 h +10 h +31 h +33 m +2585 m +97 h +4 h +4 h +4 h +808 m +4 h +2586 m +4 h +4 h +4 h +4 h +2587 m +4 h +1 h +2588 m +1 h +4 h +912 m +386 h +4 h +92 h +1 h +4 h +1 h +167 h +3 h +1 h +2589 m +1 h +2590 m +4 h +54 m +97 h +4 h +1 h +10 h +2591 m +74 h +10 h +1 h +2592 m +10 h +4 h +1 h +10 h +4 h +1 h +2593 m +704 h +4 h +57 h +2594 m +2265 m +2595 m +2596 m +109 h +104 h +2597 m +1 h +1 h +1 h +2598 m +31 h +1 h +1 h +4 h +1 h +4 h +146 h +57 h +4 h +2599 m +13 h +10 h +69 h +92 h +1 h +2600 m +4 h +139 h +4 h +4 h +4 h +57 h +10 h +4 h +279 h +2601 m +2602 m +1 h +1 h +11 h +10 h +4 h +2603 m +10 h +332 h +11 h +4 h +2604 m +2605 m +1 h +4 h +1 h +2606 m +4 h +2607 m +4 h +147 h +2608 m +295 h +1 h +2609 m +10 h +4 h +10 h +4 h +4 h +104 h +13 h +1 h +10 h +56 h +10 h +1 h +1 h +2610 m +1 h +2611 m +36 h +2612 m +1 h +59 h +4 h +4 h +4 h +4 h +10 h +195 h +297 h +4 h +125 h +10 h +1 h +2613 m +4 h +692 h +4 h +2614 m +1 h +114 h +1 h +265 h +1968 m +124 h +4 h +2615 m +358 h +124 h +4 h +2616 m +2617 m +2618 m +190 h +11 h +2619 m +316 m +1 h +2620 m +2621 m +10 h +1 h +10 h +10 h +913 m +11 h +262 h +25 h +2622 m +2623 m +25 h +139 h +10 h +83 h +169 h +109 h +1 h +1 h +4 h +1 h +1 h +4 h +146 h +377 m +143 h +10 h +2624 m +2625 m +2626 m +2627 m +1751 m +2628 m +103 m +140 h +4 h +31 h +74 h +10 h +83 h +1 h +196 h +1 h +4 h +1508 m +2629 m +10 h +10 h +238 h +10 h +229 h +11 h +1 h +57 h +4 h +57 h +4 h +2630 m +10 h +2631 m +4 h +4 h +13 h +2632 m +2633 m +386 h +41 h +10 h +104 h +125 h +1 h +2047 m +279 h +4 h +4 h +10 h +195 h +2634 m +1 h +4 h +4 h +11 h +109 h +173 h +10 h +4 h +4 h +10 h +12 h +2635 m +1 h +1 h +3 h +229 h +11 h +2636 m +1261 h +59 h +2637 m +2638 m +2639 m +92 h +1 h +4 h +4 h +4 h +258 h +1 h +1 h +4 h +10 h +146 h +2640 m +1 h +1 h +11 h +278 h +27 h +4 h +1089 m +41 h +14 m +10 h +1 h +230 h +74 h +2641 m +4 h +2642 m +4 h +10 h +2643 m +1 h +1 h +1 h +10 h +158 h +10 h +2644 m +1 h +25 h +4 h +10 h +10 h +1 h +73 h +1 h +113 h +2645 m +359 h +36 h +2646 m +10 h +1406 h +1 h +4 h +1 h +4 h +1 h +10 h +2647 m +2648 m +2649 m +250 h +2650 m +1 h +4 h +2651 m +2652 m +2653 m +297 h +10 h +13 h +1535 h +2654 m +10 h +4 h +1 h +10 h +4 h +2655 m +4 h +219 m +1 h +10 h +4 h +108 h +1 h +11 h +1 h +2656 m +1 h +140 h +2657 m +1 h +408 m +119 h +1 h +31 h +59 h +4 h +447 h +1 h +1 h +114 h +45 h +2658 m +82 h +31 h +4 h +4 h +2659 m +4 h +2660 m +94 h +1 h +4 h +4 h +2661 m +10 h +250 h +10 h +4 h +1003 h +2662 m +2663 m +28 h +79 h +4 h +368 h +10 h +601 h +1 h +4 h +10 h +1 h +2664 m +1 h +1322 m +169 h +4 h +4 h +1 h +83 h +4 h +170 h +10 h +10 h +10 h +65 h +10 h +4 h +1 h +2665 m +2666 m +10 h +10 h +4 h +1 h +4 h +2667 m +299 h +10 h +468 m +2668 m +10 h +250 h +2669 m +1 h +31 h +169 h +2670 m +4 h +11 h +2671 m +976 h +10 h +110 h +10 h +2672 m +4 h +2673 m +4 h +330 h +4 h +4 h +4 h +11 h +1 h +2674 m +2675 m +10 h +1493 m +92 h +10 h +11 h +1981 m +11 h +1 h +4 h +138 h +10 h +4 h +4 h +4 h +2676 m +10 h +109 h +1 h +118 h +83 h +4 h +258 h +4 h +4 h +10 h +1 h +59 h +1 h +1 h +2677 m +10 h +4 h +10 h +2678 m +4 h +114 h +1 h +2679 m +1045 m +2680 m +1 h +4 h +10 h +33 m +2681 m +2682 m +4 h +1482 m +2683 m +238 h +10 h +1120 m +10 h +1 h +2684 m +2685 m +195 h +22 h +4 h +124 h +36 h +4 h +10 h +2686 m +55 h +92 h +2687 m +4 h +125 h +4 h +2688 m +4 h +12 h +124 h +2689 m +4 h +57 h +4 h +4 h +3 h +1780 h +1 h +4 h +10 h +10 h +124 h +2690 m +10 h +2691 m +10 h +2692 m +4 h +4 h +488 m +119 h +322 m +2693 m +2694 m +3 h +10 h +144 h +123 h +4 h +1 h +10 h +2695 m +399 h +1403 h +3 h +10 h +1 h +4 h +4 h +1 h +1 h +10 h +692 h +1 h +31 h +1 h +4 h +1 h +2696 m +2697 m +55 h +1 h +195 h +2698 m +1 h +11 h +4 h +4 h +2699 m +477 m +2700 m +65 h +10 h +332 h +10 h +83 h +10 h +2701 m +976 h +238 h +4 h +1 h +2702 m +3 h +1 h +1 h +22 h +1 h +2703 m +2494 m +2704 m +4 h +2705 m +2706 m +2707 m +10 h +125 h +64 h +25 h +125 h +464 h +125 h +1 h +1 h +4 h +40 h +1 h +2708 m +2709 m +1 h +82 h +57 h +1 h +1 h +2710 m +182 m +1 h +386 h +377 m +1 h +2711 m +4 h +10 h +1 h +250 h +4 h +1 h +4 h +1 h +79 h +2712 m +2713 m +2714 m +4 h +4 h +2715 m +1685 m +10 h +4 h +10 h +10 h +2716 m +4 h +4 h +10 h +1 h +2717 m +4 h +3 h +1 h +82 h +10 h +1 h +1 h +41 h +10 h +41 h +10 h +2718 m +4 h +1 h +156 h +2719 m +4 h +1 h +27 h +1 h +2720 m +297 h +10 h +10 h +1184 m +11 h +10 h +2721 m +2722 m +4 h +4 h +2723 m +1 h +65 h +10 h +1 h +1 h +4 h +4 h +4 h +125 h +4 h +10 h +4 h +1 h +2724 m +383 h +4 h +10 h +2725 m +4 h +92 h +10 h +59 h +1 h +2726 m +4 h +11 h +10 h +4 h +1 h +4 h +4 h +4 h +10 h +692 h +1 h +4 h +2727 m +1 h +41 h +1 h +4 h +82 h +297 h +55 h +10 h +264 m +10 h +2728 m +10 h +4 h +1 h +4 h +10 h +4 h +359 h +2729 m +36 h +79 h +185 h +4 h +2730 m +4 h +266 h +94 h +2731 m +1 h +4 h +3 h +4 h +4 h +2732 m +2733 m +4 h +4 h +135 h +45 h +2734 m +1 h +2735 m +4 h +2736 m +123 h +488 m +4 h +10 h +1 h +4 h +27 h +1 h +258 h +2737 m +2738 m +10 h +55 h +83 h +4 h +10 h +2739 m +3 h +10 h +4 h +11 h +4 h +10 h +10 h +425 m +10 h +4 h +4 h +1016 h +687 h +2740 m +1249 m +4 h +4 h +2741 m +2742 m +1 h +2743 m +2744 m +146 h +2745 m +2746 m +4 h +1493 m +1 h +82 h +274 h +238 h +1 h +146 h +10 h +4 h +1 h +4 h +4 h +2747 m +4 h +265 h +2748 m +2749 m +2750 m +1 h +1 h +1780 h +4 h +2751 m +2752 m +4 h +147 h +4 h +2753 m +2754 m +966 m +10 h +2309 m +4 h +2755 m +10 h +2756 m +4 h +2757 m +1 h +123 h +2758 m +1 h +1 h +1 h +520 h +10 h +10 h +4 h +10 h +2759 m +230 h +1 h +4 h +4 h +10 h +1 h +4 h +4 h +4 h +11 h +4 h +167 h +1 h +10 h +158 h +2760 m +1 h +2688 m +4 h +57 h +2761 m +4 h +74 h +4 h +10 h +4 h +125 h +2762 m +468 m +1 h +2763 m +4 h +4 h +358 h +4 h +83 h +11 h +11 h +10 h +1 h +31 h +59 h +2764 m +2379 m +82 h +2765 m +1 h +2766 m +79 h +82 h +73 h +55 h +1 h +4 h +4 h +59 h +3 h +2767 m +31 h +2768 m +10 h +1 h +2769 m +83 h +10 h +10 h +1 h +2770 m +4 h +196 h +1370 m +185 h +238 h +2771 m +25 h +1 h +4 h +4 h +55 h +109 h +10 h +279 h +1470 h +4 h +10 h +4 h +10 h +1642 m +1 h +4 h +10 h +119 h +3 h +1406 h +114 h +2772 m +1 h +2773 m +10 h +4 h +996 m +124 h +601 h +4 h +4 h +45 h +59 h +11 h +4 h +10 h +4 h +123 h +110 h +2079 m +2774 m +1 h +1 h +1 h +2775 m +1 h +230 h +10 h +10 h +2776 m +2777 m +1 h +1 h +250 h +2778 m +1 h +10 h +2779 m +2780 m +55 h +4 h +2781 m +11 h +119 h +56 h +1 h +125 h +1 h +82 h +1 h +64 h +57 h +103 m +2782 m +169 h +4 h +167 h +108 h +59 h +2783 m +1 h +10 h +10 h +2784 m +10 h +4 h +82 h +2785 m +1 h +109 h +4 h +2786 m +79 h +2787 m +31 h +1 h +1 h +10 h +22 h +10 h +4 h +1 h +4 h +1409 h +4 h +2788 m +2789 m +4 h +1 h +1 h +1 h +10 h +2314 m +1 h +11 h +1 h +4 h +4 h +986 h +2790 m +1308 m +278 h +3 h +124 h +4 h +1 h +10 h +31 h +2791 m +10 h +109 h +1030 h +124 h +4 h +4 h +367 h +2792 m +25 h +135 h +4 h +82 h +2793 m +104 h +10 h +4 h +10 h +2794 m +2795 m +1 h +2374 m +4 h +10 h +1 h +4 h +2796 m +1 h +4 h +3 h +4 h +1 h +1409 h +1 h +1 h +112 h +124 h +4 h +1 h +383 h +2139 m +10 h +1 h +4 h +12 h +4 h +10 h +2797 m +1 h +358 h +1089 m +41 h +2798 m +55 h +2799 m +1299 m +1838 m +10 h +2800 m +1 h +4 h +2801 m +109 h +1 h +10 h +4 h +2802 m +2803 m +144 h +238 h +2804 m +2805 m +2806 m +570 m +10 h +2807 m +74 h +700 m +1 h +125 h +4 h +10 h +104 h +157 h +10 h +4 h +1 h +2808 m +2809 m +4 h +1 h +12 h +1595 m +2810 m +2811 m +1 h +4 h +64 h +1 h +3 h +65 h +94 h +146 h +2812 m +2813 m +2814 m +4 h +4 h +1 h +10 h +2815 m +4 h +386 h +1 h +1 h +4 h +93 h +2816 m +2817 m +368 h +3 h +10 h +10 h +4 h +278 h +4 h +2818 m +69 h +10 h +11 h +4 h +10 h +2819 m +1 h +4 h +10 h +1 h +443 h +10 h +4 h +2820 m +1089 h +4 h +4 h +1 h +4 h +113 h +2821 m +91 h +2822 m +10 h +1 h +41 h +1 h +4 h +4 h +1 h +64 h +10 h +57 h +443 h +2823 m +2824 m +1 h +10 h +10 h +1 h +11 h +262 h +4 h +4 h +2825 m +2826 m +2827 m +1 h +1 h +1 h +2828 m +123 h +1 h +4 h +2829 m +2830 m +2831 m +1 h +1 h +2832 m +92 h +10 h +1822 m +4 h +10 h +2833 m +2834 m +4 h +538 h +1 h +1 h +1 h +25 h +10 h +656 m +4 h +2835 m +146 h +1 h +2836 m +4 h +1 h +536 h +10 h +10 h +443 h +59 h +4 h +59 h +114 h +92 h +4 h +172 h +4 h +1 h +83 h +2837 m +1 h +4 h +2838 m +11 h +11 h +2839 m +2840 m +2841 m +2842 m +4 h +157 h +1048 m +1 h +1261 h +209 m +258 h +2843 m +10 h +4 h +10 h +1 h +2844 m +4 h +2845 m +4 h +557 m +520 h +170 h +556 m +1 h +2846 m +1 h +31 h +4 h +4 h +1 h +57 h +2847 m +10 h +1 h +1 h +129 h +1 h +2848 m +125 h +4 h +279 h +4 h +2849 m +4 h +10 h +185 h +4 h +10 h +10 h +250 h +2850 m +173 h +11 h +4 h +64 h +2851 m +1261 h +12 h +10 h +509 m +2852 m +82 h +626 m +4 h +59 h +4 h +1 h +4 h +2853 m +2854 m +10 h +10 h +12 h +295 h +4 h +556 m +2855 m +10 h +4 h +10 h +10 h +119 h +2856 m +2857 m +403 h +4 h +1 h +2858 m +2859 m +2860 m +1 h +4 h +4 h +4 h +13 h +2861 m +181 h +10 h +4 h +1 h +57 h +10 h +31 h +4 h +3 h +2862 m +1 h +4 h +4 h +4 h +164 h +4 h +2863 m +2864 m +1 h +4 h +109 h +1 h +2374 m +1 h +10 h +10 h +4 h +4 h +1 h +307 h +25 h +4 h +2865 m +4 h +2866 m +976 h +2867 m +2868 m +195 h +313 m +10 h +4 h +11 h +4 h +4 h +2869 m +169 h +10 h +295 h +1 h +2870 m +10 h +195 h +2871 m +10 h +928 m +3 h +172 h +11 h +403 h +4 h +2872 m +1 h +1 h +1 h +10 h +258 h +74 h +4 h +135 h +2873 m +1 h +172 h +10 h +4 h +22 h +190 h +2874 m +4 h +167 h +1 h +1 h +10 h +1 h +4 h +1 h +109 h +4 h +143 h +4 h +4 h +4 h +2875 m +190 h +2876 m +83 h +10 h +10 h +2877 m +4 h +13 h +1470 h +1 h +297 h +59 h +10 h +2878 m +25 h +57 h +82 h +359 h +10 h +219 m +1 h +1 h +2879 m +1 h +4 h +2025 m +4 h +4 h +2880 m +4 h +4 h +1 h +1 h +820 m +3 h +1 h +2022 m +2881 m +2882 m +2883 m +93 h +4 h +124 h +11 h +10 h +4 h +10 h +2884 m +250 h +278 h +2885 m +747 m +4 h +11 h +124 h +114 h +4 h +1 h +124 h +11 h +10 h +10 h +1127 m +1 h +10 h +10 h +1 h +4 h +2886 m +36 h +1 h +2887 m +2888 m +13 h +82 h +10 h +11 h +2889 m +1 h +10 h +1 h +10 h +196 h +1 h +4 h +2890 m +10 h +82 h +2891 m +195 h +2892 m +2893 m +1 h +1261 h +1 h +27 h +4 h +10 h +4 h +10 h +1 h +1 h +79 h +59 h +1 h +10 h +1 h +1 h +10 h +4 h +4 h +10 h +31 h +10 h +124 h +939 h +2625 m +1 h +1 h +82 h +4 h +1 h +1 h +4 h +2894 m +158 h +295 h +2895 m +2896 m +2897 m +2898 m +4 h +41 h +4 h +144 h +146 h +4 h +1 h +358 h +2899 m +10 h +11 h +4 h +2900 m +1 h +10 h +2901 m +2902 m +10 h +1 h +4 h +4 h +4 h +10 h +2903 m +4 h +2904 m +4 h +1 h +2905 m +2906 m +4 h +164 h +1 h +359 h +59 h +4 h +10 h +10 h +2907 m +2908 m +386 h +4 h +173 h +4 h +57 h +4 h +2909 m +4 h +4 h +45 h +1 h +22 h +11 h +4 h +41 h +1 h +4 h +1 h +1 h +4 h +4 h +124 h +4 h +1 h +82 h +1 h +1 h +124 h +4 h +2910 m +1 h +10 h +41 h +4 h +11 h +4 h +2911 m +2912 m +27 h +10 h +10 h +57 h +230 h +2913 m +10 h +4 h +10 h +2891 m +10 h +4 h +4 h +2914 m +2915 m +97 h +2916 m +36 h +2917 m +533 m +146 h +266 h +2918 m +123 h +4 h +2919 m +25 h +10 h +307 h +2920 m +4 h +1 h +83 h +2921 m +4 h +10 h +186 h +4 h +1 h +4 h +4 h +4 h +2025 m +2922 m +4 h +4 h +4 h +2923 m +11 h +11 h +4 h +2924 m +10 h +2925 m +2926 m +109 h +116 m +2927 m +1 h +2928 m +238 h +124 h +4 h +2929 m +10 h +41 h +4 h +1 h +4 h +82 h +10 h +2930 m +332 h +10 h +1 h +4 h +82 h +1 h +2931 m +1 h +4 h +4 h +1 h +1 h +4 h +1 h +10 h +169 h +4 h +1 h +2932 m +27 h +59 h +4 h +536 h +3 h +2933 m +10 h +1 h +10 h +109 h +195 h +569 h +2934 m +59 h +2935 m +10 h +10 h +10 h +33 h +31 h +11 h +4 h +4 h +10 h +83 h +10 h +156 h +4 h +2936 m +888 m +10 h +10 h +10 h +12 h +10 h +1650 m +2937 m +1 h +1 h +258 h +3 h +1 h +2938 m +10 h +1 h +1089 h +10 h +4 h +4 h +1 h +2939 m +4 h +2940 m +79 h +4 h +332 h +10 h +2941 m +2942 m +2943 m +59 h +2944 m +1 h +1 h +4 h +190 h +10 h +10 h +97 h +2945 m +4 h +4 h +59 h +2946 m +150 m +408 h +27 h +1 h +2947 m +2948 m +4 h +97 h +2949 m +173 h +2950 m +2951 m +2794 m +2952 m +2953 m +3 h +4 h +1 h +144 h +10 h +135 h +4 h +1 h +10 h +1 h +2954 m +10 h +2955 m +1 h +2956 m +2957 m +1 h +3 h +1 h +1 h +4 h +92 h +570 m +2245 m +2958 m +10 h +4 h +1 h +109 h +2935 m +4 h +10 h +10 h +1 h +144 h +2004 m +1 h +358 h +124 h +41 h +2959 m +2960 m +4 h +2961 m +124 h +2054 m +4 h +1 h +2962 m +4 h +4 h +11 h +2963 m +2964 m +109 h +4 h +2965 m +4 h +4 h +57 h +2966 m +2967 m +2968 m +11 h +10 h +2969 m +2970 m +110 h +4 h +2971 m +4 h +536 h +2972 m +10 h +4 h +447 h +272 h +59 h +10 h +69 h +10 h +4 h +10 h +2973 m +241 h +4 h +4 h +195 h +4 h +1 h +10 h +1 h +10 h +332 h +1 h +2974 m +10 h +2975 m +2976 m +57 h +1 h +1 h +57 h +10 h +10 h +11 h +139 h +10 h +2977 m +10 h +22 h +11 h +55 h +2978 m +2979 m +2980 m +2981 m +10 h +954 m +1 h +164 h +2982 m +10 h +2983 m +2984 m +4 h +10 h +4 h +1 h +1 h +3 h +25 h +4 h +4 h +4 h +82 h +56 h +59 h +1163 m +2985 m +31 h +2986 m +1 h +56 h +4 h +4 h +11 h +97 h +1 h +10 h +4 h +10 h +57 h +450 m +2987 m +4 h +10 h +2988 m +13 h +4 h +82 h +1 h +181 h +4 h +109 h +10 h +1 h +1 h +59 h +2989 m +2990 m +135 h +1 h +74 h +1 h +125 h +1 h +139 h +4 h +2991 m +109 h +82 h +82 h +4 h +2992 m +1 h +1 h +4 h +10 h +124 h +10 h +1 h +11 h +1 h +11 h +73 h +4 h +4 h +10 h +4 h +2993 m +1 h +1 h +4 h +82 h +74 h +69 h +4 h +11 h +2994 m +4 h +109 h +2995 m +4 h +1 h +2379 m +4 h +10 h +79 h +238 h +10 h +1 h +4 h +10 h +1 h +1478 h +74 h +4 h +185 h +186 h +4 h +4 h +2996 m +4 h +10 h +204 h +3 h +10 h +2997 m +2998 m +2999 m +1 h +109 h +28 h +170 h +3000 m +1766 h +4 h +1 h +250 h +4 h +1 h +10 h +3001 m +13 h +367 h +10 h +295 h +1 h +4 h +3002 m +3003 m +11 h +4 h +4 h +250 h +4 h +575 h +10 h +10 h +3004 m +10 h +1 h +4 h +4 h +3005 m +4 h +3006 m +3007 m +1 h +1 h +4 h +1 h +4 h +4 h +170 h +4 h +195 h +857 h +31 h +4 h +3008 m +4 h +27 h +1 h +124 h +4 h +3009 m +10 h +3010 m +1030 h +10 h +57 h +41 h +1 h +10 h +911 m +104 h +10 h +3011 m +313 m +1 h +4 h +3012 m +10 h +4 h +1 h +3013 m +10 h +3014 m +4 h +104 h +3015 m +10 h +3016 m +91 h +4 h +1 h +10 h +195 h +1 h +3 h +4 h +10 h +538 h +12 h +10 h +1 h +3017 m +2272 m +4 h +3018 m +25 h +4 h +1 h +195 h +10 h +1 h +10 h +4 h +4 h +10 h +10 h +1 h +4 h +3019 m +3020 m +313 h +70 m +1 h +4 h +3021 m +12 h +601 h +1 h +1 h +13 h +3022 m +4 h +4 h +10 h +156 h +1 h +3023 m +4 h +4 h +1 h +10 h +10 h +3024 m +41 h +2928 m +295 h +10 h +3025 m +4 h +4 h +3026 m +3 h +4 h +1 h +3027 m +146 h +10 h +3028 m +3029 m +4 h +4 h +4 h +1 h +4 h +1 h +41 h +289 h +4 h +10 h +1 h +10 h +3030 m +10 h +4 h +3031 m +3032 m +4 h +1 h +59 h +4 h +4 h +3033 m +3034 m +41 h +3035 m +4 h +109 h +59 h +45 h +119 h +3036 m +3037 m +109 h +10 h +1 h +3038 m +4 h +10 h +3039 m +10 h +4 h +22 h +123 h +4 h +3040 m +4 h +3041 m +4 h +4 h +3042 m +241 h +10 h +295 h +1 h +1 h +3043 m +1 h +59 h +185 h +190 h +3044 m +1 h +1 h +1 h +10 h +1 h +94 h +1 h +3045 m +10 h +3046 m +59 h +3047 m +57 h +4 h +229 h +4 h +22 h +3048 m +4 h +10 h +3049 m +4 h +82 h +4 h +4 h +3050 m +3051 m +1 h +10 h +279 h +2038 m +3052 m +92 h +3053 m +10 h +59 h +83 h +41 h +4 h +4 h +1 h +4 h +1 h +64 h +147 h +4 h +4 h +4 h +10 h +4 h +4 h +2851 m +25 h +69 h +4 h +1 h +4 h +2607 m +4 h +1 h +278 h +1619 m +25 h +11 h +1 h +1 h +4 h +73 h +10 h +146 h +278 h +1 h +4 h +13 h +3054 m +966 m +56 h +36 h +4 h +1 h +1 h +1 h +3055 m +3056 m +10 h +4 h +1 h +1 h +10 h +1 h +11 h +1796 h +4 h +3057 m +41 h +3058 m +3059 m +4 h +1 h +3060 m +12 h +1 h +1 h +3061 m +1 h +55 h +3062 m +10 h +4 h +10 h +158 h +3063 m +82 h +3064 m +11 h +3065 m +1 h +4 h +83 h +3066 m +57 h +10 h +172 h +10 h +3067 m +1861 m +11 h +2582 m +4 h +74 h +3068 m +3069 m +976 h +1 h +4 h +4 h +332 h +4 h +4 h +224 h +10 h +276 h +1 h +4 h +10 h +11 h +479 m +57 h +10 h +4 h +3070 m +368 h +25 h +4 h +129 h +10 h +1 h +10 h +4 h +4 h +10 h +5 m +1 h +4 h +4 h +1 h +4 h +10 h +4 h +1 h +10 h +3071 m +10 h +3072 m +83 h +1116 m +3073 m +1 h +3074 m +11 h +4 h +463 m +10 h +4 h +195 h +59 h +11 h +250 h +3075 m +3076 m +1 h +4 h +170 h +2532 m +1 h +1089 h +3077 m +10 h +4 h +3078 m +3079 m +10 h +4 h +3080 m +11 h +10 h +1 h +4 h +857 h +3081 m +3082 m +4 h +1 h +1 h +1444 m +11 h +3 h +358 h +91 h +3083 m +10 h +3084 m +4 h +1 h +10 h +788 m +1 h +1261 h +4 h +1 h +4 h +10 h +488 h +3085 m +1 h +601 h +4 h +3086 m +10 h +10 h +10 h +1 h +3087 m +74 h +1 h +4 h +1 h +119 h +4 h +3088 m +10 h +69 h +4 h +10 h +125 h +147 h +11 h +114 h +31 h +3089 m +3090 m +368 h +4 h +4 h +4 h +986 h +3091 m +413 m +10 h +307 h +1 h +11 h +10 h +4 h +3092 m +1 h +104 h +11 h +266 h +8 h +4 h +1 h +4 h +1884 m +278 h +3093 m +1359 h +164 h +124 h +45 h +4 h +4 h +4 h +4 h +256 m +1 h +4 h +4 h +4 h +1 h +1 h +3094 m +1 h +1 h +800 m +3095 m +4 h +4 h +1817 m +11 h +1 h +77 h +4 h +59 h +2733 m +1 h +3096 m +3097 m +195 h +4 h +3098 m +536 h +3099 m +640 h +10 h +295 h +3100 m +3101 m +1 h +36 h +196 h +3102 m +109 h +91 h +4 h +4 h +1 h +146 h +4 h +125 h +4 h +4 h +3103 m +4 h +3104 m +11 h +3105 m +57 h +11 h +82 h +169 h +368 h +3106 m +4 h +1 h +4 h +4 h +82 h +82 h +10 h +3107 m +10 h +65 h +83 h +1881 m +358 h +1 h +10 h +83 h +1 h +408 h +57 h +4 h +4 h +3108 m +3109 m +4 h +1 h +583 m +4 h +3110 m +82 h +10 h +59 h +64 h +3111 m +135 h +31 h +146 h +10 h +1790 m +265 h +1 h +3112 m +4 h +3113 m +57 h +10 h +82 h +82 h +3114 m +1 h +109 h +65 h +4 h +10 h +109 h +1 h +3115 m +3116 m +94 h +10 h +82 h +45 h +4 h +640 h +10 h +11 h +3117 m +3118 m +65 h +10 h +1 h +10 h +3 h +4 h +139 h +3119 m +4 h +10 h +41 h +3120 m +1 h +4 h +4 h +10 h +1822 m +332 h +1 h +3121 m +10 h +3122 m +4 h +4 h +4 h +4 h +4 h +3123 m +10 h +4 h +146 h +3124 m +3125 m +4 h +4 h +4 h +10 h +1 h +3126 m +3127 m +3128 m +4 h +11 h +10 h +146 h +41 h +45 h +1 h +10 h +1 h +3129 m +911 m +10 h +3130 m +1 h +125 h +3131 m +371 h +4 h +36 h +109 h +10 h +3132 m +1 h +1 h +4 h +3133 m +10 h +4 h +3 h +3134 m +10 h +332 h +1 h +4 h +1 h +3135 m +3136 m +10 h +276 h +3137 m +87 m +109 h +3138 m +4 h +1 h +10 h +25 h +167 h +4 h +3139 m +4 h +25 h +3140 m +1 h +25 h +22 h +270 h +3141 m +139 h +3142 m +10 h +27 h +779 m +10 h +4 h +1 h +55 h +1 h +4 h +4 h +27 h +25 h +57 h +274 h +4 h +3143 m +1 h +74 h +184 h +1 h +94 h +11 h +1 h +536 h +1 h +1 h +59 h +1 h +1 h +3144 m +3145 m +10 h +92 h +4 h +11 h +3146 m +1 h +3147 m +3148 m +4 h +1 h +124 h +40 h +1 h +3149 m +3150 m +307 h +10 h +465 m +3151 m +1 h +10 h +4 h +4 h +10 h +110 h +11 h +31 h +10 h +1 h +11 h +59 h +1 h +1 h +3152 m +10 h +4 h +2710 m +4 h +3153 m +10 h +4 h +3154 m +3155 m +4 h +3156 m +4 h +3157 m +10 h +1 h +4 h +10 h +1 h +3158 m +1 h +10 h +10 h +10 h +10 h +1184 m +4 h +3159 m +955 m +3160 m +10 h +4 h +976 h +2688 h +4 h +4 h +1 h +3161 m +109 h +1 h +147 h +4 h +3162 m +3163 m +83 h +434 h +1 h +11 h +4 h +1939 m +3164 m +41 h +59 h +12 h +184 h +143 h +4 h +4 h +1 h +82 h +1 h +4 h +3165 m +2964 m +10 h +4 h +4 h +3166 m +173 h +10 h +10 h +172 h +1 h +109 h +75 m +3167 m +3168 m +10 h +2865 m +4 h +3169 m +3170 m +358 h +368 h +4 h +1 h +1 h +4 h +3171 m +4 h +274 h +3172 m +57 h +1 h +3173 m +3174 m +3175 m +10 h +82 h +31 h +10 h +1 h +185 h +386 h +3176 m +4 h +46 h +2475 m +31 h +125 h +138 h +10 h +147 h +10 h +97 h +4 h +3 h +3177 m +3178 m +13 h +10 h +3179 m +4 h +1 h +266 h +110 h +10 h +123 h +4 h +109 h +4 h +83 h +3180 m +41 h +10 h +33 h +3181 m +10 h +3182 m +1218 m +4 h +4 h +1 h +3183 m +1 h +59 h +1 h +4 h +10 h +4 h +4 h +358 h +4 h +4 h +11 h +4 h +4 h +4 h +27 h +4 h +4 h +4 h +3184 m +10 h +1 h +3185 m +4 h +4 h +1 h +8 h +4 h +3186 m +3187 m +4 h +1 h +3188 m +3189 m +229 h +94 h +1880 m +3190 m +1 h +3191 m +3192 m +114 h +1 h +4 h +3193 m +3194 m +4 h +79 h +70 m +73 h +74 h +2851 m +74 h +3195 m +4 h +3196 m +4 h +10 h +1685 m +481 m +97 h +399 h +3197 m +56 h +41 h +1544 m +172 h +3198 m +97 h +94 h +181 h +11 h +1 h +295 h +116 m +4 h +104 h +4 h +3199 m +10 h +4 h +124 h +169 h +93 h +4 h +1 h +3200 m +25 h +3201 m +1 h +4 h +4 h +69 h +4 h +1306 m +1822 m +10 h +3202 m +1 h +172 h +3203 m +25 h +1 h +1 h +3204 m +1 h +4 h +256 m +1 h +4 h +3205 m +172 h +1 h +10 h +181 h +730 m +4 h +3206 m +11 h +2205 m +4 h +1953 m +4 h +4 h +3207 m +4 h +124 h +4 h +1 h +4 h +3208 m +190 h +425 m +1 h +10 h +146 h +4 h +41 h +4 h +147 h +10 h +10 h +3209 m +109 h +4 h +4 h +109 h +83 h +3210 m +3211 m +3212 m +3213 m +4 h +123 h +10 h +3214 m +3215 m +10 h +3216 m +1016 h +4 h +3217 m +1 h +4 h +1 h +1 h +79 h +8 h +4 h +3218 m +10 h +1 h +110 h +4 h +94 h +10 h +3219 m +1493 h +10 h +10 h +3 h +77 h +147 h +4 h +1 h +3220 m +276 h +434 h +3221 m +10 h +297 h +3222 m +11 h +10 h +104 h +11 h +10 h +83 h +3223 m +3224 m +1 h +169 h +4 h +1 h +299 h +3225 m +1642 m +1 h +11 h +4 h +3 h +12 h +4 h +1620 m +3226 m +1403 h +11 h +3 h +114 h +143 h +172 h +1 h +48 h +10 h +10 h +3227 m +10 h +468 h +3228 m +3229 m +3230 m +3231 m +1 h +3232 m +3233 m +4 h +10 h +3234 m +1 h +3235 m +1 h +3236 m +3237 m +10 h +195 h +3238 m +1 h +195 h +4 h +1 h +11 h +1 h +1504 m +4 h +3239 m +1 h +1 h +74 h +82 h +383 h +1 h +1 h +4 h +40 h +11 h +59 h +74 h +3240 m +10 h +1 h +1 h +4 h +4 h +4 h +3241 m +10 h +1 h +2558 m +10 h +4 h +57 h +10 h +1 h +447 h +196 h +3242 m +238 h +4 h +1 h +3 h +3243 m +386 h +11 h +174 h +656 m +569 h +4 h +3244 m +3245 m +10 h +3214 m +10 h +10 h +3246 m +3247 m +1 h +10 h +1 h +443 h +1 h +10 h +55 h +3248 m +1 h +156 h +10 h +4 h +104 h +1357 m +256 h +1 h +1 h +1 h +10 h +1 h +92 h +1 h +509 m +10 h +808 h +83 h +12 h +13 h +8 h +4 h +4 h +1261 h +4 h +125 h +4 h +82 h +3249 m +4 h +10 h +3250 m +1 h +124 h +986 h +10 h +1 h +4 h +4 h +4 h +4 h +4 h +3251 m +4 h +10 h +4 h +3252 m +4 h +10 h +4 h +1 h +238 h +157 h +1 h +31 h +3253 m +4 h +4 h +4 h +1 h +3254 m +3255 m +3256 m +69 h +4 h +4 h +477 m +3 h +4 h +147 h +82 h +4 h +59 h +1 h +3257 m +1764 m +10 h +4 h +408 h +10 h +3258 m +25 h +196 h +3259 m +3260 m +1321 h +167 h +156 h +1 h +109 h +3261 m +10 h +4 h +3262 m +124 h +3263 m +11 h +11 h +3264 m +4 h +1 h +3265 m +25 h +12 h +94 h +3266 m +1 h +307 h +10 h +1 h +3267 m +3 h +8 h +41 h +10 h +4 h +3268 m +1089 h +3269 m +2522 m +1535 h +3270 m +3271 m +1 h +3272 m +4 h +1 h +3273 m +156 h +3274 m +3275 m +3276 m +3277 m +41 h +4 h +3278 m +1 h +4 h +386 h +10 h +3279 m +3274 m +3280 m +146 h +4 h +3 h +3281 m +10 h +1 h +97 h +59 h +1 h +3282 m +359 h +3 h +4 h +10 h +10 h +73 h +4 h +4 h +4 h +4 h +25 h +3283 m +1 h +109 h +1 h +10 h +31 h +119 h +4 h +278 h +10 h +3284 m +3285 m +1437 m +3286 m +1070 m +4 h +3287 m +10 h +4 h +124 h +4 h +3288 m +3289 m +3290 m +3291 m +3067 m +3292 m +4 h +181 h +3293 m +3294 m +1261 h +3295 m +164 h +4 h +4 h +1 h +27 h +1 h +83 h +64 h +1 h +10 h +1 h +1 h +1 h +238 h +1 h +4 h +1 h +238 h +1 h +4 h +3296 m +4 h +4 h +4 h +83 h +3297 m +11 h +59 h +97 h +258 h +3298 m +143 h +41 h +265 h +1 h +10 h +10 h +97 h +139 h +3299 m +11 h +1 h +59 h +1 h +64 h +1 h +4 h +10 h +11 h +10 h +13 h +10 h +1 h +3300 m +10 h +3301 m +4 h +295 h +10 h +3302 m +1 h +403 h +383 h +4 h +3303 m +1 h +124 h +4 h +4 h +3 h +196 h +3304 m +1 h +4 h +3305 m +25 h +10 h +4 h +4 h +3306 m +1 h +4 h +10 h +97 h +10 h +1 h +4 h +4 h +3307 m +3308 m +174 h +4 h +295 h +1 h +4 h +1 h +10 h +279 h +1030 h +11 h +3309 m +3310 m +4 h +195 h +4 h +27 h +1 h +1 h +10 h +143 h +1 h +3311 m +64 h +1 h +4 h +167 h +4 h +1 h +3312 m +258 h +4 h +3313 m +3314 m +4 h +196 h +3315 m +73 h +190 h +4 h +258 h +368 h +1250 h +276 h +110 h +1 h +156 h +4 h +1 h +143 h +129 h +1 h +4 h +3316 m +779 m +11 h +3317 m +10 h +1 h +57 h +4 h +4 h +3318 m +109 h +1 h +1650 m +4 h +124 h +4 h +12 h +2163 m +3319 m +124 h +1 h +3320 m +3321 m +10 h +3322 m +2920 m +25 h +10 h +276 h +4 h +3323 m +119 h +1981 m +3324 m +4 h +3325 m +59 h +262 h +3326 m +10 h +31 h +3327 m +4 h +83 h +3328 m +4 h +869 m +25 h +10 h +3329 m +4 h +10 h +4 h +4 h +4 h +4 h +1 h +1 h +169 h +3330 m +1 h +3331 m +4 h +45 h +4 h +4 h +4 h +143 h +135 h +4 h +3332 m +1 h +1 h +1 h +10 h +3333 m +1 h +4 h +190 h +4 h +4 h +3334 m +3335 m +11 h +10 h +3336 m +10 h +31 h +1 h +990 m +4 h +1 h +4 h +124 h +25 h +4 h +4 h +4 h +69 h +97 h +190 h +3337 m +10 h +195 h +995 m +1 h +10 h +11 h +3338 m +2733 m +3339 m +1 h +230 h +3340 m +57 h +31 h +10 h +1 h +45 h +10 h +278 h +40 h +4 h +3341 m +4 h +3342 m +4 h +1 h +3343 m +3344 m +4 h +22 h +4 h +3345 m +3346 m +114 h +4 h +109 h +1 h +1 h +12 h +4 h +25 h +3347 m +1 h +4 h +3348 m +3349 m +4 h +258 h +10 h +3350 m +4 h +3351 m +3352 m +1 h +1 h +3353 m +4 h +10 h +4 h +1 h +4 h +1 h +4 h +1 h +2025 h +3354 m +4 h +1 h +3355 m +4 h +4 h +4 h +4 h +11 h +1 h +3 h +838 m +1 h +10 h +10 h +74 h +4 h +3356 m +332 h +238 h +4 h +3357 m +1053 m +1250 h +3358 m +4 h +124 h +4 h +3359 m +135 h +285 m +59 h +4 h +4 h +11 h +1 h +1 h +31 h +97 h +3360 m +11 h +4 h +3361 m +3362 m +1 h +556 h +3363 m +172 h +3364 m +1 h +195 h +3365 m +1137 h +964 m +146 h +10 h +10 h +1 h +1 h +3366 m +3367 m +164 h +4 h +4 h +156 h +3368 m +383 h +3369 m +3370 m +1359 h +10 h +3371 m +1 h +10 h +10 h +4 h +3372 m +332 h +25 h +1 h +4 h +1 h +4 h +1 h +4 h +3373 m +10 h +135 h +4 h +10 h +83 h +1 h +1 h +4 h +4 h +10 h +1 h +4 h +59 h +4 h +4 h +169 h +4 h +10 h +4 h +10 h +1 h +109 h +1 h +1 h +83 h +3374 m +4 h +1772 m +10 h +10 h +3375 m +3376 m +1 h +270 h +3377 m +10 h +3378 m +1 h +4 h +4 h +4 h +1822 h +147 h +3379 m +4 h +1 h +4 h +10 h +3380 m +258 h +4 h +1 h +2928 h +11 h +3381 m +10 h +10 h +10 h +92 h +3382 m +10 h +3383 m +4 h +1 h +1 h +104 h +1 h +22 h +82 h +1 h +1 h +10 h +10 h +3384 m +41 h +3209 m +3385 m +10 h +3386 m +1 h +319 h +1 h +158 h +4 h +82 h +196 h +4 h +1 h +1 h +97 h +1 h +4 h +56 h +10 h +3387 m +124 h +278 h +114 h +4 h +4 h +10 h +1 h +1 h +97 h +4 h +3388 m +1835 m +31 h +1 h +3389 m +3390 m +192 h +4 h +10 h +4 h +10 h +4 h +41 h +4 h +3391 m +278 h +4 h +4 h +4 h +4 h +10 h +3392 m +4 h +11 h +4 h +4 h +3393 m +4 h +1 h +3394 m +59 h +3395 m +4 h +1 h +3396 m +196 h +11 h +10 h +718 h +3397 m +3398 m +25 h +109 h +10 h +4 h +56 h +4 h +4 h +59 h +124 h +4 h +4 h +3399 m +3033 m +94 h +1 h +1 h +164 h +1770 m +3400 m +164 h +4 h +1 h +174 h +1 h +4 h +1 h +3401 m +170 h +4 h +3402 m +1 h +1 h +1 h +10 h +3403 m +1 h +4 h +1 h +4 h +10 h +82 h +203 m +3404 m +4 h +4 h +1 h +1 h +278 h +3405 m +125 h +4 h +307 h +1 h +3406 m +135 h +3407 m +276 h +1 h +10 h +1 h +1 h +1 h +278 h +1 h +3408 m +1 h +4 h +109 h +1 h +3409 m +97 h +3410 m +3411 m +3412 m +10 h +250 h +3413 m +40 h +36 h +4 h +27 h +10 h +3414 m +3415 m +3416 m +94 h +4 h +41 h +172 h +4 h +10 h +3417 m +3418 m +4 h +1 h +4 h +3419 m +119 h +3420 m +3 h +4 h +1 h +10 h +4 h +3421 m +10 h +4 h +272 h +3422 m +347 m +4 h +1 h +3423 m +4 h +4 h +1 h +359 h +1836 m +1723 m +10 h +25 h +332 h +4 h +92 h +4 h +397 m +4 h +129 h +4 h +195 h +10 h +1766 h +4 h +36 h +4 h +3424 m +4 h +10 h +698 m +3425 m +1 h +57 h +10 h +3426 m +1 h +3427 m +10 h +11 h +3428 m +146 h +79 h +1 h +1 h +74 h +109 h +55 h +10 h +64 h +10 h +3429 m +10 h +3430 m +3278 m +10 h +10 h +4 h +4 h +4 h +3431 m +10 h +82 h +1822 h +857 h +3432 m +4 h +238 h +11 h +1 h +4 h +279 h +1 h +1 h +3433 m +10 h +3434 m +1 h +10 h +322 m +4 h +4 h +1 h +64 h +167 h +10 h +1089 h +3435 m +276 h +10 h +56 h +196 h +10 h +10 h +1 h +4 h +4 h +3436 m +1 h +3437 m +3438 m +1 h +3439 m +125 h +1 h +1740 m +3440 m +1 h +28 h +4 h +3441 m +1 h +4 h +204 h +1571 m +3 h +1 h +1 h +583 m +1 h +4 h +1 h +4 h +316 m +4 h +4 h +4 h +4 h +1418 m +10 h +74 h +1 h +1 h +1 h +3442 m +10 h +10 h +3443 m +1 h +3444 m +4 h +11 h +109 h +10 h +36 h +3445 m +4 h +258 h +10 h +10 h +196 h +4 h +3446 m +258 h +164 h +3447 m +3448 m +3449 m +779 m +79 h +195 h +1074 m +3450 m +3451 m +10 h +10 h +4 h +1 h +3452 m +4 h +59 h +10 h +92 h +125 h +79 h +3453 m +11 h +10 h +1 h +3454 m +4 h +4 h +10 h +64 h +1 h +10 h +4 h +4 h +109 h +31 h +4 h +603 m +4 h +477 m +45 h +4 h +1 h +3455 m +1 h +123 h +1 h +4 h +368 h +4 h +3456 m +4 h +4 h +1127 m +4 h +4 h +10 h +109 h +1 h +1 h +1 h +64 h +704 h +4 h +4 h +1830 m +57 h +4 h +40 h +41 h +65 h +4 h +3457 m +41 h +1 h +3458 m +4 h +10 h +3459 m +4 h +1 h +3460 m +3461 m +3462 m +1403 h +1 h +1 h +4 h +4 h +10 h +3463 m +124 h +10 h +146 h +1 h +3464 m +56 h +4 h +1 h +4 h +10 h +4 h +3465 m +368 h +83 h +3466 m +124 h +11 h +11 h +4 h +4 h +10 h +4 h +4 h +1 h +65 h +1 h +74 h +3467 m +31 h +3468 m +4 h +1595 m +4 h +4 h +1 h +1 h +3469 m +109 h +730 m +57 h +1 h +82 h +10 h +258 h +3470 m +174 h +3471 m +10 h +4 h +3472 m +4 h +1 h +196 h +31 h +1 h +3473 m +4 h +10 h +11 h +1 h +4 h +3474 m +31 h +1 h +4 h +4 h +10 h +4 h +464 h +3475 m +1 h +2124 h +10 h +135 h +1 h +110 h +1 h +1 h +1 h +3476 m +4 h +10 h +1 h +1 h +3477 m +4 h +10 h +4 h +1 h +3478 m +1 h +2887 m +1016 h +4 h +119 h +1 h +3479 m +1 h +4 h +3480 m +10 h +13 h +4 h +3481 m +10 h +186 h +3482 m +3483 m +1 h +2614 m +3484 m +4 h +1 h +61 m +1 h +1 h +1 h +3485 m +59 h +3486 m +1 h +3487 m +10 h +169 h +1 h +10 h +3488 m +4 h +74 h +4 h +1070 m +4 h +1685 m +82 h +541 h +83 h +614 m +583 m +4 h +10 h +10 h +4 h +3489 m +10 h +4 h +114 h +3490 m +4 h +92 h +4 h +57 h +4 h +4 h +1 h +41 h +4 h +124 h +3491 m +4 h +181 h +3 h +4 h +1 h +4 h +36 h +3492 m +13 h +109 h +146 h +1 h +57 h +3493 m +238 h +4 h +10 h +3494 m +1 h +146 h +10 h +3495 m +82 h +4 h +1 h +3 h +4 h +11 h +4 h +230 h +3496 m +4 h +4 h +3497 m +10 h +4 h +3498 m +10 h +4 h +4 h +4 h +124 h +3499 m +4 h +3500 m +1 h +92 h +25 h +11 h +65 h +4 h +1710 m +1 h +4 h +1 h +4 h +1 h +10 h +3501 m +25 h +10 h +11 h +620 m +10 h +1886 m +4 h +4 h +10 h +1250 h +1 h +3502 m +1 h +4 h +109 h +3503 m +124 h +11 h +69 h +270 h +3504 m +3505 m +3506 m +536 h +10 h +10 h +4 h +3507 m +10 h +73 h +1 h +995 m +157 h +13 h +10 h +4 h +2002 m +2303 m +3508 m +10 h +4 h +1 h +65 h +10 h +3509 m +147 h +11 h +307 h +2436 m +10 h +57 h +10 h +3510 m +11 h +4 h +1 h +1 h +1 h +1 h +10 h +10 h +146 h +3511 m +3512 m +2028 m +114 h +4 h +570 h +25 h +4 h +10 h +1 h +801 m +147 h +4 h +4 h +4 h +92 h +10 h +10 h +195 h +4 h +10 h +1 h +11 h +3 h +1 h +1 h +3513 m +1914 m +124 h +79 h +4 h +10 h +56 h +4 h +11 h +167 h +3514 m +11 h +3396 m +1 h +3515 m +79 h +278 h +3516 m +434 h +4 h +241 h +4 h +13 h +447 h +57 h +108 h +173 h +4 h +4 h +57 h +94 h +3517 m +1 h +4 h +1 h +10 h +3518 m +3519 m +4 h +83 h +3520 m +1 h +258 h +79 h +264 m +3521 m +192 h +4 h +10 h +4 h +4 h +10 h +11 h +110 h +3522 m +1 h +124 h +27 h +3523 m +4 h +3524 m +41 h +4 h +3525 m +4 h +10 h +1470 h +4 h +1 h +4 h +82 h +83 h +4 h +4 h +4 h +1 h +79 h +11 h +4 h +4 h +1 h +10 h +104 h +10 h +4 h +3526 m +10 h +4 h +1260 m +1 h +1 h +3527 m +1 h +83 h +1 h +4 h +3528 m +3529 m +10 h +144 h +3530 m +2843 m +1 h +1 h +10 h +184 h +1 h +358 h +109 h +10 h +4 h +3531 m +4 h +41 h +1 h +83 h +185 h +3532 m +4 h +1 h +129 h +3533 m +3534 m +1 h +238 h +4 h +1 h +10 h +1 h +1 h +3535 m +124 h +10 h +10 h +1 h +4 h +10 h +4 h +3536 m +1 h +4 h +1 h +601 h +10 h +10 h +10 h +10 h +4 h +1 h +3537 m +12 h +2379 m +82 h +3538 m +4 h +4 h +1 h +48 h +4 h +1 h +3539 m +1 h +10 h +83 h +1 h +358 h +4 h +10 h +11 h +3540 m +3541 m +10 h +4 h +82 h +3542 m +65 h +25 h +4 h +4 h +4 h +1 h +4 h +4 h +3543 m +135 h +4 h +3544 m +3545 m +4 h +11 h +368 h +266 h +1 h +3546 m +119 h +4 h +1470 h +83 h +3547 m +1081 m +82 h +3115 m +4 h +1250 h +10 h +3548 m +4 h +3549 m +1 h +59 h +581 m +10 h +25 h +4 h +25 h +3550 m +186 h +332 h +403 h +4 h +109 h +10 h +109 h +92 h +1 h +1 h +3551 m +4 h +10 h +10 h +4 h +84 m +10 h +204 h +97 h +10 h +10 h +3 h +4 h +4 h +3552 m +1 h +4 h +82 h +11 h +3553 m +4 h +4 h +276 h +3554 m +3555 m +1 h +124 h +173 h +10 h +10 h +59 h +1985 m +10 h +3556 m +4 h +1 h +3557 m +12 h +4 h +4 h +10 h +8 h +4 h +59 h +10 h +276 h +3558 m +1 h +3559 m +1 h +4 h +10 h +10 h +1 h +4 h +4 h +3560 m +1 h +4 h +3561 m +1218 m +4 h +27 h +114 h +112 h +1 h +79 h +2846 m +1 h +10 h +4 h +4 h +97 h +4 h +125 h +12 h +82 h +3278 h +332 h +4 h +10 h +10 h +1 h +11 h +3562 m +1 h +3563 m +1003 h +3564 m +91 h +1 h +1 h +3565 m +3 h +1 h +1 h +4 h +3566 m +4 h +10 h +3567 m +1027 h +987 m +10 h +3568 m +4 h +4 h +3569 m +1 h +1 h +1 h +1 h +4 h +103 h +41 h +10 h +3570 m +4 h +1564 m +4 h +3571 m +10 h +108 h +3572 m +3573 m +1020 m +4 h +3574 m +1 h +604 m +4 h +3575 m +125 h +170 h +4 h +3576 m +1128 m +1 h +1 h +3577 m +1 h +124 h +11 h +447 h +4 h +45 h +195 h +4 h +3578 m +3579 m +1 h +4 h +4 h +57 h +4 h +1 h +3580 m +3581 m +10 h +1 h +3582 m +4 h +10 h +278 h +4 h +3583 m +195 h +10 h +11 h +1 h +1 h +3584 m +4 h +156 h +11 h +4 h +59 h +1 h +1 h +1016 h +10 h +10 h +3585 m +109 h +4 h +1884 m +4 h +4 h +112 h +3586 m +1 h +4 h +10 h +1 h +3587 m +185 h +1 h +55 h +1 h +4 h +3588 m +4 h +4 h +4 h +3589 m +4 h +1 h +10 h +238 h +2367 m +65 h +3590 m +25 h +64 h +10 h +1 h +3591 m +10 h +3592 m +169 h +1 h +1 h +3593 m +4 h +10 h +3594 m +157 h +1 h +157 h +10 h +41 h +10 h +1 h +10 h +11 h +1 h +124 h +41 h +74 h +4 h +4 h +3595 m +299 h +195 h +3596 m +10 h +567 m +10 h +1 h +1 h +40 h +45 h +3597 m +10 h +129 h +4 h +1 h +272 h +3 h +4 h +4 h +4 h +10 h +131 m +74 h +112 h +1 h +10 h +119 h +99 m +3598 m +3 h +41 h +4 h +10 h +5 m +74 h +4 h +4 h +92 h +4 h +692 h +119 h +10 h +1 h +4 h +939 h +1409 h +3599 m +109 h +717 m +1 h +4 h +1 h +181 h +1 h +1646 m +170 h +1 h +4 h +4 h +1 h +687 h +74 h +3600 m +156 h +1 h +3601 m +1 h +3602 m +73 h +4 h +14 m +33 h +3603 m +3604 m +57 h +1 h +4 h +27 h +4 h +109 h +1 h +10 h +10 h +1 h +578 m +10 h +146 h +4 h +12 h +3605 m +104 h +41 h +1 h +276 h +82 h +57 h +1 h +332 h +31 h +265 h +1 h +3606 m +3607 m +4 h +83 h +10 h +3608 m +41 h +10 h +2819 m +3609 m +3610 m +3 h +124 h +3611 m +642 m +11 h +3 h +1 h +4 h +3612 m +3613 m +3614 m +74 h +3615 m +10 h +4 h +4 h +83 h +123 h +3616 m +3617 m +4 h +185 h +3618 m +3619 m +11 h +83 h +3620 m +3621 m +4 h +143 h +4 h +4 h +10 h +190 h +10 h +1 h +4 h +4 h +3622 m +4 h +172 h +3623 m +55 h +3624 m +92 h +4 h +2124 h +22 h +1 h +4 h +358 h +4 h +1220 m +11 h +4 h +1 h +1 h +3625 m +1359 h +4 h +10 h +170 h +4 h +1089 h +10 h +11 h +25 h +1403 h +1 h +164 h +82 h +10 h +3381 m +1 h +10 h +4 h +36 h +377 h +3626 m +1 h +4 h +2362 m +3627 m +3 h +3628 m +3629 m +4 h +1 h +1 h +74 h +4 h +4 h +11 h +4 h +83 h +4 h +3630 m +3631 m +10 h +4 h +4 h +1 h +109 h +4 h +4 h +1 h +976 h +3632 m +4 h +1 h +4 h +69 h +10 h +1791 m +3633 m +10 h +338 m +10 h +1936 m +57 h +3634 m +10 h +1 h +489 m +4 h +4 h +1 h +112 h +1 h +1 h +31 h +4 h +3635 m +4 h +3636 m +4 h +4 h +4 h +83 h +4 h +114 h +4 h +3637 m +51 m +3638 m +3639 m +146 h +10 h +3640 m +1 h +1 h +3641 m +4 h +3642 m +10 h +4 h +10 h +10 h +10 h +92 h +1 h +1 h +59 h +258 h +794 h +2628 m +1 h +1 h +918 m +55 h +1 h +4 h +3643 m +3644 m +3645 m +4 h +4 h +195 h +4 h +3646 m +3647 m +82 h +4 h +3648 m +10 h +1 h +195 h +143 h +28 h +56 h +12 h +520 h +1 h +83 h +10 h +1 h +13 h +3 h +626 m +10 h +3649 m +1137 h +124 h +25 h +167 h +10 h +1 h +1 h +1 h +4 h +2379 m +10 h +4 h +10 h +4 h +83 h +114 h +1 h +3650 m +4 h +805 m +11 h +129 h +3651 m +97 h +3 h +4 h +124 h +3652 m +4 h +1 h +139 h +10 h +195 h +3653 m +307 h +4 h +48 h +3654 m +10 h +57 h +3655 m +1 h +1 h +1 h +109 h +41 h +4 h +4 h +4 h +1406 h +3656 m +3657 m +695 m +1 h +1835 h +11 h +3658 m +31 h +10 h +172 h +3 h +3659 m +146 h +124 h +4 h +3660 m +1 h +4 h +3661 m +4 h +4 h +1 h +4 h +10 h +801 m +31 h +10 h +4 h +45 h +4 h +1 h +1 h +195 h +3662 m +1 h +1 h +3663 m +4 h +31 h +620 m +3664 m +48 h +1740 m +156 h +185 h +65 h +4 h +1796 h +3665 m +113 h +10 h +3666 m +10 h +4 h +4 h +1 h +10 h +4 h +3667 m +258 h +4 h +31 h +3668 m +25 h +158 h +2846 m +3669 m +158 h +1 h +129 h +1 h +158 h +10 h +1 h +3670 m +3671 m +3672 m +147 h +1250 h +25 h +1 h +25 h +74 h +3673 m +4 h +157 h +2442 m +169 h +3674 m +158 h +10 h +11 h +74 h +3675 m +1 h +31 h +1 h +41 h +1 h +3676 m +3677 m +1650 m +195 h +4 h +3678 m +57 h +10 h +3679 m +57 h +3680 m +195 h +3681 m +83 h +3682 m +59 h +10 h +3683 m +1 h +4 h +3684 m +1 h +3685 m +386 h +3686 m +3687 m +3688 m +4 h +10 h +1 h +65 h +157 h +3689 m +190 h +4 h +3690 m +766 m +3691 m +4 h +1 h +57 h +4 h +11 h +1 h +3692 m +146 h +4 h +1 h +4 h +3693 m +1 h +3694 m +4 h +4 h +4 h +1 h +1 h +4 h +4 h +3695 m +3696 m +3697 m +3698 m +3699 m +3700 m +10 h +4 h +3701 m +3702 m +4 h +3 h +147 h +3703 m +1 h +4 h +10 h +1 h +1 h +4 h +10 h +3 h +986 h +4 h +4 h +10 h +56 h +1030 h +3704 m +4 h +1 h +10 h +3705 m +4 h +1 h +3706 m +82 h +4 h +45 h +4 h +158 h +3707 m +3708 m +885 h +114 h +3709 m +4 h +1 h +3710 m +4 h +94 h +4 h +704 h +184 h +1105 m +125 h +3711 m +4 h +31 h +3712 m +1 h +4 h +10 h +1714 m +2688 h +358 h +1 h +3713 m +3714 m +25 h +109 h +11 h +1470 h +1 h +368 h +1 h +4 h +4 h +3715 m +10 h +119 h +289 h +4 h +4 h +3716 m +4 h +4 h +4 h +4 h +3717 m +0 m +4 h +10 h +1 h +4 h +1 h +57 h +170 h +3 h +10 h +601 h +1 h +1 h +569 h +22 h +4 h +113 h +1 h +10 h +3718 m +1 h +113 h +3719 m +1 h +4 h +4 h +1 h +10 h +83 h +109 h +4 h +57 h +1 h +109 h +601 h +79 h +1 h +169 h +4 h +4 h +1 h +4 h +1 h +1 h +1875 m +1 h +3720 m +4 h +2730 m +10 h +11 h +1 h +10 h +11 h +10 h +55 h +57 h +4 h +332 h +4 h +10 h +155 m +4 h +583 h +367 h +10 h +4 h +65 h +3721 m +10 h +4 h +3722 m +1372 m +4 h +4 h +630 m +4 h +57 h +1 h +4 h +10 h +4 h +3723 m +4 h +520 h +4 h +488 h +4 h +1 h +3724 m +3725 m +190 h +3726 m +10 h +536 h +1 h +10 h +3727 m +135 h +4 h +41 h +3728 m +3729 m +10 h +181 h +4 h +82 h +258 h +10 h +3730 m +10 h +976 h +55 h +1 h +4 h +4 h +1 h +1 h +1 h +265 h +143 h +4 h +82 h +3731 m +3732 m +4 h +4 h +1 h +82 h +170 h +801 h +2278 m +10 h +463 m +3733 m +10 h +4 h +109 h +3734 m +1 h +4 h +869 m +938 m +4 h +4 h +1 h +3735 m +1 h +65 h +4 h +123 h +1 h +11 h +4 h +3736 m +4 h +1137 h +1 h +97 h +4 h +1 h +3737 m +27 h +1 h +1 h +4 h +1 h +1 h +2625 m +45 h +3738 m +3739 m +57 h +147 h +3740 m +147 h +386 h +1100 m +3741 m +3 h +3742 m +3743 m +1 h +3744 m +1 h +104 h +138 h +10 h +3745 m +1 h +146 h +10 h +57 h +10 h +1 h +3746 m +4 h +3747 m +1083 m +59 h +10 h +1 h +4 h +1 h +1 h +10 h +3748 m +4 h +258 h +1 h +3749 m +1 h +146 h +3750 m +3751 m +3752 m +11 h +4 h +3753 m +3754 m +12 h +11 h +83 h +4 h +41 h +4 h +307 h +4 h +106 h +4 h +1403 h +4 h +10 h +4 h +1 h +10 h +4 h +447 h +4 h +55 h +4 h +10 h +4 h +4 h +11 h +109 h +135 h +4 h +1 h +3755 m +359 h +1 h +4 h +1 h +3756 m +3757 m +4 h +1 h +10 h +4 h +124 h +12 h +112 h +3758 m +4 h +1 h +857 h +3759 m +10 h +3760 m +97 h +3761 m +1 h +104 h +3762 m +41 h +4 h +1 h +10 h +1 h +1 h +4 h +10 h +1 h +1 h +10 h +3763 m +4 h +1 h +2139 m +55 h +4 h +3764 m +59 h +10 h +1 h +4 h +203 m +3765 m +146 h +64 h +4 h +4 h +1 h +3766 m +1 h +4 h +10 h +3767 m +5 m +10 h +3768 m +4 h +4 h +447 h +3769 m +10 h +4 h +92 h +3770 m +25 h +11 h +4 h +3771 m +10 h +190 h +82 h +4 h +1 h +41 h +186 h +1 h +1 h +1 h +59 h +3772 m +10 h +1 h +3773 m +3774 m +1 h +10 h +10 h +1 h +1 h +82 h +10 h +4 h +1 h +135 h +258 h +195 h +4 h +3775 m +4 h +3776 m +10 h +4 h +25 h +4 h +109 h +1 h +3777 m +1 h +3778 m +10 h +12 h +92 h +1 h +4 h +123 h +4 h +3779 m +4 h +4 h +97 h +1 h +1627 m +1 h +3780 m +3781 m +3782 m +4 h +56 h +3783 m +3784 m +82 h +3785 m +4 h +3 h +59 h +1 h +2163 m +250 h +4 h +3786 m +10 h +3787 m +4 h +3788 m +196 h +3789 m +10 h +258 h +3790 m +1714 m +10 h +1 h +4 h +36 h +4 h +2522 m +65 h +4 h +4 h +1 h +3791 m +4 h +25 h +4 h +65 h +10 h +4 h +4 h +77 h +4 h +10 h +4 h +1 h +1 h +190 h +1 h +4 h +3792 m +4 h +1 h +64 h +55 h +1 h +10 h +1981 m +4 h +3793 m +31 h +82 h +1003 h +278 h +278 h +125 h +4 h +464 h +3794 m +3795 m +10 h +1 h +1 h +27 h +3796 m +4 h +736 m +3797 m +1 h +12 h +897 m +3798 m +443 h +114 h +4 h +4 h +74 h +3799 m +3800 m +56 h +11 h +4 h +4 h +91 h +4 h +4 h +1 h +3801 m +4 h +4 h +74 h +125 h +3802 m +3803 m +12 h +10 h +10 h +10 h +4 h +1 h +3804 m +1024 m +1 h +10 h +1 h +10 h +3805 m +10 h +536 h +3806 m +3807 m +13 h +135 h +990 m +1 h +1 h +4 h +124 h +1 h +1 h +10 h +57 h +65 h +1 h +4 h +3808 m +1 h +8 h +3809 m +8 h +1 h +73 h +10 h +3810 m +4 h +3811 m +1 h +1 h +3812 m +10 h +3813 m +3814 m +147 h +10 h +3815 m +113 h +1 h +4 h +146 h +10 h +97 h +274 h +10 h +4 h +4 h +124 h +3816 m +11 h +3817 m +3818 m +10 h +25 h +1 h +1 h +1 h +3819 m +4 h +135 h +4 h +10 h +1 h +258 h +1470 h +4 h +1 h +1 h +1 h +3820 m +3821 m +4 h +93 h +1 h +4 h +10 h +11 h +167 h +1 h +1 h +4 h +3822 m +27 h +11 h +3823 m +3824 m +4 h +4 h +1 h +3825 m +4 h +399 h +10 h +83 h +146 h +3826 m +195 h +1 h +4 h +1 h +1 h +3827 m +10 h +10 h +1677 m +587 m +1 h +224 h +4 h +1 h +3828 m +3829 m +4 h +4 h +1 h +1 h +55 h +59 h +1 h +10 h +4 h +264 m +10 h +10 h +4 h +3830 m +3831 m +1 h +238 h +4 h +911 h +1 h +1 h +3832 m +4 h +1 h +11 h +55 h +11 h +57 h +4 h +3833 m +3834 m +2379 h +4 h +3835 m +4 h +467 m +3836 m +124 h +1 h +65 h +1 h +83 h +11 h +3837 m +4 h +250 h +31 h +1016 h +4 h +10 h +3838 m +94 h +313 h +4 h +1 h +1 h +1 h +10 h +4 h +4 h +173 h +4 h +1 h +3839 m +897 m +4 h +1 h +3840 m +1 h +4 h +4 h +718 h +3841 m +1 h +3842 m +83 h +195 h +3843 m +1 h +1 h +1 h +3844 m +3845 m +10 h +4 h +4 h +4 h +250 h +4 h +3846 m +4 h +359 h +1 h +1 h +109 h +1 h +4 h +8 h +2119 m +4 h +4 h +1 h +4 h +3847 m +4 h +1 h +4 h +82 h +4 h +3848 m +1 h +1 h +55 h +3849 m +3850 m +3851 m +82 h +1 h +10 h +1 h +3852 m +1 h +3853 m +25 h +10 h +4 h +3854 m +190 h +3855 m +3856 m +1 h +10 h +3857 m +265 h +1 h +3 h +10 h +31 h +1 h +2474 m +3858 m +3859 m +10 h +10 h +938 m +3860 m +3861 m +3862 m +104 h +3 h +2300 m +10 h +1 h +1 h +3863 m +4 h +169 h +4 h +181 h +808 h +3864 m +2101 m +3025 m +92 h +4 h +181 h +3865 m +1 h +69 h +4 h +23 m +125 h +57 h +3866 m +27 h +1 h +1 h +4 h +3867 m +1 h +3868 m +4 h +229 h +4 h +538 h +31 h +3869 m +10 h +4 h +3870 m +64 h +4 h +4 h +1 h +23 m +3558 m +10 h +4 h +4 h +1 h +10 h +297 h +4 h +1 h +109 h +4 h +4 h +33 h +1 h +4 h +4 h +1470 h +10 h +143 h +185 h +1 h +1 h +256 h +4 h +204 h +3871 m +3872 m +1 h +3873 m +125 h +885 h +11 h +3874 m +322 m +65 h +4 h +3875 m +258 h +3876 m +1835 h +10 h +4 h +4 h +238 h +1 h +4 h +11 h +4 h +4 h +4 h +1 h +4 h +190 h +4 h +41 h +1 h +769 m +4 h +3877 m +59 h +1 h +55 h +1 h +556 h +3878 m +77 h +104 h +3879 m +10 h +1 h +4 h +3880 m +3881 m +1 h +119 h +1 h +3882 m +1 h +1074 m +4 h +10 h +2300 m +3883 m +3884 m +278 h +3885 m +83 h +4 h +59 h +10 h +123 h +119 h +3886 m +1 h +4 h +313 h +4 h +10 h +3887 m +238 h +1 h +31 h +125 h +3888 m +10 h +10 h +4 h +717 m +13 h +4 h +57 h +3067 m +129 h +319 h +423 m +3889 m +3890 m +1 h +447 h +4 h +1 h +630 m +4 h +4 h +4 h +938 h +79 h +3891 m +1 h +1 h +97 h +10 h +59 h +10 h +4 h +3892 m +157 h +83 h +3893 m +3 h +11 h +3894 m +214 m +1 h +57 h +3895 m +4 h +41 h +83 h +11 h +3896 m +11 h +3897 m +258 h +59 h +10 h +3898 m +146 h +4 h +3 h +4 h +3899 m +4 h +4 h +4 h +3900 m +1016 h +40 h +520 h +1 h +3901 m +3902 m +1 h +687 h +3903 m +3904 m +10 h +2954 m +1 h +4 h +73 h +147 h +3905 m +3906 m +10 h +10 h +3907 m +10 h +1 h +73 h +124 h +1 h +10 h +3908 m +3089 m +73 h +11 h +4 h +1772 h +61 m +4 h +1 h +278 h +1 h +139 h +1 h +3909 m +10 h +1646 m +3910 m +3911 m +147 h +10 h +4 h +11 h +4 h +11 h +57 h +65 h +4 h +3912 m +119 h +4 h +3913 m +3914 m +4 h +1 h +1 h +31 h +3915 m +1359 h +195 h +10 h +4 h +10 h +10 h +3916 m +1089 h +10 h +278 h +3917 m +3918 m +11 h +3919 m +10 h +1 h +224 h +3920 m +10 h +3921 m +114 h +4 h +3922 m +3923 m +4 h +10 h +1 h +3924 m +3925 m +806 m +10 h +94 h +1 h +3926 m +10 h +736 m +11 h +581 m +4 h +10 h +104 h +3927 m +3 h +3928 m +4 h +3929 m +4 h +3930 m +124 h +4 h +10 h +36 h +1 h +125 h +1 h +4 h +13 h +114 h +1 h +82 h +3931 m +1 h +4 h +109 h +4 h +3932 m +3933 m +1 h +3934 m +1 h +11 h +477 m +3935 m +3 h +1 h +170 h +11 h +3936 m +1137 h +10 h +3937 m +36 h +31 h +82 h +3 h +10 h +1 h +1 h +1 h +41 h +10 h +3938 m +3939 m +11 h +3940 m +195 h +4 h +4 h +11 h +4 h +56 h +3941 m +11 h +1 h +4 h +4 h +10 h +1 h +3942 m +1344 m +10 h +4 h +97 h +13 h +4 h +10 h +4 h +1 h +83 h +74 h +236 m +10 h +1 h +1 h +3943 m +3944 m +10 h +4 h +1 h +10 h +1 h +74 h +82 h +4 h +3945 m +1114 m +3946 m +3947 m +10 h +3948 m +3949 m +4 h +3950 m +3951 m +386 h +36 h +3952 m +1 h +1 h +3953 m +10 h +10 h +1 h +1 h +3 h +10 h +1 h +4 h +4 h +1 h +74 h +4 h +83 h +10 h +3954 m +36 h +1 h +10 h +10 h +3955 m +1 h +10 h +704 h +3956 m +3957 m +1 h +27 h +195 h +124 h +1 h +10 h +1 h +3958 m +4 h +4 h +3959 m +1 h +3960 m +10 h +1 h +4 h +109 h +4 h +3961 m +10 h +2887 m +36 h +3962 m +4 h +1 h +57 h +4 h +83 h +10 h +3622 m +1 h +1650 m +195 h +1 h +4 h +57 h +25 h +3 h +3963 m +196 h +4 h +10 h +1 h +4 h +4 h +4 h +4 h +265 h +4 h +11 h +74 h +10 h +41 h +3964 m +3965 m +4 h +4 h +4 h +4 h +4 h +11 h +10 h +3966 m +11 h +10 h +241 h +1 h +3967 m +4 h +601 h +3968 m +10 h +10 h +3969 m +65 h +56 h +2205 m +1780 h +4 h +164 h +3970 m +4 h +3971 m +64 h +4 h +3972 m +104 h +289 h +3973 m +3974 m +146 h +1 h +10 h +1861 m +4 h +262 h +4 h +10 h +4 h +173 h +3975 m +3976 m +109 h +10 h +258 h +3977 m +4 h +4 h +10 h +4 h +1 h +1 h +4 h +125 h +146 h +124 h +57 h +10 h +97 h +3978 m +4 h +82 h +4 h +1 h +10 h +3979 m +1 h +123 h +1 h +3980 m +4 h +1 h +1 h +82 h +3981 m +3982 m +10 h +3983 m +8 h +4 h +10 h +1 h +10 h +4 h +3984 m +82 h +1 h +10 h +3985 m +4 h +1 h +1 h +3986 m +10 h +1 h +1822 h +4 h +4 h +4 h +4 h +3987 m +536 h +1 h +10 h +10 h +124 h +1642 m +23 h +3988 m +1 h +3989 m +48 h +3990 m +3991 m +135 h +57 h +3992 m +3993 m +1 h +1 h +11 h +3994 m +3 h +83 h +1218 m +3995 m +264 m +1 h +11 h +1822 h +1 h +10 h +1 h +3996 m +10 h +3997 m +10 h +40 h +3998 m +1 h +1 h +3999 m +12 h +3177 m +4 h +1 h +118 m +276 h +104 h +4 h +11 h +83 h +139 h +4000 m +10 h +4001 m +1137 h +4 h +173 h +4 h +4 h +27 h +976 h +4002 m +109 h +10 h +10 h +278 h +800 m +64 h +4 h +10 h +4 h +4003 m +1 h +59 h +1 h +4 h +4004 m +195 h +4 h +1 h +10 h +4 h +1685 m +4005 m +4 h +4 h +1 h +4006 m +1 h +5 m +4 h +4007 m +4 h +4008 m +59 h +10 h +158 h +109 h +1 h +4 h +10 h +763 m +4 h +4 h +1444 m +4 h +110 h +4 h +4 h +4 h +4 h +3 h +10 h +4 h +10 h +135 h +10 h +27 h +1 h +4 h +190 h +3 h +4009 m +1 h +1722 m +4010 m +147 h +4011 m +1 h +1 h +4012 m +4 h +10 h +10 h +4013 m +4014 m +12 h +4 h +23 h +4 h +83 h +4015 m +1 h +520 h +83 h +114 h +1 h +4016 m +59 h +692 h +1 h +83 h +114 h +1 h +4 h +79 h +12 h +114 h +1 h +4017 m +4 h +146 h +41 h +4 h +10 h +4018 m +1 h +4 h +10 h +4 h +94 h +10 h +124 h +747 m +4019 m +4020 m +1 h +10 h +4021 m +164 h +4 h +110 h +146 h +4022 m +4023 m +4 h +1 h +4024 m +82 h +1 h +620 m +1 h +4025 m +1 h +4026 m +4027 m +2002 m +10 h +4 h +4028 m +578 m +4 h +11 h +17 m +125 h +4029 m +4 h +59 h +4 h +10 h +1 h +169 h +4 h +4030 m +4 h +11 h +4 h +124 h +4 h +1 h +1 h +1 h +4 h +4031 m +4032 m +4 h +4033 m +83 h +1642 m +238 h +25 h +4 h +1 h +4034 m +4035 m +10 h +4036 m +10 h +4 h +1635 m +91 h +4037 m +355 m +4038 m +109 h +1 h +113 h +1 h +872 m +4 h +358 h +1 h +169 h +1 h +4 h +1 h +104 h +1 h +4 h +11 h +2347 m +4 h +4039 m +114 h +1 h +125 h +10 h +4040 m +10 h +4 h +190 h +4041 m +4042 m +4 h +4043 m +4 h +10 h +353 m +1 h +4044 m +10 h +1 h +4 h +1 h +4045 m +4046 m +83 h +1 h +4047 m +10 h +4 h +4 h +256 h +4048 m +1 h +1 h +11 h +10 h +65 h +10 h +297 h +10 h +10 h +10 h +97 h +4049 m +59 h +11 h +4050 m +4 h +10 h +4 h +74 h +1 h +4051 m +1 h +10 h +1 h +4 h +4 h +4052 m +4 h +1 h +4 h +4053 m +4054 m +1 h +4 h +1 h +976 h +119 h +4 h +11 h +4055 m +82 h +10 h +124 h +10 h +190 h +4056 m +1 h +4057 m +4058 m +1 h +10 h +59 h +3 h +140 m +65 h +221 m +1 h +4 h +10 h +10 h +4 h +59 h +10 h +4 h +4 h +4 h +258 h +10 h +4059 m +104 h +4060 m +4 h +4 h +4061 m +1 h +1 h +4062 m +73 h +82 h +4 h +4063 m +13 h +4 h +4 h +4064 m +4 h +82 h +4 h +1 h +4065 m +4066 m +10 h +779 h +4067 m +4 h +82 h +4 h +74 h +10 h +4 h +1 h +4068 m +4069 m +1 h +1 h +125 h +11 h +399 h +114 h +4 h +4070 m +4071 m +10 h +1655 m +4 h +11 h +4 h +278 h +1 h +1 h +27 h +4 h +65 h +4072 m +10 h +4 h +10 h +185 h +4 h +4073 m +41 h +4 h +1 h +1 h +4074 m +358 h +1 h +4 h +4075 m +10 h +1 h +170 h +4 h +4076 m +25 h +3 h +238 h +5 m +3 h +332 h +1 h +640 h +4 h +986 h +1 h +1 h +10 h +83 h +4 h +25 h +270 h +82 h +10 h +11 h +22 h +4 h +4 h +4 h +4 h +41 h +59 h +64 h +4 h +10 h +10 h +55 h +3342 m +1 h +3 h +4077 m +4078 m +82 h +4079 m +4 h +97 h +10 h +4 h +264 m +10 h +3 h +4 h +4 h +10 h +1 h +59 h +4 h +381 m +4 h +10 h +4 h +1 h +10 h +1454 m +4080 m +1 h +4081 m +92 h +1 h +118 m +57 h +4082 m +399 h +4083 m +1 h +10 h +4084 m +1 h +4 h +4 h +11 h +4 h +4 h +10 h +3048 m +4085 m +4 h +1 h +939 h +4086 m +4 h +1 h +2418 m +124 h +31 h +110 h +266 h +82 h +10 h +74 h +10 h +57 h +4087 m +4 h +4088 m +1 h +4 h +4 h +911 h +4089 m +4 h +4 h +10 h +172 h +1 h +4090 m +4 h +4 h +4 h +83 h +41 h +11 h +4091 m +4 h +4 h +4092 m +10 h +10 h +520 h +1 h +4093 m +146 h +112 h +4094 m +4095 m +10 h +10 h +4096 m +109 h +4097 m +1 h +10 h +1751 m +4 h +10 h +359 h +156 h +4 h +4098 m +4099 m +41 h +4100 m +1 h +57 h +4101 m +4 h +4102 m +1 h +1 h +36 h +10 h +1 h +1 h +10 h +1 h +125 h +55 h +4103 m +1 h +4104 m +4 h +358 h +12 h +10 h +4 h +139 h +4105 m +4106 m +4107 m +3 h +4108 m +1 h +4 h +82 h +10 h +4 h +4109 m +4110 m +4111 m +4 h +3 h +547 m +4112 m +4 h +11 h +278 h +4113 m +4 h +45 h +10 h +10 h +4 h +1 h +4 h +57 h +2606 m +4 h +4 h +10 h +185 h +4114 m +4115 m +4116 m +4117 m +27 h +4118 m +4119 m +3025 m +56 h +10 h +82 h +4 h +114 h +1 h +10 h +4120 m +59 h +10 h +1 h +11 h +104 h +10 h +10 h +124 h +146 h +167 h +12 h +4 h +195 h +4 h +10 h +4121 m +10 h +65 h +4 h +4122 m +4123 m +10 h +125 h +1 h +4124 m +79 h +4 h +1 h +1 h +4 h +113 h +124 h +4 h +4 h +12 h +124 h +1 h +57 h +4 h +4125 m +1308 m +10 h +1 h +4126 m +10 h +1 h +1 h +4127 m +1 h +4 h +4128 m +860 m +270 h +4 h +41 h +1564 m +4 h +10 h +1 h +4129 m +4 h +10 h +55 h +1 h +4 h +297 h +4130 m +10 h +4 h +1 h +1790 m +4 h +10 h +10 h +4131 m +1 h +1 h +22 h +31 h +4 h +10 h +4132 m +1 h +11 h +4 h +4133 m +4 h +1 h +109 h +1374 m +368 h +11 h +1 h +4134 m +59 h +4 h +10 h +1 h +4 h +114 h +4 h +4 h +10 h +147 h +4 h +2379 h +4 h +4135 m +4 h +4136 m +10 h +1 h +1 h +1403 h +488 h +4137 m +4 h +4138 m +10 h +4139 m +1 h +4 h +4140 m +10 h +3 h +493 m +4141 m +10 h +1 h +172 h +1 h +4142 m +4 h +10 h +4143 m +4144 m +4145 m +4 h +2087 m +368 h +1 h +73 h +1 h +4146 m +125 h +10 h +10 h +4 h +4147 m +1 h +4 h +4 h +1 h +1261 h +1 h +4148 m +11 h +4 h +1 h +1 h +10 h +4149 m +10 h +1 h +4150 m +757 m +949 m +4151 m +104 h +109 h +1 h +10 h +82 h +569 h +4 h +57 h +74 h +10 h +123 h +4152 m +10 h +4 h +4 h +4 h +4 h +4153 m +1 h +11 h +4154 m +2733 m +4155 m +1 h +10 h +57 h +97 h +4 h +4156 m +1914 m +224 h +4157 m +4158 m +1 h +1 h +4 h +4 h +316 m +4159 m +123 h +31 h +1261 h +31 h +10 h +104 h +1 h +4160 m +94 h +430 m +25 h +1 h +31 h +1835 h +10 h +170 h +1 h +125 h +57 h +1914 m +297 h +4 h +4161 m +11 h +1 h +10 h +4 h +1 h +3177 m +4 h +2215 m +1 h +4162 m +10 h +31 h +1 h +10 h +4163 m +64 h +1 h +4164 m +4 h +4165 m +10 h +4166 m +4 h +4167 m +83 h +1 h +73 h +1 h +27 h +11 h +4 h +11 h +195 h +104 h +843 m +10 h +57 h +147 h +278 h +195 h +3 h +4168 m +857 h +4169 m +10 h +4 h +4170 m +4171 m +1260 m +31 h +1 h +1 h +1 h +258 h +1 h +361 m +4 h +12 h +10 h +1 h +4 h +1 h +104 h +4 h +307 h +1 h +195 h +4172 m +1607 m +4173 m +27 h +4 h +27 h +692 h +447 h +4 h +4174 m +41 h +338 m +4 h +4175 m +1 h +687 h +4 h +4 h +3112 m +1 h +10 h +3272 m +4176 m +65 h +10 h +4177 m +8 h +164 h +4178 m +10 h +4 h +4 h +536 h +64 h +4179 m +13 h +1 h +10 h +12 h +4 h +10 h +10 h +262 h +4 h +125 h +1122 m +1 h +1470 h +45 h +4180 m +125 h +4 h +4 h +157 h +4181 m +10 h +1 h +4182 m +10 h +1 h +4183 m +4 h +1 h +1953 m +4184 m +4185 m +4186 m +4 h +976 h +1 h +3 h +185 h +1 h +4187 m +4188 m +79 h +4 h +82 h +12 h +339 m +4189 m +1 h +4190 m +10 h +1 h +11 h +4191 m +4192 m +4 h +4 h +4193 m +11 h +11 h +4194 m +10 h +4 h +4 h +56 h +4 h +158 h +10 h +4 h +110 h +1 h +10 h +4 h +1 h +10 h +1 h +82 h +4195 m +143 h +4196 m +4197 m +11 h +10 h +4198 m +1 h +1 h +338 m +371 h +10 h +57 h +4199 m +69 h +4 h +4200 m +10 h +10 h +13 h +1 h +4 h +1 h +195 h +353 m +109 h +4201 m +10 h +4 h +4 h +4 h +4 h +10 h +1 h +1 h +4202 m +4 h +10 h +1 h +10 h +994 m +4 h +4203 m +386 h +4 h +1 h +4204 m +135 h +4205 m +10 h +4 h +4206 m +31 h +4 h +1261 h +2964 m +383 h +12 h +4 h +1321 h +4207 m +10 h +4 h +4208 m +36 h +4209 m +4 h +4210 m +129 h +33 h +1 h +1 h +1 h +10 h +1 h +4211 m +82 h +4212 m +1250 h +4 h +4213 m +10 h +135 h +4214 m +4 h +13 h +4215 m +10 h +1 h +110 h +1822 h +10 h +184 h +4 h +4216 m +4 h +10 h +31 h +276 h +4217 m +10 h +1296 m +4218 m +4219 m +4220 m +10 h +4 h +41 h +1 h +10 h +770 m +167 h +4 h +1 h +1 h +4 h +4 h +4221 m +79 h +10 h +4 h +1 h +4 h +4 h +10 h +4222 m +265 h +4 h +1 h +104 h +4 h +1835 h +4223 m +1142 m +1 h +4 h +1 h +1 h +10 h +4224 m +124 h +4225 m +1 h +10 h +4 h +11 h +10 h +4226 m +1642 h +4227 m +11 h +4228 m +4 h +10 h +57 h +74 h +10 h +1 h +4229 m +4230 m +4 h +4 h +135 h +4 h +82 h +57 h +1 h +4 h +1 h +10 h +4 h +12 h +4 h +4231 m +4232 m +4 h +10 h +4233 m +4234 m +73 h +4235 m +4236 m +4237 m +656 m +4238 m +4239 m +1 h +4 h +4240 m +10 h +3221 m +4241 m +1 h +338 h +4 h +1 h +1 h +10 h +11 h +65 h +4242 m +4 h +258 h +3257 m +1 h +79 h +1 h +1 h +4 h +4243 m +10 h +4 h +1 h +10 h +4244 m +10 h +10 h +57 h +4 h +190 h +1 h +109 h +83 h +1 h +4245 m +11 h +1 h +4 h +386 h +4 h +83 h +124 h +4246 m +57 h +4247 m +4 h +238 h +10 h +897 h +82 h +10 h +4 h +1 h +1822 h +10 h +4 h +4248 m +12 h +1 h +4249 m +3 h +4 h +169 h +4250 m +4 h +65 h +4251 m +10 h +10 h +10 h +10 h +4 h +4 h +4252 m +1 h +11 h +10 h +1 h +1 h +4253 m +4 h +10 h +1 h +11 h +4 h +83 h +1 h +1137 h +139 h +83 h +4 h +4 h +10 h +1796 h +83 h +4254 m +10 h +4255 m +4256 m +10 h +10 h +11 h +1 h +4257 m +10 h +4 h +10 h +10 h +10 h +10 h +4258 m +4 h +4259 m +4260 m +4 h +278 h +138 h +1 h +4 h +4261 m +4 h +10 h +1 h +65 h +4 h +4 h +4 h +64 h +1 h +79 h +10 h +1 h +4 h +1 h +1 h +4 h +4 h +4262 m +1 h +4263 m +4264 m +10 h +92 h +1 h +1470 h +4265 m +4266 m +4 h +1 h +22 h +10 h +4267 m +4 h +4268 m +4 h +4269 m +1 h +10 h +4270 m +368 h +65 h +4 h +4 h +238 h +4 h +1 h +57 h +56 h +1 h +4 h +4 h +4271 m +124 h +1 h +11 h +4 h +4272 m +82 h +31 h +13 h +1 h +4 h +1 h +74 h +164 h +10 h +181 h +4273 m +4 h +123 h +11 h +1 h +4274 m +4275 m +82 h +4276 m +12 h +10 h +11 h +45 h +4277 m +11 h +4 h +59 h +45 h +1 h +4 h +1 h +4 h +10 h +1838 m +59 h +10 h +4 h +124 h +11 h +10 h +4278 m +10 h +4 h +1 h +74 h +1 h +11 h +4279 m +10 h +1 h +4280 m +4281 m +10 h +4282 m +4283 m +4 h +4284 m +4 h +13 h +266 h +4285 m +1 h +57 h +4286 m +4 h +10 h +4287 m +1 h +4 h +110 h +1 h +1 h +10 h +4288 m +1016 h +4289 m +169 h +1 h +13 h +1 h +82 h +4 h +10 h +1 h +1 h +3 h +1 h +83 h +136 m +1137 h +258 h +1619 m +267 m +25 h +11 h +10 h +4 h +4290 m +1766 h +4291 m +1 h +4292 m +22 h +4293 m +4 h +4 h +2733 m +74 h +1 h +2054 m +10 h +1 h +1 h +4 h +1 h +4294 m +1 h +4295 m +129 h +3 h +10 h +10 h +110 h +1 h +1 h +124 h +1 h +36 h +4296 m +4297 m +4 h +10 h +4298 m +1677 m +11 h +10 h +10 h +4299 m +10 h +56 h +4300 m +4301 m +1 h +1780 h +4302 m +10 h +1 h +31 h +31 h +114 h +10 h +4 h +4 h +4 h +4303 m +3 h +3 h +1 h +4 h +10 h +4304 m +4305 m +10 h +55 h +4 h +181 h +1201 m +274 h +4306 m +10 h +10 h +10 h +1 h +4 h +3 h +1 h +10 h +4 h +4307 m +1 h +110 h +4 h +55 h +79 h +278 h +157 h +4308 m +276 h +297 h +124 h +4 h +4 h +1 h +10 h +4309 m +10 h +82 h +4 h +1 h +65 h +59 h +25 h +184 h +129 h +196 h +1218 m +10 h +4310 m +4311 m +13 h +1 h +307 h +4312 m +4 h +4313 m +1 h +1 h +4314 m +2028 m +1 h +112 h +4315 m +3025 m +10 h +219 m +125 h +146 h +41 h +3 h +4316 m +1 h +146 h +1 h +11 h +4 h +4317 m +10 h +4318 m +1650 m +4319 m +10 h +4320 m +1 h +195 h +10 h +4 h +10 h +109 h +10 h +10 h +10 h +56 h +4321 m +10 h +1 h +4 h +195 h +1 h +11 h +10 h +4 h +4 h +65 h +10 h +170 h +4 h +4 h +4322 m +4323 m +4 h +4324 m +4 h +138 h +195 h +4325 m +1 h +135 h +4 h +59 h +79 h +10 h +195 h +4326 m +4 h +10 h +4 h +10 h +4327 m +1 h +4 h +139 h +4 h +4 h +3396 h +4 h +1 h +1 h +4 h +4328 m +64 h +10 h +295 h +10 h +278 h +358 h +15 m +4329 m +4330 m +1975 m +1 h +1607 m +1 h +82 h +11 h +4 h +4 h +1406 h +4331 m +10 h +4 h +4332 m +1 h +4333 m +4 h +1 h +4 h +4334 m +4 h +4335 m +45 h +4336 m +10 h +4337 m +45 h +538 h +4338 m +278 h +11 h +1 h +104 h +570 h +4339 m +1 h +4340 m +114 h +10 h +3 h +4341 m +1089 h +10 h +4 h +1 h +1 h +82 h +1642 h +195 h +4 h +1 h +1 h +59 h +129 h +297 h +4 h +4342 m +1 h +388 m +164 h +1 h +571 m +276 h +4343 m +4 h +4344 m +73 h +4 h +400 m +65 h +4 h +4 h +4345 m +22 h +4346 m +4 h +1 h +4 h +1 h +698 m +1 h +4347 m +4348 m +4 h +59 h +77 h +1 h +27 h +1 h +4 h +4 h +1 h +4 h +279 h +10 h +4 h +4 h +170 h +11 h +4349 m +2374 m +1196 m +1 h +4 h +4350 m +1 h +4 h +4 h +157 h +4 h +10 h +1 h +4 h +4 h +10 h +4256 m +25 h +1016 h +1 h +4351 m +4 h +125 h +1403 h +4352 m +181 h +4 h +4 h +4353 m +230 h +1796 h +443 h +4 h +195 h +297 h +1 h +41 h +1 h +4354 m +1220 m +10 h +4355 m +1 h +196 h +10 h +4356 m +1 h +757 m +4357 m +4 h +4 h +4 h +10 h +25 h +186 h +196 h +4358 m +4 h +4359 m +124 h +1198 m +4360 m +1 h +1 h +990 m +1 h +1 h +4 h +4361 m +125 h +359 h +4 h +278 h +4 h +4362 m +4363 m +4364 m +10 h +4 h +195 h +10 h +4365 m +4366 m +104 h +4367 m +10 h +10 h +10 h +147 h +1 h +1 h +4368 m +4 h +4 h +4369 m +4 h +82 h +22 h +57 h +5 h +367 h +164 h +4370 m +569 h +4 h +1 h +1 h +4371 m +4372 m +1 h +869 m +4 h +359 h +4373 m +1 h +91 h +4374 m +1 h +1 h +59 h +1 h +31 h +4 h +59 h +4375 m +4 h +1 h +1137 h +520 h +11 h +10 h +4 h +79 h +11 h +139 h +4 h +91 h +4376 m +488 h +307 h +10 h +4 h +4 h +1 h +4377 m +10 h +569 h +10 h +1 h +1 h +4378 m +82 h +1 h +4 h +4379 m +4 h +45 h +4 h +3209 m +1 h +3 h +10 h +4380 m +10 h +4 h +10 h +4 h +1 h +82 h +4381 m +258 h +109 h +147 h +2022 m +4382 m +4 h +4383 m +4384 m +330 m +57 h +4 h +11 h +4385 m +4386 m +10 h +1 h +4387 m +11 h +4388 m +3 h +82 h +4389 m +4 h +10 h +10 h +10 h +1 h +4390 m +173 h +1 h +4391 m +4 h +1 h +4392 m +1 h +4 h +1 h +1 h +1 h +83 h +4 h +1 h +192 h +109 h +4393 m +31 h +4394 m +1 h +1 h +10 h +4395 m +10 h +10 h +139 h +10 h +4 h +1 h +83 h +4396 m +10 h +1316 m +1 h +4397 m +10 h +4398 m +114 h +31 h +4 h +4 h +174 h +4399 m +10 h +146 h +4 h +4292 m +1 h +1137 h +4 h +4400 m +10 h +4 h +1 h +4 h +55 h +4401 m +1 h +10 h +10 h +10 h +10 h +10 h +10 h +10 h +59 h +11 h +4 h +4402 m +4 h +4403 m +4 h +83 h +1 h +1 h +4404 m +383 h +41 h +4 h +3398 m +1 h +82 h +4 h +10 h +4 h +4 h +1 h +3 h +83 h +10 h +1 h +1 h +4 h +4 h +1 h +4 h +173 h +332 h +4405 m +1 h +4 h +443 h +1 h +4406 m +4 h +4407 m +1 h +4408 m +4409 m +258 h +4 h +4410 m +4 h +820 m +1 h +146 h +4 h +1 h +10 h +10 h +4411 m +41 h +2002 m +4 h +1 h +4 h +1 h +10 h +4412 m +1 h +4 h +1 h +1 h +185 h +1 h +4 h +170 h +4 h +4 h +10 h +4 h +114 h +4 h +10 h +4 h +1 h +59 h +4413 m +1 h +2564 m +10 h +4 h +1 h +1 h +4 h +1659 m +11 h +1 h +4414 m +1 h +1 h +4 h +11 h +4415 m +447 h +4 h +4 h +4416 m +289 h +4 h +10 h +4 h +125 h +4 h +4 h +4 h +295 h +4417 m +1 h +114 h +1 h +172 h +4 h +4418 m +41 h +4419 m +4420 m +11 h +1201 m +4 h +4 h +4421 m +12 h +10 h +1 h +82 h +4 h +185 h +4 h +258 h +65 h +1643 m +1 h +4422 m +4423 m +842 m +1362 m +4424 m +4425 m +36 h +74 h +4 h +125 h +56 h +1 h +1 h +1650 m +10 h +10 h +45 h +4426 m +40 h +4427 m +4 h +4428 m +4429 m +1 h +307 h +10 h +59 h +4 h +4430 m +1 h +4431 m +83 h +4 h +4432 m +4433 m +125 h +1 h +1 h +3 h +1261 h +1 h +4 h +1 h +10 h +25 h +4434 m +92 h +10 h +995 m +10 h +4 h +1 h +31 h +4 h +124 h +82 h +1 h +4 h +10 h +4435 m +4 h +1 h +82 h +1 h +1 h +1 h +4 h +229 h +27 h +158 h +4 h +4 h +106 h +266 h +1 h +4436 m +4437 m +4 h +4 h +22 h +170 h +82 h +10 h +4 h +92 h +1 h +4 h +190 h +2788 m +897 h +4438 m +4 h +4 h +10 h +4 h +4 h +4 h +4 h +4439 m +10 h +1 h +11 h +4440 m +4 h +10 h +1 h +1 h +31 h +278 h +1 h +124 h +1 h +4 h +4441 m +1 h +4 h +10 h +4 h +10 h +1 h +4 h +92 h +31 h +10 h +4442 m +307 h +11 h +110 h +10 h +1 h +25 h +4 h +124 h +820 m +4 h +4 h +119 h +4 h +1 h +10 h +4 h +692 h +4 h +4 h +4 h +4443 m +3 h +4 h +1 h +4 h +31 h +4444 m +1 h +59 h +1 h +4445 m +1 h +4446 m +1 h +4 h +1027 h +2186 m +4 h +83 h +4447 m +4448 m +4 h +46 h +4 h +368 h +10 h +4 h +10 h +4 h +4 h +4 h +10 h +4449 m +74 h +4 h +25 h +4 h +104 h +4450 m +601 h +167 h +1 h +4297 m +1 h +4451 m +4452 m +4453 m +4454 m +65 h +4455 m +4456 m +4 h +4457 m +4 h +4 h +184 h +1261 h +4458 m +4459 m +147 h +4 h +4 h +125 h +4 h +1 h +124 h +10 h +4460 m +4461 m +10 h +1 h +4462 m +1 h +4463 m +11 h +65 h +169 h +4464 m +82 h +4 h +383 h +3376 m +10 h +8 h +10 h +435 m +1 h +4 h +4 h +1 h +1 h +10 h +4 h +4465 m +403 h +4466 m +4 h +4 h +4 h +4467 m +10 h +488 h +4 h +10 h +238 h +3 h +10 h +4468 m +28 h +10 h +295 h +1 h +4469 m +158 h +8 h +4470 m +4471 m +1766 h +1 h +1 h +4472 m +10 h +114 h +4 h +4 h +4 h +1 h +1 h +4 h +1 h +10 h +1 h +4473 m +4 h +332 h +92 h +1 h +1137 h +36 h +10 h +2794 m +10 h +10 h +4 h +4 h +4474 m +4475 m +4 h +4476 m +4477 m +1 h +13 h +104 h +109 h +10 h +109 h +1 h +4478 m +820 h +4 h +4 h +4479 m +4 h +4 h +79 h +146 h +4 h +578 m +125 h +266 h +10 h +4480 m +4 h +11 h +4481 m +2780 m +4 h +10 h +10 h +1 h +4482 m +4483 m +4484 m +94 h +4485 m +4486 m +41 h +167 h +146 h +4 h +10 h +10 h +1 h +1083 m +4 h +4487 m +1 h +1 h +1 h +10 h +1 h +82 h +4488 m +10 h +12 h +10 h +4 h +4 h +82 h +1780 h +4489 m +11 h +447 h +4 h +83 h +124 h +10 h +13 h +4490 m +1 h +4 h +10 h +4 h +10 h +146 h +4491 m +4492 m +4493 m +1 h +4 h +4494 m +10 h +10 h +125 h +4495 m +10 h +10 h +125 h +4 h +82 h +425 m +4 h +56 h +10 h +1128 m +46 h +986 h +146 h +11 h +266 h +3 h +4496 m +4497 m +4498 m +4499 m +108 h +10 h +4500 m +83 h +2931 m +4 h +4501 m +36 h +10 h +4 h +10 h +4 h +27 h +1 h +4 h +70 m +1 h +25 h +10 h +332 h +10 h +4502 m +4503 m +10 h +11 h +1 h +1 h +1 h +313 h +109 h +1 h +4504 m +10 h +94 h +4 h +4505 m +10 h +1650 h +3 h +4506 m +4507 m +1 h +112 h +4508 m +83 h +258 h +10 h +3 h +1 h +1 h +11 h +1 h +4 h +1 h +4 h +124 h +4 h +4509 m +12 h +48 h +1 h +11 h +10 h +1 h +4 h +4510 m +4511 m +65 h +4 h +4 h +1 h +4 h +4512 m +4 h +4513 m +4 h +1250 h +124 h +1 h +4 h +55 h +4 h +4 h +1 h +4514 m +4 h +108 h +57 h +10 h +4515 m +4516 m +4517 m +4 h +10 h +156 h +1 h +164 h +4518 m +4 h +10 h +4 h +4519 m +82 h +4 h +1 h +4520 m +82 h +4 h +4 h +170 h +4521 m +1030 h +157 h +1 h +11 h +4522 m +4523 m +4524 m +4 h +4 h +4 h +10 h +56 h +65 h +10 h +1 h +359 h +1 h +4 h +4 h +195 h +59 h +65 h +4525 m +108 h +114 h +10 h +4526 m +4527 m +1 h +1 h +4 h +1362 h +1 h +1 h +1 h +4 h +1309 m +4528 m +2733 h +10 h +174 h +1003 h +4 h +1027 h +4529 m +276 h +4530 m +4 h +4531 m +4532 m +4 h +1089 h +1 h +4 h +109 h +4533 m +4534 m +520 h +10 h +4535 m +10 h +4 h +1260 m +1 h +11 h +626 m +4 h +4 h +1375 m +601 h +4 h +1 h +146 h +10 h +4536 m +79 h +170 h +4537 m +4538 m +4539 m +10 h +139 h +124 h +4540 m +25 h +10 h +3 h +4 h +4541 m +82 h +146 h +4542 m +4 h +4 h +25 h +185 h +4543 m +4 h +10 h +443 h +1 h +359 h +8 h +278 h +10 h +83 h +4544 m +4 h +10 h +10 h +4545 m +57 h +4546 m +11 h +1 h +557 m +1 h +4547 m +954 m +1 h +1 h +4 h +1548 m +112 h +4548 m +2494 m +4549 m +4550 m +4 h +4 h +10 h +57 h +857 h +4551 m +73 h +358 h +1 h +10 h +1 h +297 h +4552 m +4 h +4 h +307 h +4 h +4553 m +1 h +124 h +1 h +59 h +10 h +4554 m +10 h +4 h +4555 m +4 h +1 h +4 h +332 h +138 h +4 h +1 h +1 h +1 h +1 h +4556 m +79 h +355 m +10 h +1418 m +11 h +4 h +939 h +1137 h +118 h +12 h +575 h +10 h +172 h +1 h +4 h +4 h +10 h +10 h +4 h +4557 m +1016 h +186 h +4 h +10 h +10 h +11 h +4558 m +4559 m +8 h +10 h +10 h +4560 m +10 h +4 h +1 h +4 h +238 h +1 h +4 h +4561 m +4562 m +4 h +57 h +1 h +1 h +108 h +1 h +135 h +11 h +1 h +1 h +10 h +4563 m +1 h +97 h +4564 m +4 h +4565 m +4 h +1 h +4566 m +10 h +1 h +10 h +4567 m +4 h +4568 m +4569 m +4570 m +82 h +4571 m +1 h +1 h +10 h +113 h +4 h +109 h +83 h +4 h +5 h +4572 m +986 h +1 h +114 h +1 h +1 h +4573 m +4574 m +4 h +11 h +185 h +11 h +169 h +4 h +1 h +4575 m +332 h +11 h +4576 m +104 h +1 h +4577 m +4 h +4 h +4 h +1564 m +1 h +4 h +4 h +10 h +10 h +4 h +4 h +125 h +1359 h +59 h +73 h +4578 m +11 h +10 h +25 h +4 h +1 h +73 h +40 h +307 h +1 h +138 h +4579 m +104 h +10 h +64 h +1 h +1 h +4580 m +82 h +4 h +4 h +4581 m +1083 m +4 h +4582 m +109 h +125 h +79 h +10 h +45 h +10 h +1 h +10 h +119 h +4583 m +10 h +82 h +4584 m +203 m +4585 m +27 h +857 h +4586 m +65 h +1 h +4587 m +566 m +11 h +4 h +4588 m +4589 m +65 h +11 h +4590 m +4 h +1 h +4 h +4 h +4591 m +97 h +4 h +4 h +10 h +4592 m +109 h +4 h +4593 m +57 h +10 h +4594 m +77 h +1 h +10 h +1 h +4 h +157 h +1 h +1 h +4 h +4 h +1201 h +10 h +4 h +1 h +4 h +4 h +1105 m +10 h +1 h +57 h +185 h +4595 m +4596 m +4 h +143 h +147 h +4597 m +3 h +83 h +4598 m +195 h +4 h +1 h +143 h +172 h +4 h +195 h +4 h +4 h +1 h +4 h +1 h +10 h +146 h +1 h +279 h +3 h +270 h +4599 m +97 h +4600 m +4 h +10 h +4 h +4 h +4 h +4601 m +4 h +110 h +4602 m +10 h +1 h +10 h +4 h +4 h +4603 m +4 h +4604 m +1 h +57 h +4605 m +109 h +139 h +4292 h +10 h +258 h +4606 m +1 h +10 h +4607 m +4608 m +41 h +82 h +274 h +266 h +4609 m +10 h +10 h +11 h +1 h +4 h +4 h +687 h +4 h +1 h +10 h +4 h +4 h +190 h +135 h +56 h +4610 m +351 m +82 h +4 h +4 h +4 h +4611 m +4 h +10 h +4 h +12 h +59 h +1 h +13 h +1 h +31 h +1 h +4612 m +97 h +1 h +4 h +48 h +4613 m +986 h +1 h +94 h +10 h +4614 m +10 h +281 m +4615 m +1 h +79 h +673 m +4616 m +109 h +10 h +4 h +64 h +4617 m +192 h +10 h +4618 m +10 h +1868 m +1261 h +4619 m +4620 m +114 h +4 h +10 h +4 h +11 h +1 h +4621 m +170 h +1 h +83 h +4 h +4622 m +351 m +4623 m +79 h +1083 h +4 h +538 h +4624 m +4 h +4625 m +10 h +59 h +4626 m +10 h +4627 m +1 h +4628 m +1220 m +1 h +1 h +4629 m +4630 m +4631 m +4 h +8 h +4632 m +4 h +4633 m +173 h +4634 m +4635 m +4636 m +716 m +10 h +4637 m +41 h +4638 m +146 h +1261 h +10 h +3 h +307 h +4 h +4 h +10 h +1 h +45 h +3742 m +10 h +383 h +10 h +4 h +4 h +1 h +11 h +8 h +4639 m +10 h +4 h +1 h +4640 m +10 h +4641 m +135 h +10 h +4642 m +1 h +4 h +1185 m +10 h +4 h +4 h +4 h +4 h +4643 m +4644 m +4 h +4 h +1 h +1 h +4 h +4645 m +124 h +4 h +10 h +4646 m +4 h +10 h +1 h +4 h +41 h +541 h +1 h +4647 m +1 h +4 h +11 h +4648 m +4649 m +1 h +4650 m +1 h +4651 m +1 h +4652 m +4653 m +22 h +4 h +41 h +1 h +4 h +465 m +10 h +1 h +4 h +10 h +258 h +10 h +4 h +4654 m +4 h +1 h +4655 m +82 h +1 h +4656 m +1 h +4657 m +10 h +4 h +4658 m +4 h +4 h +55 h +4659 m +1 h +4 h +4660 m +109 h +59 h +31 h +4608 m +2925 m +11 h +1 h +1 h +4 h +4 h +4661 m +4 h +10 h +276 h +4662 m +59 h +4663 m +1 h +10 h +4664 m +82 h +1 h +4 h +266 h +1 h +4665 m +4666 m +83 h +4667 m +1 h +22 h +4668 m +181 h +57 h +10 h +1 h +1045 m +1 h +1571 m +1759 m +10 h +3 h +4 h +125 h +4 h +10 h +4669 m +4 h +1 h +4670 m +4 h +1861 m +65 h +4 h +124 h +4 h +1 h +4671 m +489 m +1 h +31 h +4 h +135 h +4 h +10 h +489 h +1650 h +4 h +1 h +1 h +4672 m +48 h +1 h +4673 m +1 h +4674 m +278 h +4 h +10 h +40 h +4675 m +1 h +278 h +1 h +4676 m +4677 m +1 h +82 h +332 h +12 h +4678 m +4679 m +4680 m +4681 m +4 h +4682 m +4 h +4683 m +59 h +10 h +4684 m +82 h +1 h +4 h +10 h +4 h +10 h +1 h +1 h +368 h +4685 m +195 h +10 h +4 h +4 h +1403 h +22 h +1261 h +1 h +11 h +4 h +92 h +4686 m +10 h +185 h +4 h +10 h +195 h +1430 m +1 h +1 h +4687 m +986 h +4688 m +11 h +463 m +1 h +297 h +4689 m +4 h +1 h +1 h +1851 m +4 h +10 h +4 h +601 h +4690 m +59 h +10 h +3177 m +1403 h +4 h +4691 m +65 h +10 h +4692 m +4693 m +65 h +124 h +1 h +82 h +4694 m +4695 m +4696 m +4 h +4 h +11 h +4 h +65 h +4 h +911 h +1 h +196 h +10 h +4697 m +4698 m +4699 m +10 h +4 h +1 h +11 h +59 h +10 h +1 h +1 h +4 h +4 h +1 h +10 h +74 h +4 h +1 h +4 h +4700 m +114 h +41 h +139 h +4701 m +4 h +258 h +10 h +11 h +4 h +4 h +4 h +4702 m +10 h +57 h +4 h +1 h +4 h +10 h +1 h +1 h +238 h +4703 m +59 h +4 h +1 h +1 h +10 h +1 h +10 h +4704 m +4 h +400 m +1 h +94 h +1 h +4705 m +1 h +4706 m +1 h +4707 m +12 h +10 h +4708 m +97 h +976 h +4 h +3702 m +4 h +1 h +10 h +386 h +4709 m +4710 m +4711 m +104 h +10 h +493 m +11 h +10 h +4712 m +4 h +83 h +322 m +186 h +1 h +1 h +1 h +10 h +1691 m +4713 m +1 h +4714 m +10 h +84 m +536 h +823 m +125 h +11 h +10 h +10 h +1 h +1 h +97 h +1 h +4 h +10 h +1751 m +124 h +4 h +4 h +3679 m +8 h +1449 m +146 h +4 h +4 h +1 h +123 h +125 h +10 h +10 h +1 h +10 h +4715 m +4716 m +27 h +1 h +10 h +1 h +4 h +4717 m +1 h +31 h +83 h +97 h +1 h +10 h +1 h +1 h +976 h +68 m +1 h +4 h +4718 m +83 h +164 h +4719 m +1556 m +4720 m +4 h +4 h +481 m +119 h +4721 m +4722 m +11 h +10 h +48 h +125 h +4723 m +10 h +4724 m +12 h +25 h +10 h +1 h +10 h +204 h +1 h +2719 m +4 h +11 h +4 h +4 h +1 h +94 h +1 h +11 h +4725 m +4726 m +4727 m +10 h +4728 m +1 h +4256 h +172 h +4 h +4729 m +1 h +4730 m +10 h +11 h +10 h +1 h +4731 m +140 m +4 h +4732 m +4733 m +4734 m +10 h +1 h +1838 m +4735 m +1 h +4736 m +113 h +4 h +4 h +386 h +55 h +1045 m +4737 m +10 h +10 h +4 h +4738 m +27 h +45 h +10 h +4739 m +1 h +1 h +4740 m +4741 m +2617 m +170 h +4 h +4742 m +3 h +64 h +4 h +911 h +2041 m +1 h +4 h +57 h +10 h +45 h +4 h +4743 m +11 h +1 h +4 h +36 h +1 h +11 h +258 h +74 h +1884 m +4744 m +4 h +8 h +1 h +1 h +4745 m +304 m +144 h +4 h +1 h +4746 m +83 h +4 h +4 h +109 h +1 h +41 h +4 h +601 h +1 h +4747 m +10 h +83 h +36 h +4 h +4 h +4 h +4 h +3 h +3396 h +4748 m +4 h +4 h +4 h +4749 m +4750 m +10 h +4 h +4 h +10 h +359 h +84 h +4 h +72 m +11 h +4 h +4751 m +4752 m +1 h +97 h +4 h +322 m +10 h +1 h +10 h +2923 m +123 h +4753 m +4 h +10 h +124 h +1074 m +1 h +1 h +4754 m +4 h +10 h +1 h +10 h +4755 m +4756 m +1 h +55 h +1770 m +4757 m +11 h +73 h +1 h +10 h +73 h +59 h +4256 h +1 h +4758 m +4759 m +4 h +4760 m +1 h +77 h +1 h +1 h +4761 m +1 h +1 h +4 h +124 h +1 h +83 h +1 h +27 h +2447 m +1 h +4 h +83 h +94 h +10 h +4 h +1 h +1 h +10 h +4111 m +10 h +1 h +8 h +170 h +4 h +4762 m +61 m +10 h +1861 m +4 h +10 h +11 h +4763 m +10 h +4 h +3 h +1 h +82 h +1 h +4764 m +1 h +1 h +4765 m +4 h +10 h +238 h +10 h +10 h +1 h +1 h +10 h +258 h +4 h +10 h +4766 m +10 h +10 h +4 h +55 h +4767 m +146 h +27 h +4768 m +1 h +10 h +295 h +4769 m +4770 m +297 h +57 h +10 h +10 h +12 h +4 h +1 h +10 h +172 h +4771 m +10 h +4772 m +386 h +4 h +601 h +278 h +104 h +1 h +1 h +4 h +195 h +569 h +109 h +41 h +10 h +1 h +10 h +4 h +4 h +4773 m +230 h +195 h +31 h +4774 m +4775 m +4776 m +10 h +10 h +4777 m +4778 m +56 h +1 h +4202 m +3 h +10 h +4779 m +4 h +4780 m +1 h +11 h +1 h +184 h +4781 m +41 h +4 h +1 h +1 h +626 m +1 h +1 h +4 h +82 h +1 h +28 h +4 h +1 h +4 h +1 h +270 h +1 h +4 h +1 h +1 h +4 h +4 h +297 h +27 h +4782 m +10 h +339 m +156 h +10 h +4 h +1650 h +1 h +4783 m +4 h +1 h +196 h +1 h +10 h +4 h +1454 m +4 h +185 h +4297 h +4 h +1 h +4784 m +4 h +4 h +1 h +11 h +4 h +4 h +196 h +281 m +4 h +10 h +4 h +57 h +4 h +10 h +266 h +10 h +4 h +184 h +41 h +1 h +69 h +169 h +11 h +82 h +41 h +4785 m +4 h +4 h +65 h +4786 m +10 h +976 h +4 h +4 h +4 h +4 h +27 h +4787 m +10 h +4 h +4788 m +4789 m +11 h +4 h +4 h +4790 m +4 h +10 h +4 h +3904 m +1 h +2887 m +1 h +11 h +4 h +82 h +1 h +4791 m +110 h +4792 m +4 h +4793 m +10 h +4 h +4 h +143 h +54 m +4 h +10 h +4794 m +4 h +4 h +1 h +4 h +1 h +443 h +11 h +10 h +83 h +147 h +4 h +4795 m +65 h +4 h +279 h +10 h +1 h +4 h +1 h +65 h +4796 m +1 h +55 h +41 h +4 h +4 h +104 h +4 h +3 h +4797 m +195 h +1 h +4798 m +1508 m +1 h +9 m +4799 m +13 h +4800 m +10 h +1 h +4 h +25 h +4801 m +1 h +1089 h +4802 m +1 h +4 h +1 h +4 h +4803 m +11 h +4804 m +4 h +10 h +4805 m +108 h +4806 m +4807 m +124 h +1 h +82 h +258 h +82 h +1 h +57 h +83 h +1 h +10 h +172 h +3768 m +4 h +10 h +1 h +73 h +4 h +4808 m +4809 m +4 h +4 h +4 h +92 h +4 h +45 h +4 h +11 h +4 h +10 h +1379 m +1 h +4810 m +4 h +4811 m +10 h +4812 m +4 h +4 h +109 h +4813 m +10 h +10 h +4 h +1 h +4 h +11 h +10 h +4 h +109 h +4 h +4814 m +443 h +36 h +258 h +1 h +1 h +1 h +10 h +10 h +4815 m +1 h +4816 m +1 h +4817 m +687 h +10 h +1 h +1 h +601 h +4 h +4818 m +4 h +11 h +4 h +4 h +4819 m +1 h +4 h +4 h +4820 m +4 h +10 h +1024 m +1 h +10 h +4821 m +230 h +4 h +4 h +10 h +1 h +4822 m +31 h +11 h +1 h +4 h +1 h +1 h +59 h +4 h +279 h +1 h +31 h +1 h +1116 m +1 h +31 h +61 m +146 h +1 h +4 h +1403 h +1 h +1 h +4 h +2002 m +4823 m +74 h +4824 m +4 h +4 h +170 h +939 h +4 h +1 h +4825 m +1309 m +338 h +10 h +4 h +41 h +10 h +4 h +4826 m +1 h +1 h +1016 h +4827 m +4828 m +4 h +4829 m +1 h +4830 m +1030 h +4 h +195 h +123 h +11 h +4831 m +10 h +123 h +10 h +10 h +40 h +4832 m +4 h +4833 m +4 h +1 h +4 h +1316 m +124 h +1 h +358 h +184 h +1 h +4 h +57 h +319 h +4834 m +1 h +258 h +4 h +964 m +976 h +4 h +4 h +10 h +4 h +4 h +4 h +4 h +4 h +4835 m +4836 m +4 h +1 h +386 h +4 h +4837 m +79 h +10 h +4 h +4 h +4 h +4 h +10 h +4838 m +10 h +4 h +10 h +4839 m +2558 m +4 h +4724 m +4840 m +10 h +976 h +4 h +1 h +371 h +4514 m +4 h +10 h +4 h +65 h +124 h +94 h +5 h +119 h +4 h +4 h +110 h +4841 m +4 h +10 h +10 h +4 h +10 h +10 h +195 h +10 h +4842 m +1 h +4 h +13 h +4843 m +4 h +55 h +10 h +4844 m +265 h +10 h +278 h +4845 m +999 m +74 h +493 m +1 h +1 h +4846 m +1 h +4847 m +4 h +1 h +2379 h +4848 m +10 h +4849 m +10 h +10 h +4 h +4850 m +1016 h +83 h +10 h +4851 m +1 h +266 h +10 h +4852 m +11 h +1 h +4853 m +1 h +1 h +4854 m +17 m +119 h +965 m +4 h +1 h +4 h +1 h +4 h +1 h +10 h +59 h +97 h +4855 m +65 h +11 h +124 h +4856 m +1 h +57 h +575 h +4 h +1027 h +1 h +1 h +4857 m +601 h +1508 m +10 h +4858 m +4 h +4859 m +74 h +4 h +4860 m +10 h +125 h +757 m +4 h +79 h +4 h +4861 m +4862 m +443 h +41 h +4 h +1 h +4 h +10 h +4 h +1 h +4 h +4863 m +1 h +4 h +230 h +73 h +1 h +10 h +11 h +4 h +4864 m +4865 m +82 h +10 h +4 h +4866 m +1 h +332 h +1 h +4 h +4 h +307 h +4 h +57 h +4867 m +4868 m +4869 m +10 h +10 h +1 h +57 h +57 h +1 h +4870 m +1 h +1 h +2887 m +1 h +4 h +4 h +3050 m +1 h +10 h +4871 m +4872 m +4873 m +1344 m +1 h +4874 m +4 h +4 h +1 h +4875 m +41 h +181 h +4876 m +10 h +11 h +1 h +718 m +10 h +4 h +536 h +59 h +1 h +4 h +1 h +4877 m +4 h +4 h +4 h +4 h +1 h +10 h +92 h +97 h +4878 m +4879 m +10 h +1 h +4 h +656 m +1128 m +4 h +4880 m +1 h +10 h +97 h +11 h +124 h +1 h +1 h +11 h +3 h +4881 m +4 h +4 h +4 h +4882 m +4883 m +4 h +10 h +4 h +10 h +4 h +10 h +4 h +307 h +1 h +172 h +1 h +1 h +1 h +4 h +13 h +25 h +224 h +109 h +4 h +4884 m +109 h +10 h +11 h +1 h +4 h +12 h +4 h +1 h +147 h +4 h +10 h +3 h +74 h +4885 m +119 h +1470 h +332 h +4886 m +1359 h +57 h +4887 m +1 h +4 h +4888 m +147 h +1 h +10 h +10 h +4889 m +4 h +4890 m +506 m +726 m +238 h +83 h +36 h +1 h +82 h +10 h +4891 m +55 h +104 h +10 h +4 h +4 h +57 h +119 h +1 h +10 h +10 h +4892 m +238 h +57 h +4 h +4893 m +10 h +4 h +4894 m +1 h +97 h +109 h +11 h +13 h +4 h +4895 m +1 h +4 h +1 h +4896 m +1 h +4897 m +170 h +156 h +139 h +4 h +4898 m +3 h +4 h +10 h +65 h +10 h +4 h +1 h +4899 m +1105 m +57 h +25 h +1 h +73 h +3435 m +4900 m +4901 m +1 h +4 h +1 h +4902 m +143 h +10 h +1 h +1 h +4903 m +97 h +4 h +82 h +1016 h +4 h +4 h +258 h +425 m +114 h +4904 m +4905 m +4906 m +4 h +4907 m +12 h +1 h +4908 m +229 h +1 h +569 h +10 h +10 h +4 h +10 h +4909 m +10 h +4910 m +4 h +1 h +4 h +4 h +1 h +11 h +4 h +4 h +143 h +1 h +73 h +113 h +4911 m +10 h +996 m +4912 m +57 h +4 h +4 h +4913 m +181 h +1 h +4 h +10 h +4 h +124 h +11 h +1 h +1261 h +4 h +4914 m +4 h +92 h +1 h +1 h +4915 m +167 h +59 h +4 h +57 h +10 h +25 h +45 h +4916 m +10 h +10 h +10 h +371 h +4 h +1 h +82 h +1 h +4 h +4917 m +1 h +4 h +83 h +4 h +4918 m +4919 m +10 h +97 h +1 h +4 h +12 h +1 h +307 h +1 h +4920 m +10 h +4 h +10 h +4921 m +1 h +124 h +4 h +4922 m +124 h +4 h +83 h +23 h +1 h +4 h +22 h +1 h +11 h +82 h +195 h +4 h +4923 m +4 h +146 h +4924 m +258 h +4 h +3 h +3025 m +4 h +4 h +146 h +4 h +10 h +4 h +4925 m +144 h +1 h +10 h +4 h +4 h +4 h +4 h +1772 h +4 h +4 h +69 h +4 h +4926 m +2887 h +147 h +10 h +1 h +4927 m +399 h +4 h +57 h +4928 m +1027 h +1030 h +4 h +238 h +4 h +4 h +1 h +1089 h +10 h +1 h +4929 m +4930 m +1 h +10 h +4931 m +1 h +1 h +4932 m +12 h +994 m +104 h +1 h +4933 m +4 h +4934 m +4 h +57 h +4 h +4935 m +4 h +4 h +4936 m +196 h +4937 m +10 h +1 h +1 h +4 h +4938 m +10 h +83 h +4939 m +13 h +1 h +4940 m +4 h +169 h +4941 m +190 h +27 h +4929 m +4 h +4 h +347 m +4942 m +1650 h +10 h +4943 m +124 h +57 h +1 h +4944 m +169 h +4 h +4 h +4945 m +10 h +167 h +1 h +4 h +109 h +1 h +10 h +4 h +4 h +10 h +1 h +27 h +135 h +1 h +4946 m +4947 m +1 h +4297 h +1 h +104 h +4948 m +4949 m +22 h +1 h +4950 m +4 h +172 h +10 h +4 h +976 h +11 h +1499 m +1 h +11 h +1 h +4 h +10 h +59 h +119 h +74 h +4520 m +13 h +425 m +82 h +10 h +1309 m +146 h +4951 m +4952 m +125 h +10 h +1 h +186 h +57 h +4953 m +4954 m +1737 m +4 h +1576 m +10 h +4955 m +11 h +1 h +4 h +104 h +1105 m +4 h +1 h +4956 m +57 h +477 m +2148 m +1 h +4 h +1780 h +566 m +2625 m +1 h +204 h +4 h +4 h +10 h +4957 m +4 h +1 h +125 h +4958 m +1293 m +4 h +10 h +4959 m +73 h +4 h +104 h +1 h +10 h +147 h +1 h +10 h +181 h +258 h +4960 m +10 h +4 h +82 h +1137 h +4961 m +4962 m +4963 m +1 h +238 h +1714 m +1975 m +10 h +4 h +64 h +256 h +4 h +4964 m +1 h +4 h +170 h +82 h +195 h +1 h +92 h +4 h +97 h +4965 m +4 h +1105 h +717 m +4966 m +135 h +1 h +4 h +4967 m +4968 m +74 h +1 h +74 h +156 h +4969 m +31 h +1 h +41 h +4970 m +1 h +4 h +4 h +307 h +82 h +11 h +4971 m +10 h +4 h +4972 m +347 m +10 h +8 h +4 h +3 h +4973 m +10 h +70 m +359 h +1 h +10 h +493 h +1 h +4 h +10 h +1137 h +1105 h +4 h +4 h +56 h +4974 m +195 h +10 h +41 h +1 h +4975 m +4 h +1 h +4 h +10 h +1 h +4 h +4 h +1 h +1 h +1 h +4 h +1 h +1470 h +4 h +1 h +4 h +4 h +625 m +4976 m +172 h +64 h +27 h +1 h +196 h +4 h +4 h +4977 m +1 h +4978 m +4 h +4 h +124 h +4979 m +1 h +4 h +10 h +10 h +4 h +124 h +1337 m +4 h +1 h +31 h +1 h +186 h +1 h +12 h +10 h +92 h +4980 m +4 h +4 h +41 h +11 h +4441 m +10 h +4 h +1 h +112 h +4981 m +297 h +4982 m +4 h +4 h +4 h +79 h +83 h +1 h +3558 m +1379 m +4 h +10 h +57 h +1 h +4983 m +4984 m +4 h +83 h +4985 m +869 m +10 h +1 h +1 h +1 h +4986 m +4 h +41 h +4987 m +10 h +10 h +4988 m +4989 m +717 m +1650 h +10 h +46 h +13 h +4 h +1089 h +10 h +265 h +11 h +4990 m +109 h +4 h +1 h +10 h +4 h +4991 m +1 h +1 h +186 h +91 h +4992 m +10 h +1 h +2148 m +36 h +195 h +4993 m +4994 m +11 h +4995 m +25 h +1 h +4 h +1 h +82 h +1 h +4 h +4996 m +4997 m +1 h +4998 m +4999 m +10 h +10 h +5000 m +4 h +1 h +5001 m +5002 m +4 h +4 h +272 m +5003 m +1 h +1 h +5004 m +1 h +10 h +4 h +83 h +4 h +5005 m +92 h +3 h +4 h +11 h +4 h +4 h +4 h +1 h +57 h +4 h +1 h +434 m +5006 m +4 h +4 h +4 h +1 h +1 h +4 h +169 h +5007 m +4 h +4 h +5008 m +104 h +4 h +1 h +1 h +10 h +1 h +25 h +5009 m +10 h +10 h +109 h +4 h +5010 m +1898 m +10 h +5011 m +5012 m +4 h +10 h +1 h +509 m +4 h +1 h +1 h +5013 m +5014 m +74 h +1 h +114 h +3 h +1027 h +1337 m +5015 m +10 h +4 h +4 h +5016 m +307 h +4 h +1 h +1016 h +144 h +11 h +4 h +5017 m +190 h +5018 m +2379 h +10 h +1 h +5019 m +57 h +10 h +135 h +1 h +5020 m +1 h +167 h +4 h +4 h +5021 m +10 h +41 h +143 h +10 h +10 h +147 h +1250 h +1 h +5022 m +5023 m +1 h +10 h +1 h +5024 m +5025 m +338 h +74 h +4 h +1 h +10 h +10 h +61 h +307 h +135 h +10 h +10 h +10 h +1 h +125 h +41 h +5026 m +167 h +5027 m +11 h +4 h +276 h +5028 m +25 h +5029 m +4 h +5030 m +195 h +4 h +10 h +124 h +716 m +224 h +10 h +4 h +5031 m +10 h +4 h +195 h +276 h +5032 m +5033 m +5034 m +1261 h +1 h +5035 m +10 h +1 h +5036 m +276 h +25 h +1 h +278 h +5037 m +10 h +4 h +359 h +3 h +4 h +4 h +109 h +5038 m +556 h +692 h +1 h +4 h +1 h +4 h +1 h +10 h +196 h +5039 m +3 h +4 h +4 h +55 h +1 h +1 h +4 h +5040 m +4 h +10 h +190 h +5041 m +1 h +359 h +5042 m +11 h +170 h +4 h +11 h +143 h +276 h +1 h +4 h +74 h +10 h +31 h +1 h +10 h +10 h +57 h +4 h +4 h +5043 m +4 h +5044 m +4 h +5045 m +10 h +5046 m +4 h +167 h +5047 m +147 h +65 h +5048 m +443 h +4 h +3 h +4 h +10 h +4 h +1 h +10 h +5049 m +10 h +5050 m +4 h +129 h +464 m +10 h +869 m +1 h +3338 m +1 h +570 h +57 h +4 h +5051 m +143 h +5052 m +109 h +31 h +1 h +1 h +4 h +4 h +5053 m +170 h +4 h +4 h +4 h +10 h +114 h +10 h +4464 m +45 h +10 h +10 h +4 h +4 h +5054 m +4 h +5055 m +5056 m +1 h +1 h +5057 m +5058 m +83 h +5059 m +5060 m +3 h +737 m +4 h +5061 m +11 h +4 h +10 h +77 h +1281 m +4 h +4 h +687 h +5062 m +123 h +4 h +11 h +4 h +4 h +41 h +1 h +10 h +83 h +4 h +10 h +4 h +1790 m +112 h +10 h +10 h +82 h +5063 m +4 h +13 h +4 h +1089 h +307 h +83 h +4 h +1 h +4 h +82 h +124 h +5064 m +5065 m +4 h +1 h +1 h +73 h +125 h +10 h +338 h +10 h +2459 m +5066 m +5067 m +1 h +1 h +5068 m +4 h +1 h +10 h +11 h +4 h +195 h +5069 m +1 h +119 h +5070 m +139 h +10 h +802 m +1017 m +322 h +1835 h +4 h +687 h +1 h +1 h +146 h +5071 m +82 h +1 h +59 h +10 h +164 h +114 h +4 h +1 h +1137 h +1 h +1 h +1 h +425 h +1835 h +3 h +156 h +538 h +5072 m +5073 m +5074 m +5075 m +4 h +1081 m +238 h +5076 m +4 h +1 h +5077 m +10 h +203 m +319 h +10 h +124 h +1 h +5078 m +83 h +4 h +1 h +10 h +10 h +1 h +4 h +45 h +10 h +10 h +4 h +57 h +5079 m +92 h +1634 m +5080 m +10 h +5081 m +64 h +995 m +41 h +1 h +5082 m +22 h +25 h +4 h +4 h +4 h +5083 m +1 h +1 h +4 h +45 h +5084 m +10 h +5085 m +92 h +5086 m +4 h +5087 m +4 h +83 h +190 h +4 h +4 h +45 h +156 h +11 h +4 h +4 h +10 h +10 h +4 h +1 h +1 h +4 h +4 h +1362 h +4 h +1 h +1 h +94 h +1 h +5088 m +464 h +11 h +1 h +4 h +986 h +4 h +4 h +5089 m +2418 m +5090 m +83 h +1 h +5091 m +4695 m +1 h +4 h +10 h +1191 m +82 h +4 h +5092 m +185 h +10 h +10 h +1 h +1 h +10 h +10 h +1710 m +10 h +1 h +173 h +4 h +124 h +2520 m +570 h +250 h +10 h +8 h +4 h +4 h +1 h +5093 m +169 h +1 h +10 h +64 h +5094 m +3 h +4 h +4 h +1 h +5095 m +1 h +5096 m +5097 m +569 h +170 h +4 h +83 h +57 h +83 h +169 h +4 h +4 h +110 h +31 h +5098 m +538 h +4 h +1 h +4 h +5099 m +124 h +10 h +5100 m +10 h +5101 m +3025 m +10 h +1 h +5102 m +1 h +5103 m +1 h +4 h +1 h +4 h +1470 h +4 h +59 h +10 h +4 h +12 h +10 h +1 h +135 h +5104 m +10 h +4 h +113 h +4 h +5105 m +278 h +5106 m +4 h +1089 h +94 h +4 h +5107 m +1 h +36 h +4 h +1308 m +10 h +5108 m +1 h +2028 m +5109 m +5110 m +1 h +25 h +4 h +763 m +25 h +1 h +1 h +2558 m +4 h +1780 h +79 h +1 h +82 h +109 h +4 h +347 h +25 h +3 h +5111 m +11 h +1 h +5112 m +1 h +1 h +25 h +10 h +229 h +1 h +1 h +41 h +536 h +5113 m +5114 m +4 h +4 h +5115 m +5116 m +1 h +64 h +1083 h +5117 m +4 h +13 h +5118 m +4 h +10 h +10 h +2710 m +4 h +10 h +1 h +1 h +97 h +211 m +1 h +181 h +172 h +4 h +4 h +1 h +1 h +1574 m +170 h +124 h +3028 m +1 h +103 m +5119 m +11 h +10 h +4 h +10 h +1 h +5120 m +59 h +4 h +83 h +4 h +5121 m +1 h +4 h +1 h +5122 m +4 h +196 h +1 h +2688 m +31 h +82 h +10 h +4 h +10 h +10 h +25 h +69 h +4 h +195 h +5123 m +79 h +1 h +258 h +10 h +5124 m +4 h +5125 m +562 m +1 h +5126 m +4 h +1 h +353 m +4 h +4 h +1 h +4 h +4 h +4 h +59 h +5127 m +4 h +4 h +125 h +4 h +5128 m +5129 m +1 h +4 h +172 h +5130 m +4 h +11 h +4 h +5131 m +10 h +10 h +4 h +4 h +1 h +41 h +10 h +278 h +4 h +1 h +10 h +1 h +4 h +124 h +1 h +1 h +1 h +10 h +1403 h +5132 m +125 h +5133 m +4 h +5134 m +307 h +74 h +5135 m +1 h +5136 m +5137 m +10 h +4 h +11 h +1 h +4 h +1 h +10 h +25 h +4 h +1 h +1 h +4 h +4 h +1 h +113 h +1 h +109 h +4 h +10 h +64 h +297 h +4 h +4 h +118 h +4 h +4 h +10 h +10 h +4 h +172 h +5138 m +5139 m +74 h +4 h +140 m +1053 m +1027 h +4 h +1 h +5140 m +4 h +1122 m +45 h +5141 m +1 h +1499 m +5142 m +4 h +4 h +4 h +1 h +5143 m +1650 h +2251 m +10 h +4 h +1403 h +4 h +5144 m +123 h +4 h +4 h +478 m +4 h +4 h +55 h +4 h +174 h +1 h +4 h +10 h +4 h +4 h +1 h +11 h +1 h +4 h +5145 m +4 h +4 h +1 h +4 h +4 h +11 h +10 h +412 m +4 h +11 h +1379 m +4 h +108 h +1 h +5146 m +1 h +11 h +116 m +10 h +123 h +10 h +10 h +4 h +124 h +489 h +10 h +4 h +185 h +10 h +146 h +10 h +276 h +1 h +13 h +1 h +4 h +5147 m +285 m +4 h +109 h +190 h +4 h +170 h +388 m +11 h +10 h +10 h +185 h +5148 m +5149 m +10 h +1 h +10 h +1278 m +5150 m +10 h +1261 h +4 h +5151 m +423 m +4 h +4 h +1 h +4 h +1 h +10 h +4 h +10 h +10 h +10 h +114 h +10 h +966 m +190 h +41 h +278 h +11 h +129 h +65 h +443 h +83 h +1 h +5152 m +119 h +5153 m +1 h +4 h +10 h +4 h +1 h +82 h +5154 m +299 m +1 h +5155 m +4 h +10 h +4 h +10 h +10 h +11 h +4 h +1 h +69 h +1 h +1 h +5156 m +172 h +57 h +1 h +5157 m +4 h +5158 m +10 h +74 h +4 h +1 h +10 h +5159 m +230 h +935 m +10 h +5160 m +1 h +4 h +4 h +4 h +718 h +986 h +1 h +5161 m +840 m +11 h +5162 m +4 h +4 h +11 h +1 h +5163 m +31 h +1 h +1 h +5164 m +1 h +976 h +10 h +4 h +4 h +10 h +1 h +1 h +10 h +173 h +195 h +5165 m +10 h +125 h +135 h +5166 m +1 h +5167 m +1 h +5168 m +10 h +4 h +4 h +57 h +1 h +82 h +1 h +4 h +4 h +3546 m +1 h +4 h +307 h +1 h +1 h +64 h +4 h +11 h +3 h +10 h +4 h +5169 m +3 h +4 h +4 h +1 h +10 h +1647 m +1470 h +169 h +3396 h +5170 m +1 h +258 h +4 h +59 h +1 h +358 h +3 h +10 h +1 h +124 h +74 h +114 h +1710 m +5171 m +4 h +1 h +1 h +10 h +60 m +10 h +5172 m +5173 m +4 h +25 h +109 h +4 h +5174 m +4 h +10 h +5175 m +10 h +10 h +83 h +5176 m +10 h +5177 m +11 h +5178 m +5179 m +10 h +1 h +12 h +10 h +5180 m +5181 m +1 h +5182 m +238 h +74 h +11 h +173 h +5183 m +4 h +10 h +64 h +1 h +4 h +5184 m +5185 m +1 h +5186 m +4 h +109 h +10 h +55 h +1 h +10 h +1201 h +146 h +4 h +10 h +4 h +1 h +5187 m +1 h +4 h +1 h +11 h +1 h +2418 m +10 h +10 h +4 h +11 h +4 h +4 h +1 h +5188 m +1 h +1 h +1 h +5189 m +4 h +4 h +1 h +109 h +5190 m +25 h +5191 m +57 h +4 h +57 h +11 h +4 h +258 h +1 h +5192 m +1 h +10 h +10 h +109 h +4 h +10 h +1 h +3 h +1 h +258 h +36 h +4 h +4 h +1 h +5193 m +25 h +5194 m +935 m +25 h +10 h +601 h +1 h +1 h +4 h +83 h +5195 m +109 h +10 h +109 h +2971 m +4 h +4 h +4 h +1 h +5196 m +65 h +5197 m +11 h +1 h +266 h +172 h +1766 h +5198 m +1 h +1 h +5199 m +10 h +92 h +10 h +4 h +1 h +5200 m +181 h +5201 m +135 h +10 h +10 h +1 h +4 h +4 h +4 h +1 h +361 m +4 h +5202 m +5203 m +1 h +5204 m +1 h +4 h +1 h +10 h +4 h +5205 m +124 h +1893 m +12 h +46 h +509 m +5206 m +1 h +4 h +4 h +1 h +5207 m +5208 m +1 h +10 h +601 h +4 h +1 h +5209 m +5210 m +4 h +48 h +169 h +10 h +59 h +172 h +10 h +1 h +65 h +5211 m +371 h +4 h +14 m +1053 m +10 h +4 h +5212 m +5213 m +82 h +10 h +1 h +10 h +5214 m +11 h +4 h +1 h +5215 m +1 h +779 h +1955 m +5216 m +1370 m +575 h +59 h +5217 m +5218 m +1 h +4 h +10 h +10 h +5219 m +10 h +4 h +195 h +770 m +295 h +195 h +10 h +1 h +4 h +8 h +10 h +5220 m +10 h +59 h +1 h +84 h +4 h +4 h +4 h +4 h +4 h +97 h +266 h +11 h +5221 m +1 h +1 h +10 h +73 h +10 h +4 h +5222 m +1 h +109 h +4 h +4 h +4 h +4 h +5223 m +464 h +5224 m +5225 m +11 h +11 h +4 h +4 h +1470 h +114 h +4 h +83 h +139 h +129 h +190 h +1 h +5226 m +10 h +289 h +45 h +64 h +4 h +1 h +11 h +1 h +5227 m +82 h +5228 m +5229 m +3 h +83 h +250 h +3 h +10 h +4 h +5230 m +1 h +97 h +299 m +3555 m +4 h +5231 m +97 h +4 h +5232 m +10 h +147 h +4 h +5233 m +41 h +4 h +1 h +4 h +129 h +1 h +1 h +57 h +10 h +5234 m +4 h +57 h +56 h +5235 m +118 h +135 h +4 h +1 h +4 h +1822 m +606 m +124 h +25 h +5236 m +1 h +601 h +4 h +4 h +90 m +92 h +59 h +332 h +4 h +1 h +11 h +1 h +4 h +4 h +4 h +92 h +1 h +4 h +1 h +3 h +4 h +5237 m +164 h +10 h +4 h +593 m +4 h +125 h +10 h +1 h +5238 m +124 h +1 h +4 h +10 h +1 h +5239 m +55 h +4 h +4 h +125 h +1 h +1 h +1 h +4 h +11 h +10 h +4 h +10 h +10 h +5240 m +674 m +4 h +1 h +65 h +97 h +41 h +687 h +4 h +10 h +172 h +1 h +4 h +885 m +1261 h +10 h +1 h +11 h +2928 m +147 h +5241 m +114 h +266 h +5242 m +170 h +2769 m +4 h +140 m +1 h +10 h +25 h +1 h +10 h +1 h +57 h +1 h +59 h +4 h +4 h +4 h +4 h +5243 m +5244 m +4 h +5245 m +345 m +3630 m +5246 m +5247 m +10 h +104 h +10 h +79 h +55 h +93 h +4 h +687 h +5248 m +339 m +5249 m +1 h +79 h +4 h +279 h +104 h +12 h +10 h +5250 m +10 h +1 h +5251 m +536 h +4 h +25 h +1 h +1 h +10 h +4 h +5252 m +4 h +4 h +1 h +11 h +10 h +1 h +1 h +10 h +124 h +1 h +1 h +4 h +5253 m +147 h +2550 m +4 h +143 h +4 h +4 h +4 h +4 h +4 h +8 h +5254 m +4 h +4 h +11 h +258 h +10 h +4 h +1828 m +4 h +5255 m +10 h +4 h +31 h +73 h +10 h +195 h +10 h +1 h +1 h +1108 m +8 h +10 h +4 h +276 h +110 h +82 h +5256 m +10 h +79 h +5257 m +10 h +5258 m +31 h +570 h +5259 m +4 h +1 h +170 h +1 h +447 h +11 h +297 h +135 h +773 m +1 h +4 h +4 h +79 h +4 h +13 h +4 h +4 h +74 h +167 h +1 h +4 h +10 h +28 h +332 h +124 h +5260 m +1 h +10 h +4 h +1 h +5261 m +1 h +538 h +4 h +1 h +4 h +77 h +1 h +1697 m +1 h +59 h +5262 m +5263 m +4 h +386 h +4 h +5264 m +36 h +4 h +4 h +92 h +124 h +4 h +10 h +12 h +3 h +3 h +4 h +1 h +195 h +25 h +4 h +10 h +10 h +3 h +2788 m +10 h +5265 m +10 h +358 h +5266 m +4 h +55 h +1 h +147 h +1 h +83 h +1 h +10 h +1 h +935 h +5267 m +5268 m +5269 m +4 h +1 h +5270 m +5271 m +5272 m +1 h +59 h +4 h +1 h +4 h +15 m +5273 m +10 h +10 h +5274 m +82 h +1116 m +59 h +5275 m +10 h +83 h +31 h +1 h +1 h +1 h +1 h +5276 m +4 h +297 h +4 h +5277 m +79 h +10 h +4 h +5278 m +1 h +1 h +4 h +5279 m +5280 m +64 h +10 h +113 h +5281 m +5282 m +4 h +5283 m +4 h +31 h +3112 m +4 h +195 h +1 h +10 h +104 h +181 h +1 h +1 h +4 h +28 h +10 h +146 h +83 h +10 h +1 h +1 h +4 h +5284 m +4 h +57 h +4 h +5285 m +1 h +1 h +4 h +79 h +5286 m +779 h +11 h +1 h +4 h +1 h +41 h +1 h +2625 m +10 h +258 h +65 h +11 h +1 h +5287 m +1 h +59 h +615 m +8 h +169 h +92 h +5288 m +83 h +28 h +4 h +1138 m +5289 m +31 h +4 h +55 h +167 h +5290 m +2928 m +125 h +4 h +4 h +1 h +1 h +5291 m +1 h +10 h +10 h +5292 m +5293 m +1 h +10 h +185 h +4 h +1 h +10 h +4 h +146 h +10 h +4 h +5294 m +4 h +4 h +25 h +5295 m +5296 m +10 h +5297 m +59 h +620 m +1 h +4 h +94 h +4 h +74 h +5298 m +73 h +92 h +135 h +181 h +1 h +57 h +170 h +1 h +371 h +10 h +4 h +5299 m +10 h +4 h +1 h +5300 m +10 h +5301 m +41 h +450 m +4 h +5302 m +5303 m +10 h +10 h +4 h +4 h +11 h +82 h +4 h +4 h +4 h +1 h +1 h +4 h +109 h +10 h +11 h +124 h +10 h +125 h +1 h +5304 m +5305 m +10 h +4 h +10 h +4 h +1 h +935 h +10 h +1981 m +10 h +2116 m +41 h +10 h +4 h +5306 m +167 h +1 h +5307 m +2002 m +36 h +5308 m +4 h +10 h +4 h +10 h +4 h +10 h +1955 m +5309 m +4 h +322 h +4 h +11 h +57 h +4 h +4 h +25 h +184 h +5310 m +1 h +59 h +1 h +4 h +1 h +124 h +1 h +4 h +4 h +1 h +82 h +757 m +332 h +1938 m +5311 m +4 h +1 h +1 h +1 h +5312 m +5313 m +10 h +5314 m +4 h +4 h +5315 m +41 h +5316 m +1 h +4 h +10 h +4 h +1 h +5317 m +1 h +10 h +4 h +83 h +4 h +11 h +4528 m +4 h +10 h +4 h +359 h +5318 m +4 h +448 m +10 h +10 h +10 h +4 h +1685 m +3033 m +146 h +4 h +10 h +4 h +601 h +83 h +195 h +1 h +41 h +1 h +4 h +10 h +11 h +4 h +5319 m +5320 m +185 h +124 h +4 h +278 h +147 h +3111 m +173 h +5321 m +1 h +5322 m +3558 m +59 h +4 h +1 h +119 h +578 m +1 h +1 h +4 h +10 h +4 h +10 h +1 h +143 h +11 h +5323 m +4 h +4 h +5324 m +190 h +250 h +11 h +146 h +1 h +5325 m +1 h +319 h +109 h +11 h +1 h +36 h +1 h +11 h +104 h +3188 m +692 h +1 h +4 h +4 h +10 h +3 h +4 h +57 h +11 h +1 h +10 h +146 h +4 h +4 h +4 h +434 h +190 h +4 h +4 h +5326 m +10 h +92 h +4301 m +4 h +1 h +5327 m +1 h +1 h +4 h +83 h +5328 m +83 h +4 h +10 h +57 h +79 h +5329 m +5330 m +10 h +65 h +4 h +190 h +4 h +1 h +10 h +5331 m +4 h +1738 m +112 h +10 h +4 h +91 h +1 h +11 h +109 h +1 h +1 h +4 h +4 h +5332 m +5333 m +10 h +69 h +4 h +5334 m +195 h +10 h +1 h +31 h +4 h +109 h +10 h +219 m +5335 m +386 h +4 h +83 h +4 h +4 h +5336 m +82 h +3025 m +11 h +4 h +10 h +5337 m +5338 m +59 h +4 h +5339 m +1 h +10 h +1642 h +5340 m +22 h +1 h +10 h +4 h +104 h +10 h +4 h +5341 m +1 h +138 h +5342 m +620 m +5343 m +27 h +1122 m +10 h +2418 h +4 h +31 h +185 h +4 h +10 h +4 h +4 h +97 h +5344 m +135 h +1 h +1 h +5345 m +104 h +1 h +4 h +808 h +5346 m +1 h +1105 h +299 h +4 h +65 h +10 h +649 m +13 h +4 h +5347 m +83 h +1 h +104 h +1 h +929 m +59 h +170 h +10 h +144 h +1 h +5348 m +5349 m +10 h +5350 m +4 h +5351 m +8 h +4 h +4 h +4 h +5352 m +4 h +5353 m +10 h +56 h +332 h +4 h +11 h +5354 m +5355 m +10 h +4240 m +185 h +4 h +4 h +104 h +4 h +5356 m +5357 m +5358 m +4 h +10 h +5359 m +1096 m +5360 m +25 h +4 h +5361 m +10 h +1 h +1 h +4 h +4 h +4 h +11 h +4 h +1 h +10 h +4 h +279 h +1454 m +4 h +4 h +4 h +164 h +4 h +1 h +10 h +4 h +1 h +4 h +1 h +5362 m +5363 m +11 h +5364 m +4 h +10 h +4 h +83 h +4 h +1 h +5365 m +10 h +10 h +104 h +4 h +274 h +5366 m +65 h +5367 m +3 h +1 h +1 h +45 h +4 h +1 h +4 h +25 h +4 h +5368 m +4 h +4 h +83 h +110 h +4 h +5369 m +5370 m +10 h +4 h +11 h +10 h +4 h +83 h +1 h +5371 m +4 h +4 h +1 h +4 h +4 h +5372 m +4 h +5373 m +4 h +10 h +383 h +4 h +4 h +1 h +195 h +4 h +10 h +1 h +10 h +109 h +266 h +5374 m +36 h +4029 m +4 h +4 h +1 h +5375 m +2794 m +8 h +31 h +4 h +1003 h +557 m +185 h +11 h +4 h +229 h +10 h +5376 m +5377 m +59 h +1796 m +164 h +10 h +1 h +158 h +5378 m +124 h +1 h +5379 m +147 h +4 h +99 m +4 h +10 h +31 h +83 h +1 h +10 h +57 h +4 h +279 h +73 h +358 h +4 h +5380 m +31 h +4 h +5381 m +1 h +5382 m +27 h +1 h +124 h +4240 m +4 h +5383 m +10 h +5384 m +1952 m +4 h +4 h +143 h +185 h +4 h +1685 m +10 h +5385 m +10 h +4 h +57 h +5386 m +10 h +4 h +4 h +3025 m +820 h +10 h +5387 m +5388 m +1 h +3 h +4 h +10 h +196 h +1 h +11 h +4 h +4 h +4 h +4 h +779 h +5389 m +10 h +10 h +1 h +11 h +5390 m +1 h +4 h +4 h +1 h +1 h +167 h +10 h +4 h +109 h +4 h +262 m +3089 m +203 m +4 h +4 h +274 h +1 h +1 h +4 h +147 h +83 h +820 h +73 h +109 h +4 h +41 h +5391 m +258 h +172 h +140 h +4 h +10 h +10 h +5392 m +4 h +278 h +4 h +10 h +4 h +55 h +4 h +692 h +265 h +181 h +358 h +1 h +5393 m +1 h +4 h +5394 m +10 h +97 h +181 h +10 h +4 h +82 h +5395 m +93 h +569 h +45 h +5396 m +1 h +4 h +5397 m +266 h +338 h +2124 m +2308 m +10 h +4 h +4 h +10 h +10 h +4 h +4 h +4 h +114 h +45 h +97 h +10 h +74 h +4 h +1 h +4 h +1771 m +4 h +5398 m +146 h +4 h +4 h +59 h +167 h +5399 m +150 m +386 h +1 h +113 h +10 h +1 h +4 h +10 h +135 h +4 h +1 h +1 h +4 h +5400 m +307 h +4 h +10 h +73 h +97 h +3668 m +4 h +97 h +1 h +4 h +10 h +1 h +5401 m +57 h +65 h +5402 m +1 h +1 h +1 h +195 h +1 h +79 h +1 h +1 h +4 h +143 h +5403 m +57 h +140 h +124 h +83 h +5404 m +3 h +10 h +1 h +2494 m +10 h +1 h +5405 m +4 h +1 h +70 m +1 h +4 h +5406 m +112 h +11 h +1772 h +5407 m +1 h +10 h +10 h +2595 m +10 h +3 h +1725 m +5408 m +5409 m +1 h +538 h +5410 m +5411 m +5412 m +13 h +1 h +10 h +5413 m +5414 m +97 h +10 h +10 h +4 h +5415 m +1 h +5416 m +1 h +4 h +4 h +11 h +10 h +4 h +10 h +10 h +976 h +82 h +10 h +5417 m +1 h +1 h +1642 h +5418 m +4 h +5419 m +169 h +4 h +5420 m +4 h +4 h +25 h +5421 m +4 h +533 m +82 h +5422 m +5423 m +4 h +5424 m +1250 h +4 h +4 h +4 h +4101 m +4 h +1 h +4 h +5425 m +258 h +1 h +10 h +4 h +4 h +1 h +4 h +1 h +5426 m +4 h +83 h +1 h +1393 m +11 h +556 h +57 h +4 h +4 h +1 h +383 h +4 h +5427 m +5428 m +10 h +56 h +10 h +5429 m +31 h +92 h +4 h +10 h +11 h +4 h +4 h +5430 m +11 h +5431 m +5432 m +105 m +10 h +5433 m +4 h +5434 m +2359 m +125 h +10 h +82 h +109 h +10 h +45 h +1 h +10 h +57 h +5435 m +4 h +4 h +57 h +642 m +123 h +1372 m +1 h +59 h +4 h +83 h +1835 h +5436 m +1 h +2436 m +59 h +4 h +4 h +4 h +1 h +144 h +83 h +83 h +11 h +338 h +4 h +109 h +10 h +28 h +10 h +4 h +1 h +10 h +1 h +1 h +13 h +4 h +4 h +77 h +1281 m +4 h +5437 m +5438 m +1 h +25 h +10 h +4 h +82 h +2582 m +5439 m +4 h +10 h +1780 h +10 h +5440 m +5441 m +5442 m +5443 m +4 h +124 h +157 h +10 h +1 h +4 h +520 m +8 h +4 h +1 h +10 h +885 m +935 h +10 h +164 h +10 h +4 h +5444 m +10 h +10 h +10 h +4 h +1 h +4 h +13 h +4 h +10 h +10 h +4 h +5445 m +5446 m +4 h +113 h +5447 m +73 h +65 h +1 h +83 h +1 h +1 h +31 h +5448 m +4 h +10 h +11 h +4 h +1685 h +164 h +1 h +2281 m +10 h +297 h +110 h +10 h +12 h +5449 m +5450 m +4 h +1 h +144 h +4 h +332 h +5451 m +10 h +65 h +10 h +195 h +12 h +10 h +28 h +2769 m +124 h +5452 m +57 h +36 h +5453 m +1016 h +125 h +5454 m +1 h +11 h +3768 m +4 h +5455 m +278 h +125 h +4 h +41 h +1 h +5456 m +5457 m +1 h +4 h +4 h +4057 m +4 h +5458 m +1 h +2459 m +114 h +224 h +11 h +10 h +1 h +5459 m +5460 m +10 h +8 h +4 h +10 h +3 h +1 h +4 h +5461 m +1 h +4 h +5462 m +5463 m +10 h +3837 m +4 h +1 h +4 h +25 h +4 h +1 h +5464 m +4 h +1685 h +4 h +4 h +4218 m +5465 m +10 h +25 h +10 h +146 h +4 h +25 h +1 h +157 h +4 h +10 h +10 h +358 h +1 h +4 h +5466 m +10 h +4 h +10 h +172 h +386 h +5467 m +181 h +10 h +4306 m +41 h +5468 m +4 h +10 h +83 h +1 h +5469 m +4 h +5470 m +36 h +1 h +5471 m +41 h +10 h +4 h +10 h +10 h +5472 m +4 h +4 h +1089 h +5473 m +256 h +10 h +4 h +4 h +4 h +4 h +1 h +425 h +4 h +5474 m +4 h +5475 m +10 h +4 h +4 h +10 h +386 h +1 h +5476 m +1309 h +1 h +1472 m +135 h +4 h +10 h +124 h +31 h +147 h +4 h +5477 m +4 h +45 h +10 h +1 h +97 h +5478 m +10 h +82 h +4 h +4 h +4 h +5479 m +5480 m +10 h +1 h +4 h +5481 m +10 h +12 h +4 h +10 h +10 h +4 h +276 h +4 h +77 h +147 h +4 h +10 h +1 h +10 h +4 h +4 h +2846 m +1 h +82 h +1 h +4 h +4 h +10 h +5482 m +12 h +11 h +4 h +10 h +10 h +12 h +1 h +1 h +4 h +5483 m +10 h +169 h +4 h +358 h +5484 m +4 h +5485 m +5348 m +140 h +5486 m +1 h +5487 m +964 m +2172 m +82 h +1 h +5488 m +307 h +4 h +4 h +10 h +5489 m +332 h +57 h +1 h +1027 h +1 h +92 h +1 h +4 h +4 h +5490 m +5491 m +4 h +4 h +1 h +31 h +1 h +4 h +5492 m +4 h +319 h +5493 m +4 h +4 h +5494 m +5495 m +10 h +1 h +12 h +10 h +1 h +1 h +1 h +4 h +5496 m +4 h +1 h +1 h +5497 m +10 h +4 h +4 h +4 h +74 h +5498 m +5499 m +4 h +5500 m +1201 h +1 h +5501 m +97 h +4 h +5502 m +125 h +4 h +12 h +25 h +82 h +368 h +4 h +4 h +94 h +157 h +4 h +125 h +4 h +1 h +4 h +109 h +4 h +1 h +383 h +57 h +10 h +1780 h +65 h +716 m +368 h +1 h +5503 m +1 h +55 h +4 h +4 h +1137 h +4 h +41 h +4 h +10 h +11 h +104 h +5504 m +10 h +45 h +181 h +5505 m +10 h +4 h +129 h +10 h +28 h +5506 m +692 h +83 h +332 h +4 h +1 h +4 h +5507 m +4 h +5508 m +10 h +124 h +4 h +1 h +4 h +1553 m +5509 m +4 h +5510 m +10 h +5511 m +1553 m +10 h +109 h +5512 m +5513 m +10 h +1650 h +196 h +219 m +4 h +10 h +104 h +4 h +36 h +10 h +266 h +10 h +57 h +1 h +1822 h +104 h +195 h +5514 m +4 h +5515 m +1 h +4 h +4 h +82 h +5516 m +1 h +1403 h +4 h +1403 h +124 h +1 h +1 h +1 h +5517 m +2961 m +10 h +4 h +10 h +5518 m +25 h +1 h +4 h +1 h +5519 m +4 h +10 h +4 h +5520 m +56 h +5521 m +10 h +1 h +156 h +2245 m +1 h +1 h +1 h +10 h +4 h +57 h +82 h +1 h +1548 m +4 h +1 h +23 h +185 h +295 h +5522 m +4 h +5523 m +4 h +4 h +1 h +536 h +10 h +104 h +4 h +4 h +5524 m +10 h +10 h +10 h +10 h +1 h +4 h +4 h +615 m +5525 m +5526 m +1 h +4 h +1 h +1 h +10 h +1 h +4 h +976 h +1 h +4 h +4 h +258 h +1772 h +5527 m +5528 m +4 h +5529 m +1 h +10 h +4 h +10 h +4 h +4 h +143 h +27 h +976 h +10 h +57 h +83 h +13 h +4 h +2111 m +5530 m +5531 m +2418 h +1 h +5532 m +184 h +383 h +1 h +4 h +10 h +4 h +10 h +170 h +4 h +4 h +5533 m +10 h +4 h +10 h +10 h +4 h +59 h +10 h +5534 m +82 h +4 h +172 h +1 h +12 h +1 h +83 h +4 h +4 h +266 h +4 h +5505 m +3 h +59 h +109 h +5535 m +3216 m +5536 m +11 h +447 h +129 h +1 h +5537 m +582 m +4 h +41 h +124 h +4 h +1 h +5538 m +4576 m +4 h +94 h +3 h +4 h +1835 h +238 h +383 h +5539 m +285 m +31 h +5540 m +57 h +1 h +10 h +5541 m +5542 m +718 h +143 h +5543 m +4 h +4 h +5544 m +1 h +718 h +4 h +4 h +5545 m +332 h +97 h +5546 m +10 h +5547 m +10 h +4 h +10 h +10 h +228 m +10 h +1 h +25 h +5548 m +5549 m +4 h +5550 m +59 h +976 h +4 h +5551 m +5552 m +5553 m +4 h +10 h +4 h +41 h +1 h +57 h +5554 m +172 h +1 h +3 h +4 h +3499 m +2110 m +1 h +587 m +1 h +10 h +1 h +10 h +11 h +1 h +1116 m +59 h +5555 m +1 h +83 h +4 h +5556 m +10 h +1 h +45 h +10 h +1 h +1948 m +1 h +143 h +5557 m +4 h +10 h +297 h +74 h +195 h +297 h +112 h +143 h +5558 m +146 h +10 h +5559 m +5560 m +1 h +4 h +1 h +258 h +5561 m +10 h +11 h +10 h +4 h +91 h +1 h +208 m +119 h +4 h +1 h +1 h +1 h +10 h +1642 h +1 h +65 h +181 h +4 h +1780 h +4 h +82 h +4 h +4 h +59 h +1 h +4 h +1 h +5562 m +5563 m +185 h +1 h +1 h +1 h +2257 m +5564 m +5565 m +4 h +146 h +4 h +10 h +2846 m +3 h +2719 m +2124 h +399 h +5566 m +119 h +5567 m +5568 m +1 h +1 h +56 h +5569 m +4 h +3209 m +10 h +5570 m +1 h +238 h +1 h +5571 m +11 h +5572 m +1 h +4 h +5573 m +124 h +1 h +173 h +359 h +4 h +57 h +4 h +4 h +4 h +1 h +4 h +1737 m +92 h +10 h +4 h +10 h +4 h +59 h +5574 m +1 h +108 h +57 h +4 h +11 h +1 h +4 h +1 h +10 h +5575 m +4 h +10 h +82 h +1 h +4 h +4 h +5576 m +5577 m +124 h +1 h +1 h +5578 m +2374 m +1 h +4 h +5579 m +4 h +5580 m +10 h +25 h +1 h +5581 m +1 h +11 h +4 h +11 h +10 h +4 h +25 h +10 h +4 h +5582 m +4 h +1 h +10 h +4 h +443 h +4 h +4 h +1 h +4 h +5583 m +10 h +5584 m +276 h +10 h +4 h +109 h +3 h +124 h +28 h +4 h +10 h +10 h +69 h +1 h +10 h +4 h +1 h +1 h +10 h +4 h +97 h +4 h +10 h +4 h +289 h +1 h +10 h +4 h +5585 m +10 h +5586 m +1 h +4 h +1 h +5587 m +91 h +1766 h +158 h +1 h +5588 m +5589 m +4 h +5590 m +4 h +4 h +1470 h +4 h +4 h +1 h +5591 m +1 h +74 h +57 h +4 h +10 h +5592 m +1 h +4039 m +56 h +10 h +10 h +83 h +5593 m +4 h +11 h +13 h +4 h +2475 m +1 h +1 h +10 h +167 h +4 h +1 h +5594 m +1 h +10 h +1 h +13 h +10 h +139 h +5595 m +22 h +57 h +5596 m +563 m +11 h +4 h +1 h +139 h +5597 m +10 h +4 h +262 h +169 h +1790 m +3 h +779 h +4 h +1 h +4 h +425 h +569 h +123 h +5598 m +1 h +1 h +4 h +1 h +10 h +57 h +57 h +11 h +10 h +146 h +1 h +10 h +5599 m +1 h +11 h +59 h +4 h +10 h +1 h +1 h +5600 m +1 h +4 h +5601 m +31 h +4 h +11 h +5602 m +4 h +211 m +4645 m +11 h +11 h +11 h +11 h +57 h +238 h +4 h +1 h +5603 m +164 h +1 h +4 h +10 h +181 h +299 h +4 h +358 h +105 m +1 h +10 h +109 h +10 h +10 h +1 h +5604 m +1 h +5605 m +5606 m +295 h +5607 m +5608 m +5609 m +5610 m +278 h +272 m +10 h +1 h +5611 m +4 h +5612 m +4 h +5613 m +4 h +5614 m +4 h +5615 m +10 h +124 h +31 h +5616 m +5617 m +5618 m +5619 m +1 h +4 h +4 h +1 h +4 h +5620 m +5621 m +4 h +433 m +1 h +4 h +1 h +158 h +3 h +73 h +124 h +4 h +5622 m +4 h +118 h +1 h +109 h +82 h +1016 h +4 h +4 h +1 h +73 h +278 h +5623 m +31 h +5624 m +10 h +5625 m +114 h +64 h +4 h +1 h +4 h +4 h +10 h +4 h +4 h +4 h +4 h +124 h +97 h +10 h +1 h +10 h +10 h +4 h +1 h +5626 m +4 h +5627 m +3558 m +1389 m +4 h +4 h +229 h +10 h +5628 m +4 h +10 h +368 h +170 h +45 h +4 h +10 h +332 h +4 h +1 h +4 h +4 h +1 h +1 h +4 h +11 h +4 h +5629 m +1 h +224 h +4 h +4 h +5630 m +1 h +1 h +11 h +4 h +10 h +4 h +5631 m +73 h +124 h +83 h +649 m +146 h +97 h +119 h +297 h +5632 m +1 h +4 h +5633 m +12 h +5634 m +10 h +570 h +4 h +109 h +41 h +1737 m +55 h +1 h +82 h +4 h +5635 m +5636 m +143 h +41 h +5637 m +4 h +4 h +5638 m +1 h +10 h +1 h +1 h +10 h +282 m +10 h +1 h +59 h +1 h +2423 m +1 h +649 m +1 h +1 h +57 h +1 h +5639 m +4 h +3 h +4 h +41 h +124 h +371 h +10 h +5640 m +4 h +82 h +1 h +4 h +4 h +3 h +4 h +5641 m +59 h +1 h +4 h +57 h +265 h +10 h +5642 m +5643 m +1 h +1 h +5644 m +125 h +5645 m +10 h +4 h +556 h +4 h +5646 m +488 m +1 h +1 h +4 h +4 h +1030 h +1 h +1 h +4 h +4 h +10 h +170 h +4 h +5647 m +65 h +478 m +4 h +10 h +74 h +433 m +69 h +4 h +4 h +4 h +5648 m +82 h +4 h +403 h +4 h +5649 m +10 h +1 h +4 h +1 h +1 h +82 h +1 h +5650 m +4 h +1 h +1 h +1 h +4 h +4 h +11 h +1 h +1 h +13 h +73 h +1 h +4 h +114 h +10 h +3 h +1 h +4 h +5651 m +82 h +124 h +5652 m +276 h +1564 m +11 h +464 h +4 h +69 h +1 h +5653 m +4 h +1 h +59 h +2688 m +5654 m +1 h +1 h +4 h +5655 m +276 h +1 h +4 h +4 h +4 h +4 h +181 h +4 h +170 h +4 h +124 h +129 h +5656 m +22 h +59 h +1 h +4 h +109 h +5657 m +10 h +5658 m +230 h +593 m +10 h +238 h +4 h +4 h +3 h +4 h +3 h +1 h +4 h +10 h +104 h +5659 m +10 h +1 h +5660 m +1 h +1 h +1 h +5661 m +72 m +4 h +4 h +4 h +10 h +1 h +170 h +97 h +1 h +4 h +1 h +109 h +94 h +139 h +4 h +536 h +4 h +10 h +10 h +5662 m +4 h +135 h +4 h +4 h +4 h +4 h +12 h +5650 m +1 h +4 h +4 h +31 h +270 h +5663 m +4 h +5664 m +25 h +649 h +1 h +4 h +4 h +10 h +10 h +5665 m +5666 m +11 h +1120 m +23 h +1 h +10 h +258 h +4 h +185 h +4 h +57 h +1 h +4 h +1 h +4 h +11 h +1 h +4 h +4 h +976 h +307 h +10 h +10 h +4 h +4 h +4 h +5667 m +112 h +4 h +157 h +5668 m +5669 m +2625 m +31 h +4 h +1 h +173 h +5670 m +147 h +114 h +4 h +4 h +1 h +83 h +1 h +1 h +601 h +5671 m +4 h +601 h +83 h +1 h +4 h +1137 h +4 h +10 h +5672 m +4 h +31 h +4 h +11 h +1 h +44 m +1 h +104 h +109 h +10 h +4 h +4 h +110 h +5673 m +5674 m +10 h +1 h +4 h +4 h +5675 m +4 h +3 h +5225 m +386 h +5676 m +4 h +5141 m +238 h +1 h +11 h +1 h +146 h +5677 m +1 h +4 h +192 h +10 h +4 h +4 h +10 h +4 h +5678 m +5679 m +2591 m +5680 m +41 h +4 h +4 h +5681 m +4 h +5682 m +10 h +8 h +10 h +5683 m +1 h +5684 m +1 h +4 h +4 h +1 h +1 h +5685 m +143 h +4 h +1 h +5686 m +1 h +4 h +4 h +278 h +4 h +4 h +13 h +104 h +10 h +25 h +5687 m +83 h +4 h +1 h +64 h +5688 m +1 h +1 h +4 h +1470 h +10 h +4 h +1790 m +1 h +4 h +10 h +5689 m +5690 m +1 h +4 h +1914 m +10 h +5691 m +4 h +1218 m +1359 h +5692 m +4 h +4 h +5693 m +1627 m +56 h +109 h +65 h +4 h +1737 h +4 h +10 h +4 h +464 h +238 h +3 h +104 h +109 h +630 m +167 h +4 h +1 h +5694 m +4 h +4 h +11 h +1 h +4 h +114 h +5695 m +4 h +1751 m +41 h +94 h +4 h +1 h +4 h +4 h +97 h +5696 m +5697 m +1 h +4 h +911 h +41 h +40 h +10 h +1 h +1 h +113 h +10 h +1 h +4 h +5698 m +4 h +1 h +1 h +5699 m +10 h +1 h +73 h +1 h +4 h +25 h +250 h +10 h +10 h +79 h +4 h +5230 m +5700 m +1 h +10 h +10 h +10 h +11 h +4 h +1 h +1 h +73 h +1 h +186 h +109 h +4 h +4 h +4 h +125 h +5701 m +1714 m +5702 m +11 h +5703 m +229 h +31 h +10 h +5704 m +10 h +11 h +4 h +1 h +10 h +4 h +10 h +4 h +1 h +22 h +5705 m +8 h +5706 m +1 h +4 h +1 h +1 h +381 m +4 h +5707 m +10 h +83 h +4 h +4 h +270 h +4 h +124 h +5708 m +4 h +156 h +1 h +125 h +4 h +1 h +5709 m +11 h +4 h +5710 m +4 h +10 h +5711 m +4 h +4 h +278 h +74 h +4 h +4 h +4 h +1 h +31 h +10 h +4 h +1 h +1 h +1 h +5712 m +10 h +4 h +4 h +4 h +5713 m +1 h +1948 m +5714 m +4 h +1027 h +1 h +10 h +97 h +1269 m +4 h +4 h +4 h +10 h +4 h +4 h +57 h +1191 m +185 h +109 h +90 m +3 h +10 h +83 h +4 h +5715 m +10 h +4 h +10 h +5716 m +1 h +4 h +5717 m +124 h +135 h +5718 m +1 h +4 h +10 h +10 h +25 h +10 h +5719 m +4 h +1 h +11 h +82 h +1 h +4 h +10 h +1 h +10 h +204 h +1 h +4 h +158 h +10 h +143 h +4 h +5720 m +5721 m +5722 m +4 h +10 h +5723 m +1 h +4 h +25 h +5724 m +11 h +45 h +928 m +10 h +10 h +112 h +10 h +109 h +4 h +10 h +5725 m +5726 m +5727 m +5728 m +114 h +1 h +124 h +1389 m +135 h +5729 m +4 h +5730 m +4 h +158 h +10 h +10 h +1 h +83 h +1 h +5731 m +5732 m +1 h +4 h +1 h +10 h +2625 m +5733 m +4 h +4 h +4 h +5734 m +11 h +4 h +31 h +1 h +1 h +10 h +10 h +11 h +10 h +5735 m +1 h +10 h +94 h +4 h +4 h +4 h +246 m +74 h +169 h +4 h +4 h +181 h +10 h +4 h +65 h +82 h +3 h +5736 m +1 h +10 h +1442 m +31 h +11 h +4 h +5737 m +5738 m +4 h +10 h +2851 m +73 h +1 h +1 h +3477 m +41 h +4 h +10 h +196 h +1 h +10 h +5739 m +4 h +1 h +5740 m +4 h +2998 m +1 h +4 h +104 h +4 h +5741 m +5742 m +5743 m +10 h +5744 m +4 h +5745 m +10 h +4 h +5746 m +5747 m +5748 m +109 h +97 h +10 h +129 h +1 h +1 h +10 h +10 h +10 h +10 h +4 h +4 h +5749 m +5750 m +83 h +238 h +4 h +13 h +5751 m +4 h +5752 m +10 h +10 h +10 h +4 h +5753 m +5754 m +4 h +430 m +4 h +4 h +1 h +4 h +11 h +83 h +55 h +4 h +1785 m +4 h +4 h +10 h +4 h +5755 m +10 h +4 h +4 h +601 h +4 h +5756 m +59 h +4 h +1 h +1 h +1 h +94 h +10 h +10 h +8 h +4 h +11 h +1 h +1 h +319 h +4 h +11 h +4 h +113 h +41 h +1 h +11 h +5757 m +1 h +3150 m +4 h +10 h +56 h +4 h +10 h +4 h +1 h +2775 m +109 h +5758 m +1 h +156 h +73 h +1 h +1 h +31 h +1 h +1 h +5759 m +1 h +5760 m +4 h +124 h +1 h +10 h +11 h +1 h +4 h +4 h +4 h +1 h +82 h +146 h +4 h +10 h +5761 m +289 h +1 h +5762 m +5763 m +4 h +4 h +4 h +185 h +4 h +4 h +185 h +109 h +124 h +156 h +1 h +59 h +1 h +5764 m +5765 m +1138 m +5766 m +5767 m +31 h +109 h +4 h +92 h +5768 m +4 h +4 h +83 h +10 h +3 h +74 h +36 h +4 h +31 h +64 h +59 h +1 h +4 h +757 m +1 h +4 h +11 h +4 h +4 h +10 h +1 h +5769 m +1 h +4 h +238 h +5770 m +986 h +4 h +5771 m +12 h +10 h +5772 m +4 h +1650 h +4 h +1 h +1 h +109 h +4 h +10 h +4 h +1127 m +4 h +4 h +110 h +5773 m +5774 m +10 h +65 h +1 h +954 m +1 h +5775 m +5206 m +447 h +4549 m +640 m +4 h +1 h +83 h +123 h +717 h +11 h +1 h +195 h +4 h +82 h +4 h +170 h +1 h +1 h +5776 m +5777 m +4 h +928 m +170 h +13 h +1 h +4 h +124 h +4 h +238 h +2617 m +5778 m +1 h +1 h +10 h +4 h +4 h +1 h +124 h +5779 m +383 h +10 h +10 h +4 h +1 h +4 h +1362 h +5780 m +10 h +869 h +1 h +5781 m +1 h +5542 m +4 h +1 h +1 h +10 h +10 h +4 h +10 h +124 h +10 h +4 h +45 h +1838 m +109 h +1 h +11 h +10 h +1 h +10 h +1 h +4 h +79 h +4 h +10 h +10 h +278 h +4 h +295 h +5782 m +5783 m +1 h +4906 m +4 h +4 h +1 h +1655 m +1 h +11 h +4 h +4 h +1 h +10 h +185 h +272 m +1 h +10 h +109 h +10 h +11 h +1 h +5784 m +1 h +4 h +104 h +1 h +5785 m +5786 m +5787 m +250 h +3212 m +4 h +5788 m +616 m +5789 m +1 h +11 h +10 h +986 h +4 h +10 h +5790 m +5791 m +626 m +36 h +4 h +10 h +10 h +196 h +10 h +5792 m +25 h +1 h +27 h +1470 h +5793 m +56 h +1 h +5794 m +10 h +83 h +4 h +4 h +297 h +10 h +4 h +196 h +5795 m +5796 m +5797 m +82 h +4 h +10 h +4 h +10 h +5798 m +10 h +143 h +1 h +5799 m +1 h +3 h +5800 m +2887 h +31 h +1089 h +59 h +1 h +1 h +1 h +601 h +1 h +4 h +4 h +4 h +10 h +5801 m +1 h +5802 m +10 h +4 h +57 h +4 h +4 h +164 h +59 h +41 h +5803 m +124 h +5804 m +5059 m +403 h +104 h +167 h +4 h +5805 m +1 h +1 h +10 h +119 h +1 h +10 h +4 h +1 h +10 h +4 h +2769 m +10 h +1 h +1 h +1 h +5806 m +5807 m +92 h +10 h +10 h +4 h +4 h +4 h +569 h +13 h +10 h +4 h +2958 m +5808 m +266 h +1 h +1 h +4 h +4 h +10 h +4 h +1 h +5809 m +56 h +1 h +169 h +4 h +4 h +1 h +10 h +5810 m +1 h +4 h +31 h +4 h +1 h +4 h +109 h +1 h +5811 m +1 h +4 h +4106 m +1 h +4 h +5812 m +888 m +4 h +1 h +5813 m +11 h +108 h +4 h +4 h +103 m +10 h +5814 m +464 h +4 h +295 h +3 h +5815 m +10 h +170 h +5816 m +5817 m +10 h +386 h +1 h +4 h +83 h +1 h +56 h +1 h +1 h +11 h +4 h +295 h +10 h +4 h +10 h +4 h +1 h +4 h +109 h +1 h +124 h +10 h +1 h +10 h +4 h +1 h +22 h +10 h +5818 m +4780 m +4 h +1 h +10 h +83 h +274 h +5819 m +10 h +10 h +4 h +5820 m +5821 m +5822 m +10 h +10 h +11 h +266 h +4 h +10 h +265 h +158 h +1 h +114 h +55 h +4 h +4 h +10 h +4 h +56 h +65 h +5823 m +1 h +4 h +5824 m +10 h +11 h +190 h +463 m +4 h +1714 m +10 h +1250 h +41 h +4 h +147 h +1 h +5825 m +4 h +4 h +5826 m +4 h +83 h +83 h +10 h +170 h +1 h +25 h +10 h +57 h +10 h +1 h +4 h +5827 m +11 h +5828 m +1 h +5829 m +5830 m +4 h +5831 m +4 h +2883 m +4 h +1 h +59 h +91 h +4 h +10 h +56 h +57 h +5832 m +4 h +4 h +5833 m +1 h +10 h +5834 m +4 h +1 h +4 h +10 h +4 h +82 h +1 h +330 m +4 h +31 h +4 h +4 h +4 h +1 h +1 h +12 h +4 h +5835 m +3 h +11 h +1 h +4 h +4 h +5836 m +173 h +5837 m +1 h +10 h +5838 m +65 h +4 h +5839 m +5840 m +1724 m +4 h +10 h +77 h +4 h +65 h +5841 m +65 h +1 h +1 h +338 h +1886 m +31 h +10 h +1 h +41 h +170 h +10 h +10 h +113 h +278 h +64 h +5842 m +79 h +4 h +1 h +5584 m +4 h +1 h +10 h +74 h +41 h +84 h +5843 m +1 h +1 h +5844 m +1 h +4 h +10 h +2733 h +4 h +939 m +10 h +1 h +83 h +48 h +4 h +692 h +4 h +40 h +1 h +10 h +4 h +25 h +1 h +10 h +1 h +5845 m +10 h +4 h +10 h +4 h +1 h +11 h +266 h +11 h +5846 m +1 h +447 h +1 h +5847 m +464 h +289 h +109 h +59 h +10 h +1 h +3799 m +4 h +10 h +4 h +83 h +11 h +4 h +447 h +4 h +4 h +5848 m +5849 m +1 h +10 h +4 h +79 h +83 h +1 h +33 m +5850 m +11 h +4 h +1 h +4 h +124 h +25 h +10 h +5851 m +4 h +1 h +4 h +278 h +204 h +10 h +4 h +4 h +59 h +911 h +10 h +2172 m +40 h +5852 m +3 h +4 h +5853 m +5854 m +4 h +1 h +10 h +79 h +250 h +164 h +1261 h +5855 m +4 h +4 h +1409 m +4 h +5856 m +147 h +1 h +10 h +5857 m +1 h +1 h +5858 m +368 h +1105 h +1 h +10 h +10 h +1 h +278 h +4 h +79 h +3 h +125 h +10 h +1 h +5859 m +5860 m +1 h +4 h +4933 m +1 h +5861 m +10 h +1 h +10 h +4 h +5862 m +265 h +185 h +332 h +556 h +10 h +1 h +82 h +219 m +196 h +1 h +11 h +135 h +4 h +4 h +1 h +1 h +1 h +1 h +172 h +4 h +5863 m +195 h +4 h +10 h +5864 m +4 h +41 h +4 h +1 h +4 h +630 m +125 h +4 h +65 h +4 h +4 h +1 h +1 h +135 h +1 h +1 h +5865 m +1 h +1 h +5141 m +4 h +1 h +5866 m +1 h +10 h +5867 m +10 h +10 h +5868 m +10 h +4 h +5869 m +5870 m +316 m +10 h +1 h +83 h +4 h +123 h +5871 m +10 h +1 h +4564 m +5872 m +146 h +4 h +5873 m +4 h +4 h +4 h +5874 m +1 h +4 h +4 h +5875 m +10 h +1 h +5876 m +4 h +82 h +1 h +4 h +5877 m +4 h +5878 m +4 h +1 h +4 h +4 h +265 h +10 h +82 h +1 h +5879 m +1 h +3 h +1 h +1 h +265 h +74 h +5880 m +4 h +10 h +11 h +10 h +10 h +289 h +4 h +4 h +5881 m +5882 m +10 h +4 h +4 h +4 h +41 h +113 h +156 h +1 h +4 h +5883 m +10 h +5884 m +57 h +4 h +5885 m +258 h +10 h +1 h +5886 m +27 h +1 h +2116 m +4 h +5887 m +5888 m +1 h +1 h +5889 m +434 h +4 h +4 h +1 h +4 h +196 h +5890 m +4 h +794 m +1 h +4 h +5891 m +5892 m +4 h +108 h +5893 m +82 h +2308 m +1 h +1 h +1 h +10 h +1725 m +112 h +5894 m +31 h +196 h +4 h +1894 m +4 h +4 h +5895 m +5896 m +150 m +4 h +146 h +10 h +10 h +4 h +10 h +5897 m +125 h +1 h +5898 m +57 h +192 h +1 h +57 h +5899 m +45 h +1 h +10 h +307 h +5900 m +125 h +258 h +31 h +124 h +10 h +1 h +10 h +1 h +1 h +4 h +368 h +83 h +4 h +692 h +10 h +83 h +10 h +83 h +5901 m +10 h +4 h +10 h +5902 m +195 h +2459 m +4 h +4 h +104 h +1 h +4 h +5903 m +146 h +129 h +10 h +59 h +1790 h +986 h +4 h +5904 m +5905 m +146 h +5906 m +368 h +4 h +1 h +3 h +307 h +110 h +1201 h +3469 m +10 h +10 h +4 h +911 h +1 h +538 h +4 h +4 h +403 h +5907 m +601 h +4 h +4 h +4 h +55 h +5908 m +104 h +1 h +83 h +1 h +13 h +83 h +25 h +4 h +447 h +5909 m +1 h +190 h +135 h +59 h +4 h +124 h +1 h +5910 m +5911 m +143 h +4 h +4 h +4 h +5912 m +1639 m +5913 m +109 h +1 h +143 h +4 h +4 h +1 h +4 h +196 h +5914 m +5915 m +5916 m +1 h +5917 m +5918 m +10 h +10 h +5919 m +2459 m +10 h +4 h +3555 m +11 h +5920 m +1 h +82 h +13 h +4 h +1 h +10 h +1 h +4 h +12 h +5921 m +5922 m +1955 h +1 h +124 h +1 h +297 h +295 h +13 h +56 h +10 h +11 h +135 h +56 h +5923 m +338 h +1 h +125 h +41 h +46 h +3534 m +11 h +10 h +692 h +5924 m +1 h +5925 m +4 h +82 h +5926 m +10 h +48 h +4 h +4 h +4 h +4 h +10 h +45 h +219 m +5927 m +172 h +164 h +10 h +1 h +4 h +4 h +10 h +10 h +4780 m +4 h +5928 m +10 h +1016 h +4 h +4 h +1 h +10 h +64 h +11 h +59 h +164 h +4 h +601 h +5929 m +10 h +4 h +4441 m +1 h +4 h +1 h +5930 m +79 h +258 h +5931 m +185 h +25 h +1316 m +4 h +1 h +31 h +1796 m +4 h +4 h +94 h +45 h +5932 m +5933 m +4 h +1 h +279 h +1 h +10 h +258 h +29 m +1 h +167 h +1 h +5934 m +55 h +4 h +146 h +10 h +1 h +10 h +5935 m +4 h +687 h +57 h +1 h +5936 m +59 h +10 h +92 h +109 h +5937 m +10 h +11 h +425 h +809 m +1 h +4 h +5938 m +5939 m +83 h +167 h +1016 h +4 h +1 h +4 h +5940 m +5941 m +4 h +185 h +2475 m +4 h +10 h +262 h +4 h +1 h +4 h +147 h +10 h +1650 h +5942 m +5943 m +4 h +358 h +196 h +5944 m +5945 m +13 h +1 h +195 h +109 h +124 h +73 h +1 h +1 h +1 h +4 h +109 h +83 h +4941 m +4 h +11 h +119 h +114 h +4 h +10 h +157 h +538 h +5946 m +5947 m +1 h +1 h +1 h +5948 m +4 h +5949 m +4 h +2844 m +1780 h +10 h +1 h +4 h +106 h +5950 m +10 h +4 h +5951 m +185 h +1 h +5952 m +4 h +4 h +4 h +5953 m +36 h +4805 m +172 h +4 h +330 m +238 h +4 h +4 h +4 h +626 m +5954 m +10 h +4 h +5955 m +2719 m +4 h +10 h +4 h +5956 m +5944 m +4 h +3 h +4 h +4 h +10 h +4 h +4 h +238 h +4 h +4 h +109 h +164 h +383 h +4 h +5957 m +1722 m +5958 m +5959 m +12 h +383 h +97 h +31 h +11 h +5960 m +1 h +986 h +109 h +10 h +649 h +912 m +4 h +4 h +4 h +10 h +11 h +4 h +4503 m +1 h +4 h +1 h +164 h +140 h +55 h +5961 m +1 h +11 h +4 h +757 m +82 h +10 h +1039 m +5962 m +4 h +4 h +5963 m +1 h +10 h +5964 m +13 h +10 h +1 h +11 h +1 h +1 h +10 h +10 h +5965 m +5966 m +4 h +59 h +10 h +10 h +4 h +5967 m +4 h +25 h +1595 m +4 h +278 h +4 h +1619 m +124 h +4 h +10 h +4 h +10 h +11 h +434 h +4 h +1053 h +276 h +5968 m +10 h +74 h +59 h +4 h +2022 m +4 h +10 h +1 h +5969 m +31 h +10 h +4 h +1 h +11 h +4 h +614 m +1 h +4 h +4 h +83 h +4 h +278 h +83 h +5970 m +4 h +13 h +124 h +41 h +4 h +4 h +10 h +1 h +59 h +4 h +4 h +640 h +1 h +124 h +4 h +1 h +10 h +5971 m +2887 h +459 m +5972 m +5973 m +4 h +5974 m +65 h +1 h +4 h +5975 m +10 h +11 h +1 h +10 h +4 h +157 h +5976 m +10 h +5977 m +5978 m +59 h +279 h +156 h +359 h +144 h +4 h +1 h +8 h +77 h +1 h +1 h +1 h +1 h +1 h +4 h +5979 m +1105 h +1 h +11 h +1 h +59 h +1 h +1 h +208 m +1 h +112 h +1 h +94 h +10 h +1 h +41 h +4 h +5980 m +5981 m +10 h +1 h +195 h +4 h +5982 m +4 h +4 h +1 h +1137 h +5983 m +10 h +5984 m +185 h +1 h +4 h +5985 m +5986 m +5987 m +5988 m +82 h +114 h +7 m +169 h +1 h +10 h +1 h +5989 m +55 h +73 h +5990 m +250 h +10 h +1197 m +10 h +36 h +383 h +119 h +4 h +167 h +4 h +10 h +55 h +4 h +124 h +83 h +65 h +4 h +10 h +4 h +4 h +4 h +4 h +8 h +4810 m +109 h +1 h +4 h +1137 h +1 h +5991 m +1 h +91 h +10 h +170 h +5992 m +5993 m +169 h +1 h +10 h +4 h +368 h +3 h +5994 m +4 h +5464 m +1 h +1 h +5995 m +1 h +56 h +104 h +5996 m +4 h +1 h +73 h +4 h +258 h +4 h +1 h +1 h +10 h +10 h +1 h +1 h +258 h +10 h +41 h +83 h +1 h +1 h +10 h +274 h +4 h +718 h +11 h +10 h +224 h +1 h +83 h +10 h +4 h +190 h +5997 m +143 h +5980 m +2148 m +1 h +4 h +5998 m +4 h +4 h +5999 m +238 h +2794 m +6000 m +219 h +1 h +79 h +368 h +10 h +1 h +4 h +22 h +4 h +11 h +6001 m +1822 h +109 h +108 h +12 h +939 h +6002 m +4 h +167 h +6003 m +4 h +250 h +2719 m +256 h +1 h +1 h +57 h +1260 m +140 h +307 h +6004 m +1 h +4 h +11 h +195 h +1 h +83 h +6005 m +5422 m +4 h +1 h +1 h +6006 m +4 h +6007 m +59 h +6008 m +4 h +4 h +6009 m +190 h +4 h +79 h +4 h +11 h +195 h +10 h +6010 m +4 h +4 h +4 h +4 h +4297 m +2475 m +1 h +4 h +4 h +11 h +31 h +4 h +1 h +1 h +109 h +195 h +2788 m +10 h +1 h +6011 m +11 h +4 h +6012 m +1 h +1 h +4 h +4 h +6013 m +4 h +1 h +4 h +6014 m +6015 m +1 h +4 h +48 h +6016 m +10 h +6017 m +4 h +6018 m +4 h +158 h +1 h +10 h +4 h +4 h +11 h +1 h +4 h +40 h +195 h +4 h +6019 m +1 h +800 m +4 h +1 h +4 h +1 h +1 h +6020 m +6021 m +112 h +1 h +10 h +181 h +6022 m +11 h +10 h +11 h +1403 h +4 h +156 h +1 h +6023 m +204 h +1 h +4 h +10 h +6024 m +4 h +6025 m +12 h +57 h +6026 m +1 h +10 h +82 h +45 h +4 h +6027 m +109 h +10 h +4 h +44 m +10 h +1 h +2038 m +11 h +6028 m +1454 m +4 h +10 h +4 h +1123 m +4 h +4 h +1470 h +1 h +1 h +6029 m +1790 h +1 h +6030 m +10 h +109 h +443 h +1 h +1027 h +3847 m +31 h +185 h +4 h +4 h +6031 m +113 h +4 h +4 h +11 h +6032 m +435 m +1 h +6033 m +10 h +1 h +46 h +1 h +1 h +4 h +1 h +295 h +3558 m +10 h +6034 m +1 h +6035 m +4 h +1 h +307 h +1127 m +4 h +1 h +1 h +1 h +4 h +45 h +4 h +83 h +278 h +383 h +6036 m +4 h +6037 m +6038 m +6039 m +10 h +10 h +6040 m +1 h +1 h +279 h +4 h +1 h +156 h +4 h +4 h +11 h +1 h +6041 m +3214 m +97 h +119 h +1105 h +4 h +4 h +1 h +4 h +6042 m +4 h +10 h +4 h +59 h +4 h +147 h +22 h +13 h +10 h +4 h +4 h +10 h +6043 m +119 h +4 h +1 h +6044 m +381 m +4 h +10 h +1 h +1 h +6045 m +4 h +6046 m +109 h +10 h +4 h +289 h +6047 m +11 h +4 h +6048 m +82 h +4 h +1 h +6049 m +40 h +6050 m +307 h +266 h +6051 m +10 h +1 h +10 h +6052 m +6053 m +169 h +1780 h +2520 m +694 m +4 h +6054 m +6055 m +10 h +1 h +4 h +278 h +4 h +10 h +13 h +143 h +2041 m +6056 m +31 h +4 h +4498 m +55 h +1403 h +10 h +1 h +2484 m +6057 m +6058 m +4 h +10 h +104 h +1 h +1 h +1 h +4 h +1 h +10 h +2840 m +6059 m +6060 m +11 h +4 h +1 h +2887 h +6061 m +1 h +124 h +10 h +4 h +4 h +4 h +10 h +1677 m +4 h +4 h +10 h +10 h +6062 m +10 h +6063 m +6064 m +10 h +4 h +6065 m +1 h +6066 m +802 m +6067 m +4 h +104 h +6068 m +6069 m +6070 m +1105 h +56 h +6071 m +6072 m +6073 m +4 h +371 h +6074 m +4 h +6075 m +4 h +4 h +6076 m +1 h +10 h +6077 m +6078 m +6079 m +1 h +6080 m +1261 h +6081 m +64 h +4 h +6082 m +12 h +4 h +4 h +109 h +75 m +224 h +1 h +10 h +12 h +4 h +278 h +82 h +278 h +4 h +108 h +4 h +6083 m +59 h +332 h +6084 m +4 h +82 h +10 h +11 h +4 h +10 h +6085 m +857 m +4 h +6086 m +4 h +4 h +6087 m +82 h +4 h +1 h +1 h +4 h +4596 m +265 h +4 h +184 h +75 m +1 h +4 h +10 h +11 h +1 h +6088 m +6089 m +10 h +10 h +4 h +4 h +6090 m +10 h +104 h +41 h +6091 m +10 h +13 h +57 h +4 h +4 h +10 h +258 h +82 h +6092 m +4 h +4 h +6093 m +4 h +1359 h +1 h +4 h +6094 m +4 h +6095 m +1 h +1619 m +57 h +1 h +6096 m +4 h +6097 m +1024 m +4 h +569 h +4 h +1 h +10 h +6098 m +4 h +6099 m +757 m +36 h +4 h +258 h +4 h +1 h +386 h +570 h +110 h +4 h +1 h +4 h +1 h +170 h +124 h +1 h +4 h +6100 m +79 h +10 h +169 h +4 h +6101 m +8 h +57 h +4 h +1 h +4 h +1766 h +6102 m +12 h +6103 m +6104 m +10 h +4 h +1 h +114 h +6105 m +5809 m +6106 m +4 h +4 h +1250 h +4 h +97 h +4 h +4 h +4 h +4 h +124 h +11 h +57 h +174 h +10 h +1 h +601 h +12 h +1 h +4 h +10 h +6107 m +135 h +10 h +6108 m +258 h +6109 m +10 h +976 h +41 h +250 h +4 h +41 h +10 h +4 h +4 h +1 h +10 h +6110 m +73 h +6111 m +4 h +10 h +6112 m +83 h +4 h +109 h +1 h +1 h +4 h +1 h +278 h +82 h +10 h +4 h +6113 m +109 h +6114 m +4 h +1 h +1 h +25 h +6115 m +238 h +1 h +10 h +10 h +6116 m +986 h +25 h +4 h +11 h +1 h +116 m +109 h +1 h +1 h +1 h +22 h +4 h +64 h +6117 m +1 h +4 h +6118 m +10 h +25 h +10 h +4 h +10 h +11 h +4 h +10 h +124 h +1952 m +4 h +41 h +3115 m +1 h +6119 m +430 m +4 h +272 m +4 h +6120 m +6121 m +1 h +79 h +4 h +147 h +10 h +1 h +6122 m +1030 h +6123 m +31 h +1 h +1 h +10 h +6124 m +11 h +1 h +10 h +143 h +146 h +41 h +1 h +10 h +4 h +4 h +1 h +6125 m +4 h +238 h +11 h +4 h +124 h +4 h +4 h +1 h +10 h +1 h +4 h +170 h +6126 m +1 h +6127 m +1 h +4 h +10 h +4 h +77 h +4 h +8 h +4 h +6128 m +4 h +10 h +10 h +4 h +307 h +23 h +172 h +448 m +2158 m +146 h +1389 h +10 h +10 h +6129 m +4 h +125 h +6130 m +4 h +1 h +700 m +11 h +4 h +2591 m +10 h +6131 m +6132 m +4 h +1 h +10 h +6133 m +4 h +10 h +4 h +10 h +57 h +1 h +4 h +6134 m +57 h +10 h +6135 m +6136 m +1 h +3 h +10 h +94 h +266 h +4 h +4 h +10 h +1 h +1 h +11 h +6137 m +6138 m +1 h +10 h +10 h +10 h +4 h +1 h +1 h +4 h +4 h +10 h +6139 m +6140 m +146 h +6141 m +1137 h +10 h +4 h +1089 h +4 h +4 h +1 h +6142 m +1 h +6143 m +1822 h +10 h +203 m +435 m +4 h +6144 m +11 h +112 h +4 h +124 h +6145 m +1 h +986 h +10 h +1 h +10 h +6146 m +1 h +238 h +31 h +146 h +10 h +10 h +31 h +4 h +41 h +10 h +65 h +1 h +6147 m +73 h +4 h +6148 m +109 h +82 h +109 h +6149 m +41 h +4 h +4 h +1 h +10 h +6150 m +1 h +10 h +11 h +6151 m +4 h +6152 m +109 h +4 h +6153 m +185 h +1 h +25 h +4 h +4 h +10 h +1 h +108 h +104 h +278 h +6154 m +6155 m +1 h +1 h +4 h +6156 m +1403 h +6157 m +3 h +1 h +10 h +6132 m +1 h +1 h +169 h +4 h +4 h +297 h +31 h +1 h +1 h +4 h +614 m +6158 m +295 h +41 h +1 h +124 h +12 h +2887 h +4 h +10 h +1 h +6159 m +4 h +6160 m +6161 m +4359 m +1 h +6162 m +10 h +6163 m +332 h +10 h +10 h +4 h +6164 m +4 h +4 h +6165 m +109 h +4 h +10 h +10 h +195 h +6166 m +4 h +1 h +538 h +1 h +4 h +4 h +1089 h +10 h +10 h +6167 m +1 h +3 h +4 h +1128 m +1 h +4 h +1 h +12 h +79 h +1 h +6168 m +94 h +10 h +4520 m +6169 m +6170 m +1 h +1357 m +1100 m +4 h +6171 m +4 h +6172 m +11 h +196 h +1 h +11 h +4 h +10 h +4 h +359 h +6173 m +6174 m +6175 m +332 h +1 h +1 h +4 h +10 h +6176 m +1 h +55 h +1 h +6177 m +6178 m +1 h +82 h +10 h +6179 m +6180 m +1 h +1 h +1 h +371 h +4 h +4 h +6181 m +8 h +10 h +6182 m +1 h +4 h +6183 m +4 h +6184 m +4 h +6185 m +4 h +57 h +4 h +2374 m +6186 m +1 h +10 h +6187 m +3321 m +10 h +10 h +6188 m +1 h +1 h +6189 m +368 h +10 h +272 m +6190 m +97 h +307 h +6191 m +10 h +4 h +4 h +10 h +25 h +11 h +4 h +4 h +4 h +403 h +4 h +6192 m +10 h +538 h +1 h +6193 m +1 h +4 h +10 h +10 h +181 h +82 h +6194 m +4 h +4 h +338 h +112 h +31 h +4 h +10 h +6195 m +1 h +1 h +104 h +4 h +359 h +1 h +4 h +11 h +1 h +6196 m +10 h +266 h +3 h +57 h +1 h +1 h +6197 m +4 h +4 h +135 h +10 h +4 h +4 h +41 h +31 h +6198 m +6199 m +1 h +4 h +1 h +6200 m +10 h +1 h +10 h +1524 m +6201 m +935 h +6202 m +79 h +139 h +10 h +10 h +186 h +1 h +4 h +92 h +1 h +1 h +97 h +1218 m +6203 m +1 h +1 h +59 h +4 h +4 h +10 h +1 h +4 h +1 h +48 h +57 h +138 h +4 h +83 h +1 h +6204 m +6205 m +4 h +6206 m +575 h +11 h +1 h +4 h +4 h +6207 m +270 h +4 h +11 h +6208 m +4 h +10 h +4 h +11 h +4 h +83 h +109 h +258 h +4 h +6209 m +1 h +41 h +65 h +6210 m +146 h +4 h +11 h +31 h +1 h +6211 m +4 h +6212 m +1 h +1 h +820 h +10 h +388 m +4 h +4 h +4 h +276 h +1070 m +6213 m +4 h +250 h +1 h +1 h +4 h +10 h +124 h +155 m +6214 m +1 h +4 h +36 h +6215 m +6216 m +4 h +146 h +6217 m +6218 m +10 h +10 h +569 h +1 h +185 h +6219 m +10 h +1 h +104 h +59 h +6220 m +1016 h +6221 m +489 m +65 h +1 h +125 h +4 h +10 h +4 h +4 h +82 h +77 h +4 h +1 h +192 h +10 h +266 h +6222 m +124 h +1 h +1772 h +4 h +11 h +4 h +6223 m +4 h +36 h +1 h +59 h +1 h +6224 m +1 h +1 h +3 h +172 h +6225 m +4 h +4 h +2374 m +124 h +10 h +224 h +60 m +4 h +10 h +25 h +195 h +196 h +677 m +1 h +1 h +6226 m +6227 m +1642 h +1642 h +1 h +109 h +6228 m +6229 m +1 h +11 h +4 h +6230 m +12 h +6231 m +6232 m +146 h +1 h +10 h +4 h +1 h +1 h +10 h +25 h +10 h +1337 m +10 h +56 h +1 h +1 h +1 h +3 h +5348 m +11 h +6233 m +3 h +123 h +11 h +10 h +478 m +403 h +1 h +6234 m +6235 m +4 h +4 h +6236 m +4 h +11 h +4 h +10 h +57 h +6237 m +12 h +6238 m +4 h +4 h +1 h +6239 m +4 h +295 h +196 h +4 h +74 h +1 h +4 h +1 h +10 h +56 h +1 h +138 h +4 h +4 h +976 h +10 h +6240 m +6241 m +6242 m +1 h +1 h +10 h +1 h +59 h +4 h +45 h +1 h +368 h +4 h +10 h +25 h +4 h +6243 m +172 h +79 h +6244 m +649 h +10 h +1 h +204 h +4 h +928 h +10 h +83 h +45 h +6245 m +1 h +4 h +94 h +4 h +10 h +3 h +45 h +83 h +59 h +4 h +6246 m +3341 m +10 h +6247 m +4 h +124 h +31 h +6248 m +307 h +1 h +6249 m +6250 m +4 h +4 h +6251 m +1 h +4 h +4 h +6252 m +6253 m +10 h +113 h +238 h +6254 m +186 h +74 h +74 h +4 h +6255 m +1 h +82 h +155 m +2733 h +10 h +6256 m +6257 m +4 h +1 h +1 h +10 h +22 h +146 h +1 h +1893 m +124 h +4 h +1 h +1 h +10 h +104 h +6258 m +10 h +4 h +464 h +1619 h +6259 m +316 m +10 h +1 h +4 h +55 h +6260 m +6261 m +1 h +1 h +6262 m +4 h +6263 m +4 h +4 h +4 h +4 h +2069 m +4 h +11 h +1 h +4 h +1 h +6264 m +92 h +4 h +10 h +258 h +57 h +4 h +10 h +4 h +4 h +4 h +124 h +4 h +109 h +79 h +46 h +6265 m +4 h +368 h +614 m +6266 m +4 h +6267 m +4 h +575 h +1 h +1 h +3 h +11 h +56 h +1 h +31 h +6268 m +10 h +6269 m +10 h +1772 h +6270 m +4 h +22 h +1 h +1105 h +4576 m +6271 m +1 h +1 h +123 h +73 h +1 h +1 h +65 h +6272 m +4 h +1 h +351 m +4 h +6273 m +4 h +4 h +1453 m +6274 m +6275 m +4 h +4 h +190 h +358 h +28 h +94 h +1 h +1 h +48 h +4 h +22 h +6276 m +23 h +10 h +4 h +214 m +73 h +6277 m +4 h +1 h +1 h +10 h +4 h +4111 m +6278 m +10 h +4 h +6279 m +1 h +4 h +1 h +1 h +6280 m +10 h +4538 m +1 h +6281 m +1 h +135 h +6282 m +11 h +266 h +10 h +4 h +10 h +6283 m +4 h +11 h +10 h +147 h +935 h +4 h +4 h +1 h +1 h +4 h +4 h +6284 m +1 h +1 h +1 h +316 m +4 h +11 h +6285 m +3 h +4 h +4 h +1 h +6286 m +1359 h +6287 m +83 h +181 h +4 h +4 h +10 h +4 h +10 h +4 h +1 h +6288 m +4 h +6289 m +2308 m +2769 m +1 h +10 h +31 h +59 h +1 h +8 h +265 h +6290 m +10 h +1 h +6291 m +146 h +1 h +97 h +1 h +1 h +6292 m +97 h +1250 h +57 h +1 h +135 h +113 h +6293 m +4 h +119 h +1 h +124 h +158 h +6294 m +4 h +11 h +10 h +4 h +2719 h +1 h +196 h +4 h +10 h +1074 m +6295 m +6296 m +4 h +4 h +190 h +1 h +1 h +3 h +4626 m +41 h +172 h +1 h +1 h +82 h +6297 m +1 h +73 h +1 h +6298 m +1 h +4 h +4 h +108 h +1 h +25 h +10 h +184 h +4 h +167 h +11 h +10 h +109 h +4 h +488 m +1 h +97 h +57 h +238 h +6299 m +939 h +4 h +1 h +10 h +11 h +4 h +1 h +4 h +1 h +6300 m +1 h +1 h +10 h +3 h +6301 m +74 h +6302 m +1 h +1 h +10 h +6303 m +6304 m +1454 m +3 h +6305 m +488 h +4 h +6306 m +400 m +4 h +114 h +73 h +73 h +83 h +4 h +6307 m +1 h +1 h +1 h +229 h +4 h +196 h +57 h +4 h +4 h +1 h +6308 m +1 h +1 h +3 h +536 h +6309 m +626 m +10 h +10 h +196 h +1 h +4 h +57 h +4 h +1 h +41 h +11 h +6310 m +4 h +1 h +11 h +59 h +4 h +601 h +1 h +4 h +6311 m +10 h +6312 m +146 h +1 h +6313 m +1 h +119 h +82 h +6314 m +10 h +4 h +4 h +1 h +6315 m +1 h +1 h +167 h +6316 m +146 h +3558 m +25 h +3 h +124 h +10 h +4 h +1 h +3111 m +563 m +4 h +6317 m +4 h +10 h +591 m +91 h +12 h +1 h +31 h +4 h +1 h +4 h +4 h +4 h +55 h +1 h +4 h +41 h +11 h +109 h +1 h +1 h +4 h +6318 m +1 h +4 h +10 h +4 h +77 h +4 h +4 h +4 h +1 h +6319 m +1 h +4 h +4 h +3834 m +5765 m +57 h +4 h +157 h +399 h +91 h +10 h +169 h +10 h +57 h +124 h +10 h +4 h +4 h +73 h +172 h +124 h +93 h +104 h +2172 m +6320 m +1074 m +1 h +25 h +109 h +1 h +10 h +6321 m +897 m +1 h +6322 m +1096 m +4 h +322 m +40 h +82 h +6323 m +6324 m +4 h +4 h +4 h +135 h +56 h +4 h +4 h +536 h +10 h +1 h +6325 m +125 h +1 h +6326 m +1 h +124 h +1 h +11 h +1 h +4 h +64 h +6327 m +109 h +1 h +11 h +10 h +10 h +6328 m +4 h +4 h +1 h +1 h +4 h +4 h +25 h +140 h +4 h +1 h +10 h +109 h +109 h +82 h +6329 m +10 h +10 h +6330 m +1089 h +1 h +6331 m +4 h +6332 m +4151 m +6333 m +1 h +6334 m +4 h +56 h +40 h +10 h +10 h +4 h +185 h +1 h +104 h +10 h +4 h +1 h +1548 m +109 h +332 h +83 h +4 h +368 h +59 h +6335 m +1 h +104 h +1 h +4 h +278 h +5917 m +6336 m +12 h +10 h +119 h +1 h +10 h +10 h +6337 m +135 h +4 h +70 m +4 h +10 h +412 m +1 h +6338 m +10 h +6339 m +6340 m +4 h +6341 m +4 h +82 h +6342 m +3 h +10 h +146 h +6343 m +4 h +575 h +10 h +6344 m +1655 m +57 h +195 h +4 h +4 h +1 h +64 h +601 h +6345 m +1 h +97 h +1 h +1 h +55 h +1 h +10 h +6346 m +31 h +6347 m +8 h +1 h +11 h +10 h +10 h +94 h +156 h +6348 m +459 m +4 h +4 h +1677 m +6349 m +11 h +1 h +1 h +1 h +6350 m +4 h +6351 m +11 h +57 h +4 h +4 h +1 h +41 h +4 h +1 h +1 h +10 h +1 h +10 h +6352 m +1 h +10 h +57 h +10 h +4 h +1 h +10 h +10 h +13 h +55 h +3562 m +4 h +1 h +10 h +6353 m +6354 m +91 h +3 h +258 h +10 h +4 h +59 h +10 h +10 h +4 h +124 h +147 h +368 h +1 h +10 h +6355 m +6356 m +4 h +25 h +1 h +4 h +4 h +4 h +3068 m +1 h +4 h +4 h +4 h +4 h +4 h +6357 m +104 h +718 h +1 h +10 h +3 h +4 h +4 h +1 h +6358 m +4 h +6359 m +10 h +6360 m +113 h +1 h +6361 m +6362 m +25 h +1 h +1 h +1 h +10 h +4 h +10 h +6363 m +575 h +4 h +4 h +48 h +6364 m +1 h +3 h +10 h +6365 m +1 h +124 h +59 h +692 h +4 h +443 h +6366 m +4 h +91 h +6367 m +41 h +195 h +2261 m +4 h +4 h +1 h +10 h +6368 m +1 h +4 h +4 h +4 h +104 h +83 h +6369 m +10 h +3779 m +4 h +1362 h +1 h +82 h +4 h +3 h +6370 m +91 h +6371 m +1822 h +6372 m +125 h +6373 m +4 h +1 h +25 h +6374 m +238 h +123 h +57 h +2887 h +6375 m +104 h +10 h +10 h +10 h +1 h +4 h +1 h +2379 m +82 h +10 h +1 h +6376 m +603 m +31 h +4 h +4 h +4 h +3025 m +3141 m +3 h +10 h +10 h +157 h +6377 m +6378 m +1 h +13 h +27 h +1 h +41 h +10 h +6379 m +6380 m +1 h +270 h +11 h +4 h +4 h +4 h +6381 m +73 h +10 h +10 h +10 h +6382 m +10 h +307 h +4 h +6383 m +10 h +1 h +6384 m +65 h +918 m +4 h +383 h +125 h +10 h +6385 m +808 m +25 h +4 h +4 h +4 h +939 h +6386 m +83 h +184 h +4 h +10 h +1296 m +1 h +1261 h +1 h +10 h +97 h +10 h +6387 m +687 h +4 h +135 h +4 h +5475 m +65 h +6388 m +1185 m +4 h +6389 m +10 h +1 h +538 h +1 h +5070 m +10 h +4 h +4 h +1 h +4 h +6390 m +1 h +4 h +6391 m +10 h +4 h +57 h +6392 m +10 h +10 h +2146 m +1 h +65 h +371 h +10 h +195 h +1 h +59 h +4 h +3558 m +170 h +4 h +6393 m +4 h +4 h +31 h +1 h +6394 m +22 h +1 h +4 h +4 h +4 h +109 h +10 h +4 h +41 h +4 h +3 h +4 h +297 h +4 h +990 m +25 h +4 h +4 h +4 h +359 h +83 h +4 h +45 h +41 h +1 h +10 h +59 h +1 h +4 h +11 h +6395 m +6396 m +4 h +147 h +4 h +6397 m +4 h +104 h +4 h +83 h +4 h +3383 m +6398 m +10 h +124 h +1370 m +4 h +276 h +97 h +1 h +10 h +238 h +4 h +6399 m +4 h +4 h +6400 m +687 h +4 h +1 h +4 h +82 h +55 h +31 h +6401 m +10 h +4 h +6402 m +64 h +65 h +6403 m +6404 m +4 h +11 h +6405 m +6406 m +6407 m +10 h +4 h +4 h +4 h +10 h +1 h +6408 m +1 h +1 h +4 h +1 h +4 h +6409 m +6410 m +10 h +10 h +1 h +1 h +31 h +82 h +3278 m +486 m +6411 m +4 h +10 h +4 h +11 h +55 h +1 h +10 h +295 h +6412 m +6413 m +4 h +443 h +4 h +1 h +4 h +6414 m +276 h +10 h +31 h +190 h +1 h +4 h +1 h +6415 m +10 h +4 h +1 h +640 h +4 h +4 h +57 h +1 h +6416 m +4 h +185 h +6417 m +4 h +82 h +1 h +2288 m +1 h +82 h +6418 m +4 h +125 h +6419 m +4 h +57 h +125 h +4 h +443 h +146 h +5 m +4 h +12 h +10 h +6420 m +6421 m +1027 h +4 h +6422 m +10 h +4 h +1 h +31 h +4 h +1 h +1 h +6423 m +10 h +4 h +4 h +112 h +6424 m +1 h +6425 m +11 h +1 h +656 m +10 h +1 h +230 h +4 h +6426 m +6427 m +1 h +125 h +10 h +65 h +56 h +1 h +10 h +1 h +109 h +6428 m +1 h +10 h +59 h +1 h +1 h +6429 m +4 h +1 h +13 h +4 h +91 h +119 h +10 h +41 h +41 h +13 h +1685 h +6430 m +2339 m +1 h +4 h +10 h +6431 m +386 h +1 h +79 h +135 h +59 h +6432 m +3435 m +6433 m +6434 m +1 h +4 h +6435 m +6436 m +6437 m +6438 m +1 h +6439 m +6440 m +57 h +4 h +1 h +1 h +57 h +57 h +4 h +332 h +6441 m +4 h +64 h +119 h +493 m +6442 m +6443 m +4 h +82 h +1 h +4 h +12 h +1 h +10 h +156 h +805 m +569 h +4 h +104 h +10 h +6444 m +6445 m +4 h +1 h +10 h +6446 m +10 h +11 h +10 h +4 h +87 m +6447 m +6448 m +258 h +4 h +11 h +330 h +4 h +10 h +1 h +82 h +4 h +11 h +1 h +10 h +4 h +4 h +4 h +4 h +4 h +4 h +6449 m +4 h +6450 m +6451 m +6452 m +6399 m +6453 m +11 h +109 h +4 h +10 h +1685 h +4 h +1 h +4 h +1 h +4 h +4 h +4 h +6454 m +6455 m +10 h +1 h +4 h +41 h +4 h +6456 m +1 h +10 h +488 h +10 h +4 h +10 h +73 h +4 h +4914 m +1 h +10 h +11 h +6457 m +737 m +11 h +69 m +4 h +6458 m +1 h +12 h +6459 m +12 h +1 h +4 h +1 h +4 h +6460 m +1 h +10 h +1 h +6461 m +976 h +146 h +10 h +57 h +10 h +11 h +4 h +170 h +1 h +184 h +6462 m +10 h +4 h +4 h +4 h +10 h +114 h +10 h +477 m +4 h +4 h +11 h +6463 m +74 h +64 h +322 h +3561 m +1 h +27 h +403 h +93 h +10 h +83 h +4 h +4 h +6464 m +11 h +82 h +4 h +6465 m +6466 m +4 h +4229 m +1 h +6467 m +10 h +124 h +55 h +224 h +10 h +79 h +6468 m +1 h +36 h +1 h +6469 m +1 h +10 h +4 h +10 h +124 h +6470 m +1 h +6471 m +4 h +1 h +4 h +6472 m +368 h +297 h +4 h +4 h +150 m +1 h +1541 m +6473 m +6474 m +4 h +4 h +1 h +229 h +6475 m +114 h +1886 m +4 h +10 h +4 h +31 h +10 h +82 h +6476 m +6477 m +94 h +4858 m +1 h +11 h +1 h +13 h +6105 m +6478 m +6479 m +158 h +4 h +10 h +4 h +250 h +1 h +4 h +1 h +4 h +4 h +4 h +1 h +358 h +4 h +4 h +82 h +83 h +41 h +83 h +4 h +4 h +25 h +124 h +138 h +4 h +448 m +1 h +575 h +1 h +6480 m +1 h +104 h +10 h +1 h +4 h +55 h +6481 m +6482 m +6483 m +10 h +25 h +4 h +307 h +6484 m +4 h +1761 m +6485 m +6486 m +1 h +57 h +443 h +10 h +4 h +172 h +10 h +10 h +143 h +10 h +10 h +4 h +4 h +6487 m +10 h +124 h +1 h +6488 m +4 h +1 h +258 h +10 h +10 h +6489 m +1 h +6490 m +3 h +17 m +97 h +4 h +6491 m +1 h +1 h +1 h +258 h +4 h +1 h +857 m +55 h +6492 m +4 h +124 h +1 h +93 h +104 h +6493 m +6494 m +229 h +5478 m +4 h +31 h +104 h +10 h +156 h +1 h +10 h +1 h +181 h +83 h +4 h +4 h +3 h +6495 m +41 h +146 h +601 h +6496 m +1 h +4 h +6497 m +6498 m +6499 m +6500 m +6501 m +82 h +10 h +4 h +4 h +1 h +6502 m +10 h +11 h +10 h +6503 m +181 h +4 h +4 h +109 h +4 h +41 h +6504 m +4 h +4 h +94 h +59 h +6505 m +1619 h +167 h +228 m +1 h +6506 m +1 h +4 h +6507 m +4 h +6508 m +4 h +1751 m +4 h +6509 m +1 h +6423 m +4 h +4 h +1260 m +11 h +6510 m +10 h +4 h +6511 m +124 h +4 h +10 h +10 h +4 h +6512 m +1 h +3170 m +4 h +12 h +1 h +112 h +4 h +41 h +1 h +6513 m +412 m +73 h +6514 m +1 h +1 h +1 h +6515 m +10 h +10 h +1 h +92 h +10 h +4 h +6516 m +125 h +11 h +6517 m +10 h +4 h +4 h +114 h +82 h +4 h +4 h +4 h +10 h +4 h +3396 m +10 h +1 h +2163 m +6518 m +1 h +4 h +10 h +4 h +6519 m +10 h +1 h +4 h +4 h +1 h +1 h +4 h +3680 m +1 h +4 h +6520 m +114 h +4 h +65 h +10 h +82 h +10 h +82 h +1 h +1 h +6521 m +4 h +59 h +536 h +10 h +6522 m +737 m +1 h +230 h +4 h +1 h +4 h +6523 m +10 h +10 h +1089 h +109 h +6524 m +6525 m +10 h +1548 m +802 m +1 h +4 h +4 h +36 h +4 h +10 h +1 h +4 h +6526 m +146 h +10 h +6527 m +124 h +1 h +4 h +1 h +299 h +1 h +6528 m +4 h +1 h +10 h +82 h +83 h +31 h +11 h +6529 m +1 h +386 h +6530 m +1 h +1322 m +10 h +4 h +6531 m +83 h +6532 m +6533 m +359 h +1 h +4 h +82 h +10 h +6534 m +41 h +10 h +4 h +10 h +4 h +1 h +83 h +2442 m +59 h +4 h +6535 m +6536 m +914 m +82 h +1 h +6537 m +1 h +4 h +109 h +1 h +6538 m +59 h +4 h +4 h +1 h +146 h +1788 m +6539 m +6540 m +10 h +156 h +1 h +1 h +77 h +10 h +22 h +1 h +4 h +4 h +6541 m +1 h +1 h +6542 m +1 h +6543 m +10 h +69 h +6544 m +147 h +1027 h +4 h +1 h +92 h +4 h +6545 m +4 h +6546 m +1 h +10 h +6547 m +195 h +1 h +4 h +6548 m +297 h +1 h +59 h +124 h +4 h +10 h +10 h +4 h +1 h +10 h +6549 m +1 h +11 h +6550 m +4 h +4 h +59 h +6551 m +829 m +59 h +1 h +6552 m +185 h +10 h +10 h +757 h +1 h +6553 m +4 h +3 h +4 h +640 h +59 h +73 h +4 h +83 h +1 h +10 h +147 h +6554 m +12 h +1 h +1 h +1 h +143 h +4 h +4 h +4 h +10 h +358 h +6555 m +4 h +1 h +6556 m +1 h +125 h +147 h +6557 m +1 h +447 h +124 h +1 h +6558 m +10 h +10 h +6559 m +6560 m +92 h +6561 m +5944 h +170 h +4 h +4 h +629 m +1 h +1 h +10 h +74 h +4 h +104 h +104 h +10 h +6562 m +57 h +4 h +28 h +6563 m +265 h +4 h +6564 m +41 h +6565 m +4 h +124 h +265 h +184 h +125 h +1 h +6566 m +6567 m +1 h +82 h +1 h +4 h +4 h +129 h +1137 h +4 h +5581 m +6568 m +1 h +4 h +6569 m +6570 m +56 h +1 h +6571 m +4 h +4 h +10 h +10 h +13 h +4 h +129 h +6572 m +109 h +4 h +1201 h +10 h +4 h +195 h +173 h +10 h +10 h +1137 h +164 h +27 h +125 h +10 h +1 h +5125 m +1 h +10 h +4 h +1 h +4 h +6573 m +4 h +4 h +185 h +1 h +1 h +1650 h +1137 h +83 h +6574 m +118 h +1 h +4 h +278 h +6575 m +124 h +1 h +45 h +4 h +1 h +1 h +10 h +6576 m +4 h +25 h +4 h +1137 h +4 h +1 h +82 h +656 m +6577 m +10 h +6578 m +1 h +12 h +2116 m +109 h +4 h +3 h +4 h +4 h +4 h +6579 m +6580 m +4 h +10 h +4 h +6581 m +169 h +6582 m +1632 m +276 h +1 h +1 h +192 h +3 h +6583 m +1 h +1 h +2184 m +1 h +170 h +6584 m +4 h +6585 m +1 h +4 h +1 h +4 h +31 h +2891 m +41 h +94 h +97 h +4 h +4 h +4 h +10 h +124 h +6586 m +4 h +4 h +1 h +4 h +10 h +4 h +57 h +692 h +4 h +10 h +10 h +6587 m +45 h +4 h +358 h +10 h +124 h +4 h +6588 m +1 h +4 h +74 h +6589 m +6590 m +6591 m +11 h +129 h +4 h +1250 h +4 h +82 h +6592 m +229 h +4 h +6593 m +4 h +1 h +10 h +6594 m +25 h +10 h +4 h +10 h +104 h +1 h +6595 m +10 h +6596 m +6597 m +10 h +4 h +10 h +368 h +109 h +125 h +1 h +6598 m +4 h +6599 m +4 h +1 h +6600 m +4 h +4 h +170 h +4 h +4 h +129 h +6601 m +57 h +10 h +6602 m +10 h +6603 m +10 h +10 h +41 h +1 h +10 h +83 h +4 h +1 h +1 h +4 h +4 h +4 h +12 h +10 h +11 h +1595 m +10 h +11 h +6604 m +6605 m +6606 m +488 h +109 h +10 h +6607 m +1122 m +4 h +94 h +31 h +4 h +1 h +10 h +4 h +6608 m +170 h +6609 m +10 h +10 h +4 h +4 h +1 h +1 h +6610 m +4 h +4 h +170 h +6611 m +1 h +6612 m +4 h +82 h +109 h +4 h +10 h +8 h +6613 m +307 h +1 h +4 h +11 h +4 h +3 h +229 h +4 h +4 h +6614 m +10 h +192 h +6615 m +10 h +4 h +4 h +124 h +1 h +6616 m +6617 m +1 h +1 h +104 h +1 h +13 h +119 h +468 m +1 h +114 h +73 h +1 h +4 h +74 h +6618 m +601 h +4 h +25 h +10 h +4 h +1 h +4 h +270 h +41 h +6619 m +6620 m +4 h +477 m +1 h +4 h +6621 m +196 h +4 h +10 h +1 h +1403 h +4 h +4 h +4 h +4 h +6622 m +10 h +10 h +11 h +83 h +2303 m +1 h +10 h +11 h +1 h +25 h +11 h +2268 m +10 h +83 h +1 h +1 h +6623 m +6624 m +6625 m +1 h +4 h +12 h +1 h +31 h +10 h +6626 m +73 h +181 h +1 h +4 h +6627 m +6628 m +10 h +6629 m +4 h +4 h +4 h +184 h +82 h +10 h +262 h +6630 m +10 h +4 h +10 h +4 h +10 h +184 h +45 h +443 h +1 h +4 h +6631 m +5341 m +4932 m +10 h +6200 m +12 h +59 h +6632 m +170 h +6633 m +6634 m +167 h +4 h +4 h +1 h +91 h +857 m +12 h +4 h +6635 m +6636 m +13 h +10 h +4 h +94 h +1 h +31 h +55 h +1 h +6637 m +601 h +5590 m +1 h +104 h +1 h +6638 m +4 h +358 h +11 h +6639 m +11 h +1 h +4 h +4 h +56 h +11 h +12 h +6640 m +4 h +10 h +6641 m +6642 m +238 h +10 h +6643 m +4 h +403 h +6644 m +6645 m +1 h +4 h +4 h +6646 m +1 h +1576 m +25 h +1 h +4 h +1 h +4 h +6647 m +4 h +31 h +10 h +82 h +6648 m +1 h +2591 m +79 h +965 m +1 h +1 h +10 h +1 h +25 h +10 h +1304 m +64 h +11 h +10 h +83 h +4 h +167 h +4 h +4 h +1 h +6649 m +570 h +784 m +4 h +146 h +10 h +4 h +1 h +57 h +29 m +4 h +1 h +10 h +1 h +4 h +297 h +4 h +109 h +109 h +6650 m +1 h +184 h +266 h +11 h +443 h +1 h +6651 m +10 h +1769 m +11 h +123 h +603 m +10 h +1 h +4 h +4 h +1 h +250 h +1 h +1 h +6652 m +109 h +4 h +4 h +10 h +4 h +10 h +64 h +10 h +82 h +125 h +1218 m +146 h +575 h +6653 m +135 h +6654 m +4 h +4 h +82 h +124 h +10 h +79 h +4 h +1504 m +1 h +4 h +6655 m +4 h +6656 m +1 h +10 h +196 h +65 h +10 h +10 h +186 h +114 h +1 h +4 h +1321 m +10 h +1 h +1 h +36 h +6657 m +10 h +1 h +4 h +4 h +1 h +1 h +6658 m +1 h +1 h +135 h +57 h +13 h +1 h +4 h +6659 m +4 h +649 h +4 h +4 h +4 h +79 h +11 h +83 h +6660 m +10 h +4 h +6661 m +10 h +4 h +1 h +6031 m +4 h +733 m +6662 m +6663 m +3 h +10 h +4 h +1003 m +4 h +4 h +6664 m +4 h +1 h +6665 m +5505 m +192 h +4 h +83 h +10 h +5762 m +59 h +4 h +13 h +13 h +4 h +1 h +1 h +1 h +6666 m +6667 m +41 h +6668 m +4 h +1 h +6669 m +1 h +4 h +4 h +1 h +2418 h +4 h +6670 m +10 h +6671 m +1 h +1322 m +1 h +1 h +4 h +4 h +1646 m +195 h +3188 m +113 h +4 h +8 h +4 h +1 h +4 h +6672 m +10 h +10 h +6673 m +6674 m +10 h +6675 m +4 h +124 h +110 h +10 h +83 h +1 h +147 h +1 h +139 h +6676 m +1 h +12 h +4 h +135 h +40 h +4 h +4 h +4723 m +83 h +10 h +4 h +6677 m +6678 m +1 h +195 h +4 h +10 h +10 h +4 h +4 h +10 h +6679 m +10 h +1 h +10 h +1576 m +97 h +1650 h +4 h +6680 m +1 h +167 h +1 h +4 h +1948 m +4 h +10 h +6681 m +10 h +119 h +172 h +1 h +4 h +4 h +4 h +57 h +575 h +6682 m +124 h +11 h +1 h +2256 m +4359 m +6683 m +3680 m +4 h +4 h +55 h +82 h +1 h +10 h +11 h +1 h +135 h +4 h +8 h +4 h +109 h +82 h +25 h +125 h +4 h +10 h +4 h +1 h +11 h +4 h +79 h +10 h +4 h +601 h +74 h +4 h +10 h +3837 m +10 h +4 h +1 h +6684 m +59 h +6685 m +6686 m +6687 m +83 h +4 h +10 h +6688 m +1 h +820 h +4 h +6689 m +6690 m +4 h +104 h +109 h +6691 m +6692 m +601 h +1 h +10 h +10 h +195 h +8 h +10 h +911 h +4520 m +443 h +6693 m +4 h +4 h +1 h +147 h +3177 m +10 h +6694 m +82 h +6695 m +6696 m +55 h +12 h +1 h +6697 m +6698 m +4966 m +10 h +1 h +6699 m +124 h +6700 m +6701 m +4 h +74 h +4 h +4 h +6702 m +1 h +4 h +4 h +124 h +146 h +297 h +1 h +184 h +1 h +6703 m +12 h +4 h +123 h +10 h +1 h +77 h +27 h +1 h +10 h +1 h +1 h +6704 m +92 h +4 h +1 h +6705 m +1 h +4 h +6706 m +6707 m +1650 h +10 h +3 h +1 h +10 h +1 h +118 h +10 h +1 h +6708 m +1 h +4 h +97 h +10 h +1 h +6709 m +801 m +169 h +10 h +41 h +146 h +4 h +4 h +4 h +1 h +6710 m +4 h +4 h +1 h +4 h +10 h +6711 m +1 h +10 h +1 h +241 m +4 h +1 h +4 h +1 h +1 h +238 h +10 h +4 h +3150 m +4 h +10 h +10 h +10 h +82 h +820 h +1 h +6712 m +4 h +6713 m +4 h +10 h +1 h +125 h +10 h +10 h +4 h +6714 m +48 h +1 h +6715 m +1 h +57 h +4 h +6716 m +64 h +238 h +6717 m +195 h +1 h +6718 m +4 h +368 h +493 h +6719 m +1 h +57 h +104 h +4 h +124 h +25 h +575 h +10 h +1 h +11 h +4 h +10 h +4 h +1 h +1 h +10 h +57 h +173 h +1 h +143 h +4 h +4 h +6720 m +11 h +1764 m +6721 m +4 h +10 h +4 h +6722 m +10 h +1 h +1 h +1 h +6723 m +6724 m +6725 m +124 h +6726 m +6727 m +1089 h +4 h +6728 m +4 h +4 h +224 h +4 h +12 h +6729 m +386 h +4 h +10 h +4 h +31 h +4 h +6730 m +11 h +82 h +83 h +6731 m +4 h +73 h +986 h +1 h +4 h +10 h +4 h +1 h +10 h +41 h +2914 m +4 h +1197 m +4 h +1 h +10 h +10 h +82 h +4 h +1 h +4 h +4 h +13 h +1 h +4 h +4 h +10 h +640 h +10 h +1 h +6732 m +1 h +11 h +10 h +11 h +10 h +4 h +6733 m +1 h +11 h +6734 m +1 h +97 h +4 h +6735 m +4 h +10 h +1 h +94 h +4 h +10 h +11 h +1 h +119 h +1 h +4 h +3 h +25 h +1 h +97 h +185 h +4 h +4 h +601 h +6736 m +1725 m +6737 m +1185 m +1 h +97 h +10 h +11 h +190 h +4 h +4 h +1 h +6738 m +4 h +4 h +258 h +4 h +4 h +104 h +1 h +4 h +4 h +4 h +31 h +1 h +6739 m +4 h +10 h +10 h +6740 m +1 h +10 h +195 h +10 h +6741 m +2266 m +10 h +8 h +57 h +10 h +57 h +6742 m +169 h +1 h +10 h +6743 m +4 h +6744 m +6745 m +31 h +258 h +10 h +1 h +1 h +1 h +4 h +25 h +10 h +6746 m +4 h +6747 m +444 m +3 h +4 h +1 h +65 h +10 h +97 h +10 h +10 h +10 h +8 h +6748 m +10 h +1 h +4 h +4 h +270 h +1 h +10 h +4 h +73 h +31 h +4 h +11 h +11 h +6749 m +6750 m +10 h +2319 m +229 h +25 h +1 h +6751 m +4 h +4 h +1 h +4 h +41 h +109 h +10 h +82 h +1 h +6752 m +3 h +73 h +190 h +4 h +11 h +10 h +4 h +10 h +4 h +6753 m +1 h +1 h +10 h +1 h +4 h +1 h +6754 m +1 h +10 h +4 h +1 h +6755 m +4 h +4 h +10 h +4 h +83 h +4 h +169 h +332 h +4 h +307 h +10 h +4 h +4 h +307 h +1 h +6756 m +109 h +297 h +3 h +4 h +1 h +13 h +1 h +25 h +1 h +4 h +1 h +1 h +4 h +4 h +59 h +1 h +4 h +6757 m +55 h +10 h +124 h +143 h +6758 m +31 h +10 h +4 h +10 h +1 h +65 h +74 h +82 h +6759 m +10 h +104 h +104 h +1 h +1 h +6760 m +6761 m +1 h +1 h +1 h +4 h +10 h +6762 m +3373 m +1 h +556 h +6763 m +3025 m +6764 m +4 h +4 h +3 h +784 m +629 m +10 h +4 h +4 h +10 h +6765 m +4 h +56 h +6766 m +31 h +4 h +10 h +10 h +93 h +10 h +82 h +10 h +1137 h +272 h +79 h +147 h +4 h +6767 m +74 h +6768 m +10 h +10 h +10 h +104 h +1 h +10 h +6769 m +97 h +4 h +6770 m +4 h +1 h +6771 m +1 h +69 h +4 h +718 h +31 h +6772 m +10 h +4 h +6773 m +11 h +289 h +6774 m +4 h +195 h +125 h +1 h +143 h +10 h +332 h +10 h +125 h +4 h +83 h +185 h +3199 m +1 h +31 h +4 h +109 h +10 h +10 h +82 h +10 h +4 h +6775 m +10 h +6776 m +1 h +82 h +10 h +1 h +1 h +6777 m +1 h +6778 m +40 h +1 h +6779 m +1 h +4 h +4 h +10 h +1 h +1 h +6780 m +10 h +1 h +1 h +10 h +1406 m +146 h +447 h +1 h +4 h +6781 m +123 h +4 h +4 h +4 h +4 h +4 h +11 h +4 h +11 h +278 h +10 h +976 h +11 h +73 h +11 h +4 h +2532 m +1 h +4 h +6782 m +6783 m +6784 m +6785 m +11 h +112 h +297 h +4 h +6786 m +4 h +6787 m +6788 m +4 h +10 h +2433 m +41 h +6789 m +6790 m +11 h +4 h +12 h +1 h +1409 m +238 h +65 h +11 h +6791 m +1 h +73 h +25 h +6792 m +4 h +10 h +6793 m +6794 m +6795 m +10 h +3 h +6796 m +1 h +203 m +124 h +10 h +10 h +55 h +1650 h +59 h +1 h +6270 m +10 h +185 h +25 h +10 h +1 h +1 h +73 h +110 h +10 h +1642 h +123 h +92 h +1499 m +1 h +4 h +1535 m +1 h +10 h +4 h +1406 m +1 h +575 h +6797 m +10 h +6798 m +6799 m +22 h +4 h +1 h +10 h +3177 m +4 h +4 h +4 h +119 h +493 h +1 h +10 h +10 h +83 h +11 h +6800 m +1 h +4 h +10 h +125 h +11 h +1 h +6801 m +10 h +3 h +59 h +10 h +11 h +8 h +6802 m +1 h +1 h +1 h +1470 h +10 h +77 h +1 h +6803 m +4 h +6804 m +4 h +6805 m +4 h +4 h +1 h +351 m +1 h +4 h +1 h +4 h +6806 m +4 h +4 h +135 h +4 h +10 h +11 h +4 h +687 h +359 h +6807 m +11 h +6808 m +1030 h +6809 m +6810 m +4 h +82 h +692 h +1 h +6811 m +581 m +11 h +118 h +4 h +4 h +6812 m +4 h +6813 m +1304 m +4 h +10 h +6814 m +295 h +157 h +1 h +1 h +109 h +10 h +10 h +104 h +4 h +1953 m +1 h +4 h +195 h +12 h +109 h +114 h +6815 m +31 h +6816 m +170 h +104 h +6817 m +4 h +10 h +10 h +94 h +135 h +4 h +10 h +41 h +82 h +114 h +13 h +10 h +4 h +10 h +4 h +368 h +97 h +10 h +4 h +82 h +25 h +10 h +83 h +1359 h +1 h +4 h +11 h +6818 m +10 h +1279 m +4689 m +10 h +10 h +6819 m +4 h +4 h +1 h +4 h +184 h +4 h +172 h +4 h +10 h +10 h +1 h +6820 m +69 h +10 h +1 h +25 h +620 m +6821 m +143 h +4 h +1 h +1 h +229 h +6822 m +4 h +10 h +6823 m +124 h +146 h +3 h +1 h +11 h +12 h +1 h +4 h +371 h +1 h +468 m +1 h +3161 m +6824 m +443 h +4 h +4 h +10 h +258 h +4 h +109 h +10 h +6825 m +6826 m +11 h +6827 m +4 h +1 h +6828 m +4574 m +124 h +6829 m +4 h +6830 m +1083 m +6831 m +6832 m +4 h +10 h +1 h +4 h +10 h +6833 m +10 h +57 h +45 h +124 h +4 h +5348 h +109 h +4 h +59 h +5225 m +6834 m +4 h +25 h +10 h +1 h +3240 m +1 h +10 h +10 h +6835 m +6836 m +4 h +4 h +4 h +3 h +1 h +6837 m +10 h +10 h +10 h +1642 h +4 h +112 h +6838 m +1 h +41 h +359 h +4 h +10 h +6839 m +192 h +1 h +90 m +6840 m +625 m +258 h +27 h +1 h +6841 m +4 h +4 h +48 h +692 h +10 h +358 h +10 h +4 h +104 h +3422 m +185 h +1 h +1 h +72 m +10 h +1 h +2410 m +256 h +4 h +12 h +6842 m +4 h +4 h +6843 m +4 h +4 h +358 h +575 h +10 h +77 h +4 h +12 h +4 h +10 h +25 h +11 h +6844 m +6845 m +4 h +1 h +4 h +10 h +1 h +10 h +1 h +1 h +6846 m +8 h +79 h +124 h +10 h +6847 m +4 h +3 h +185 h +6848 m +11 h +1 h +1 h +25 h +1 h +6849 m +6850 m +4 h +4 h +6851 m +45 h +1 h +443 h +185 h +11 h +6852 m +412 m +3 h +4 h +124 h +1 h +908 m +12 h +1 h +40 h +10 h +6853 m +57 h +10 h +6854 m +10 h +4 h +1 h +4 h +11 h +10 h +767 m +124 h +192 h +110 h +4 h +4 h +1 h +6855 m +4 h +230 h +1 h +6856 m +6857 m +801 m +4 h +10 h +1 h +536 h +4 h +1 h +1 h +10 h +1 h +1 h +4 h +1 h +4 h +4 h +1 h +11 h +4 h +3 h +6858 m +4 h +2096 m +6859 m +10 h +91 h +1 h +4 h +1470 h +6860 m +4 h +10 h +3 h +6861 m +1 h +104 h +10 h +10 h +6862 m +4 h +4 h +10 h +13 h +185 h +10 h +4 h +97 h +1 h +4 h +56 h +116 m +3398 m +59 h +1454 m +1 h +4 h +25 h +23 h +10 h +10 h +169 h +4 h +4 h +1 h +4 h +73 h +123 h +1 h +3 h +13 h +6863 m +10 h +10 h +4 h +1 h +195 h +82 h +2285 m +4 h +10 h +6864 m +10 h +687 h +4 h +92 h +10 h +1403 h +4 h +6865 m +4 h +6866 m +1 h +4 h +5958 m +4 h +4 h +4863 m +1 h +4 h +109 h +2984 m +4 h +6867 m +4 h +10 h +6868 m +83 h +6869 m +1 h +4 h +6870 m +1 h +6871 m +6872 m +1 h +4 h +278 h +4 h +10 h +1 h +36 h +4 h +4 h +4 h +138 h +11 h +57 h +109 h +10 h +1 h +4 h +1 h +4 h +4 h +3 h +10 h +1 h +6873 m +6874 m +1470 h +25 h +4 h +230 h +1 h +25 h +4 h +167 h +6875 m +219 h +91 h +258 h +279 h +270 h +278 h +79 h +41 h +1 h +1 h +4 h +146 h +1 h +869 m +6876 m +4 h +6877 m +1 h +3 h +4 h +41 h +31 h +6878 m +4 h +10 h +1880 m +186 h +6879 m +124 h +13 h +6880 m +4 h +4 h +1 h +10 h +6881 m +10 h +6882 m +6883 m +4 h +1 h +10 h +10 h +190 h +1 h +6884 m +4 h +31 h +1 h +4 h +4 h +10 h +112 h +1 h +1 h +4 h +4 h +1 h +10 h +6885 m +41 h +6886 m +25 h +6887 m +4 h +5505 m +83 h +1 h +6888 m +1 h +1 h +104 h +4 h +1 h +399 h +73 h +6889 m +6890 m +4 h +10 h +6891 m +74 h +3539 m +10 h +615 m +4 h +1137 h +10 h +10 h +1 h +6892 m +6893 m +4 h +6894 m +172 h +4 h +3825 m +1 h +10 h +4 h +1 h +6895 m +6896 m +1089 h +4 h +6897 m +31 h +1 h +630 m +181 h +6898 m +6899 m +10 h +1 h +25 h +4 h +4 h +1 h +1 h +3 h +6900 m +4 h +94 h +4 h +1 h +6901 m +1359 h +6902 m +1 h +6903 m +1406 h +6904 m +91 h +6905 m +6906 m +3657 m +10 h +6907 m +10 h +1697 m +1968 m +6908 m +119 h +966 m +6909 m +10 h +104 h +5863 m +109 h +6910 m +6911 m +10 h +1 h +6912 m +6913 m +265 h +93 h +433 m +11 h +10 h +1 h +4 h +1 h +6914 m +4 h +82 h +125 h +57 h +6915 m +4 h +1 h +74 h +1 h +6916 m +4 h +4 h +603 m +181 h +59 h +4 h +41 h +601 h +123 h +124 h +6917 m +6918 m +4 h +4 h +36 h +1 h +279 h +2887 h +6919 m +1 h +10 h +4 h +1 h +6920 m +10 h +1137 h +4240 m +4 h +6921 m +1 h +4 h +6922 m +109 h +4 h +538 h +6923 m +6924 m +57 h +976 h +10 h +1 h +1 h +10 h +6925 m +124 h +6926 m +1 h +195 h +4 h +857 h +10 h +6927 m +6928 m +1 h +4 h +1 h +10 h +6929 m +1 h +1 h +11 h +6930 m +2733 h +6931 m +6932 m +3513 m +6933 m +6934 m +4 h +270 h +1 h +1 h +31 h +6935 m +97 h +4 h +3 h +3 h +4 h +6936 m +40 h +4 h +64 h +6937 m +6938 m +1 h +74 h +4089 m +1 h +4 h +3 h +6939 m +65 h +73 h +6940 m +1 h +4 h +1 h +4 h +4 h +1105 h +4 h +10 h +4 h +10 h +1 h +1 h +4 h +6941 m +1 h +97 h +4 h +4 h +97 h +4 h +97 h +1 h +1 h +1 h +70 m +10 h +10 h +6942 m +2379 m +11 h +6943 m +4 h +6944 m +82 h +6945 m +1 h +10 h +10 h +10 h +4 h +10 h +6946 m +10 h +1 h +6947 m +6948 m +172 h +4 h +1 h +1685 h +6949 m +1 h +146 h +1 h +4 h +4 h +4 h +5757 m +97 h +256 h +74 h +10 h +3 h +10 h +4 h +91 h +1 h +1 h +10 h +6950 m +10 h +1 h +6951 m +104 h +10 h +1 h +6952 m +1 h +170 h +10 h +6953 m +368 h +4 h +82 h +10 h +10 h +6954 m +10 h +10 h +36 h +4 h +83 h +386 h +1 h +6955 m +11 h +4 h +83 h +1 h +124 h +6956 m +6957 m +307 h +125 h +4 h +6448 m +57 h +1 h +1 h +10 h +4 h +92 h +112 h +6958 m +806 m +6959 m +1 h +6960 m +45 h +1 h +25 h +1 h +1 h +6961 m +10 h +57 h +4 h +1 h +6962 m +41 h +3 h +4 h +6963 m +11 h +4 h +12 h +74 h +10 h +10 h +4 h +196 h +146 h +1 h +6964 m +6965 m +1 h +4 h +6966 m +4 h +10 h +10 h +1 h +84 h +22 h +6967 m +4 h +6968 m +6969 m +4 h +1 h +1 h +6970 m +1 h +4 h +10 h +4 h +3025 m +459 m +94 h +82 h +1 h +97 h +10 h +10 h +1 h +79 h +6971 m +1 h +10 h +110 h +174 h +10 h +11 h +135 h +4 h +4 h +195 h +6972 m +1 h +1 h +119 h +6973 m +1 h +6974 m +6975 m +139 h +6976 m +196 h +172 h +10 h +6977 m +6978 m +1 h +1 h +11 h +36 h +1 h +10 h +10 h +4 h +4 h +6979 m +4 h +6980 m +1 h +6981 m +1 h +4 h +64 h +250 h +1 h +4 h +4 h +143 h +276 h +4 h +1122 m +10 h +12 h +31 h +124 h +1 h +6982 m +6983 m +129 h +1 h +3607 m +6984 m +4 h +10 h +1 h +4 h +4 h +6985 m +6986 m +1 h +1220 m +6987 m +4 h +94 h +1 h +74 h +4 h +82 h +6988 m +4 h +185 h +1345 m +1 h +10 h +1 h +1 h +6989 m +4 h +1 h +8 h +25 h +1379 m +1 h +4 h +55 h +1 h +6990 m +8 h +4 h +1 h +4 h +1 h +6991 m +2339 m +65 h +4 h +55 h +6992 m +1 h +6993 m +6994 m +4 h +196 h +1 h +10 h +11 h +10 h +1 h +31 h +4 h +6995 m +97 h +10 h +57 h +1 h +1 h +1 h +6996 m +4 h +4 h +1 h +11 h +6997 m +82 h +147 h +1 h +3 h +1 h +1 h +1 h +4 h +170 h +6998 m +1 h +4 h +59 h +4 h +1 h +10 h +1 h +4 h +123 h +4 h +6999 m +138 h +4 h +1 h +1 h +124 h +4 h +1 h +4 h +4 h +4 h +4 h +4 h +7000 m +4 h +10 h +7001 m +4 h +7002 m +4 h +7003 m +1 h +4 h +1 h +1 h +1 h +7004 m +7005 m +10 h +10 h +4 h +4 h +4 h +7006 m +7007 m +258 h +10 h +241 m +114 h +4 h +10 h +1 h +250 h +4 h +10 h +359 h +11 h +4 h +538 h +59 h +7008 m +2607 m +82 h +1 h +10 h +4 h +7009 m +1 h +7010 m +82 h +1 h +124 h +10 h +1 h +468 m +203 m +4 h +1 h +97 h +7011 m +4 h +22 h +7012 m +4 h +7013 m +1 h +266 h +1 h +7014 m +7015 m +7016 m +4 h +4 h +7017 m +4 h +82 h +10 h +1 h +10 h +3845 m +1 h +7018 m +295 h +1 h +4 h +7019 m +7020 m +10 h +1 h +1 h +94 h +97 h +41 h +83 h +4702 m +109 h +3089 m +10 h +1 h +3 h +7021 m +4 h +7022 m +10 h +1 h +3 h +4 h +332 h +698 m +10 h +4 h +3 h +7023 m +173 h +7024 m +31 h +7025 m +1016 h +109 h +1 h +7026 m +4 h +10 h +332 h +7027 m +10 h +1 h +276 h +7028 m +4 h +1 h +1 h +7029 m +4 h +7030 m +7031 m +10 h +4 h +4 h +25 h +10 h +1 h +757 h +1 h +7032 m +7033 m +258 h +1 h +4 h +4 h +4 h +10 h +1 h +125 h +110 h +7034 m +1 h +169 h +4 h +1 h +4 h +307 h +7035 m +4 h +3 h +319 h +1 h +129 h +56 h +124 h +1 h +10 h +1 h +7036 m +1 h +25 h +22 h +10 h +1 h +129 h +270 h +10 h +1 h +7037 m +1 h +4 h +10 h +1975 m +10 h +1304 m +7038 m +45 h +10 h +10 h +7039 m +4177 m +307 h +4 h +173 h +4 h +1 h +65 h +7040 m +1 h +10 h +7041 m +7042 m +1 h +4 h +7043 m +4 h +4 h +1 h +7044 m +7045 m +4 h +146 h +10 h +7046 m +7047 m +4 h +93 h +104 h +4 h +4 h +7048 m +59 h +10 h +4 h +7049 m +10 h +4 h +1 h +7050 m +1 h +297 h +4 h +10 h +1 h +4 h +10 h +27 h +7051 m +1 h +11 h +7052 m +10 h +386 h +3 h +7053 m +208 m +41 h +7054 m +10 h +2163 m +7055 m +1 h +93 h +11 h +10 h +170 h +4 h +4 h +4 h +11 h +7056 m +10 h +4 h +10 h +10 h +986 h +10 h +276 h +158 h +1 h +4 h +1 h +319 h +1 h +4 h +1 h +7057 m +10 h +986 h +4 h +1 h +1 h +124 h +7058 m +7059 m +4 h +1697 m +7060 m +4 h +7061 m +11 h +4 h +4 h +7062 m +7063 m +4 h +1 h +124 h +7064 m +3 h +1 h +8 h +1 h +10 h +10 h +31 h +4 h +11 h +7065 m +10 h +4 h +7066 m +1619 h +1 h +7067 m +7068 m +1 h +264 m +1 h +4 h +119 h +4 h +10 h +1 h +2788 m +1 h +7069 m +172 h +10 h +278 h +73 h +7070 m +110 h +10 h +4 h +4 h +1 h +1868 m +59 h +4 h +1 h +10 h +1 h +4 h +1 h +195 h +10 h +1 h +7071 m +4 h +386 h +82 h +7072 m +4 h +10 h +10 h +11 h +7073 m +7074 m +10 h +7075 m +11 h +167 h +4 h +278 h +7076 m +7077 m +82 h +3 h +1 h +4 h +1 h +4 h +1 h +1 h +97 h +27 h +10 h +10 h +46 h +7078 m +11 h +4 h +73 h +4 h +59 h +4 h +4 h +10 h +1 h +7079 m +7080 m +4 h +10 h +1993 m +10 h +25 h +7081 m +10 h +1 h +10 h +10 h +1 h +4 h +10 h +1 h +4 h +57 h +25 h +10 h +10 h +1 h +92 h +1 h +4 h +3 h +31 h +1 h +7082 m +1 h +4 h +7083 m +692 h +1 h +25 h +11 h +4 h +7084 m +1 h +10 h +7085 m +10 h +10 h +13 h +1 h +737 h +7086 m +4 h +138 h +7087 m +7088 m +601 h +976 h +2494 m +1 h +4 h +109 h +7089 m +4 h +10 h +4 h +7090 m +4 h +278 h +230 h +3 h +7091 m +4 h +140 h +1 h +4 h +7092 m +1 h +4 h +7093 m +1 h +124 h +7094 m +28 h +10 h +4 h +7095 m +307 h +7096 m +41 h +1 h +11 h +7097 m +2931 m +11 h +8 h +7098 m +73 h +702 m +10 h +124 h +238 h +7099 m +1478 m +7100 m +4 h +1 h +10 h +10 h +190 h +1 h +307 h +1 h +4 h +4 h +276 h +10 h +1 h +1 h +82 h +27 h +1 h +10 h +10 h +7101 m +10 h +109 h +1 h +7102 m +157 h +45 h +3229 m +1 h +4 h +1 h +11 h +4 h +4 h +4 h +7103 m +4 h +118 h +7104 m +10 h +110 h +4 h +73 h +1 h +10 h +4 h +109 h +435 h +1 h +1 h +10 h +7105 m +1016 h +578 m +184 h +7106 m +4 h +1 h +7107 m +7108 m +4 h +4 h +7109 m +13 h +4 h +1 h +31 h +7110 m +4 h +2116 m +10 h +1 h +7111 m +4 h +7112 m +10 h +82 h +2788 m +332 h +1 h +59 h +4 h +59 h +541 m +4 h +56 h +83 h +1 h +74 h +73 h +10 h +7113 m +258 h +10 h +7114 m +1 h +4 h +7115 m +57 h +7116 m +512 m +4 h +125 h +1 h +493 h +4 h +10 h +1 h +4 h +7117 m +1 h +4 h +7118 m +59 h +1 h +56 h +7119 m +4 h +10 h +4 h +276 h +1 h +157 h +10 h +64 h +7120 m +82 h +4 h +124 h +186 h +1 h +4 h +82 h +1 h +4 h +1 h +10 h +1 h +1 h +7121 m +7122 m +7123 m +125 h +4 h +10 h +4 h +10 h +10 h +1 h +10 h +1 h +11 h +36 h +27 h +146 h +10 h +10 h +10 h +1 h +83 h +7124 m +1 h +4 h +11 h +82 h +4 h +4 h +4 h +129 h +4 h +4 h +1 h +479 m +7125 m +1 h +7126 m +7127 m +5813 m +4 h +4 h +147 h +10 h +4 h +124 h +4 h +4 h +4 h +1 h +7128 m +4 h +7129 m +4 h +4 h +40 h +6391 m +7130 m +10 h +1 h +13 h +27 h +7131 m +65 h +10 h +250 h +10 h +1 h +56 h +7132 m +4 h +4 h +7133 m +4 h +7134 m +11 h +7135 m +1016 h +7136 m +367 m +4 h +4 h +7137 m +25 h +7138 m +1 h +4 h +7139 m +83 h +7140 m +1 h +4 h +113 h +4 h +1006 m +10 h +1 h +70 m +7141 m +10 h +4 h +307 h +110 h +11 h +4 h +1 h +41 h +1 h +4 h +1 h +1 h +4 h +33 m +10 h +10 h +4292 m +12 h +11 h +4 h +57 h +4 h +1 h +36 h +4 h +1 h +4 h +4 h +7142 m +1 h +4 h +276 h +10 h +41 h +4 h +986 h +195 h +4 h +10 h +10 h +2769 m +181 h +7143 m +4 h +2540 m +7144 m +4 h +1 h +5673 m +1 h +4 h +73 h +7145 m +7146 m +2788 h +10 h +83 h +10 h +1 h +1 h +3 h +4 h +307 h +4 h +1 h +381 m +4 h +1 h +45 h +10 h +119 h +4 h +4 h +1 h +7147 m +147 h +1 h +4 h +4 h +158 h +57 h +7148 m +119 h +11 h +10 h +7149 m +10 h +7150 m +10 h +4 h +57 h +1 h +4 h +185 h +4 h +1 h +1 h +1 h +4 h +4 h +1 h +4 h +11 h +125 h +1 h +1 h +4 h +4 h +4 h +12 h +79 h +258 h +7151 m +7152 m +7153 m +4 h +10 h +4 h +4 h +83 h +4 h +4 h +10 h +1359 h +10 h +7154 m +1 h +7155 m +4 h +190 h +4 h +383 h +4 h +990 m +4 h +27 h +10 h +730 m +7156 m +1 h +4 h +4 h +31 h +1 h +10 h +1 h +4 h +7157 m +7158 m +92 h +10 h +10 h +59 h +12 h +7159 m +12 h +7160 m +146 h +4 h +4 h +41 h +4 h +25 h +79 h +4 h +25 h +104 h +2379 m +125 h +10 h +7161 m +358 h +164 h +10 h +10 h +25 h +1 h +4 h +1142 m +4 h +4 h +4 h +4 h +1 h +1 h +4 h +7162 m +10 h +12 h +1 h +139 h +7163 m +10 h +7164 m +1 h +41 h +297 h +1 h +1 h +59 h +1 h +31 h +4 h +1322 h +1 h +7165 m +10 h +692 h +147 h +55 h +276 h +7166 m +10 h +10 h +10 h +3 h +1 h +10 h +4 h +4 h +1074 h +10 h +1 h +4 h +7167 m +1 h +4 h +1 h +10 h +1 h +7168 m +4 h +258 h +3558 h +7169 m +7170 m +7171 m +1 h +214 m +2625 m +276 h +7172 m +10 h +1 h +4 h +1 h +7173 m +11 h +4 h +1 h +1697 h +3 h +7174 m +4 h +83 h +1 h +1 h +4 h +10 h +7175 m +11 h +114 h +7176 m +4 h +94 h +4 h +79 h +4 h +11 h +4276 m +13 h +4 h +4 h +123 h +114 h +3396 m +196 h +57 h +125 h +147 h +10 h +7177 m +1 h +1 h +1 h +7178 m +57 h +10 h +1 h +170 h +10 h +10 h +1 h +4 h +7179 m +83 h +258 h +10 h +7180 m +7181 m +59 h +238 h +10 h +1 h +7182 m +79 h +7183 m +7184 m +7185 m +1 h +4 h +4 h +4 h +278 h +447 h +7186 m +7187 m +59 h +167 h +7188 m +10 h +4 h +1 h +1 h +463 m +36 h +7189 m +238 h +41 h +3 h +125 h +219 h +82 h +7190 m +74 h +1 h +1 h +10 h +10 h +7191 m +11 h +4 h +7192 m +10 h +7193 m +4 h +10 h +82 h +1 h +4 h +1 h +2172 m +10 h +119 h +7194 m +7195 m +10 h +1379 m +1 h +10 h +3847 m +4 h +4 h +1 h +1 h +10 h +11 h +4 h +10 h +4 h +124 h +196 h +4 h +4 h +7196 m +7197 m +4 h +10 h +4 h +11 h +1403 h +55 h +4 h +4 h +7198 m +4 h +857 h +10 h +69 h +104 h +104 h +4 h +4 h +7199 m +10 h +11 h +4 h +1 h +4 h +204 h +4 h +7200 m +11 h +10 h +7201 m +4 h +4 h +1 h +4 h +353 m +7202 m +4 h +1 h +4 h +1 h +59 h +4 h +7203 m +195 h +4 h +195 h +7204 m +10 h +4 h +10 h +1074 h +1 h +10 h +976 h +10 h +7205 m +59 h +7206 m +7207 m +1 h +10 h +1 h +1137 h +10 h +763 m +7208 m +27 h +10 h +10 h +11 h +4 h +1 h +7209 m +7210 m +1 h +104 h +124 h +190 h +4 h +7211 m +258 h +1 h +7212 m +83 h +36 h +7213 m +10 h +221 m +10 h +10 h +1 h +1 h +10 h +4 h +7214 m +10 h +7215 m +4 h +569 h +7216 m +125 h +4 h +7217 m +1 h +10 h +7218 m +4 h +25 h +7219 m +7220 m +12 h +7221 m +7222 m +7223 m +3357 m +4 h +11 h +79 h +7224 m +7225 m +82 h +56 h +74 h +4 h +10 h +7226 m +1 h +41 h +10 h +1122 m +1 h +10 h +97 h +31 h +1 h +7227 m +1 h +11 h +186 h +7228 m +4 h +1 h +1 h +4 h +181 h +7229 m +7230 m +13 h +299 h +4 h +10 h +65 h +4 h +113 h +289 h +6747 m +4 h +10 h +7231 m +169 h +238 h +4 h +4 h +92 h +45 h +1 h +4 h +113 h +6197 m +1 h +7232 m +7233 m +10 h +12 h +10 h +692 h +10 h +7234 m +7235 m +258 h +7236 m +7237 m +10 h +31 h +1 h +7238 m +1 h +359 h +10 h +7239 m +169 h +10 h +5923 m +4 h +123 h +97 h +1 h +4 h +4 h +447 h +7240 m +82 h +91 h +65 h +7241 m +4 h +7242 m +4 h +7243 m +1 h +1 h +55 h +2172 m +4858 m +7244 m +82 h +4 h +4 h +7245 m +4 h +7246 m +1 h +10 h +3435 m +7247 m +7248 m +7249 m +114 h +1137 h +4 h +74 h +1 h +79 h +1 h +7250 m +4 h +7251 m +4 h +7252 m +123 h +10 h +1 h +1 h +4 h +4 h +129 h +7253 m +57 h +258 h +10 h +4 h +181 h +10 h +4 h +124 h +4 h +7254 m +4089 m +1 h +56 h +4 h +10 h +258 h +4 h +1 h +11 h +10 h +229 h +195 h +4 h +4 h +4 h +167 h +4 h +185 h +196 h +1 h +7255 m +4 h +1 h +5525 m +359 h +7256 m +7257 m +10 h +4 h +83 h +11 h +238 h +4 h +4 h +7258 m +8 h +4 h +1 h +4 h +59 h +7259 m +5 m +7260 m +4 h +2374 h +4 h +10 h +3 h +1 h +4 h +4 h +1 h +4 h +1 h +1607 m +6731 m +10 h +83 h +7261 m +82 h +167 h +4 h +4 h +110 h +10 h +10 h +7262 m +270 h +7263 m +147 h +10 h +7264 m +7265 m +10 h +4 h +368 h +4 h +1220 m +1 h +4 h +4 h +56 h +82 h +1 h +4 h +83 h +109 h +172 h +7266 m +10 h +1 h +10 h +4 h +4 h +949 m +7267 m +4 h +83 h +1 h +4 h +358 h +10 h +3227 m +10 h +10 h +4 h +4 h +1 h +1 h +1 h +1 h +1 h +1083 m +7268 m +1 h +1 h +443 h +129 h +10 h +74 h +7269 m +3 h +1 h +1 h +1 h +59 h +230 h +7270 m +10 h +7271 m +7272 m +11 h +10 h +7273 m +4 h +74 h +1 h +4 h +4 h +156 h +1 h +164 h +4 h +1 h +888 m +1766 h +4 h +83 h +1 h +124 h +1 h +4 h +10 h +7274 m +11 h +7275 m +4 h +10 h +83 h +146 h +10 h +7276 m +25 h +7277 m +10 h +4 h +74 h +1 h +270 h +1 h +4 h +7278 m +1 h +1 h +146 h +4 h +276 h +4 h +10 h +10 h +135 h +4 h +195 h +158 h +4 h +1619 h +4 h +1 h +4 h +4 h +4 h +10 h +82 h +31 h +10 h +109 h +83 h +169 h +1791 m +5 m +7279 m +4 h +4 h +794 m +7280 m +7281 m +11 h +2607 m +1 h +4 h +7282 m +1646 m +4 h +7283 m +11 h +10 h +195 h +10 h +7284 m +74 h +11 h +4 h +195 h +10 h +1 h +4 h +4 h +10 h +1 h +4 h +4 h +1083 m +4 h +4 h +176 m +64 h +1868 m +7285 m +4 h +4 h +45 h +3 h +1074 h +11 h +147 h +7286 m +4 h +125 h +4 h +1406 h +10 h +1 h +7287 m +7288 m +11 h +4 h +1 h +1 h +7289 m +7290 m +11 h +1 h +4 h +10 h +1 h +10 h +4 h +7291 m +7292 m +1 h +1535 m +10 h +11 h +7293 m +1 h +282 m +4 h +7294 m +307 h +4 h +7295 m +172 h +10 h +7296 m +124 h +10 h +7297 m +4 h +10 h +4 h +57 h +11 h +1 h +4 h +4 h +4 h +10 h +7298 m +4 h +94 h +7299 m +10 h +4 h +7300 m +468 m +7301 m +11 h +4 h +4 h +4 h +4 h +1 h +1 h +25 h +10 h +7302 m +4 h +109 h +1 h +1 h +25 h +1 h +10 h +79 h +10 h +4 h +203 m +4 h +7303 m +4 h +31 h +4 h +7304 m +4 h +4 h +1 h +3 h +83 h +1 h +146 h +7305 m +92 h +7306 m +10 h +4 h +1 h +3 h +7307 m +7308 m +10 h +4 h +7309 m +143 h +4 h +11 h +11 h +4 h +196 h +94 h +156 h +4 h +4 h +7310 m +124 h +1 h +223 m +5557 m +1619 h +4 h +7311 m +4 h +7312 m +1 h +10 h +10 h +327 m +8 h +4 h +110 h +10 h +4 h +5526 m +10 h +1 h +7313 m +1 h +279 h +1564 m +1 h +7314 m +1 h +4 h +7315 m +83 h +4 h +7316 m +4 h +11 h +1 h +7317 m +4 h +230 h +1 h +7318 m +1359 h +4 h +4 h +4 h +10 h +10 h +73 h +1 h +1 h +307 h +1 h +10 h +119 h +10 h +7319 m +1 h +147 h +65 h +1 h +4 h +6095 m +4 h +10 h +4 h +7320 m +59 h +170 h +7321 m +13 h +4 h +7322 m +1 h +7323 m +82 h +4 h +843 m +1 h +258 h +4 h +7324 m +4 h +10 h +7325 m +109 h +10 h +114 h +10 h +135 h +5325 m +4 h +262 h +7326 m +10 h +1 h +113 h +7327 m +114 h +1 h +7328 m +41 h +41 h +124 h +123 h +4 h +83 h +1 h +11 h +1 h +4 h +4 h +55 h +59 h +4 h +12 h +1564 m +4 h +1 h +1 h +143 h +4 h +10 h +1 h +11 h +4033 m +1 h +533 m +7329 m +1 h +7330 m +11 h +10 h +7331 m +109 h +118 h +7332 m +7333 m +1089 h +11 h +10 h +10 h +4 h +7334 m +7335 m +4 h +313 m +4 h +1 h +7336 m +358 h +1 h +5544 m +3324 m +74 h +4 h +1532 m +737 h +1 h +13 h +7337 m +7338 m +10 h +181 h +258 h +3 h +109 h +45 h +4 h +10 h +10 h +4 h +1 h +7339 m +124 h +7340 m +1 h +4 h +7341 m +125 h +1 h +4 h +4 h +7342 m +4 h +55 h +10 h +1 h +7343 m +195 h +138 h +74 h +1 h +83 h +3293 m +7344 m +7345 m +7346 m +10 h +1 h +7347 m +430 m +74 h +10 h +109 h +7348 m +10 h +11 h +1260 m +110 h +10 h +1 h +4 h +57 h +7349 m +258 h +7350 m +104 h +4 h +7351 m +7352 m +41 h +4 h +4 h +73 h +299 h +93 h +4 h +278 h +1 h +41 h +8 h +4 h +4 h +1 h +7353 m +83 h +7354 m +7355 m +4 h +250 h +10 h +5 h +1817 m +104 h +10 h +4 h +11 h +7356 m +7357 m +13 h +1 h +82 h +94 h +4 h +1 h +1 h +59 h +10 h +10 h +1 h +10 h +266 h +7358 m +1 h +4 h +11 h +4 h +1 h +110 h +4 h +10 h +10 h +1 h +1 h +4 h +4 h +10 h +1 h +7359 m +276 h +7360 m +10 h +10 h +3669 m +82 h +4 h +7361 m +167 h +386 h +4608 m +1 h +297 h +7362 m +4 h +190 h +114 h +4 h +1 h +4 h +770 m +10 h +4 h +13 h +278 h +4 h +10 h +7363 m +1 h +10 h +800 m +4 h +4 h +7364 m +10 h +1 h +135 h +4 h +57 h +83 h +139 h +1 h +82 h +10 h +4 h +11 h +195 h +10 h +1470 h +4 h +1 h +4 h +10 h +10 h +10 h +73 h +97 h +4 h +976 h +4 h +1835 m +83 h +11 h +10 h +4 h +4 h +7365 m +7366 m +164 h +986 h +10 h +31 h +11 h +1 h +10 h +7367 m +330 h +4 h +1 h +7368 m +82 h +10 h +125 h +986 h +7369 m +28 h +10 h +1 h +1016 h +4 h +229 h +2883 m +270 h +167 h +10 h +1 h +10 h +7370 m +4 h +986 h +7371 m +1 h +4 h +10 h +1 h +25 h +266 h +7372 m +74 h +3150 m +10 h +106 m +1 h +4 h +1 h +7373 m +11 h +7374 m +7375 m +204 h +4 h +4 h +7376 m +7377 m +1092 m +41 h +4 h +109 h +186 h +4 h +1 h +57 h +1 h +1 h +2909 m +297 h +1 h +4 h +7378 m +1016 h +4 h +4 h +7379 m +93 h +138 h +4 h +164 h +25 h +4 h +7380 m +10 h +4 h +4 h +4 h +4 h +7381 m +125 h +10 h +4 h +7382 m +1 h +7383 m +4 h +258 h +181 h +196 h +10 h +1 h +4 h +736 m +4 h +59 h +7384 m +10 h +1 h +4 h +4 h +170 h +7385 m +7386 m +1 h +4 h +1 h +1 h +7387 m +82 h +4 h +7388 m +4 h +4 h +7389 m +57 h +4 h +4 h +7214 m +10 h +56 h +7390 m +7391 m +10 h +31 h +1 h +4 h +1 h +4 h +332 h +4 h +10 h +4 h +4 h +10 h +4 h +4 h +40 h +7392 m +7393 m +1308 m +4 h +10 h +1 h +7394 m +10 h +45 h +4 h +1 h +4 h +104 h +7395 m +45 h +8 h +7396 m +104 h +57 h +4 h +1 h +10 h +10 h +146 h +1 h +4 h +478 m +7397 m +7398 m +7399 m +4 h +10 h +4 h +4 h +7400 m +1 h +4 h +10 h +4 h +10 h +4 h +4 h +36 h +10 h +692 h +135 h +10 h +7401 m +5225 m +4 h +2607 m +1 h +74 h +10 h +4 h +25 h +1 h +4 h +4 h +4 h +4 h +7402 m +1 h +10 h +1016 h +4 h +10 h +4 h +4 h +7403 m +11 h +146 h +7404 m +4 h +31 h +11 h +1 h +307 h +7405 m +11 h +184 h +7406 m +359 h +25 h +4 h +7407 m +11 h +13 h +10 h +10 h +4 h +7408 m +4 h +1620 m +114 h +4 h +1 h +1 h +1 h +7409 m +1 h +36 h +4564 m +36 h +4 h +11 h +4 h +124 h +7410 m +4905 m +7411 m +3 h +1 h +353 m +1772 h +10 h +4 h +157 h +10 h +4 h +278 h +1035 m +7412 m +10 h +82 h +4 h +4 h +1539 m +1 h +109 h +84 h +109 h +4 h +4 h +7413 m +125 h +139 h +109 h +1 h +56 h +5145 m +4 h +1 h +119 h +7414 m +1 h +1 h +4 h +4 h +7415 m +935 h +4 h +4 h +119 h +276 h +169 h +4 h +1 h +123 h +1 h +1 h +7416 m +7417 m +4 h +4 h +4 h +224 h +7418 m +7419 m +7420 m +7421 m +262 h +4 h +1780 h +1 h +10 h +108 h +83 h +1 h +56 h +1 h +1403 h +1 h +185 h +11 h +4 h +4 h +10 h +4 h +186 h +3025 m +7422 m +4 h +7423 m +4 h +4 h +48 h +4 h +2720 m +687 h +5254 m +5653 m +7424 m +4 h +433 m +1 h +65 h +359 h +1137 h +4 h +7425 m +4 h +87 m +4 h +1 h +169 h +11 h +4 h +4 h +4 h +4 h +4 h +7426 m +7427 m +4 h +10 h +10 h +4 h +4 h +1 h +65 h +4 h +295 h +276 h +7428 m +4 h +1 h +2961 m +1 h +59 h +7429 m +10 h +64 h +4 h +4 h +10 h +4 h +1 h +1 h +7430 m +12 h +48 h +4 h +1 h +109 h +1 h +1309 m +4 h +11 h +4 h +156 h +10 h +4 h +4 h +4 h +1 h +10 h +1 h +7431 m +488 h +59 h +190 h +1006 m +57 h +10 h +7432 m +4 h +1 h +7433 m +2002 m +59 h +7434 m +5 h +4 h +45 h +7435 m +447 h +1 h +10 h +7436 m +7437 m +4 h +7438 m +4 h +10 h +1 h +7439 m +12 h +938 m +4 h +10 h +4 h +4 h +7440 m +7441 m +4 h +7442 m +12 h +4 h +1 h +4 h +10 h +278 h +1 h +1 h +1 h +4 h +4 h +57 h +1 h +7443 m +4 h +266 h +4 h +1 h +4 h +7444 m +10 h +10 h +4 h +138 h +4 h +1 h +41 h +4 h +77 h +41 h +59 h +10 h +10 h +4 h +1 h +3561 m +75 m +124 h +7445 m +676 m +7446 m +1016 h +1 h +181 h +139 h +1 h +464 h +97 h +10 h +7447 m +1 h +186 h +7448 m +10 h +10 h +1 h +1 h +10 h +7449 m +332 h +48 h +11 h +172 h +10 h +1 h +4 h +4 h +1 h +170 h +59 h +158 h +7450 m +4 h +7451 m +10 h +274 h +7452 m +4 h +7453 m +1 h +7454 m +56 h +1 h +10 h +106 m +4 h +25 h +4 h +295 h +4 h +2475 m +7455 m +7456 m +4 h +1 h +31 h +11 h +7457 m +10 h +4 h +295 h +4 h +7458 m +11 h +1 h +79 h +181 h +4 h +7459 m +4 h +83 h +2623 m +57 h +538 h +4 h +4 h +109 h +97 h +214 m +4 h +5869 m +5917 m +12 h +1374 m +59 h +135 h +4 h +7460 m +4 h +109 h +4 h +147 h +7461 m +7462 m +10 h +4 h +10 h +4 h +10 h +25 h +25 h +7463 m +114 h +1 h +10 h +1 h +195 h +4 h +56 h +83 h +1 h +10 h +4 h +91 h +74 h +1 h +10 h +276 h +4 h +1 h +41 h +7464 m +70 m +4 h +31 h +7465 m +1 h +1 h +73 h +10 h +4 h +7466 m +1 h +4 h +454 m +3 h +69 h +4 h +13 h +40 h +1 h +7467 m +109 h +10 h +10 h +7468 m +10 h +1 h +1 h +7469 m +41 h +4 h +28 h +10 h +7470 m +55 h +1 h +10 h +4 h +4 h +11 h +4 h +4 h +10 h +3477 m +195 h +7471 m +10 h +7472 m +7473 m +1 h +7474 m +46 h +4 h +1697 h +1 h +94 h +4 h +1 h +7475 m +1 h +7476 m +114 h +7477 m +7478 m +1 h +73 h +109 h +7479 m +11 h +45 h +4 h +4 h +7480 m +4 h +1 h +4 h +4 h +59 h +10 h +104 h +7481 m +11 h +4 h +270 h +7482 m +79 h +1 h +74 h +1 h +1 h +7483 m +1504 m +1 h +147 h +146 h +124 h +7484 m +4 h +97 h +4 h +3 h +4 h +135 h +10 h +4 h +1 h +10 h +7485 m +4 h +10 h +4 h +1 h +1 h +4 h +4 h +11 h +7486 m +7487 m +7488 m +4 h +4 h +41 h +481 m +4 h +1 h +1 h +8 h +4 h +10 h +7489 m +109 h +4 h +1 h +1 h +478 m +10 h +1 h +1 h +4 h +4 h +1 h +536 h +10 h +266 h +857 h +10 h +4 h +7490 m +4 h +1 h +7491 m +190 h +10 h +359 h +10 h +82 h +4 h +2300 m +7492 m +7493 m +7494 m +7495 m +7496 m +4 h +10 h +10 h +3 h +1 h +140 h +1 h +64 h +304 m +41 h +3 h +97 h +1 h +7497 m +10 h +10 h +1 h +46 h +7498 m +146 h +82 h +4 h +4 h +1 h +4 h +7499 m +10 h +1847 m +10 h +7500 m +386 h +4 h +1 h +59 h +4 h +31 h +11 h +146 h +1 h +4 h +3 h +1 h +146 h +4 h +4 h +36 h +1 h +94 h +1 h +10 h +7501 m +7502 m +114 h +2840 m +4 h +1 h +13 h +4 h +31 h +7503 m +10 h +10 h +7504 m +1 h +7505 m +2002 m +4 h +11 h +1 h +258 h +5505 m +7506 m +4 h +25 h +114 h +4 h +28 h +4 h +238 h +307 h +2666 m +10 h +10 h +4 h +1 h +7507 m +4 h +1 h +10 h +4 h +1 h +155 m +4 h +4 h +7508 m +10 h +11 h +4 h +264 m +4 h +7509 m +4 h +1 h +10 h +4 h +332 h +7510 m +4 h +1116 m +7511 m +4 h +4 h +7512 m +1 h +10 h +4 h +241 m +1 h +1 h +123 h +172 h +4390 m +4 h +4 h +4 h +4 h +4 h +3 h +4 h +10 h +1 h +7513 m +11 h +56 h +129 h +7514 m +55 h +41 h +59 h +7515 m +4 h +11 h +11 h +109 h +10 h +4 h +108 h +7516 m +124 h +10 h +10 h +146 h +7517 m +278 h +7518 m +31 h +83 h +1 h +7519 m +83 h +7520 m +4 h +1 h +59 h +109 h +10 h +1 h +443 h +172 h +11 h +10 h +1 h +10 h +7521 m +82 h +7522 m +4723 m +7523 m +1 h +1 h +7524 m +266 h +7525 m +4 h +1 h +4 h +79 h +104 h +1 h +297 h +56 h +12 h +1 h +4 h +4 h +347 m +1 h +10 h +7526 m +1 h +7527 m +7528 m +10 h +10 h +4 h +1 h +313 m +3 h +4 h +4 h +10 h +1 h +7529 m +1 h +295 h +10 h +4 h +1 h +7530 m +184 h +65 h +124 h +4 h +10 h +4 h +7531 m +1 h +185 h +7532 m +1249 m +10 h +173 h +4 h +7533 m +7534 m +55 h +57 h +104 h +25 h +4 h +10 h +7535 m +1 h +1 h +4 h +4 h +135 h +4 h +1772 h +4 h +4 h +1 h +7536 m +4 h +4 h +11 h +6461 m +65 h +4 h +7537 m +4 h +125 h +4 h +10 h +297 h +7538 m +4 h +123 h +1 h +83 h +10 h +536 h +3028 m +5048 m +7539 m +1 h +10 h +10 h +125 h +7540 m +7541 m +1 h +10 h +7542 m +10 h +195 h +3 h +386 h +1 h +278 h +10 h +1 h +4 h +7543 m +4 h +4 h +1 h +4 h +1137 h +10 h +1 h +10 h +1835 m +935 h +4 h +82 h +7544 m +4489 m +4 h +4 h +4 h +11 h +1619 h +4 h +265 h +4 h +7545 m +10 h +7546 m +10 h +109 h +7547 m +4 h +10 h +135 h +4 h +1 h +2054 m +10 h +13 h +4 h +289 h +7548 m +1 h +4 h +2931 m +7549 m +112 h +4 h +1 h +7550 m +3112 m +1 h +10 h +10 h +7551 m +195 h +10 h +4 h +41 h +64 h +1 h +1 h +4 h +25 h +4 h +1 h +13 h +4 h +1 h +1 h +7552 m +4 h +1454 m +4 h +4 h +779 m +5897 m +46 h +7553 m +4 h +4 h +1 h +1 h +4 h +4 h +4 h +266 h +10 h +4 h +885 m +1 h +7554 m +266 h +10 h +10 h +4 h +1 h +7555 m +8 h +195 h +146 h +7556 m +4 h +258 h +4 h +64 h +7557 m +4 h +7558 m +332 h +4 h +1 h +7559 m +1 h +4 h +1 h +262 h +7560 m +1685 h +10 h +1697 h +7561 m +4 h +7562 m +7563 m +45 h +25 h +4 h +4 h +55 h +1 h +7564 m +1 h +1 h +104 h +7565 m +10 h +74 h +1 h +447 h +7566 m +1 h +10 h +1 h +7567 m +7568 m +56 h +1 h +10 h +4 h +4 h +7569 m +6491 m +4 h +57 h +10 h +7570 m +1 h +7571 m +11 h +10 h +1 h +10 h +7572 m +7573 m +1 h +1 h +7574 m +656 h +10 h +10 h +4 h +1 h +7575 m +4 h +7576 m +10 h +10 h +7577 m +65 h +114 h +56 h +7578 m +4 h +7579 m +1 h +25 h +4 h +4 h +4 h +25 h +7580 m +7581 m +7582 m +164 h +1 h +7583 m +1389 h +4 h +82 h +10 h +1 h +10 h +7584 m +4 h +7585 m +4 h +11 h +4 h +94 h +1 h +10 h +4 h +7586 m +1 h +7587 m +109 h +4 h +986 h +4 h +4 h +1 h +7588 m +1 h +74 h +954 m +1 h +7589 m +1 h +55 h +1772 h +97 h +4 h +10 h +4 h +7590 m +11 h +146 h +10 h +4 h +1403 h +124 h +11 h +1 h +692 h +10 h +7591 m +3 h +1 h +4858 m +7592 m +10 h +1 h +64 h +1 h +4 h +1 h +11 h +104 h +4 h +1 h +4 h +10 h +250 h +10 h +4 h +4 h +22 h +642 m +6381 m +10 h +7593 m +4 h +4 h +4 h +386 h +4 h +11 h +4 h +4 h +196 h +7594 m +41 h +486 m +7595 m +4 h +7596 m +7597 m +69 h +241 m +119 h +7598 m +7599 m +2002 h +12 h +7600 m +31 h +4 h +1 h +4 h +4 h +41 h +1 h +125 h +157 h +7601 m +7602 m +250 h +7603 m +7604 m +4 h +10 h +1822 h +4 h +7605 m +92 h +109 h +7606 m +464 h +3398 m +1 h +4 h +10 h +146 h +1 h +2962 m +123 h +444 m +7607 m +1886 m +4 h +1 h +1 h +1508 m +4 h +2733 h +10 h +10 h +1 h +11 h +1089 h +10 h +1771 m +7608 m +41 h +4 h +1790 h +1 h +41 h +295 h +7609 m +64 h +4 h +1 h +7610 m +4 h +7611 m +7612 m +184 h +10 h +4 h +7613 m +1 h +4 h +7614 m +4 h +185 h +857 h +4 h +7615 m +11 h +5783 m +10 h +1 h +10 h +4 h +124 h +2072 m +7616 m +1 h +4 h +109 h +7617 m +10 h +97 h +10 h +1 h +138 h +1 h +10 h +7618 m +10 h +4 h +74 h +10 h +1 h +1 h +25 h +4 h +563 m +1 h +10 h +13 h +4 h +7619 m +4 h +4 h +7620 m +83 h +1 h +4 h +4 h +4 h +757 h +10 h +4 h +1 h +10 h +1 h +4 h +181 h +278 h +4 h +4 h +7621 m +3 h +4 h +41 h +10 h +383 h +4 h +4 h +7622 m +4 h +45 h +7623 m +4 h +733 m +1 h +1790 h +4 h +7624 m +258 h +4 h +7625 m +1 h +4 h +4 h +1 h +1 h +10 h +7626 m +7627 m +4 h +7628 m +229 h +146 h +4 h +7629 m +1 h +10 h +4 h +1 h +7630 m +1 h +10 h +4 h +10 h +4 h +10 h +10 h +1 h +11 h +83 h +1 h +276 h +12 h +7631 m +36 h +10 h +7632 m +10 h +190 h +3 h +1822 h +7633 m +1 h +1 h +1 h +7634 m +7635 m +4 h +7636 m +1 h +4 h +114 h +4 h +4 h +10 h +1 h +464 h +4 h +143 h +1 h +7637 m +4 h +11 h +4 h +13 h +4 h +4 h +1 h +10 h +649 h +1 h +7638 m +4 h +7639 m +7640 m +7641 m +10 h +7642 m +7643 m +4 h +4 h +4 h +1 h +1 h +4 h +10 h +448 m +94 h +4 h +1 h +10 h +7644 m +1 h +5 h +2928 m +82 h +5822 m +1 h +258 h +4 h +1 h +4 h +94 h +12 h +7645 m +4 h +7646 m +1445 m +4 h +3025 m +5944 h +4 h +1 h +4 h +590 m +4 h +7647 m +25 h +113 h +1 h +4 h +1 h +1 h +7648 m +1 h +79 h +10 h +10 h +1 h +3 h +1 h +7649 m +7650 m +4 h +1 h +1 h +1 h +11 h +1 h +1 h +1 h +278 h +3 h +97 h +1 h +1 h +83 h +75 m +59 h +7651 m +57 h +1 h +82 h +7652 m +4 h +11 h +7653 m +4 h +7654 m +10 h +4 h +7655 m +4 h +1 h +109 h +10 h +7656 m +1 h +59 h +4 h +4 h +4 h +10 h +8 h +146 h +10 h +11 h +4 h +620 m +17 m +7657 m +7658 m +7659 m +4 h +4 h +10 h +73 h +10 h +1 h +1 h +4 h +7660 m +13 h +4 h +7661 m +1 h +1 h +10 h +4 h +4 h +7662 m +4 h +4 h +4 h +97 h +7663 m +7447 m +4 h +4 h +7664 m +1 h +4 h +10 h +13 h +77 h +65 h +7665 m +307 h +4 h +7666 m +4 h +7253 m +104 h +1 h +4 h +55 h +4 h +157 h +184 h +7667 m +10 h +10 h +59 h +297 h +10 h +36 h +59 h +4 h +7668 m +55 h +814 m +82 h +7669 m +3396 m +7670 m +468 m +10 h +1 h +10 h +82 h +4 h +1 h +10 h +10 h +1 h +1359 h +7671 m +7672 m +4 h +1 h +7673 m +7674 m +11 h +7675 m +7676 m +7677 m +4 h +10 h +31 h +1 h +1 h +97 h +1 h +10 h +97 h +520 m +64 h +4 h +110 h +4 h +4 h +4 h +1 h +83 h +92 h +1 h +109 h +1 h +1 h +7678 m +4 h +169 h +10 h +11 h +4 h +4 h +7679 m +7680 m +4 h +10 h +2522 m +84 h +10 h +4 h +581 m +7681 m +4 h +1 h +94 h +4 h +83 h +97 h +48 h +92 h +7682 m +4 h +10 h +4 h +7683 m +64 h +4 h +11 h +4 h +10 h +1 h +4 h +4 h +258 h +10 h +36 h +48 h +22 h +1 h +1 h +12 h +10 h +1 h +65 h +10 h +7684 m +4 h +12 h +1 h +65 h +7685 m +7686 m +11 h +1 h +1 h +10 h +10 h +109 h +10 h +7687 m +4 h +1 h +181 h +3 h +4 h +7688 m +10 h +55 h +7689 m +4 h +10 h +4 h +7690 m +1 h +83 h +4 h +147 h +74 h +4 h +196 h +4 h +986 h +7691 m +10 h +4 h +7692 m +7693 m +167 h +10 h +1 h +3 h +1 h +2887 h +994 m +256 h +10 h +4 h +97 h +10 h +7694 m +10 h +7695 m +156 h +10 h +4 h +195 h +144 h +10 h +11 h +4 h +640 h +7696 m +4 h +173 h +4 h +1 h +208 m +7697 m +82 h +10 h +1722 m +10 h +59 h +1 h +12 h +12 h +4 h +4 h +10 h +169 h +36 h +443 h +124 h +97 h +10 h +4 h +7698 m +7699 m +83 h +7700 m +41 h +1 h +5379 m +4 h +7701 m +1 h +1 h +7702 m +4 h +1 h +25 h +1 h +258 h +7703 m +1 h +1 h +11 h +3979 m +1 h +238 h +7704 m +1083 h +7705 m +11 h +74 h +173 h +488 h +7706 m +1 h +1 h +4 h +83 h +1 h +1 h +908 m +10 h +45 h +1 h +4 h +1027 h +4 h +10 h +1 h +4 h +4 h +447 h +195 h +146 h +10 h +4 h +1 h +4 h +7707 m +2172 h +124 h +4 h +138 h +4 h +1 h +74 h +7708 m +2733 h +3 h +1 h +4 h +10 h +7709 m +7710 m +64 h +1 h +7711 m +7712 m +10 h +7713 m +10 h +4 h +7714 m +10 h +7715 m +7716 m +1445 m +57 h +4 h +7717 m +10 h +17 m +7718 m +1 h +91 h +4 h +108 h +7719 m +10 h +1 h +10 h +1 h +4 h +295 h +104 h +10 h +4 h +2111 m +4 h +1 h +4 h +4 h +11 h +1 h +7720 m +110 h +4 h +110 h +1 h +4 h +272 h +1 h +1 h +119 h +4 h +1 h +1 h +4 h +104 h +1137 h +7721 m +10 h +10 h +4 h +7722 m +10 h +92 h +10 h +138 h +4 h +10 h +10 h +1 h +986 h +4 h +3 h +10 h +12 h +278 h +4 h +590 m +1 h +11 h +7723 m +7724 m +1 h +1 h +4 h +1 h +48 h +7725 m +1 h +1 h +278 h +10 h +7726 m +7727 m +477 m +10 h +7728 m +7729 m +1 h +1 h +10 h +10 h +104 h +13 h +4 h +4 h +4 h +1 h +4 h +1 h +1 h +4 h +4 h +4 h +10 h +59 h +4 h +10 h +4 h +10 h +7730 m +976 h +1 h +7731 m +4 h +1 h +56 h +181 h +4 h +7732 m +1 h +7733 m +4 h +10 h +4 h +10 h +10 h +10 h +10 h +7734 m +146 h +1 h +147 h +7735 m +74 h +7736 m +4 h +1 h +10 h +7737 m +31 h +10 h +7738 m +433 m +7739 m +4 h +4 h +82 h +1 h +7740 m +1 h +7741 m +4 h +4 h +10 h +4 h +57 h +4 h +31 h +556 h +7742 m +4 h +1 h +56 h +7743 m +4 h +7744 m +4 h +4 h +7745 m +10 h +11 h +4 h +4 h +1 h +7746 m +1 h +4 h +3702 m +11 h +124 h +1122 m +4 h +1 h +169 h +7747 m +368 h +1 h +11 h +1 h +1 h +113 h +7661 m +1 h +146 h +4 h +1666 m +1 h +65 h +1 h +285 m +1 h +4 h +7748 m +7749 m +4 h +1 h +140 h +69 h +1880 m +4 h +7750 m +1 h +1 h +1 h +82 h +1 h +4 h +10 h +114 h +2374 h +10 h +538 h +4 h +55 h +109 h +7751 m +2314 m +1 h +266 h +1 h +92 h +83 h +3 h +737 h +5 h +11 h +124 h +7752 m +7753 m +520 m +41 h +41 h +4 h +4 h +4 h +4 h +27 h +4 h +4 h +4 h +4 h +110 h +784 h +1 h +7754 m +1 h +59 h +7755 m +1 h +7756 m +97 h +97 h +1822 h +31 h +7757 m +7758 m +1 h +4 h +10 h +4 h +7759 m +1 h +125 h +7760 m +1650 h +7761 m +4 h +92 h +1 h +5093 m +11 h +157 h +11 h +1 h +11 h +1 h +4 h +190 h +4 h +716 m +278 h +1835 m +4 h +7762 m +1 h +1 h +1 h +74 h +1137 h +1 h +4 h +7763 m +10 h +1 h +1 h +11 h +4 h +7764 m +520 h +4 h +10 h +11 h +10 h +383 h +1 h +7765 m +31 h +4 h +10 h +7766 m +7767 m +119 h +7768 m +4 h +10 h +7769 m +1470 h +986 h +56 h +593 m +10 h +7770 m +10 h +10 h +6370 m +82 h +7771 m +4 h +4 h +82 h +185 h +4 h +1 h +3 h +10 h +7772 m +93 h +7773 m +1 h +125 h +10 h +7774 m +59 h +55 h +7775 m +1 h +7776 m +270 h +94 h +2079 m +92 h +7777 m +258 h +4 h +208 m +1 h +4 h +10 h +1 h +1260 m +40 h +4 h +4 h +4 h +146 h +4 h +7778 m +10 h +25 h +74 h +10 h +4 h +11 h +4 h +4 h +4 h +83 h +94 h +124 h +276 h +1595 m +7779 m +4 h +10 h +7780 m +10 h +4 h +1 h +7781 m +4 h +4 h +10 h +10 h +7782 m +7783 m +10 h +114 h +7784 m +447 h +4 h +10 h +3 h +1 h +7785 m +57 h +4 h +1780 h +1 h +7786 m +10 h +157 h +181 h +4 h +10 h +4 h +4 h +7787 m +1 h +4 h +4 h +73 h +57 h +10 h +1 h +7788 m +181 h +1 h +41 h +1650 h +4 h +2788 h +112 h +1 h +4 h +11 h +10 h +4 h +7789 m +4 h +4 h +139 h +10 h +25 h +10 h +5 h +4 h +1 h +69 h +7790 m +7791 m +1185 m +7792 m +1445 h +1123 m +7793 m +1 h +124 h +1 h +74 h +4 h +7794 m +7795 m +7796 m +8 h +11 h +4 h +172 h +10 h +1 h +4 h +10 h +65 h +41 h +7797 m +4 h +10 h +4 h +8 h +692 h +83 h +10 h +204 h +4 h +1198 m +7798 m +1 h +1 h +4 h +7799 m +10 h +139 h +10 h +11 h +4 h +4 h +6726 m +41 h +114 h +7800 m +11 h +92 h +7801 m +143 h +10 h +368 h +1 h +124 h +1 h +1 h +4975 m +7802 m +601 h +7803 m +7804 m +1016 h +7805 m +278 h +1 h +7806 m +12 h +7807 m +1 h +7808 m +10 h +1105 h +7809 m +174 h +4 h +1 h +4 h +1 h +3 h +5 h +109 h +4 h +124 h +4 h +228 m +7810 m +104 h +1 h +10 h +1 h +41 h +265 h +10 h +74 h +7811 m +520 h +10 h +2625 m +10 h +4 h +10 h +7812 m +1 h +7813 m +73 h +1 h +10 h +4 h +1105 h +31 h +7814 m +1 h +4 h +3 h +7815 m +4 h +7816 m +7817 m +59 h +10 h +4 h +4 h +7818 m +6221 m +4 h +167 h +443 h +7819 m +4 h +1 h +27 h +7820 m +104 h +1 h +4 h +4 h +1 h +1470 h +1 h +92 h +83 h +10 h +1 h +4 h +79 h +7821 m +1 h +1 h +4 h +7822 m +1 h +7823 m +4 h +7824 m +262 h +65 h +7825 m +1 h +125 h +4 h +11 h +4 h +2625 m +73 h +10 h +4 h +22 h +7826 m +4 h +4 h +7827 m +2851 m +4 h +1 h +1 h +83 h +195 h +59 h +4 h +57 h +4 h +1 h +1646 m +1 h +12 h +4 h +10 h +1619 h +10 h +4 h +7828 m +289 h +7829 m +278 h +10 h +4 h +11 h +4 h +7830 m +7831 m +4 h +7832 m +41 h +4 h +536 h +10 h +1 h +7833 m +1 h +10 h +4 h +11 h +97 h +10 h +692 h +7834 m +4 h +1 h +83 h +4 h +97 h +92 h +297 h +4 h +1016 h +4 h +4 h +1 h +10 h +4 h +7835 m +4 h +104 h +10 h +7836 m +25 h +7837 m +7838 m +4 h +447 h +10 h +10 h +59 h +1 h +10 h +10 h +7839 m +7840 m +7841 m +4 h +10 h +7842 m +4 h +4 h +56 h +11 h +10 h +97 h +10 h +11 h +169 h +7843 m +1 h +10 h +41 h +1083 h +1 h +10 h +1089 h +25 h +11 h +7844 m +4 h +10 h +1169 m +4 h +7845 m +10 h +7846 m +41 h +22 h +4 h +7847 m +36 h +158 h +7848 m +7849 m +4 h +109 h +7850 m +1 h +185 h +399 h +7851 m +246 m +82 h +104 h +7852 m +4 h +79 h +219 h +1 h +123 h +1 h +7853 m +4 h +25 h +307 h +7854 m +7855 m +170 h +1 h +4 h +1 h +172 h +10 h +6144 m +109 h +3 h +1016 h +7856 m +7857 m +4 h +7858 m +258 h +4 h +196 h +1751 m +1260 m +4 h +7859 m +7860 m +83 h +7861 m +307 h +1 h +1 h +1 h +4 h +276 h +4 h +10 h +7862 m +1 h +119 h +1 h +4 h +3307 m +181 h +4535 m +10 h +1 h +4 h +4 h +7863 m +1 h +2719 h +297 h +10 h +167 h +10 h +124 h +4 h +264 m +358 h +10 h +83 h +55 h +1 h +13 h +1 h +4 h +4 h +124 h +4 h +358 h +4 h +7864 m +10 h +1 h +10 h +4 h +1 h +7865 m +4 h +1 h +97 h +124 h +195 h +10 h +4 h +7866 m +73 h +124 h +250 h +371 h +59 h +1796 m +73 h +4 h +1 h +7867 m +4 h +4 h +125 h +630 m +2591 m +1 h +7868 m +7869 m +4 h +2623 m +1 h +112 h +7870 m +4 h +4 h +22 h +1 h +4 h +4 h +57 h +463 m +1 h +7871 m +3 h +4 h +10 h +1 h +1250 h +1 h +1137 h +7872 m +447 h +10 h +10 h +7873 m +7874 m +1 h +1 h +138 h +7875 m +10 h +1089 h +10 h +185 h +4 h +10 h +4 h +7876 m +538 h +4 h +31 h +4 h +2172 h +10 h +1 h +10 h +7126 m +443 h +7877 m +167 h +4 h +4 h +1 h +7878 m +57 h +4 h +4 h +1362 h +1 h +1 h +4 h +4 h +11 h +1 h +10 h +7879 m +31 h +4 h +1020 m +4 h +4 h +124 h +3 h +124 h +10 h +1 h +4 h +8 h +4 h +7880 m +4 h +158 h +4 h +4 h +1 h +114 h +278 h +83 h +5933 m +10 h +181 h +4 h +7881 m +73 h +56 h +1 h +3680 m +1309 m +10 h +112 h +3299 m +172 h +630 m +10 h +92 h +10 h +1 h +74 h +10 h +1 h +109 h +57 h +167 h +4 h +1 h +10 h +129 h +7882 m +10 h +181 h +7883 m +10 h +1 h +1 h +4 h +7884 m +27 h +7885 m +11 h +4 h +110 h +1 h +8 h +7886 m +10 h +196 h +10 h +7887 m +4 h +10 h +10 h +1 h +45 h +4 h +7888 m +7889 m +258 h +10 h +2379 m +7890 m +13 h +10 h +4 h +278 h +8 h +10 h +3479 m +371 h +1 h +4 h +1 h +1 h +59 h +4 h +2447 m +10 h +1 h +7891 m +4 h +164 h +7892 m +1 h +10 h +7893 m +4 h +295 h +10 h +1 h +1 h +4 h +7894 m +4 h +4 h +4 h +125 h +10 h +1 h +1 h +10 h +10 h +358 h +4 h +31 h +36 h +196 h +4 h +4 h +7895 m +4 h +7896 m +1 h +139 h +83 h +10 h +4 h +7897 m +7898 m +1 h +10 h +123 h +7899 m +258 h +11 h +4 h +25 h +1030 h +31 h +10 h +1 h +4 h +1 h +97 h +4 h +123 h +172 h +1 h +4 h +4 h +7900 m +27 h +4 h +1 h +4 h +7901 m +124 h +1 h +229 h +11 h +139 h +10 h +7902 m +7903 m +4 h +4 h +4 h +41 h +7904 m +7905 m +10 h +4 h +4 h +146 h +10 h +10 h +4 h +4 h +7906 m +1 h +4240 m +4350 m +10 h +10 h +7907 m +7908 m +1508 m +1 h +4 h +1 h +1 h +10 h +3 h +7909 m +25 h +97 h +10 h +1 h +4 h +45 h +7910 m +82 h +4 h +7911 m +4 h +4 h +6954 m +7912 m +196 h +4 h +7913 m +4 h +74 h +4 h +4 h +1 h +4 h +4 h +7914 m +7915 m +156 h +4 h +106 h +10 h +83 h +1 h +7916 m +7917 m +1 h +4 h +4 h +1 h +1 h +250 h +57 h +167 h +82 h +4 h +10 h +10 h +4 h +4 h +4 h +109 h +1 h +1 h +4 h +10 h +4 h +10 h +4292 m +4 h +7918 m +4 h +59 h +1 h +1 h +265 h +4 h +10 h +7919 m +4 h +64 h +7920 m +488 h +10 h +1 h +73 h +7921 m +3 h +4 h +10 h +4 h +388 m +167 h +7922 m +386 h +1 h +3 h +4 h +1 h +4 h +4 h +2887 h +7923 m +25 h +1 h +1 h +4 h +7924 m +7925 m +5141 m +8 h +146 h +7926 m +7927 m +1 h +11 h +4 h +7839 m +41 h +91 h +7928 m +4 h +4 h +1 h +114 h +1 h +7929 m +112 h +40 h +196 h +7930 m +10 h +10 h +10 h +7931 m +146 h +4 h +1 h +3 h +1 h +83 h +4 h +4 h +1337 m +11 h +4 h +7932 m +1 h +4 h +64 h +7933 m +7661 h +124 h +3 h +7934 m +10 h +7935 m +173 h +7936 m +2530 m +10 h +1957 m +10 h +7937 m +1 h +7938 m +190 h +10 h +108 h +4 h +7939 m +1 h +11 h +7940 m +4 h +1 h +1 h +4 h +1249 m +10 h +82 h +146 h +1 h +59 h +1 h +4 h +7941 m +4 h +10 h +4 h +146 h +7942 m +1 h +1 h +4 h +109 h +1 h +2813 m +55 h +368 h +4 h +1 h +4 h +4 h +7943 m +7944 m +1 h +4 h +7945 m +3342 m +83 h +1 h +1 h +5653 m +10 h +307 h +276 h +7946 m +7947 m +7948 m +4 h +45 h +1027 h +4 h +1116 m +7949 m +124 h +1 h +31 h +10 h +4 h +7950 m +10 h +1685 h +124 h +1 h +7951 m +10 h +1 h +7952 m +83 h +135 h +65 h +12 h +135 h +4 h +25 h +3422 m +1 h +124 h +92 h +139 h +82 h +109 h +1 h +1 h +4 h +10 h +124 h +4 h +4 h +4 h +92 h +146 h +10 h +10 h +4 h +4 h +11 h +109 h +10 h +4 h +1 h +10 h +1249 m +1 h +10 h +1 h +7953 m +77 h +7954 m +1403 h +4 h +4 h +7955 m +4 h +965 m +4 h +238 h +1 h +83 h +65 h +556 h +4 h +4 h +403 h +7956 m +4 h +4 h +1 h +4 h +11 h +7957 m +4 h +692 h +1064 m +172 h +1 h +1 h +1 h +319 h +371 h +31 h +1470 h +7958 m +41 h +7959 m +10 h +1 h +11 h +1 h +1 h +3341 m +104 h +82 h +45 h +4 h +4 h +7960 m +123 h +10 h +4 h +1 h +4 h +10 h +4 h +10 h +36 h +82 h +266 h +147 h +10 h +56 h +1 h +4 h +4 h +45 h +7961 m +103 m +1 h +1892 m +73 h +7962 m +4 h +4 h +1 h +583 m +4 h +10 h +1 h +1 h +7963 m +4 h +10 h +4 h +4 h +7964 m +94 h +4 h +2004 m +1 h +1 h +10 h +4 h +56 h +1 h +10 h +4 h +4 h +143 h +169 h +7965 m +976 h +1 h +1 h +82 h +7966 m +1 h +25 h +7967 m +11 h +4 h +1 h +1 h +7968 m +114 h +4 h +119 h +1 h +7969 m +7970 m +1 h +478 h +1 h +4 h +82 h +4 h +10 h +64 h +10 h +4 h +7971 m +7972 m +4 h +4 h +1 h +1261 m +718 h +1 h +1 h +65 h +7973 m +124 h +45 h +4 h +4 h +4 h +41 h +92 h +1 h +7974 m +4 h +4 h +270 h +17 h +1 h +12 h +1 h +10 h +7975 m +25 h +4 h +124 h +1 h +10 h +4 h +1 h +97 h +4 h +1 h +7976 m +10 h +7977 m +1089 h +7978 m +1 h +7979 m +4 h +4 h +4 h +1 h +7980 m +7981 m +7982 m +10 h +1 h +41 h +125 h +1535 m +10 h +601 h +1 h +10 h +1 h +7983 m +7984 m +4 h +73 h +104 h +1619 h +109 h +82 h +4 h +7985 m +11 h +7986 m +1 h +1137 h +1 h +7987 m +4 h +4 h +698 m +918 m +4 h +1 h +1 h +2733 h +383 h +1 h +4 h +10 h +7988 m +4 h +1766 h +45 h +36 h +7989 m +7990 m +144 h +7991 m +4 h +1785 m +4 h +7992 m +56 h +4 h +10 h +7993 m +4 h +4 h +10 h +7994 m +7995 m +56 h +696 m +4 h +4 h +4 h +7996 m +10 h +41 h +1 h +104 h +36 h +1 h +4 h +7997 m +1 h +144 h +1 h +172 h +7998 m +4 h +7999 m +82 h +8000 m +1 h +640 h +8001 m +27 h +8002 m +1 h +4 h +2941 m +8003 m +4 h +1053 m +8004 m +976 h +11 h +82 h +10 h +4 h +55 h +1 h +31 h +4 h +4 h +4 h +82 h +157 h +10 h +10 h +10 h +1 h +1202 m +4 h +8005 m +4 h +4 h +1 h +1 h +4 h +4 h +4 h +10 h +4 h +8006 m +3555 m +4 h +224 h +10 h +4 h +11 h +45 h +1 h +1 h +8007 m +8008 m +59 h +10 h +1 h +4 h +4 h +1 h +8009 m +1122 m +1 h +1 h +412 m +4 h +10 h +1 h +10 h +1 h +10 h +10 h +1 h +10 h +4 h +4 h +97 h +3 h +8010 m +8011 m +10 h +1796 m +10 h +8012 m +57 h +229 h +4 h +1 h +8013 m +8014 m +4 h +10 h +8015 m +3 h +184 h +185 h +4 h +1016 h +976 h +5590 m +190 h +8 h +25 h +1 h +8016 m +10 h +10 h +3 h +8017 m +4 h +4 h +4 h +10 h +8018 m +8 h +8019 m +1 h +737 h +520 h +11 h +4 h +8020 m +185 h +1 h +8021 m +8022 m +1 h +478 h +10 h +8023 m +41 h +57 h +10 h +10 h +4229 m +82 h +10 h +1114 m +4 h +447 h +1 h +11 h +3112 m +1 h +4 h +4 h +10 h +4 h +4 h +10 h +4 h +299 h +1 h +4 h +10 h +1 h +10 h +4 h +297 h +10 h +4 h +1 h +4 h +10 h +10 h +8024 m +8025 m +1 h +4 h +10 h +4 h +8026 m +1201 m +285 m +181 h +1 h +8027 m +11 h +10 h +433 m +8028 m +56 h +4 h +8029 m +4 h +8030 m +104 h +4 h +74 h +1 h +8031 m +185 h +1 h +97 h +278 h +1 h +8032 m +4 h +1016 h +11 h +1 h +1 h +1 h +4 h +10 h +4 h +59 h +4 h +11 h +8033 m +4 h +1646 m +1 h +4 h +8034 m +4 h +1 h +1 h +4 h +8035 m +56 h +4 h +4 h +8036 m +1 h +10 h +443 h +4 h +4 h +1 h +8037 m +8038 m +4 h +332 h +1 h +4 h +8039 m +10 h +123 h +1 h +4 h +8040 m +307 h +31 h +25 h +8041 m +82 h +8042 m +1 h +1 h +8043 m +8044 m +169 h +8045 m +265 h +27 h +91 h +10 h +6438 m +8046 m +4 h +124 h +10 h +1 h +8047 m +4 h +4 h +1 h +4 h +1 h +307 h +1 h +1 h +64 h +57 h +41 h +4 h +4 h +10 h +8048 m +11 h +1 h +4 h +1 h +1 h +8049 m +10 h +8050 m +8051 m +10 h +4 h +1 h +4 h +10 h +114 h +2314 m +1 h +41 h +10 h +4 h +8052 m +10 h +83 h +10 h +59 h +1 h +11 h +1406 h +1 h +687 h +25 h +11 h +4 h +10 h +447 h +4 h +36 h +41 h +1 h +10 h +10 h +82 h +4 h +57 h +4 h +1 h +11 h +173 h +265 h +170 h +11 h +1454 m +11 h +119 h +97 h +1 h +4 h +8053 m +1 h +8054 m +1 h +79 h +8055 m +10 h +8056 m +27 h +70 m +6731 m +4 h +1 h +4 h +8057 m +22 h +8058 m +10 h +10 h +5917 m +8059 m +10 h +4 h +238 h +4590 m +27 h +10 h +656 h +8060 m +8061 m +10 h +8062 m +2002 h +4 h +10 h +8063 m +1 h +2625 h +11 h +1 h +1957 m +172 h +4 h +1 h +4 h +4 h +8064 m +4 h +27 h +10 h +8065 m +4 h +8066 m +124 h +4 h +4 h +1790 h +97 h +1 h +28 h +8067 m +4 h +4 h +8068 m +4 h +8069 m +11 h +143 h +1 h +4 h +8070 m +10 h +1 h +10 h +4 h +359 h +289 h +114 h +10 h +1 h +79 h +4 h +4 h +3 h +1 h +8071 m +347 m +1 h +10 h +13 h +1 h +10 h +82 h +73 h +1 h +125 h +1 h +8072 m +8073 m +8074 m +8075 m +8076 m +11 h +238 h +3679 m +4 h +4 h +45 h +8077 m +1 h +1 h +143 h +8078 m +12 h +4 h +8079 m +1 h +8080 m +1 h +11 h +4 h +2265 m +146 h +10 h +4 h +297 h +1 h +4 h +1620 m +642 m +5917 m +97 h +4 h +8081 m +8082 m +4 h +1 h +10 h +8083 m +11 h +1 h +114 h +1 h +10 h +22 h +279 h +4 h +1 h +1 h +10 h +8084 m +1067 m +8085 m +8086 m +4 h +8087 m +8088 m +10 h +359 h +94 h +4 h +10 h +4 h +114 h +8089 m +8090 m +1 h +1 h +1 h +1 h +8091 m +4 h +10 h +4 h +4 h +31 h +8092 m +1 h +94 h +4 h +4 h +124 h +31 h +1 h +124 h +8093 m +4 h +10 h +1074 h +238 h +10 h +57 h +59 h +1 h +1 h +1 h +1105 h +1 h +8094 m +8095 m +4 h +4 h +276 h +192 h +10 h +4 h +8096 m +1 h +935 h +4 h +109 h +10 h +4 h +4 h +8097 m +8098 m +8099 m +10 h +2379 m +4 h +1 h +10 h +4 h +4 h +8100 m +1 h +10 h +1 h +8101 m +10 h +8102 m +73 h +704 m +8103 m +307 h +4 h +109 h +1 h +4 h +4 h +1 h +4 h +8104 m +4 h +190 h +10 h +1092 m +113 h +1 h +109 h +10 h +4 h +10 h +10 h +459 m +2418 m +8 h +1650 h +4 h +64 h +8105 m +4 h +1 h +1 h +41 h +8106 m +10 h +8107 m +57 h +8108 m +129 h +4 h +10 h +106 h +11 h +59 h +8109 m +10 h +8110 m +4 h +170 h +4 h +8111 m +8112 m +8113 m +4 h +10 h +10 h +1 h +347 m +57 h +1 h +8114 m +1027 h +1751 m +83 h +10 h +1 h +4 h +172 h +56 h +2616 m +1 h +443 h +10 h +10 h +1 h +8115 m +1 h +8116 m +8117 m +4 h +10 h +1 h +8118 m +10 h +1 h +11 h +8119 m +8120 m +8121 m +1 h +1 h +4 h +4 h +55 h +857 h +620 m +1 h +65 h +1 h +59 h +258 h +11 h +1 h +1 h +4 h +8122 m +4 h +8123 m +195 h +1 h +172 h +1 h +687 h +10 h +8124 m +8125 m +10 h +1 h +10 h +8126 m +10 h +8127 m +4 h +156 h +8128 m +1886 m +8129 m +4 h +10 h +4 h +57 h +4 h +94 h +1 h +1 h +8130 m +10 h +4 h +4 h +143 h +4 h +1 h +8131 m +82 h +8132 m +1 h +1 h +1 h +10 h +83 h +1 h +8133 m +125 h +25 h +10 h +8134 m +11 h +371 h +4 h +4 h +5917 h +92 h +258 h +4 h +10 h +10 h +167 h +10 h +1 h +8135 m +4 h +4 h +1 h +10 h +4 h +1 h +8136 m +146 h +10 h +4378 m +4 h +1 h +10 h +10 h +4 h +22 h +4 h +10 h +4 h +4 h +57 h +965 m +5387 m +4 h +1 h +4 h +506 m +195 h +124 h +1 h +41 h +109 h +8137 m +509 m +4 h +4 h +10 h +114 h +10 h +1 h +4 h +1 h +4 h +278 h +4 h +5229 m +1403 h +4 h +1137 h +124 h +4 h +447 h +10 h +186 h +13 h +10 h +1 h +4 h +279 h +12 h +4 h +4 h +1 h +114 h +8138 m +1 h +8139 m +10 h +8140 m +250 h +10 h +8141 m +4 h +10 h +11 h +109 h +1 h +1 h +1 h +4 h +4 h +1 h +10 h +143 h +1 h +520 h +4 h +170 h +278 h +4 h +1 h +82 h +4 h +10 h +1 h +83 h +4 h +10 h +4 h +601 h +1 h +8142 m +8143 m +4 h +8144 m +185 h +7787 m +8 h +8145 m +135 h +10 h +1 h +5929 m +1 h +272 h +4 h +8146 m +8147 m +157 h +11 h +4 h +4 h +4 h +8148 m +7348 m +4 h +1 h +31 h +4 h +4 h +1 h +196 h +75 m +1 h +10 h +4 h +10 h +56 h +1 h +2508 m +1 h +1 h +4 h +8149 m +1 h +4 h +4 h +109 h +1445 h +4 h +10 h +124 h +73 h +10 h +4 h +4 h +1403 h +1 h +10 h +4 h +8150 m +123 h +444 m +4 h +4 h +11 h +3303 m +10 h +10 h +59 h +82 h +4 h +1 h +82 h +4 h +913 m +1 h +12 h +123 h +13 h +82 h +4 h +4 h +1 h +278 h +10 h +8151 m +10 h +1 h +4 h +2951 m +1 h +1 h +10 h +1 h +135 h +11 h +64 h +10 h +10 h +4 h +8152 m +601 h +4 h +520 h +57 h +1 h +266 h +1 h +82 h +4 h +8153 m +1975 m +8154 m +1953 m +10 h +65 h +8155 m +124 h +794 m +8156 m +570 h +1261 h +578 m +4 h +4 h +10 h +4 h +8157 m +1822 h +8158 m +10 h +1 h +1 h +4 h +986 h +1642 h +4 h +1 h +1 h +8159 m +1 h +8160 m +10 h +8161 m +82 h +4 h +1 h +83 h +114 h +10 h +119 h +4 h +8162 m +10 h +1 h +11 h +601 h +144 h +1 h +83 h +1 h +8163 m +8164 m +3509 m +4 h +10 h +230 h +73 h +1 h +10 h +1016 h +10 h +4810 m +45 h +1 h +1016 h +8165 m +8166 m +10 h +11 h +1 h +10 h +5621 m +262 h +74 h +1766 h +1 h +4 h +4 h +1 h +4 h +8167 m +140 h +4 h +10 h +41 h +1 h +146 h +4 h +1 h +8168 m +4 h +4 h +4 h +4574 m +57 h +4 h +4 h +8169 m +150 m +10 h +3909 m +1445 h +4 h +10 h +10 h +10 h +1 h +8170 m +3111 m +55 h +36 h +4 h +4 h +13 h +11 h +8171 m +1 h +57 h +4 h +4 h +4 h +4 h +10 h +8172 m +4 h +1 h +1 h +358 h +1 h +83 h +1 h +82 h +4 h +8173 m +10 h +4 h +4 h +59 h +8174 m +4 h +8175 m +4 h +10 h +124 h +4 h +4 h +3 h +1 h +307 h +4 h +1 h +82 h +139 h +8176 m +10 h +10 h +4 h +4 h +4 h +4 h +4 h +8177 m +1 h +4 h +1 h +1 h +8178 m +11 h +1 h +10 h +4 h +195 h +8179 m +4 h +8180 m +601 h +8181 m +1 h +8182 m +964 m +124 h +869 m +1 h +8183 m +8184 m +4 h +8185 m +1359 h +8186 m +10 h +266 h +1 h +1822 h +8187 m +1 h +1 h +25 h +9 m +8188 m +1 h +8189 m +11 h +10 h +8190 m +8191 m +8192 m +124 h +3 h +8193 m +1 h +8194 m +83 h +1 h +13 h +25 h +1 h +8195 m +10 h +57 h +82 h +8196 m +10 h +59 h +4 h +4 h +4 h +4 h +8197 m +1 h +1 h +8198 m +143 h +8199 m +124 h +1 h +10 h +1 h +601 h +8200 m +512 m +8201 m +229 h +8202 m +1 h +3 h +10 h +12 h +8203 m +4 h +170 h +12 h +124 h +1 h +10 h +4 h +8204 m +1 h +11 h +4 h +25 h +4 h +1070 m +104 h +4 h +4 h +172 h +8205 m +3 h +8206 m +1 h +1 h +262 h +10 h +10 h +11 h +8207 m +8208 m +1 h +1556 m +1 h +1 h +4 h +1 h +4 h +36 h +5917 h +869 m +1 h +48 h +1 h +1685 h +13 h +124 h +4 h +8209 m +1650 h +1 h +8210 m +10 h +10 h +278 h +4 h +4 h +1 h +8211 m +8212 m +8213 m +31 h +297 h +4 h +4 h +1362 h +1 h +1 h +1606 m +190 h +3 h +31 h +57 h +10 h +10 h +8214 m +1 h +10 h +172 h +278 h +8215 m +1016 h +1 h +8216 m +8217 m +1 h +8218 m +5929 m +4 h +79 h +1 h +10 h +1 h +41 h +124 h +10 h +4 h +1 h +278 h +4 h +536 h +4 h +1691 m +110 h +8219 m +8220 m +138 h +10 h +74 h +8221 m +124 h +8222 m +4 h +8223 m +4 h +114 h +1 h +1 h +10 h +4986 m +4 h +4 h +4 h +1250 h +4 h +8224 m +1 h +307 h +1 h +1 h +1 h +8225 m +8226 m +170 h +1504 m +4 h +3 h +1 h +1105 h +4 h +4 h +8227 m +11 h +196 h +10 h +4 h +57 h +4 h +443 h +2746 m +4 h +911 m +4 h +10 h +125 h +45 h +41 h +4 h +11 h +109 h +4 h +1 h +4 h +5 h +8228 m +10 h +2379 m +1796 h +8229 m +1 h +8230 m +90 m +108 h +8231 m +12 h +8232 m +4 h +8233 m +1 h +1478 m +4 h +10 h +4 h +74 h +143 h +4 h +94 h +11 h +3 h +10 h +10 h +8234 m +8235 m +4 h +4 h +13 h +13 h +8236 m +4000 m +1 h +1 h +45 h +2272 m +8237 m +8238 m +4 h +4 h +1 h +8239 m +8240 m +8241 m +1 h +1714 m +10 h +1 h +8242 m +4 h +1 h +8243 m +10 h +4 h +1 h +399 h +8244 m +10 h +11 h +8245 m +10 h +4 h +125 h +4 h +10 h +4 h +109 h +57 h +3 h +4 h +4 h +4 h +10 h +1 h +8246 m +143 h +4 h +1 h +12 h +31 h +4 h +10 h +4 h +55 h +1 h +8017 m +4 h +8247 m +8248 m +10 h +8249 m +41 h +8250 m +4 h +8251 m +181 h +10 h +8252 m +31 h +84 h +8253 m +4 h +8254 m +4 h +8255 m +4 h +1 h +3 h +12 h +1 h +1 h +4 h +4 h +41 h +8256 m +4 h +4 h +4 h +4 h +4 h +4 h +112 h +8257 m +1 h +332 h +6558 m +10 h +270 h +11 h +4 h +4 h +4 h +8258 m +8259 m +109 h +266 h +353 m +4 h +4 h +1 h +278 h +1 h +1 h +1 h +10 h +1 h +8260 m +112 h +1137 h +59 h +8261 m +1 h +10 h +113 h +1 h +2887 h +8262 m +4 h +10 h +4 h +8263 m +8264 m +8265 m +125 h +4 h +4 h +4 h +569 h +10 h +10 h +1470 h +8266 m +10 h +3 h +10 h +74 h +1 h +1 h +11 h +10 h +10 h +92 h +4 h +8267 m +11 h +10 h +124 h +125 h +104 h +976 h +57 h +258 h +114 h +230 h +4 h +8268 m +41 h +8269 m +167 h +464 h +10 h +8270 m +190 h +4 h +4 h +8271 m +10 h +25 h +10 h +1 h +10 h +358 h +1 h +8272 m +4 h +1 h +55 h +4 h +157 h +8273 m +966 m +8274 m +59 h +1 h +4 h +8275 m +157 h +1 h +4 h +31 h +4 h +1 h +79 h +1 h +4 h +4 h +4 h +69 h +10 h +11 h +4 h +57 h +8276 m +97 h +124 h +3025 m +125 h +1 h +12 h +4 h +1 h +109 h +718 h +82 h +41 h +4 h +4 h +196 h +4 h +64 h +1714 m +45 h +4 h +4 h +4 h +114 h +1 h +196 h +8277 m +4 h +8278 m +8279 m +1 h +10 h +92 h +64 h +11 h +8280 m +1 h +45 h +1 h +109 h +4 h +8281 m +1 h +8282 m +1 h +2625 h +3675 m +1 h +368 h +1685 h +119 h +8283 m +164 h +1 h +4 h +1 h +8284 m +91 h +1 h +1 h +10 h +4 h +64 h +8285 m +8286 m +8287 m +3 h +8 h +8288 m +8289 m +1 h +4 h +4 h +82 h +11 h +10 h +104 h +4 h +359 h +12 h +274 h +10 h +8290 m +272 h +185 h +8291 m +8292 m +3 h +8293 m +913 m +10 h +57 h +4 h +8294 m +64 h +4 h +8295 m +10 h +11 h +1 h +8296 m +8297 m +8298 m +8299 m +1 h +8300 m +3 h +10 h +4 h +10 h +4 h +4 h +8301 m +1 h +1 h +8302 m +92 h +4 h +124 h +11 h +10 h +124 h +1 h +3 h +1 h +147 h +8303 m +4 h +4 h +8304 m +4 h +1 h +8305 m +10 h +1 h +4 h +4 h +1 h +1 h +125 h +10 h +105 m +4 h +4 h +8306 m +4 h +1 h +4 h +158 h +25 h +8307 m +1 h +1 h +10 h +8308 m +4218 m +4 h +1 h +8309 m +1 h +81 m +4 h +250 h +1 h +172 h +55 h +1 h +129 h +1 h +41 h +1 h +4 h +1 h +146 h +4 h +8310 m +4 h +4 h +59 h +135 h +10 h +4 h +4 h +8311 m +7064 m +332 h +4 h +4 h +8312 m +4 h +8313 m +4 h +10 h +167 h +8314 m +10 h +1 h +82 h +55 h +25 h +10 h +1 h +1 h +10 h +1 h +10 h +569 h +1 h +10 h +4 h +8315 m +195 h +779 m +109 h +912 m +779 h +1 h +238 h +25 h +10 h +10 h +4 h +8316 m +274 h +1772 h +4 h +41 h +10 h +195 h +8317 m +8318 m +3 h +1189 m +8319 m +10 h +31 h +1 h +59 h +8320 m +4 h +10 h +238 h +8321 m +1027 h +1 h +10 h +1 h +10 h +4 h +25 h +10 h +10 h +1 h +1 h +757 h +10 h +1 h +536 h +4 h +4 h +4 h +8322 m +8323 m +1 h +1 h +4 h +1 h +190 h +36 h +8324 m +10 h +10 h +4 h +186 h +8325 m +4 h +1 h +4 h +8326 m +170 h +1 h +1 h +332 h +1 h +10 h +8327 m +4 h +4 h +10 h +8328 m +8329 m +1 h +278 h +109 h +4292 m +4 h +1 h +8330 m +10 h +8331 m +8332 m +250 h +4 h +8333 m +10 h +1 h +265 h +1769 m +1 h +4 h +4 h +1 h +10 h +1 h +11 h +4 h +1 h +4 h +83 h +10 h +10 h +10 h +1 h +139 h +10 h +3646 m +4 h +55 h +3025 m +1 h +1 h +1 h +1 h +10 h +1 h +1 h +1 h +8334 m +8335 m +125 h +4 h +8336 m +73 h +1 h +330 h +8337 m +463 m +3 h +104 h +97 h +4 h +1 h +4 h +4 h +11 h +307 h +4 h +1 h +278 h +1 h +1955 m +57 h +45 h +8338 m +1 h +59 h +190 h +1 h +8339 m +4 h +1454 m +10 h +65 h +4 h +8340 m +169 h +1 h +258 h +1 h +4 h +5 h +1 h +4240 m +10 h +10 h +92 h +7870 m +10 h +8341 m +1 h +8342 m +4 h +1 h +4 h +94 h +10 h +73 h +583 m +1 h +1 h +1 h +8206 m +170 h +4 h +692 h +4 h +4 h +1 h +4 h +4 h +1968 m +8343 m +4 h +1 h +4 h +289 h +4 h +10 h +8344 m +2172 h +4 h +55 h +4 h +4 h +1975 m +1 h +146 h +8345 m +13 h +4 h +57 h +4 h +82 h +8346 m +1691 m +10 h +10 h +195 h +11 h +1 h +4 h +31 h +4 h +13 h +10 h +8347 m +10 h +1 h +8348 m +195 h +8349 m +10 h +8350 m +1 h +4 h +1038 m +8351 m +8352 m +1619 h +1 h +59 h +1 h +28 h +885 m +10 h +1 h +147 h +143 h +4 h +10 h +10 h +192 h +368 h +192 h +8353 m +1 h +1 h +4 h +4 h +4 h +10 h +8354 m +4 h +258 h +11 h +83 h +8355 m +1 h +10 h +538 h +10 h +10 h +4 h +4 h +27 h +8 h +11 h +10 h +143 h +139 h +1 h +4 h +8356 m +1122 m +8357 m +8358 m +4 h +114 h +8359 m +170 h +8360 m +10 h +8361 m +4 h +569 h +41 h +4 h +8362 m +10 h +990 m +11 h +4 h +4333 m +8363 m +4 h +1 h +4 h +31 h +4 h +1 h +4 h +4 h +45 h +1542 m +996 m +8364 m +4 h +1 h +4 h +1 h +10 h +1 h +403 h +10 h +146 h +114 h +10 h +1 h +4 h +13 h +8365 m +1 h +114 h +4 h +1 h +4 h +4 h +10 h +8366 m +3177 m +8367 m +124 h +4 h +4 h +4 h +8368 m +8369 m +4858 m +1 h +8370 m +143 h +8371 m +10 h +4 h +4 h +1 h +4 h +4 h +10 h +1914 m +1 h +164 h +82 h +10 h +1 h +8372 m +10 h +1 h +10 h +4 h +8373 m +1 h +109 h +109 h +4 h +1 h +56 h +1 h +4 h +8374 m +8375 m +10 h +3555 m +994 m +8376 m +4 h +1 h +1 h +10 h +1939 m +8377 m +64 h +8378 m +170 h +4 h +11 h +8379 m +10 h +4 h +94 h +4 h +109 h +8380 m +8381 m +8382 m +297 h +1 h +3 h +41 h +4 h +4 h +241 m +258 h +10 h +1 h +4 h +434 m +10 h +4 h +10 h +10 h +692 h +10 h +109 h +8383 m +1 h +8384 m +1 h +733 m +10 h +276 h +1 h +687 h +857 h +8385 m +4 h +443 h +10 h +56 h +4 h +1024 m +4 h +10 h +1 h +1 h +1 h +3768 m +4 h +4 h +4 h +1 h +124 h +1449 m +4 h +59 h +1 h +1 h +4 h +4 h +190 h +8386 m +1 h +4 h +1 h +1 h +147 h +10 h +4 h +4 h +59 h +4 h +10 h +1 h +172 h +4 h +82 h +11 h +27 h +1 h +258 h +8387 m +8388 m +8389 m +1 h +10 h +8390 m +83 h +1 h +3 h +74 h +10 h +10 h +8391 m +4 h +1 h +4 h +4 h +1 h +1 h +4 h +2172 h +808 m +1 h +477 h +8392 m +674 m +13 h +1 h +8393 m +1 h +1 h +1 h +4 h +10 h +10 h +8394 m +4 h +4 h +1835 m +1070 m +1 h +56 h +8395 m +65 h +4 h +36 h +135 h +4 h +4 h +8396 m +4 h +4 h +10 h +4 h +3 h +4 h +4 h +1 h +1 h +2617 m +1 h +31 h +4 h +4 h +1030 h +13 h +181 h +8397 m +7709 m +1 h +3 h +10 h +8398 m +83 h +1 h +4 h +4 h +8399 m +8400 m +1 h +3 h +1337 m +10 h +10 h +10 h +4 h +11 h +1 h +92 h +2920 m +1 h +4 h +31 h +59 h +1 h +8401 m +4 h +10 h +83 h +8402 m +4 h +41 h +1 h +59 h +1 h +5917 h +41 h +4 h +4 h +125 h +73 h +4 h +8403 m +10 h +104 h +4 h +10 h +4 h +74 h +1 h +4 h +358 h +4 h +45 h +41 h +4 h +181 h +83 h +265 h +10 h +8404 m +10 h +195 h +11 h +1 h +4 h +8405 m +4 h +78 m +8406 m +8407 m +1 h +8408 m +4 h +10 h +8409 m +83 h +464 h +4 h +8410 m +4 h +10 h +8411 m +1 h +5929 h +4 h +11 h +186 h +82 h +119 h +1796 h +195 h +8412 m +265 h +4 h +13 h +1016 h +8413 m +536 h +2733 h +4 h +8414 m +1 h +4 h +10 h +8415 m +1 h +1 h +4 h +4 h +195 h +8206 h +8416 m +4 h +65 h +1 h +8417 m +125 h +8418 m +266 h +4 h +8419 m +10 h +265 h +1 h +4 h +1 h +11 h +4 h +4 h +1 h +10 h +8420 m +82 h +8421 m +1 h +8422 m +10 h +83 h +1 h +8423 m +4 h +8424 m +8425 m +1 h +4 h +1 h +10 h +10 h +10 h +94 h +8426 m +8427 m +8428 m +4 h +976 h +4 h +4256 m +1 h +8429 m +1 h +167 h +1 h +4 h +8430 m +196 h +8431 m +1 h +578 m +1 h +10 h +1 h +1 h +8432 m +4 h +8433 m +718 h +4 h +912 m +4 h +1 h +229 h +8434 m +10 h +1 h +4 h +8435 m +11 h +82 h +1886 m +167 h +10 h +8436 m +8437 m +10 h +4 h +73 h +4 h +4 h +8438 m +12 h +109 h +8439 m +73 h +4 h +4 h +11 h +25 h +2592 m +10 h +1939 m +2172 h +1 h +266 h +59 h +8440 m +4 h +104 h +4 h +4 h +10 h +1 h +195 h +109 h +8441 m +4 h +4 h +1 h +1 h +1 h +6851 m +8442 m +8188 m +4 h +4 h +444 m +65 h +11 h +1 h +74 h +10 h +8443 m +8444 m +8445 m +8446 m +8447 m +1 h +536 h +4 h +10 h +1796 h +4 h +3 h +3558 m +4 h +1571 m +11 h +10 h +8448 m +1 h +8449 m +4 h +97 h +1 h +172 h +4 h +7214 m +3170 m +59 h +4 h +10 h +4 h +10 h +10 h +11 h +10 h +10 h +10 h +1 h +82 h +79 h +1 h +1508 m +1 h +4 h +11 h +4 h +601 h +493 m +10 h +8450 m +10 h +13 h +65 h +8451 m +8452 m +1 h +10 h +8453 m +1 h +10 h +10 h +4 h +400 m +1 h +31 h +468 m +4 h +8454 m +250 h +8455 m +4 h +4 h +2494 m +1 h +4 h +59 h +1822 h +25 h +8456 m +1 h +11 h +1 h +4 h +4 h +371 h +8457 m +3847 m +124 h +447 h +1 h +4 h +10 h +4824 m +278 h +4 h +10 h +31 h +8458 m +4 h +1309 m +8459 m +347 h +157 h +57 h +10 h +10 h +1 h +8460 m +8461 m +8462 m +10 h +8463 m +4 h +8464 m +1 h +185 h +109 h +147 h +4 h +5863 m +10 h +65 h +25 h +4 h +1 h +8465 m +4 h +10 h +1981 m +59 h +2041 m +4 h +10 h +25 h +10 h +11 h +8466 m +4 h +1016 h +8467 m +10 h +1 h +4 h +8468 m +1 h +8469 m +1 h +4 h +25 h +1 h +10 h +57 h +2148 m +8470 m +8471 m +82 h +8472 m +8473 m +1 h +4 h +5436 m +8474 m +4 h +1 h +4 h +28 h +8475 m +4 h +8476 m +11 h +10 h +538 h +8477 m +10 h +4 h +8478 m +1 h +4496 m +4 h +4 h +170 h +1 h +10 h +1 h +1 h +55 h +8479 m +55 h +316 m +8480 m +10 h +2022 m +386 h +4 h +8481 m +41 h +65 h +196 h +4 h +74 h +25 h +454 m +4 h +1 h +4 h +4 h +4 h +4 h +10 h +10 h +1 h +10 h +83 h +31 h +4 h +8482 m +5060 m +10 h +12 h +8483 m +3 h +238 h +8484 m +109 h +110 h +1 h +1 h +10 h +4 h +4 h +57 h +4 h +10 h +8485 m +186 h +8486 m +1 h +146 h +1 h +8487 m +4 h +45 h +8488 m +169 h +8489 m +10 h +479 m +1 h +10 h +1 h +1 h +463 m +4 h +399 h +10 h +4 h +1 h +1 h +114 h +4 h +4 h +4 h +1 h +11 h +4 h +59 h +3161 m +4132 m +4 h +4 h +190 h +83 h +8490 m +64 h +4 h +204 h +1 h +10 h +8491 m +10 h +1 h +79 h +8492 m +8493 m +8494 m +1074 h +1 h +1117 m +113 h +11 h +4 h +10 h +4 h +964 m +124 h +4 h +22 h +10 h +11 h +1370 m +1 h +8495 m +10 h +10 h +8496 m +45 h +8497 m +10 h +1017 m +1 h +8498 m +8499 m +1 h +4 h +10 h +4 h +4 h +8500 m +125 h +31 h +8501 m +1 h +279 h +8502 m +8503 m +4 h +4 h +435 m +8504 m +1 h +8505 m +1 h +1 h +22 h +1 h +1056 m +4 h +4 h +4 h +1470 h +4 h +55 h +1 h +8506 m +8507 m +1 h +8508 m +1642 h +4 h +10 h +3 h +10 h +4 h +434 m +8509 m +1 h +536 h +75 m +10 h +8510 m +1250 h +4 h +114 h +65 h +4 h +2128 m +65 h +10 h +1 h +8511 m +4 h +195 h +157 h +8512 m +92 h +1 h +10 h +4 h +31 h +4 h +8513 m +146 h +1016 h +8514 m +4 h +4 h +4 h +8515 m +10 h +82 h +1 h +75 m +196 h +1 h +4 h +4 h +4 h +46 h +8516 m +12 h +307 h +185 h +1 h +307 h +4 h +10 h +8517 m +109 h +1 h +4 h +10 h +8518 m +82 h +8519 m +1 h +8520 m +4 h +4111 m +4 h +4 h +278 h +358 h +1 h +10 h +10 h +4 h +1 h +172 h +10 h +8521 m +4 h +8522 m +4 h +103 m +1 h +8523 m +4 h +25 h +8524 m +8525 m +1 h +41 h +10 h +7870 m +10 h +4 h +687 h +358 h +276 h +4 h +8526 m +4 h +10 h +4 h +1 h +8527 m +10 h +10 h +8 h +124 h +109 h +8528 m +174 h +114 h +533 m +1 h +10 h +4 h +1 h +1 h +1 h +57 h +1 h +4 h +1 h +10 h +8529 m +8530 m +8531 m +1 h +11 h +4 h +57 h +4 h +4 h +57 h +4 h +1 h +11 h +4 h +399 h +11 h +64 h +124 h +506 m +1 h +1 h +8532 m +4 h +4 h +6133 m +8533 m +8534 m +2625 h +11 h +1 h +8535 m +1137 h +4 h +10 h +11 h +1 h +1 h +10 h +8536 m +5035 m +8537 m +1619 h +158 h +3 h +10 h +1 h +4 h +41 h +8538 m +11 h +1 h +1 h +4 h +172 h +1 h +10 h +11 h +119 h +8539 m +11 h +8540 m +3112 m +4 h +4 h +59 h +8541 m +11 h +10 h +22 h +8542 m +83 h +1642 h +8543 m +8544 m +4 h +4 h +184 h +2184 m +1 h +5757 m +1045 m +4 h +3303 m +4 h +1070 m +4 h +8545 m +4 h +1 h +10 h +1 h +8546 m +4 h +22 h +590 m +1 h +8547 m +10 h +1 h +1685 h +8548 m +10 h +10 h +447 h +806 m +4 h +83 h +1 h +8549 m +190 h +8550 m +10 h +8551 m +10 h +4 h +8552 m +4 h +57 h +1374 m +1 h +278 h +447 h +1 h +138 h +1083 h +3 h +82 h +11 h +1 h +1 h +8553 m +4 h +4 h +4 h +7839 h +1 h +10 h +8554 m +2532 m +4464 m +4651 m +83 h +12 h +10 h +59 h +4 h +10 h +1 h +4 h +4 h +59 h +1 h +8555 m +4 h +1 h +119 h +4 h +8556 m +1 h +83 h +1 h +104 h +4 h +10 h +4 h +4 h +124 h +8557 m +94 h +8558 m +4 h +4 h +1 h +8559 m +10 h +11 h +59 h +8560 m +1 h +10 h +10 h +10 h +10 h +8561 m +3 h +1261 h +55 h +65 h +8562 m +4 h +4 h +6399 m +11 h +12 h +4 h +4 h +4 h +8563 m +10 h +11 h +10 h +10 h +1 h +10 h +185 h +8564 m +8565 m +1 h +1 h +8566 m +10 h +4 h +3 h +143 h +139 h +8567 m +4 h +190 h +4 h +860 m +4 h +1 h +1939 h +8568 m +4 h +4 h +4 h +10 h +2308 m +4 h +4 h +4 h +1 h +4 h +27 h +125 h +2266 m +4 h +4 h +4 h +4 h +10 h +4 h +8569 m +4 h +10 h +8570 m +1 h +64 h +36 h +11 h +4 h +8571 m +1 h +73 h +8572 m +1 h +4 h +4 h +8573 m +4 h +4 h +1 h +170 h +118 m +10 h +4 h +3750 m +8574 m +8575 m +692 h +11 h +10 h +258 h +359 h +10 h +1 h +41 h +1 h +11 h +10 h +1105 h +8576 m +230 h +169 h +4 h +1 h +31 h +8577 m +10 h +8578 m +10 h +4 h +65 h +146 h +1 h +172 h +4 h +45 h +4 h +4 h +4 h +8579 m +4 h +8580 m +82 h +1 h +10 h +10 h +4 h +1 h +10 h +857 h +1 h +195 h +10 h +1 h +25 h +4 h +1 h +911 m +167 h +4 h +1 h +10 h +262 h +8581 m +8582 m +1 h +74 h +4 h +8583 m +79 h +41 h +8584 m +10 h +169 h +124 h +8585 m +2625 h +4 h +4 h +4 h +4 h +8586 m +10 h +10 h +4 h +4 h +109 h +11 h +4 h +10 h +1 h +8587 m +64 h +104 h +4 h +869 h +4 h +238 h +10 h +8588 m +110 h +1 h +4127 m +1508 m +4 h +1 h +97 h +83 h +4 h +4 h +8589 m +4 h +143 h +1 h +8590 m +8591 m +1 h +1 h +1 h +4 h +83 h +109 h +1975 m +4 h +1 h +8592 m +114 h +1772 h +10 h +4 h +8593 m +1 h +31 h +10 h +4 h +4 h +92 h +10 h +332 h +4 h +10 h +143 h +4 h +10 h +65 h +3600 m +8594 m +10 h +1 h +1 h +65 h +10 h +687 h +17 h +11 h +8595 m +4 h +4 h +10 h +4 h +8596 m +4 h +114 h +4 h +10 h +73 h +1 h +4896 m +4 h +4 h +10 h +468 m +10 h +10 h +13 h +8597 m +1 h +59 h +1 h +4 h +125 h +1 h +229 h +1 h +4 h +8598 m +92 h +2923 m +57 h +8599 m +1 h +8600 m +4 h +97 h +6400 m +82 h +10 h +4 h +4 h +8601 m +4 h +3634 m +1 h +4 h +59 h +8602 m +1 h +4 h +10 h +649 m +3 h +2588 m +1 h +10 h +4 h +4 h +11 h +8603 m +185 h +4 h +4 h +123 h +181 h +4 h +1 h +65 h +69 h +4 h +41 h +10 h +4 h +4 h +1 h +8604 m +3 h +55 h +57 h +4 h +4 h +4 h +196 h +1 h +8605 m +10 h +8606 m +4 h +8607 m +10 h +4 h +3 h +4 h +1020 m +10 h +4 h +10 h +1 h +626 m +506 m +1 h +10 h +10 h +146 h +3555 m +640 h +4 h +125 h +10 h +1 h +124 h +820 m +1 h +1 h +4 h +533 m +55 h +1 h +82 h +8608 m +1 h +8609 m +11 h +4 h +110 h +3293 m +157 h +8610 m +1 h +8611 m +7 m +8612 m +1 h +10 h +57 h +1 h +687 h +109 h +4 h +4 h +92 h +124 h +1 h +8613 m +124 h +4 h +8614 m +4 h +28 h +4 h +4 h +10 h +447 h +1 h +4 h +1 h +1 h +11 h +4 h +996 m +8615 m +65 h +1 h +10 h +125 h +279 h +8616 m +10 h +578 m +10 h +10 h +7938 m +4714 m +11 h +1 h +84 h +8617 m +1 h +185 h +11 h +4 h +4 h +4 h +11 h +124 h +3 h +8618 m +10 h +4283 m +4 h +8619 m +8620 m +8621 m +8622 m +8623 m +41 h +2438 m +1 h +10 h +4 h +10 h +4 h +10 h +10 h +10 h +8624 m +8625 m +1 h +11 h +8626 m +10 h +11 h +1 h +10 h +1 h +11 h +8627 m +359 h +4 h +73 h +4 h +4 h +4 h +1 h +36 h +82 h +4 h +4 h +8628 m +45 h +4349 m +10 h +8629 m +4 h +1 h +11 h +4 h +13 h +1 h +4 h +8630 m +1 h +10 h +25 h +8631 m +1 h +1 h +11 h +8 h +8632 m +5411 m +10 h +4 h +4 h +77 h +55 h +4 h +195 h +8633 m +4 h +4 h +8634 m +8635 m +97 h +1 h +109 h +4 h +4 h +55 h +1 h +4 h +1 h +250 h +4 h +22 h +1 h +83 h +4 h +13 h +1 h +1835 m +4 h +1 h +935 h +8636 m +8637 m +4 h +285 m +10 h +4 h +1 h +119 h +4 h +8638 m +8639 m +8640 m +538 h +31 h +8641 m +4 h +332 h +4 h +8642 m +4 h +4 h +4 h +11 h +779 h +2148 m +4 h +1 h +22 h +8643 m +10 h +59 h +1 h +1 h +36 h +13 h +258 h +31 h +4 h +10 h +1 h +1 h +1 h +1 h +279 h +1 h +8644 m +147 h +1 h +110 h +10 h +4 h +10 h +195 h +8645 m +73 h +3622 m +8646 m +4 h +10 h +4 h +8647 m +8648 m +1 h +4 h +4 h +135 h +31 h +8147 m +8649 m +1 h +8650 m +1 h +8651 m +55 h +8652 m +8653 m +4 h +5059 m +4 h +4 h +4 h +4 h +8654 m +4 h +1 h +57 h +8655 m +172 h +1 h +146 h +1 h +1 h +8656 m +464 h +8657 m +10 h +4 h +8658 m +1 h +10 h +1 h +135 h +8659 m +1 h +4 h +83 h +4 h +1 h +15 m +10 h +146 h +8660 m +1 h +1 h +8661 m +8662 m +169 h +289 h +57 h +3303 m +1 h +556 h +8663 m +8664 m +8 h +1 h +10 h +4 h +10 h +8665 m +8666 m +6565 m +358 h +4 h +278 h +4 h +1 h +10 h +1 h +4 h +1 h +10 h +8667 m +10 h +4 h +22 h +8668 m +31 h +124 h +4 h +3 h +1214 m +1 h +4 h +158 h +10 h +8669 m +1309 m +109 h +1 h +4 h +1 h +8670 m +1 h +692 h +41 h +10 h +8671 m +443 h +8672 m +4 h +1 h +57 h +8673 m +258 h +4 h +1261 h +1 h +8674 m +31 h +10 h +1 h +4 h +1 h +299 h +4 h +13 h +8675 m +56 h +358 h +4 h +1 h +4 h +1 h +164 h +1 h +97 h +123 h +124 h +4 h +8676 m +4 h +125 h +1 h +1 h +4 h +4 h +8677 m +125 h +33 m +10 h +313 m +4 h +119 h +31 h +4 h +4 h +10 h +687 h +10 h +36 h +4 h +10 h +8678 m +25 h +25 h +8679 m +3 h +104 h +8680 m +687 h +447 h +181 h +8681 m +8682 m +4 h +4 h +1 h +11 h +79 h +10 h +1 h +4 h +45 h +1 h +8683 m +8684 m +1 h +8685 m +8686 m +8687 m +8688 m +10 h +386 h +4 h +4 h +118 h +4 h +3 h +4 h +8689 m +8690 m +4 h +4 h +4 h +2851 m +10 h +1 h +1 h +4 h +4 h +10 h +10 h +196 h +1 h +4 h +1 h +1 h +920 p +11 h +4 h +1083 h +4 h +4 h +4 h +57 h +8691 m +11 h +8692 m +8693 m +59 h +4 h +8694 m +8695 m +10 h +4 h +4 h +10 h +4 h +692 h +4 h +4 h +8696 m +124 h +8697 m +4 h +10 h +4 h +5567 m +8698 m +10 h +1 h +10 h +104 h +4 h +8699 m +4 h +4 h +10 h +1 h +109 h +10 h +124 h +8700 m +41 h +1 h +114 h +1 h +11 h +1710 m +4 h +73 h +1 h +4 h +143 h +4 h +4 h +4 h +8701 m +4 h +536 h +1410 m +2815 m +935 h +8702 m +1 h +4 h +8703 m +520 h +1 h +8704 m +8705 m +10 h +8706 m +10 h +8707 m +1835 m +109 h +536 h +1 h +10 h +8708 m +4 h +8709 m +4 h +266 h +8710 m +3679 m +1 h +295 h +1 h +4 h +8711 m +276 h +1 h +1 h +1 h +8712 m +8713 m +4 h +10 h +10 h +8714 m +8715 m +10 h +10 h +1 h +4 h +8716 m +4 h +55 h +1 h +8717 m +10 h +4 h +8718 m +10 h +8 h +1 h +4 h +8719 m +4 h +295 h +8720 m +4 h +155 m +22 h +82 h +8721 m +1 h +8722 m +74 h +265 h +195 h +4 h +10 h +4 h +25 h +1 h +1299 m +57 h +4 h +119 h +4 h +4 h +4 h +4 h +8723 m +10 h +4 h +10 h +8724 m +74 h +27 h +10 h +8725 m +1177 m +8726 m +4 h +4 h +1 h +10 h +601 h +1 h +1 h +10 h +4 h +112 h +4 h +36 h +3837 m +11 h +278 h +11 h +1 h +10 h +1 h +33 m +8727 m +195 h +8728 m +10 h +4 h +75 h +45 h +10 h +1 h +4 h +8729 m +112 h +10 h +11 h +4 h +4 h +59 h +10 h +1 h +8730 m +1 h +4 h +5613 m +4 h +1796 h +278 h +1 h +4 h +770 m +4 h +4 h +630 m +8731 m +1 h +10 h +8732 m +170 h +4 h +10 h +4 h +92 h +8733 m +4 h +169 h +41 h +4 h +8734 m +4 h +1 h +3278 m +359 h +64 h +4 h +4 h +4 h +8735 m +4 h +238 h +4 h +4 h +139 h +8736 m +4 h +4 h +4 h +41 h +1 h +3 h +10 h +109 h +4 h +4 h +8737 m +8738 m +10 h +56 h +8739 m +238 h +1 h +8740 m +10 h +83 h +8741 m +4 h +1 h +4 h +55 h +10 h +1 h +4 h +4 h +1 h +4 h +10 h +10 h +92 h +10 h +5125 m +8742 m +10 h +1 h +4 h +11 h +10 h +10 h +8743 m +190 h +8744 m +4 h +11 h +10 h +4 h +1 h +172 h +10 h +4 h +1 h +31 h +10 h +488 h +25 h +8745 m +31 h +8746 m +1 h +4 h +8747 m +1 h +10 h +59 h +8748 m +4 h +4 h +8749 m +10 h +4 h +169 h +4 h +10 h +8750 m +1 h +10 h +8751 m +31 h +31 h +97 h +1 h +1 h +8752 m +4 h +8753 m +757 h +4 h +1 h +1 h +10 h +59 h +4 h +1 h +1 h +10 h +8754 m +1914 m +1 h +65 h +108 h +139 h +8755 m +4 h +1 h +339 m +92 h +779 h +8756 m +4 h +1214 m +92 h +8757 m +4 h +11 h +229 h +10 h +4 h +1 h +8758 m +8759 m +4 h +124 h +274 h +5963 m +10 h +4 h +12 h +4 h +1 h +10 h +1 h +5281 m +10 h +3799 m +10 h +167 h +8760 m +692 h +8761 m +4 h +8762 m +10 h +4 h +65 h +8763 m +3 h +10 h +2379 h +27 h +8764 m +1 h +4 h +4 h +195 h +383 h +8765 m +8766 m +82 h +4 h +4 h +10 h +8767 m +8768 m +4 h +146 h +1 h +6438 m +8769 m +94 h +4 h +718 h +4 h +1 h +2280 m +10 h +8770 m +1 h +74 h +692 h +83 h +4 h +59 h +56 h +7755 m +135 h +2794 m +8771 m +10 h +123 h +4 h +10 h +119 h +8772 m +3036 m +10 h +1 h +10 h +8773 m +1 h +25 h +11 h +4 h +1 h +976 h +443 h +626 m +8774 m +31 h +4 h +338 m +4 h +10 h +28 h +10 h +10 h +4 h +1790 h +986 h +4 h +10 h +11 h +4 h +8775 m +8776 m +8777 m +211 m +1 h +114 h +1 h +1 h +8778 m +1 h +8779 m +4 h +146 h +8780 m +4 h +332 h +25 h +8781 m +8782 m +4 h +27 h +1 h +186 h +601 h +65 h +6869 m +5053 m +82 h +8783 m +4 h +601 h +1642 h +4 h +10 h +238 h +56 h +11 h +1 h +10 h +4 h +8784 m +195 h +4 h +359 h +8785 m +4 h +4 h +10 h +332 h +1 h +10 h +1092 m +8786 m +1 h +8787 m +8788 m +10 h +106 h +1 h +4 h +4 h +2139 m +59 h +4 h +4 h +1 h +1 h +4 h +1 h +4 h +10 h +92 h +4 h +8789 m +8790 m +10 h +1 h +368 h +8791 m +109 h +204 h +842 m +8792 m +4 h +64 h +538 h +1 h +6200 m +4218 m +8793 m +8794 m +8795 m +114 h +1 h +1685 h +10 h +4 h +8796 m +578 m +4 h +4 h +8797 m +1406 h +57 h +10 h +25 h +4 h +1 h +1 h +4 h +55 h +1 h +8798 m +1 h +4 h +1454 m +8799 m +41 h +468 m +8800 m +8801 m +4 h +1 h +10 h +8802 m +8803 m +172 h +10 h +986 h +996 m +5379 m +10 h +146 h +8804 m +1 h +65 h +57 h +1284 m +3742 m +1 h +146 h +1 h +3 h +4 h +10 h +8805 m +8806 m +13 h +1 h +10 h +83 h +4 h +10 h +4 h +1 h +4 h +10 h +4 h +8807 m +8808 m +8809 m +1 h +11 h +8810 m +4 h +104 h +124 h +3 h +4 h +262 h +1 h +4 h +11 h +4 h +1 h +4 h +10 h +1 h +1 h +8811 m +124 h +1 h +1 h +1939 h +1 h +8812 m +8813 m +8814 m +1304 m +8815 m +82 h +59 h +8816 m +332 h +1 h +1 h +10 h +1 h +4 h +10 h +4 h +1 h +1 h +8817 m +4 h +1 h +3321 m +109 h +82 h +4 h +1 h +55 h +538 h +1 h +1 h +1 h +157 h +10 h +10 h +4 h +10 h +10 h +4 h +1 h +358 h +1 h +4 h +1478 m +1 h +156 h +1 h +1 h +3 h +2266 m +8818 m +135 h +8819 m +976 h +8820 m +64 h +59 h +570 h +10 h +109 h +56 h +25 h +1 h +8821 m +1 h +289 h +10 h +8822 m +4810 m +4 h +36 h +10 h +10 h +10 h +313 m +1 h +74 h +10 h +4 h +3 h +10 h +601 h +8823 m +1 h +10 h +4 h +1 h +4 h +1 h +2923 m +1 h +5907 m +620 m +4 h +677 m +8824 m +4 h +1 h +8825 m +10 h +10 h +25 h +10 h +40 h +1 h +1822 h +1250 h +8826 m +1 h +1 h +4 h +1 h +1 h +124 h +10 h +8827 m +109 h +4 h +1105 h +295 h +1 h +4 h +82 h +74 h +4 h +4 h +10 h +10 h +185 h +125 h +11 h +83 h +65 h +195 h +8828 m +10 h +56 h +1 h +4 h +11 h +10 h +1374 m +1 h +64 h +4 h +1 h +1 h +1 h +8829 m +1 h +5526 m +4 h +10 h +82 h +4 h +1 h +196 h +1 h +8830 m +3161 m +541 m +11 h +4203 m +4 h +8831 m +838 m +1 h +10 h +48 h +4 h +1 h +8832 m +10 h +139 h +4 h +4 h +4 h +8833 m +4 h +164 h +1725 m +8834 m +8835 m +2484 m +10 h +8836 m +10 h +4 h +4 h +1 h +11 h +8837 m +8838 m +1 h +8839 m +8840 m +3360 m +59 h +8841 m +82 h +8842 m +8843 m +167 h +8844 m +8 h +31 h +10 h +55 h +10 h +11 h +41 h +278 h +1737 m +3499 m +74 h +10 h +4 h +10 h +1309 m +31 h +224 h +8845 m +4 h +1 h +4 h +4 h +74 h +4 h +82 h +578 m +10 h +74 h +8846 m +4 h +8847 m +4 h +10 h +238 h +4 h +83 h +1 h +158 h +1 h +36 h +4 h +8848 m +4 h +6817 m +8849 m +83 h +195 h +1 h +196 h +25 h +10 h +3036 m +1 h +4 h +8850 m +4 h +4 h +10 h +582 m +1 h +986 h +110 h +12 h +190 h +1 h +435 m +8851 m +447 h +1045 m +258 h +135 h +583 m +4 h +1 h +45 h +4 h +124 h +181 h +4 h +8852 m +4 h +1 h +1 h +158 h +55 h +1 h +8853 m +114 h +327 m +112 h +1 h +8854 m +4 h +1 h +45 h +238 h +1 h +468 m +1 h +8855 m +8856 m +110 h +8857 m +1 h +4 h +1 h +119 h +8858 m +8859 m +1619 h +1 h +11 h +8860 m +10 h +8861 m +4 h +73 h +1685 h +36 h +8862 m +238 h +8863 m +4 h +4 h +4 h +4 h +40 h +91 h +114 h +4 h +61 m +10 h +10 h +10 h +1 h +8864 m +4 h +8865 m +4 h +10 h +1 h +1620 m +250 h +8866 m +4 h +8867 m +368 h +4 h +204 h +4 h +124 h +4 h +8868 m +10 h +8869 m +109 h +10 h +4 h +139 h +4 h +8870 m +8871 m +23 m +1 h +8872 m +1 h +1 h +4 h +170 h +8873 m +56 h +31 h +10 h +8874 m +2025 m +83 h +55 h +144 h +124 h +4 h +1685 h +27 h +4 h +8875 m +64 h +140 h +4 h +8876 m +1 h +1 h +1 h +8877 m +4 h +123 h +1 h +8878 m +1 h +8879 m +1 h +860 m +4 h +10 h +4 h +8880 m +8881 m +10 h +4 h +10 h +3 h +1 h +118 h +1 h +124 h +1 h +4 h +1 h +4 h +10 h +4 h +4 h +1 h +8882 m +307 h +4 h +8883 m +10 h +3 h +10 h +82 h +4 h +5254 m +1 h +1478 m +229 h +8884 m +124 h +8 h +4 h +4 h +4 h +860 m +1 h +10 h +8885 m +92 h +64 h +4 h +1 h +57 h +4 h +23 m +4350 m +41 h +4 h +8886 m +8887 m +4 h +8888 m +4 h +10 h +4 h +4 h +1 h +4 h +57 h +1 h +93 m +4 h +10 h +195 h +4 h +8889 m +4 h +22 h +11 h +41 h +626 m +8890 m +8891 m +8892 m +10 h +143 h +10 h +1 h +41 h +1 h +1 h +1 h +124 h +1 h +10 h +11 h +172 h +10 h +1 h +1 h +4 h +8893 m +10 h +1 h +266 h +4 h +4 h +10 h +1 h +125 h +8894 m +11 h +1619 h +109 h +12 h +4 h +65 h +10 h +61 m +97 h +74 h +147 h +8895 m +4 h +10 h +1 h +4 h +139 h +5537 m +4 h +1 h +10 h +11 h +4 h +1751 m +1 h +276 h +8896 m +297 h +3 h +125 h +1 h +8897 m +10 h +7760 m +11 h +83 h +74 h +164 h +230 h +3 h +4 h +8898 m +41 h +10 h +4 h +27 h +1 h +83 h +4 h +1685 h +65 h +4 h +4 h +4 h +185 h +4 h +10 h +8899 m +11 h +1 h +4 h +911 m +4 h +109 h +8900 m +278 h +8901 m +11 h +8902 m +8903 m +10 h +1 h +8904 m +295 h +4 h +4 h +8905 m +4132 m +1 h +1 h +307 h +4 h +8906 m +8907 m +170 h +4 h +8908 m +770 m +1 h +4 h +104 h +276 h +575 m +41 h +8909 m +10 h +8910 m +125 h +4 h +4 h +8911 m +10 h +353 m +11 h +1 h +10 h +7064 m +73 h +359 h +143 h +4 h +8912 m +1337 m +1 h +83 h +10 h +10 h +10 h +4 h +10 h +10 h +4 h +1 h +6135 m +1 h +79 h +1 h +4 h +8913 m +1250 h +4 h +10 h +10 h +256 m +4 h +36 h +64 h +2984 m +4 h +4 h +1 h +921 m +8914 m +4 h +1 h +4 h +316 m +10 h +4 h +4 h +4 h +123 h +8915 m +4 h +41 h +319 m +10 h +976 h +8793 m +4 h +125 h +1 h +640 h +10 h +10 h +8916 m +5060 m +8917 m +4 h +8918 m +10 h +8919 m +4 h +10 h +8920 m +1 h +4 h +4 h +57 h +11 h +124 h +8653 m +10 h +1 h +2131 m +1 h +10 h +10 h +8161 m +4 h +69 h +4 h +4 h +10 h +412 m +1 h +1 h +1 h +8921 m +8922 m +1 h +8923 m +59 h +258 h +1 h +57 h +1 h +124 h +1 h +186 h +77 h +4 h +3 h +4 h +10 h +4 h +12 h +13 h +10 h +5801 m +4 h +8924 m +4 h +4 h +4 h +1 h +10 h +173 h +386 h +10 h +56 h +4 h +888 m +10 h +167 h +5923 m +10 h +1 h +31 h +10 h +1 h +8925 m +8926 m +3 h +10 h +82 h +1 h +8927 m +2923 m +10 h +1 h +1 h +4 h +25 h +57 h +4 h +4 h +10 h +1 h +4 h +11 h +1 h +8928 m +4 h +2520 m +8929 m +8930 m +59 h +4 h +8931 m +8932 m +1 h +10 h +109 h +10 h +1 h +229 h +56 h +8933 m +4 h +1 h +11 h +1 h +4 h +125 h +10 h +4 h +10 h +8934 m +1772 h +4 h +4 h +4 h +10 h +4 h +1 h +196 h +1 h +147 h +10 h +1 h +10 h +1 h +10 h +4 h +1 h +3100 m +8935 m +1 h +146 h +8936 m +8937 m +5 h +8938 m +8939 m +4 h +1 h +4 h +4 h +698 m +4 h +4 h +10 h +1 h +1 h +295 h +4 h +146 h +123 h +11 h +103 m +8940 m +82 h +4 h +8941 m +536 h +4 h +8942 m +8943 m +8944 m +266 h +1 h +10 h +8945 m +10 h +8946 m +4301 m +73 h +3025 m +31 h +1 h +276 h +146 h +319 h +10 h +59 h +1 h +4 h +1 h +10 h +10 h +1 h +4 h +8947 m +4 h +4 h +8948 m +4 h +10 h +8949 m +8950 m +156 h +25 h +4 h +1201 m +8951 m +10 h +31 h +10 h +1137 h +8952 m +8953 m +684 m +238 h +4 h +22 h +11 h +4 h +10 h +8954 m +8955 m +4 h +10 h +4 h +4 h +327 m +1 h +8956 m +40 h +4 h +2494 m +92 h +8957 m +1620 m +190 h +265 h +11 h +8958 m +124 h +10 h +8959 m +4 h +83 h +169 h +8960 m +4 h +733 m +25 h +10 h +1 h +1 h +224 h +94 h +1 h +8961 m +4 h +1 h +4469 m +109 h +1 h +359 h +8962 m +8963 m +59 h +8964 m +108 h +6399 m +5965 m +986 h +10 h +31 h +1 h +4 h +278 h +11 h +69 h +8965 m +1470 h +10 h +869 h +10 h +8966 m +64 h +10 h +8967 m +10 h +4 h +172 h +4 h +4 h +6592 m +10 h +3396 m +8968 m +7839 h +8969 m +57 h +10 h +4 h +23 h +10 h +583 m +1409 m +181 h +8970 m +319 h +8971 m +8972 m +1 h +4 h +59 h +4 h +8973 m +1 h +1 h +10 h +10 h +4 h +1 h +55 h +4 h +1 h +8974 m +8975 m +1 h +4 h +8976 m +10 h +57 h +167 h +10 h +55 h +10 h +4 h +265 h +13 h +10 h +1 h +1 h +1638 m +8977 m +4 h +109 h +8978 m +10 h +4 h +1370 m +10 h +4 h +8979 m +10 h +1 h +8980 m +1 h +3 h +4 h +8981 m +10 h +10 h +488 h +1 h +4 h +4 h +4 h +1 h +10 h +4 h +10 h +4 h +687 h +1 h +8982 m +10 h +124 h +1685 h +8983 m +4 h +10 h +1 h +8984 m +1 h +112 h +8985 m +8986 m +143 h +267 m +10 h +74 h +1 h +8324 m +10 h +1 h +11 h +1 h +10 h +56 h +55 h +82 h +4 h +83 h +27 h +1 h +8987 m +1138 m +125 h +140 h +8988 m +1 h +10 h +104 h +8989 m +73 h +4 h +1 h +1271 m +1 h +10 h +4 h +10 h +4 h +1 h +1620 h +4 h +4 h +1 h +319 h +1 h +103 m +1 h +10 h +11 h +10 h +464 h +11 h +8990 m +109 h +59 h +195 h +10 h +57 h +124 h +4 h +1 h +10 h +4 h +2733 h +6066 m +8991 m +57 h +488 h +57 h +10 h +185 h +8992 m +164 h +8993 m +518 m +4 h +860 h +1 h +4 h +64 h +1 h +129 h +443 h +3 h +1 h +4 h +4 h +8994 m +10 h +10 h +8995 m +1714 m +4 h +1 h +8996 m +79 h +1 h +464 h +4 h +59 h +4 h +4 h +4 h +1 h +5 h +10 h +10 h +57 h +8997 m +10 h +4 h +8998 m +10 h +986 h +245 m +1 h +10 h +8999 m +4 h +185 h +8716 m +1 h +104 h +119 h +9000 m +1 h +196 h +10 h +9001 m +82 h +9002 m +1 h +172 h +125 h +146 h +10 h +278 h +4 h +10 h +9003 m +4 h +9004 m +10 h +65 h +9005 m +9006 m +9007 m +10 h +4 h +1 h +156 h +104 h +4 h +25 h +1 h +1 h +45 h +1 h +79 h +767 m +10 h +1016 h +1 h +3 h +11 h +9008 m +9009 m +520 h +9010 m +9011 m +11 h +10 h +9012 m +9013 m +1 h +92 h +169 h +4 h +4 h +9014 m +9015 m +4 h +10 h +4 h +4 h +1 h +4 h +1 h +59 h +9016 m +4 h +204 h +4 h +123 h +9017 m +1 h +74 h +41 h +4 h +4 h +4 h +1822 h +1 h +1 h +10 h +9018 m +4 h +4 h +9019 m +533 h +4 h +185 h +129 h +4 h +195 h +196 h +167 h +4 h +10 h +9020 m +10 h +4 h +4 h +1 h +1 h +4 h +4381 m +1 h +9021 m +377 m +4 h +22 h +332 h +10 h +692 h +10 h +9022 m +425 m +9023 m +1 h +399 h +9024 m +1 h +4 h +12 h +1 h +146 h +1 h +4 h +4 h +9025 m +1 h +79 h +4151 m +463 m +4 h +1 h +1127 m +9026 m +4 h +4 h +2480 m +9027 m +1 h +192 h +10 h +10 h +4 h +9028 m +10 h +6112 m +10 h +3 h +4 h +9029 m +4 h +9030 m +9031 m +92 h +1 h +1 h +10 h +10 h +10 h +1 h +9032 m +9033 m +9034 m +285 m +1 h +11 h +4 h +1056 m +124 h +22 h +1249 m +4 h +4 h +1 h +4 h +4 h +10 h +124 h +9035 m +4 h +1 h +1 h +147 h +1 h +1 h +9036 m +976 h +55 h +10 h +4 h +4 h +1548 m +79 h +3 h +4 h +1 h +1 h +108 h +9037 m +10 h +1 h +1 h +9038 m +4 h +4 h +4 h +10 h +97 h +1 h +9039 m +4 h +1 h +9040 m +9041 m +403 h +9042 m +4240 m +1 h +57 h +4 h +10 h +265 h +169 h +9043 m +10 h +463 m +9044 m +1 h +1137 h +1 h +1218 m +9045 m +9046 m +2931 m +10 h +124 h +9047 m +9048 m +3177 m +9049 m +10 h +9050 m +11 h +1 h +4 h +4 h +1 h +74 h +1 h +9051 m +258 h +9052 m +4 h +368 h +10 h +9053 m +9054 m +4 h +129 h +1 h +4 h +4 h +1 h +353 m +3 h +1 h +10 h +4 h +9055 m +4 h +10 h +3533 m +9056 m +3943 m +9057 m +9058 m +1884 m +10 h +10 h +443 h +10 h +9059 m +1 h +1 h +1 h +1691 m +9060 m +4 h +4 h +10 h +104 h +9061 m +1 h +9062 m +10 h +11 h +332 h +9063 m +82 h +3913 m +4 h +10 h +1 h +5 h +1 h +9064 m +1 h +266 h +9065 m +1835 h +897 m +9066 m +139 h +9067 m +9068 m +204 h +172 h +74 h +1 h +9069 m +4 h +9070 m +57 h +1 h +9071 m +9072 m +11 h +11 h +4 h +4 h +3 h +156 h +10 h +9073 m +9074 m +12 h +250 h +9075 m +2706 m +10 h +74 h +4 h +4 h +1 h +41 h +10 h +10 h +1105 h +11 h +10 h +4 h +190 h +1 h +9076 m +48 h +103 h +4 h +25 h +74 h +262 h +9077 m +9078 m +4 h +195 h +1 h +9079 m +4 h +9080 m +4 h +9081 m +10 h +158 h +4 h +10 h +403 h +1 h +4 h +601 h +1 h +9082 m +10 h +1 h +359 h +1 h +11 h +192 h +1 h +1 h +2374 m +9083 m +9084 m +1 h +7585 m +569 h +9085 m +1 h +3 h +2617 m +4 h +109 h +146 h +9086 m +59 h +1619 h +9087 m +4 h +10 h +4 h +164 h +1 h +4 h +9088 m +9089 m +79 h +1 h +8477 m +4 h +65 h +10 h +4 h +9090 m +119 h +4 h +25 h +9091 m +976 h +1 h +578 h +11 h +82 h +10 h +4 h +4 h +59 h +1 h +1 h +9092 m +4 h +4 h +10 h +119 h +124 h +97 h +4 h +9093 m +4 h +9094 m +4 h +4 h +65 h +9095 m +10 h +9096 m +9097 m +123 h +143 h +3558 h +9098 m +10 h +4 h +41 h +82 h +9099 m +4 h +1 h +4 h +119 h +2054 m +1737 m +40 h +9100 m +1 h +9101 m +1 h +124 h +4 h +1 h +11 h +1 h +1 h +4 h +1 h +656 h +1308 m +1 h +1 h +258 h +9102 m +167 h +869 h +109 h +9103 m +1957 m +2281 m +57 h +9104 m +8 h +109 h +6549 m +10 h +11 h +4 h +10 h +195 h +4 h +1261 h +1685 h +9105 m +4 h +82 h +3558 h +9106 m +169 h +4 h +4 h +6941 m +10 h +1 h +4 h +1 h +4 h +9107 m +27 h +195 h +10 h +146 h +11 h +4 h +4 h +4 h +9108 m +10 h +9109 m +1 h +4 h +4 h +9110 m +10 h +10 h +4 h +82 h +11 h +9111 m +10 h +4 h +9112 m +4 h +4 h +9113 m +9114 m +332 h +119 h +10 h +4 h +10 h +9115 m +9116 m +4 h +31 h +1 h +10 h +4 h +1 h +1 h +278 h +4 h +4 h +9117 m +1 h +1 h +9118 m +110 h +4 h +319 h +9119 m +1 h +40 h +1 h +368 h +109 h +278 h +278 h +4 h +4 h +4 h +169 h +823 m +82 h +9120 m +4 h +1 h +10 h +25 h +9121 m +112 h +1 h +10 h +1835 h +4 h +146 h +4 h +295 h +10 h +9122 m +11 h +4 h +77 h +9123 m +9124 m +74 h +203 m +1409 m +27 h +27 h +5053 m +4 h +9125 m +4 h +9126 m +4 h +109 h +9127 m +10 h +9128 m +2205 m +4 h +10 h +258 h +25 h +110 h +9129 m +9130 m +4 h +1796 h +4 h +4 h +478 m +1074 h +359 h +12 h +10 h +1 h +10 h +9131 m +1541 m +9132 m +11 h +9133 m +4 h +976 h +4 h +307 h +10 h +11 h +1 h +297 h +9134 m +10 h +11 h +9135 m +1 h +9136 m +4 h +13 h +4 h +9137 m +10 h +109 h +10 h +1 h +4 h +82 h +10 h +1 h +4 h +9138 m +1 h +4 h +4 h +9139 m +1 h +4 h +118 h +1 h +147 h +4 h +3995 m +9140 m +9141 m +9142 m +4 h +9143 m +109 h +4 h +273 m +11 h +9144 m +9145 m +1 h +4 h +10 h +1766 h +9146 m +1 h +9147 m +114 h +4 h +4 h +170 h +1 h +4 h +9148 m +4 h +25 h +9149 m +10 h +114 h +9150 m +45 h +4 h +9151 m +9152 m +31 h +10 h +164 h +75 h +10 h +1 h +718 h +4 h +9153 m +1 h +9154 m +1 h +9155 m +10 h +538 h +10 h +1 h +9156 m +10 h +1 h +4 h +1685 h +83 h +4 h +9157 m +74 h +10 h +1 h +73 h +143 h +1 h +9158 m +10 h +4 h +1 h +9159 m +1 h +10 h +4 h +57 h +113 h +4 h +41 h +56 h +9160 m +13 h +10 h +4 h +4 h +1 h +195 h +265 h +2002 m +12 h +74 h +10 h +4 h +4 h +10 h +266 h +4 h +4 h +1 h +230 h +4 h +9161 m +1 h +1 h +4 h +9162 m +9163 m +230 h +11 h +8638 m +10 h +4 h +4 h +10 h +9164 m +4 h +9165 m +843 m +9166 m +1 h +4 h +10 h +4 h +9167 m +687 h +1 h +4 h +1 h +11 h +9168 m +10 h +110 h +28 h +31 h +82 h +9169 m +147 h +9170 m +10 h +1016 h +11 h +1 h +9171 m +1 h +4 h +9172 m +2824 m +1 h +143 h +9173 m +1 h +4 h +4 h +10 h +8 h +9174 m +10 h +57 h +94 h +1822 h +9175 m +4 h +4 h +4 h +4 h +4 h +9176 m +1 h +4 h +9177 m +258 h +224 h +4 h +9178 m +1 h +9179 m +258 h +4 h +4 h +307 h +123 h +9180 m +124 h +123 h +1 h +9181 m +4 h +186 h +4 h +64 h +1303 m +1 h +59 h +1 h +106 h +10 h +383 h +4 h +4 h +9182 m +10 h +4 h +9183 m +9184 m +65 h +4538 m +1 h +4 h +1884 m +3 h +9185 m +3555 m +4 h +31 h +1 h +4 h +11 h +9186 m +74 h +139 h +5976 m +9187 m +185 h +4 h +9188 m +9189 m +83 h +4 h +9190 m +4 h +9191 m +31 h +258 h +59 h +9192 m +4 h +1 h +1 h +1 h +57 h +1 h +10 h +103 h +31 h +9193 m +1 h +578 h +3 h +172 h +4 h +4 h +82 h +4 h +9194 m +82 h +4 h +9195 m +10 h +11 h +9196 m +4 h +1 h +9197 m +10 h +173 h +4 h +124 h +2815 m +4 h +4966 m +10 h +4 h +10 h +282 m +104 h +9198 m +1 h +11 h +4 h +9199 m +1 h +56 h +4 h +1 h +238 h +4 h +9200 m +9201 m +1 h +5230 m +9202 m +1 h +1 h +1 h +1 h +4 h +4 h +7394 m +4 h +4 h +4 h +1 h +4 h +477 m +620 m +36 h +1 h +9203 m +403 h +468 h +41 h +278 h +1 h +241 m +135 h +4 h +801 m +10 h +2984 m +692 h +1 h +10 h +3 h +9204 m +59 h +10 h +10 h +10 h +9205 m +4 h +124 h +9206 m +9207 m +3 h +1 h +4 h +9208 m +4 h +1 h +9209 m +167 h +31 h +3 h +12 h +4 h +258 h +109 h +4 h +9210 m +146 h +4 h +10 h +1 h +9211 m +10 h +10 h +9212 m +4 h +9213 m +9214 m +1 h +798 m +118 h +4 h +31 h +109 h +10 h +4 h +10 h +1 h +4 h +1 h +9215 m +13 h +9216 m +1 h +1027 m +4 h +289 h +9217 m +119 h +10 h +192 h +10 h +1 h +4 h +1 h +6869 m +73 h +4 h +25 h +57 h +4 h +9218 m +11 h +1 h +9219 m +1 h +10 h +1 h +195 h +536 h +4 h +10 h +94 h +1 h +190 h +55 h +11 h +9220 m +4 h +464 h +74 h +1 h +9221 m +4 h +10 h +224 h +4 h +9222 m +59 h +1 h +4 h +10 h +4 h +1 h +9223 m +4 h +109 h +8221 m +4 h +9224 m +48 h +3 h +1 h +9225 m +9226 m +10 h +4 h +4 h +1772 h +129 h +4 h +123 h +3707 m +10 h +10 h +92 h +4 h +1 h +79 h +1 h +9227 m +108 h +1 h +10 h +4 h +1759 m +359 h +4 h +10 h +9228 m +4 h +4 h +976 h +1 h +174 h +9229 m +9230 m +4 h +4 h +4 h +1 h +56 h +9231 m +10 h +1 h +4 h +9232 m +9233 m +9234 m +4 h +9235 m +4 h +4 h +9236 m +3562 m +31 h +69 h +1 h +9237 m +4 h +9238 m +5141 m +4 h +4 h +10 h +4 h +9239 m +1 h +9240 m +9241 m +4 h +9242 m +65 h +73 h +167 h +570 h +3 h +4 h +9243 m +129 h +4 h +5523 m +9244 m +4 h +79 h +4 h +170 h +77 h +9245 m +10 h +1 h +1 h +10 h +25 h +4 h +11 h +9246 m +10 h +4 h +170 h +9247 m +1 h +626 h +25 h +1 h +4 h +1 h +124 h +9248 m +110 h +41 h +1619 h +9249 m +9250 m +4 h +124 h +4 h +3396 m +65 h +1 h +10 h +9251 m +614 m +4 h +56 h +144 h +9252 m +9253 m +1 h +4 h +10 h +1 h +1 h +3799 m +307 h +57 h +9254 m +1 h +1 h +1 h +173 h +9255 m +4 h +4 h +1772 h +109 h +1 h +1 h +82 h +4 h +109 h +10 h +124 h +114 h +9256 m +4 h +4 h +10 h +4 h +9257 m +4 h +4 h +228 m +1 h +12 h +9258 m +4 h +4 h +9259 m +9260 m +82 h +1375 m +319 h +4 h +4 h +59 h +9261 m +278 h +10 h +4 h +109 h +371 h +9262 m +1 h +112 h +114 h +9263 m +1 h +4 h +104 h +9264 m +10 h +1 h +4 h +9265 m +83 h +8179 m +4 h +1 h +1 h +129 h +4 h +4 h +1 h +9266 m +1955 m +4 h +9267 m +9268 m +10 h +4 h +10 h +170 h +9269 m +4 h +1 h +1 h +4 h +10 h +45 h +4 h +4 h +4 h +266 h +1 h +4 h +124 h +1 h +27 h +1 h +10 h +83 h +1 h +4 h +158 h +8 h +1 h +10 h +9270 m +3 h +4 h +1 h +10 h +1 h +48 h +9271 m +10 h +970 m +1 h +25 h +332 h +692 h +1 h +536 h +250 h +83 h +158 h +4 h +11 h +1 h +10 h +4 h +9272 m +381 m +11 h +1 h +109 h +9273 m +1 h +1 h +642 m +9274 m +11 h +1 h +9275 m +4 h +4 h +1 h +4 h +1 h +4 h +3 h +9276 m +1 h +27 h +1 h +4 h +1 h +9277 m +9278 m +1016 h +4 h +9279 m +104 h +10 h +4 h +74 h +1 h +10 h +9280 m +10 h +1 h +1 h +297 h +1 h +4 h +10 h +9281 m +4 h +1337 m +10 h +82 h +4 h +1 h +1 h +9282 m +1 h +9283 m +5809 m +10 h +4 h +10 h +779 h +276 h +371 h +935 h +9284 m +9285 m +46 h +1 h +4 h +10 h +1 h +10 h +4 h +9286 m +1 h +31 h +4 h +1766 h +146 h +313 h +190 h +9287 m +4 h +10 h +10 h +109 h +10 h +135 h +1 h +195 h +94 h +1 h +31 h +4 h +10 h +4 h +4 h +184 h +1 h +123 h +4 h +463 h +9288 m +10 h +4 h +1 h +935 h +4 h +28 h +83 h +1 h +4 h +4 h +4 h +9289 m +4 h +9290 m +123 h +82 h +1 h +1 h +1 h +10 h +4 h +11 h +4 h +59 h +9291 m +11 h +10 h +11 h +9292 m +4 h +125 h +10 h +4 h +146 h +9293 m +10 h +31 h +97 h +41 h +41 h +1 h +1 h +25 h +25 h +1 h +4 h +9294 m +1 h +4 h +11 h +55 h +3 h +1 h +9295 m +4 h +1 h +158 h +229 h +11 h +4 h +4 h +10 h +4 h +4 h +4 h +114 h +9296 m +10 h +9297 m +10 h +59 h +278 h +9298 m +1 h +4 h +64 h +4 h +9299 m +114 h +229 h +278 h +1 h +9300 m +169 h +140 h +10 h +9301 m +4 h +1 h +1556 m +1 h +1 h +238 h +1 h +3 h +25 h +9302 m +10 h +10 h +1 h +1 h +9303 m +1 h +10 h +10 h +4 h +4 h +1 h +10 h +1 h +1191 m +10 h +79 h +4 h +92 h +41 h +27 h +3533 m +9304 m +114 h +13 h +4 h +1 h +2625 h +41 h +368 h +3 h +9305 m +10 h +1 h +9306 m +1 h +73 h +4 h +124 h +9307 m +69 h +10 h +4 h +4 h +1 h +83 h +4 h +4 h +10 h +11 h +307 h +358 h +536 h +464 h +31 h +976 h +1030 h +1 h +10 h +4 h +83 h +4 h +123 h +1 h +10 h +59 h +4 h +1 h +1 h +31 h +4 h +190 h +10 h +3 h +1 h +25 h +4 h +9308 m +224 h +1 h +9309 m +1 h +1 h +10 h +332 h +9310 m +4 h +4 h +10 h +1 h +4 h +1886 m +65 h +9311 m +106 h +124 h +195 h +10 h +4 h +569 h +10 h +4 h +1 h +1 h +2592 m +966 m +3 h +4 h +124 h +386 h +13 h +9312 m +9313 m +41 h +1 h +1 h +1 h +4 h +113 h +113 h +57 h +139 h +124 h +9314 m +69 h +1089 m +9315 m +9316 m +8 h +2184 m +4 h +9317 m +10 h +9318 m +11 h +1 h +1 h +256 m +13 h +10 h +11 h +9319 m +8 h +11 h +9320 m +307 h +4 h +55 h +139 h +1 h +1 h +4 h +2522 m +135 h +4 h +885 m +4 h +41 h +10 h +10 h +10 h +4 h +11 h +9321 m +9322 m +9323 m +4 h +9293 m +9324 m +9325 m +157 h +443 h +4 h +1 h +11 h +1 h +45 h +10 h +10 h +443 h +55 h +146 h +4 h +1 h +185 h +4 h +4 h +1642 h +4718 m +1 h +4 h +4 h +10 h +10 h +4 h +9326 m +1 h +97 h +10 h +258 h +377 m +10 h +73 h +1 h +125 h +125 h +74 h +11 h +9327 m +10 h +9328 m +4 h +9329 m +489 m +423 m +83 h +25 h +45 h +83 h +55 h +9330 m +94 h +10 h +9331 m +1 h +9332 m +4 h +9333 m +4 h +1822 h +238 h +157 h +10 h +10 h +4 h +10 h +123 h +9334 m +10 h +1 h +10 h +10 h +1 h +4 h +124 h +4 h +4 h +11 h +1 h +4 h +4 h +4 h +4 h +1 h +10 h +56 h +11 h +123 h +10 h +25 h +109 h +2769 m +1 h +9335 m +9336 m +124 h +1 h +124 h +4 h +109 h +4 h +4 h +1 h +1 h +4 h +4 h +270 m +11 h +1 h +266 h +10 h +4 h +10 h +1 h +9337 m +73 h +9338 m +1 h +4 h +4 h +9339 m +119 h +4 h +1 h +4 h +9340 m +9341 m +2425 m +146 h +2720 m +10 h +4 h +9342 m +4 h +4 h +4 h +9343 m +9344 m +367 m +9345 m +1 h +9346 m +4 h +4 h +9027 m +196 h +4 h +9347 m +1 h +84 h +1650 h +10 h +104 h +9348 m +4 h +2625 h +4 h +4 h +4 h +4 h +4 h +2733 h +1 h +146 h +4 h +109 h +9349 m +9350 m +4 h +114 h +9351 m +8 h +123 h +10 h +4 h +9352 m +4 h +4 h +1 h +10 h +3742 m +1 h +4 h +4 h +4 h +4 h +4 h +4 h +1 h +129 h +1 h +1 h +9353 m +4 h +1 h +124 h +170 h +9354 m +167 h +1 h +10 h +11 h +10 h +28 h +65 h +9355 m +9356 m +4 h +1 h +83 h +1 h +4 h +10 h +109 h +10 h +1 h +1 h +4 h +1 h +91 h +9357 m +9358 m +9359 m +1205 m +10 h +9360 m +9361 m +1 h +4 h +9362 m +9363 m +82 h +3 h +112 h +1 h +9364 m +82 h +10 h +1 h +5557 m +1 h +4 h +9365 m +4 h +4 h +9366 m +1 h +299 h +9367 m +9368 m +9007 m +10 h +447 h +1 h +10 h +4 h +9369 m +2281 m +965 m +10 h +1 h +601 h +4 h +9370 m +9371 m +737 m +4 h +10 h +9372 m +9373 m +9374 m +4 h +692 h +1 h +1 h +4 h +3 h +4 h +4 h +1 h +10 h +10 h +1 h +9375 m +124 h +4 h +12 h +1 h +9376 m +1 h +12 h +1 h +1 h +9377 m +1 h +9378 m +9379 m +9380 m +9381 m +4 h +4 h +185 h +9382 m +114 h +912 m +4 h +135 h +41 h +45 h +190 h +1 h +10 h +4 h +4 h +10 h +4 h +10 h +9383 m +10 h +1 h +538 h +9384 m +114 h +270 m +4 h +59 h +4 h +4 h +3341 m +1 h +4 h +4 h +4 h +4 h +4 h +4 h +4 h +1 h +9385 m +9386 m +282 m +82 h +1 h +124 h +1 h +488 h +11 h +4 h +83 h +1 h +1 h +1847 m +172 h +9387 m +9388 m +83 h +1 h +9389 m +9 m +10 h +4 h +4 h +167 h +94 h +10 h +1 h +4 h +9390 m +4 h +170 h +4 h +1 h +10 h +2391 m +9391 m +10 h +10 h +109 h +4 h +1 h +83 h +7253 m +125 h +4 h +9392 m +167 h +1105 h +8002 m +9393 m +11 h +195 h +4 h +10 h +4 h +1 h +109 h +9394 m +4 h +124 h +10 h +4 h +996 m +1 h +10 h +143 h +9395 m +8 h +10 h +278 h +4 h +12 h +1 h +4441 m +4 h +9396 m +9397 m +1 h +9398 m +1 h +10 h +9399 m +104 h +10 h +687 h +125 h +939 m +83 h +4 h +190 h +3622 m +4367 m +9400 m +1 h +11 h +4 h +9401 m +4 h +9402 m +295 h +9403 m +464 h +1 h +10 h +74 h +1 h +4378 m +123 h +10 h +10 h +447 h +4 h +104 h +195 h +25 h +332 h +1 h +9404 m +84 h +10 h +59 h +4 h +1 h +10 h +97 h +59 h +10 h +4 h +9405 m +1 h +140 h +135 h +7792 m +59 h +9406 m +1 h +4 h +113 h +10 h +4 h +4 h +4 h +9407 m +9408 m +3 h +1 h +9409 m +976 h +9410 m +9411 m +4 h +10 h +278 h +4 h +1 h +4 h +1445 m +10 h +9412 m +9413 m +4 h +10 h +4 h +295 h +9414 m +104 h +590 h +266 h +1 h +1 h +9415 m +196 h +1 h +4 h +1 h +10 h +4 h +125 h +1 h +4 h +4 h +4 h +1 h +10 h +92 h +1 h +1 h +25 h +10 h +238 h +464 h +10 h +97 h +4 h +1 h +11 h +1 h +9416 m +9417 m +11 h +4 h +1 h +9 m +10 h +9418 m +4 h +11 h +10 h +4 h +65 h +4 h +4 h +82 h +1284 m +10 h +10 h +1 h +147 h +9419 m +9420 m +4 h +57 h +4 h +4 h +4 h +10 h +4 h +10 h +9421 m +9422 m +9423 m +506 h +10 h +11 h +10 h +9424 m +1 h +10 h +9425 m +10 h +1 h +1 h +9426 m +9427 m +9428 m +172 h +1 h +83 h +1 h +4 h +4 h +1 h +157 h +4 h +1 h +4 h +4 h +10 h +9429 m +1650 h +9430 m +9431 m +4 h +4 h +4 h +124 h +9432 m +9433 m +36 h +4 h +57 h +185 h +10 h +109 h +1 h +808 m +9434 m +1 h +124 h +9435 m +10 h +262 h +31 h +4 h +195 h +10 h +1 h +278 h +147 h +313 h +65 h +109 h +9436 m +109 h +4 h +4 h +1 h +1 h +976 h +36 h +1 h +6549 m +13 h +9437 m +1 h +10 h +4 h +10 h +1 h +4 h +4 h +9438 m +9439 m +9440 m +9441 m +1 h +1 h +10 h +9442 m +4 h +4320 m +1 h +41 h +9443 m +190 h +1 h +1 h +10 h +3 h +9444 m +12 h +9445 m +4 h +1 h +1 h +9446 m +46 h +9447 m +278 h +10 h +1 h +4 h +9448 m +1 h +1 h +368 h +9449 m +59 h +156 h +5 h +4 h +41 h +9450 m +9451 m +9452 m +9453 m +4 h +4 h +1 h +383 h +45 h +4 h +31 h +10 h +4 h +92 h +4 h +11 h +172 h +9454 m +10 h +9455 m +59 h +1 h +10 h +124 h +9456 m +196 h +8133 m +9457 m +1 h +12 h +41 h +4 h +195 h +757 h +9458 m +40 h +1 h +10 h +9459 m +989 m +1 h +1470 h +4 h +1 h +65 h +25 h +9460 m +31 h +1780 m +83 h +27 h +10 h +1 h +10 h +11 h +1 h +4 h +4 h +479 m +41 h +4 h +1 h +9461 m +10 h +195 h +8 h +9462 m +4 h +4 h +1261 h +1 h +4 h +4 h +10 h +10 h +9463 m +4 h +1535 m +4 h +10 h +1 h +9464 m +9465 m +4 h +4 h +4 h +4 h +1 h +4 h +1 h +9466 m +10 h +4 h +181 h +9467 m +1 h +4 h +9468 m +8 h +11 h +3 h +4 h +124 h +83 h +4 h +4 h +1 h +10 h +1 h +25 h +4 h +139 h +10 h +92 h +1 h +4 h +181 h +9469 m +9470 m +104 h +10 h +9471 m +57 h +9472 m +9473 m +36 h +258 h +10 h +4 h +9474 m +9475 m +1 h +9476 m +8 h +9477 m +1 h +13 h +10 h +9478 m +31 h +1 h +10 h +9479 m +4 h +9480 m +9481 m +83 h +9482 m +4 h +65 h +1 h +114 h +9483 m +57 h +1 h +1 h +4 h +1 h +9484 m +10 h +12 h +1 h +9485 m +4 h +1 h +4 h +83 h +83 h +10 h +238 h +10 h +1780 m +4 h +9486 m +82 h +9487 m +4 h +4 h +31 h +6869 m +1 h +3 h +195 h +279 h +9488 m +1 h +9489 m +569 h +9490 m +10 h +4 h +4 h +1 h +11 h +164 h +9491 m +10 h +59 h +1 h +124 h +11 h +4538 m +10 h +9492 m +1 h +4 h +359 h +124 h +238 h +9493 m +4 h +9494 m +9495 m +9496 m +91 h +82 h +9497 m +1 h +4 h +1 h +65 h +3 h +1 h +73 h +1 h +4 h +9498 m +4 h +146 h +31 h +11 h +10 h +1261 h +124 h +41 h +9499 m +1 h +10 h +83 h +124 h +9500 m +112 h +8533 m +1 h +4 h +1 h +83 h +4 h +55 h +4 h +10 h +4 h +1 h +9501 m +112 h +4 h +1 h +9502 m +1828 m +82 h +2278 m +185 h +11 h +538 h +104 h +4 h +4 h +1 h +9503 m +1642 h +4 h +10 h +518 m +114 h +4 h +4 h +9504 m +4 h +1646 m +103 h +1 h +57 h +1 h +4 h +83 h +41 h +9505 m +4 h +9506 m +4 h +4 h +10 h +4 h +109 h +10 h +2124 m +41 h +4 h +170 h +1 h +36 h +10 h +125 h +1 h +10 h +9507 m +9508 m +1 h +12 h +1 h +276 h +4 h +10 h +74 h +73 h +9509 m +118 h +27 h +113 h +9510 m +4 h +36 h +4 h +25 h +4 h +1 h +266 h +1 h +5225 m +9511 m +4 h +1 h +4 h +9512 m +467 m +9513 m +12 h +2314 m +4 h +10 h +911 m +9514 m +10 h +79 h +68 m +10 h +11 h +2163 m +1 h +4 h +1 h +11 h +9515 m +10 h +1 h +276 h +9516 m +94 h +10 h +196 h +7924 m +83 h +1 h +10 h +278 h +9517 m +4 h +57 h +1 h +11 h +1 h +4 h +74 h +238 h +4 h +4 h +371 h +4 h +9518 m +9519 m +45 h +65 h +1 h +4 h +4 h +4 h +13 h +1 h +1 h +358 h +9520 m +9521 m +9522 m +10 h +9523 m +10 h +164 h +4 h +1 h +10 h +1 h +9524 m +10 h +11 h +10 h +464 h +1083 h +31 h +9525 m +9526 m +11 h +4 h +196 h +11 h +1 h +4 h +106 h +1 h +147 h +79 h +9527 m +1 h +1 h +82 h +10 h +4 h +2172 h +10 h +9528 m +74 h +1 h +9529 m +4 h +1 h +204 h +4 h +9530 m +7253 m +97 h +4 h +9531 m +4 h +9532 m +4 h +1 h +196 h +9533 m +4 h +185 h +1127 m +97 h +1 h +9534 m +4 h +4 h +1 h +1 h +4 h +250 h +4 h +9535 m +9536 m +1 h +10 h +8 h +25 h +11 h +9537 m +1 h +1116 m +97 h +9538 m +4 h +9539 m +11 h +4 h +9540 m +4 h +4 h +9541 m +31 h +10 h +9542 m +1309 m +82 h +1 h +9543 m +9544 m +114 h +8 h +9545 m +9546 m +140 h +170 h +10 h +10 h +4 h +4 h +1 h +4 h +7999 m +123 h +113 h +447 h +1478 h +9547 m +10 h +1 h +9548 m +31 h +1 h +4 h +27 h +4 h +36 h +4 h +4 h +1 h +4 h +1 h +4 h +9549 m +1 h +9550 m +9551 m +1650 h +1 h +25 h +4 h +10 h +10 h +10 h +195 h +169 h +4 h +9552 m +74 h +57 h +1 h +9553 m +25 h +297 h +1 h +5456 m +11 h +4127 m +196 h +4 h +9554 m +1 h +4 h +11 h +8555 m +4 h +1 h +1 h +1 h +9555 m +1 h +9556 m +4 h +8485 m +4 h +10 h +5 h +4 h +10 h +31 h +65 h +9557 m +9558 m +25 h +4 h +9559 m +10 h +9560 m +1 h +4 h +9561 m +10 h +4 h +4 h +9562 m +4 h +1 h +1 h +359 h +4 h +31 h +9563 m +1 h +4257 m +4 h +4 h +172 h +9564 m +3 h +164 h +976 h +4 h +10 h +10 h +9565 m +125 h +22 h +9566 m +1 h +4 h +91 h +10 h +3768 m +4 h +1627 m +578 h +10 h +4 h +1 h +1 h +9567 m +4 h +10 h +2794 m +31 h +41 h +1 h +1 h +1 h +11 h +4 h +124 h +10 h +4 h +3095 m +1 h +91 h +174 h +276 h +4 h +9568 m +4 h +4 h +82 h +56 h +25 h +4 h +10 h +25 h +1 h +10 h +14 m +1 h +12 h +11 h +9569 m +1 h +147 h +59 h +2719 m +1 h +1 h +25 h +4 h +190 h +9570 m +4 h +170 h +4 h +1886 m +9571 m +478 m +9572 m +9573 m +4 h +1027 m +135 h +1 h +1 h +4 h +9574 m +10 h +2558 m +94 h +11 h +83 h +9575 m +4 h +40 h +9576 m +10 h +119 h +9577 m +1 h +258 h +94 h +820 m +7322 m +4 h +4 h +4 h +146 h +83 h +11 h +173 h +55 h +9578 m +41 h +97 h +1 h +104 h +4 h +1 h +10 h +4 h +4 h +874 m +4 h +1 h +9579 m +1 h +1 h +332 h +170 h +10 h +1 h +1 h +74 h +4 h +1493 m +10 h +9580 m +83 h +4 h +265 h +1454 m +9581 m +10 h +25 h +1 h +31 h +4 h +172 h +10 h +9582 m +4 h +195 h +1 h +1261 h +976 h +238 h +82 h +9583 m +4 h +1 h +1 h +9584 m +4 h +3704 m +1 h +4 h +11 h +986 h +1 h +1 h +332 h +11 h +1 h +1 h +93 h +8781 m +10 h +172 h +10 h +4 h +11 h +1 h +4 h +9585 m +9586 m +359 h +4 h +4 h +195 h +135 h +4177 m +4 h +146 h +11 h +11 h +4 h +48 h +9587 m +4 h +601 h +1 h +82 h +10 h +25 h +119 h +9588 m +124 h +9589 m +1201 m +4 h +1 h +1 h +4 h +601 h +11 h +82 h +91 h +6381 m +4 h +4616 m +10 h +4 h +135 h +10 h +119 h +2952 m +172 h +820 m +36 h +9590 m +4 h +9591 m +278 h +9592 m +4 h +1 h +125 h +9593 m +211 m +65 h +1 h +56 h +9594 m +4 h +4 h +1 h +10 h +4 h +4 h +1 h +167 h +9595 m +1 h +9596 m +10 h +1 h +1 h +4 h +9597 m +9598 m +9599 m +164 h +10 h +170 h +4 h +4 h +59 h +9600 m +4 h +11 h +1453 m +3068 m +74 h +4 h +4 h +11 h +1 h +4 h +4 h +1 h +3 h +4 h +124 h +4 h +10 h +4 h +97 h +1 h +557 m +9601 m +4 h +4 h +10 h +192 h +4 h +10 h +3 h +9602 m +9603 m +158 h +1 h +9604 m +4 h +2592 m +9605 m +1 h +11 h +9606 m +9607 m +82 h +307 h +4 h +9608 m +11 h +1 h +10 h +135 h +4 h +9609 m +10 h +1 h +10 h +82 h +9610 m +3 h +332 h +156 h +238 h +9611 m +4 h +196 h +104 h +1 h +4 h +3680 m +4 h +4 h +9612 m +1074 h +10 h +83 h +1 h +4 h +238 h +9613 m +1893 m +1261 h +4648 m +1 h +4 h +4 h +10 h +10 h +4 h +770 m +4 h +4 h +4 h +4 h +1 h +22 h +25 h +1 h +4 h +888 m +1 h +10 h +9614 m +4 h +9615 m +1 h +4 h +82 h +4 h +4 h +9616 m +23 h +4 h +11 h +1 h +1 h +4 h +1 h +8133 m +10 h +9617 m +10 h +3 h +1 h +9618 m +4 h +1 h +9619 m +92 h +9620 m +124 h +41 h +31 h +10 h +2984 m +10 h +4 h +3161 m +31 h +1 h +73 h +4 h +9621 m +10 h +4 h +10 h +10 h +82 h +10 h +146 h +4 h +181 h +4 h +109 h +330 m +1 h +1 h +4 h +1 h +1 h +1 h +41 h +1 h +1 h +10 h +1 h +1220 m +9622 m +11 h +9623 m +4 h +9624 m +1 h +4 h +57 h +4 h +1 h +976 h +1 h +55 h +9625 m +4 h +11 h +9626 m +196 h +9627 m +1 h +10 h +31 h +4 h +1 h +4 h +4 h +278 h +9628 m +1127 m +9629 m +196 h +3 h +3 h +1 h +31 h +4 h +4 h +3 h +11 h +1 h +4 h +1 h +10 h +9630 m +1260 m +403 h +12 h +9631 m +10 h +73 h +4 h +4 h +1 h +2844 m +278 h +124 h +119 h +4 h +31 h +11 h +4 h +9632 m +1 h +125 h +1 h +4 h +167 h +1 h +10 h +9633 m +295 h +1 h +9475 m +4 h +1 h +57 h +4 h +10 h +1 h +4 h +4 h +10 h +9634 m +10 h +1 h +4 h +4 h +4 h +10 h +238 h +57 h +1 h +10 h +1 h +9635 m +9636 m +146 h +1 h +9637 m +9638 m +1 h +10 h +4 h +1 h +4 h +4 h +10 h +9639 m +1 h +9640 m +11 h +258 h +4 h +41 h +3933 m +4 h +297 h +1 h +911 m +9641 m +1 h +11 h +4 h +124 h +3 h +1 h +196 h +4 h +10 h +2379 h +41 h +1 h +575 m +97 h +1 h +1 h +1 h +10 h +10 h +10 h +1 h +1 h +10 h +9642 m +57 h +82 h +4 h +9643 m +1 h +9644 m +810 m +9645 m +83 h +10 h +5470 m +10 h +1884 h +1 h +9646 m +9647 m +82 h +1 h +9648 m +4 h +1074 h +73 h +125 h +1030 h +9649 m +10 h +4 h +10 h +9650 m +3246 m +186 h +1 h +57 h +10 h +1884 h +1 h +1 h +687 h +9651 m +4 h +9652 m +10 h +147 h +4 h +4 h +1 h +97 h +1 h +4 h +5869 m +9653 m +1 h +9654 m +4 h +4 h +170 h +10 h +1 h +4 h +139 h +4 h +1677 m +2418 m +1 h +11 h +69 h +1 h +1250 h +4 h +4 h +69 h +10 h +4 h +1 h +1 h +9655 m +2878 m +4 h +9656 m +9657 m +146 h +4 h +1 h +1 h +125 h +9658 m +9659 m +4 h +4 h +250 h +4 h +1 h +138 h +9660 m +1 h +104 h +9661 m +4 h +9662 m +9663 m +9664 m +1 h +31 h +1 h +9665 m +1 h +1 h +238 h +10 h +11 h +1 h +10 h +156 h +4 h +677 m +1 h +9666 m +57 h +186 h +4 h +1574 m +83 h +9667 m +1 h +13 h +9668 m +9669 m +9670 m +1 h +11 h +10 h +4 h +1 h +9671 m +10 h +9672 m +10 h +11 h +9673 m +55 h +9674 m +987 m +10 h +4 h +9675 m +3847 m +4 h +11 h +9676 m +4 h +195 h +11 h +10 h +9677 m +371 h +9678 m +3 h +83 h +4 h +9679 m +25 h +10 h +9680 m +170 h +4 h +9681 m +4 h +6869 m +3 h +169 h +6599 m +1 h +9682 m +9683 m +4 h +124 h +569 h +4 h +9684 m +1 h +5917 h +79 h +4 h +3820 m +55 h +4 h +170 h +4 h +4 h +74 h +1024 m +9685 m +9686 m +9687 m +9688 m +4 h +1 h +10 h +1 h +11 h +9689 m +447 h +10 h +443 h +9690 m +10 h +9691 m +4 h +9692 m +7395 m +125 h +4 h +4 h +113 h +9693 m +4 h +9694 m +167 h +57 h +9695 m +11 h +9696 m +9697 m +939 m +10 h +9698 m +65 h +10 h +146 h +4 h +4 h +4 h +3 h +9699 m +4 h +13 h +36 h +9700 m +195 h +92 h +74 h +9701 m +124 h +41 h +4 h +1 h +25 h +9702 m +1 h +55 h +1 h +10 h +109 h +4 h +10 h +447 h +82 h +1 h +1 h +433 m +1116 m +264 m +4 h +266 h +99 m +4 h +59 h +4 h +203 m +10 h +146 h +3 h +4 h +9703 m +4 h +9704 m +297 h +1 h +27 h +10 h +10 h +150 m +403 h +6102 m +77 h +4 h +4000 m +2186 m +1 h +135 h +9705 m +1 h +83 h +4 h +1 h +4 h +1 h +72 m +9706 m +4 h +4 h +10 h +3841 m +4 h +56 h +4 h +1201 m +82 h +447 h +6963 m +4 h +1 h +4 h +4 h +172 h +11 h +1 h +124 h +211 m +10 h +4 h +7572 m +1 h +2374 m +1 h +57 h +4 h +11 h +4 h +4 h +10 h +4 h +143 h +1 h +93 h +1 h +4 h +77 h +1 h +4 h +9707 m +4 h +4 h +9708 m +41 h +4 h +1 h +9709 m +4 h +10 h +9710 m +258 h +4 h +9711 m +9712 m +4 h +4 h +94 h +11 h +4 h +10 h +258 h +4 h +4 h +4 h +59 h +9713 m +9714 m +1 h +10 h +1 h +9715 m +4 h +10 h +9716 m +82 h +1 h +9717 m +9718 m +10 h +4 h +538 h +9719 m +1 h +1 h +307 h +114 h +4 h +56 h +12 h +4 h +4 h +70 m +10 h +10 h +9720 m +9721 m +4 h +319 h +4 h +10 h +4 h +1 h +1 h +9722 m +4 h +10 h +2002 m +1 h +10 h +4 h +9723 m +59 h +1 h +9724 m +146 h +64 h +4 h +1 h +10 h +181 h +9725 m +110 h +9726 m +279 h +79 h +83 h +9727 m +10 h +10 h +10 h +1 h +110 h +383 h +9728 m +7394 m +1137 h +9729 m +10 h +9730 m +9731 m +4 h +279 h +295 h +1 h +10 h +11 h +3 h +10 h +10 h +10 h +9732 m +10 h +1 h +73 h +4 h +104 h +2475 m +169 h +10 h +11 h +10 h +9733 m +4 h +9734 m +963 m +4 h +9735 m +358 h +9736 m +9737 m +10 h +1 h +1 h +4 h +4 h +1 h +139 h +83 h +10 h +1 h +1253 m +9738 m +11 h +10 h +82 h +10 h +4 h +190 h +156 h +74 h +4 h +11 h +4 h +533 h +1 h +9739 m +1 h +4 h +4 h +9740 m +3 h +9741 m +146 h +97 h +57 h +74 h +1 h +25 h +31 h +9742 m +10 h +10 h +169 h +4 h +11 h +196 h +97 h +10 h +3494 m +9743 m +4 h +4 h +9744 m +92 h +4 h +4 h +10 h +181 h +11 h +73 h +256 m +9745 m +829 m +10 h +82 h +11 h +4 h +9746 m +112 h +1 h +4 h +82 h +1 h +1 h +9747 m +1 h +9748 m +250 h +4 h +4 h +10 h +10 h +41 h +9749 m +10 h +104 h +9750 m +170 h +195 h +9751 m +3 h +1 h +59 h +4 h +4 h +9752 m +9753 m +4 h +1 h +359 h +9754 m +4 h +169 h +1 h +10 h +4 h +9755 m +4 h +1 h +4 h +114 h +332 h +10 h +4 h +9756 m +1 h +4 h +11 h +185 h +9757 m +1403 m +4 h +4 h +82 h +4 h +10 h +447 h +10 h +258 h +31 h +4 h +109 h +10 h +41 h +77 h +9758 m +83 h +9759 m +186 h +1083 h +4 h +164 h +4 h +219 m +4 h +9760 m +94 h +1 h +221 m +4 h +1 h +443 h +4 h +4 h +4 h +8 h +170 h +25 h +74 h +4 h +1 h +4 h +10 h +12 h +9761 m +10 h +4 h +1 h +1790 h +10 h +4 h +9762 m +79 h +125 h +4 h +190 h +601 h +4 h +9763 m +9764 m +9765 m +1 h +4 h +9766 m +11 h +11 h +1 h +124 h +1884 h +9767 m +1 h +276 h +104 h +9768 m +358 h +10 h +10 h +1 h +9769 m +9770 m +4 h +1 h +4 h +533 h +4 h +83 h +2582 m +1 h +64 h +4 h +4 h +82 h +569 h +1 h +170 h +9771 m +82 h +9772 m +1 h +2418 m +1337 m +109 h +1 h +1 h +135 h +135 h +9773 m +229 h +1 h +2865 m +4 h +1 h +279 h +79 h +1 h +10 h +10 h +4 h +10 h +10 h +4 h +3561 m +4 h +6387 m +1470 h +69 h +4 h +304 m +4 h +687 h +9774 m +4 h +9775 m +9776 m +4 h +4 h +1 h +1 h +4 h +278 h +1 h +31 h +10 h +1 h +7641 m +9777 m +9778 m +1 h +9779 m +123 h +4 h +82 h +144 h +238 h +124 h +4 h +4 h +1 h +4 h +353 m +258 h +4 h +2184 m +265 h +1 h +489 m +1 h +124 h +10 h +10 h +83 h +9780 m +9781 m +11 h +10 h +10 h +1975 m +4 h +10 h +10 h +1 h +1 h +10 h +11 h +4 h +9782 m +109 h +9783 m +4 h +10 h +4 h +9784 m +59 h +4 h +4 h +1 h +9785 m +31 h +9786 m +110 h +9787 m +1 h +4 h +109 h +10 h +10 h +250 h +4 h +4702 m +4 h +5378 m +536 h +59 h +10 h +4 h +11 h +9788 m +1 h +4556 m +4 h +9789 m +9790 m +1 h +332 h +9791 m +4 h +4 h +4 h +9792 m +82 h +1 h +9793 m +1137 h +3 h +9794 m +10 h +9795 m +31 h +10 h +10 h +4 h +108 h +1 h +4 h +195 h +9796 m +1 h +1 h +4 h +143 h +1 h +520 h +9797 m +4 h +9798 m +4 h +986 h +1 h +9799 m +74 h +36 h +4 h +4 h +279 h +3704 m +4 h +4 h +10 h +3 h +9800 m +4 h +4 h +9801 m +4 h +10 h +31 h +9802 m +9803 m +4 h +238 h +10 h +9804 m +4 h +4 h +10 h +1 h +718 h +2101 m +1 h +1 h +2379 h +170 h +4 h +10 h +170 h +9805 m +10 h +147 h +172 h +1 h +9806 m +1 h +83 h +447 h +6197 m +9807 m +10 h +9808 m +4 h +591 m +123 h +4 h +1 h +265 h +9809 m +125 h +124 h +4 h +9810 m +10 h +10 h +119 h +4 h +82 h +1 h +339 m +368 h +403 h +9811 m +1955 m +8626 m +9812 m +4 h +250 h +69 h +4 h +109 h +1 h +82 h +1 h +4 h +289 h +192 h +10 h +278 h +9813 m +195 h +1 h +4 h +10 h +112 h +299 h +1 h +1406 h +10 h +1 h +4 h +73 h +1 h +12 h +9814 m +4 h +104 h +4 h +56 h +4 h +1 h +4 h +4 h +57 h +583 h +9815 m +4 h +65 h +82 h +170 h +4 h +9816 m +10 h +10 h +1 h +109 h +9817 m +22 h +447 h +10 h +4 h +4 h +4 h +10 h +9818 m +10 h +4 h +124 h +1 h +10 h +9819 m +65 h +57 h +1 h +4 h +4 h +1 h +11 h +4553 m +83 h +1 h +3369 m +278 h +196 h +10 h +82 h +10 h +109 h +4932 m +4 h +9820 m +10 h +1 h +1 h +10 h +195 h +10 h +4 h +1 h +4 h +1 h +4 h +9821 m +258 h +1030 h +4 h +4 h +9822 m +10 h +10 h +238 h +9823 m +9824 m +9825 m +1 h +270 h +1 h +10 h +629 m +10 h +109 h +1 h +195 h +10 h +9826 m +265 h +1 h +82 h +9827 m +9828 m +4320 m +60 m +1 h +114 h +4 h +1 h +109 h +10 h +9829 m +569 h +109 h +83 h +11 h +124 h +1 h +65 h +10 h +1 h +1201 h +238 h +22 h +10 h +6941 m +100 m +10 h +1 h +82 h +238 h +338 m +2148 m +1 h +9830 m +147 h +1 h +10 h +9831 m +195 h +31 h +92 h +9832 m +9833 m +10 h +4 h +4 h +196 h +278 h +9834 m +270 h +4 h +124 h +1117 m +278 h +9835 m +1 h +10 h +9836 m +108 h +1 h +1 h +4 h +1 h +11 h +9837 m +79 h +1 h +3 h +4 h +4 h +9838 m +1 h +10 h +4 h +1 h +1 h +1 h +4 h +10 h +4 h +4 h +1 h +57 h +4 h +4 h +1 h +4 h +10 h +9839 m +9840 m +94 h +4 h +83 h +59 h +4 h +114 h +10 h +4 h +9841 m +4 h +3 h +443 h +57 h +4 h +9842 m +1 h +1 h +3303 m +9843 m +1 h +196 h +104 h +9844 m +4 h +4 h +1 h +4 h +10 h +109 h +1 h +258 h +10 h +4 h +9845 m +4 h +10 h +83 h +1 h +10 h +10 h +9846 m +104 h +10 h +1 h +10 h +41 h +11 h +4 h +36 h +4 h +73 h +10 h +109 h +1 h +4 h +4 h +4 h +4 h +10 h +10 h +238 h +9847 m +10 h +10 h +1 h +4 h +57 h +9848 m +1 h +9849 m +10 h +9850 m +9851 m +4 h +10 h +488 h +92 h +10 h +196 h +41 h +9852 m +1 h +9853 m +4 h +1 h +4 h +1 h +399 h +4 h +25 h +10 h +4 h +4 h +4 h +1 h +8 h +1442 m +11 h +939 m +36 h +83 h +273 m +4 h +1 h +1 h +1 h +9854 m +4 h +1 h +1 h +3321 m +48 h +79 h +185 h +125 h +10 h +79 h +757 h +9855 m +173 h +9856 m +4 h +258 h +4 h +359 h +9857 m +4 h +10 h +4 h +10 h +1 h +1 h +4 h +9858 m +9859 m +1 h +82 h +9860 m +143 h +1016 h +9861 m +1 h +9862 m +10 h +9863 m +1835 h +31 h +4 h +9864 m +1 h +10 h +4 h +124 h +4 h +443 h +10 h +9865 m +10 h +10 h +4 h +156 h +238 h +40 h +9866 m +9867 m +4 h +3398 m +692 h +4 h +1 h +10 h +11 h +4 h +9868 m +9869 m +9870 m +1 h +31 h +4 h +9871 m +11 h +4 h +4 h +4 h +9872 m +7306 m +1 h +1 h +9873 m +10 h +55 h +185 h +10 h +1 h +10 h +1 h +1 h +4 h +10 h +10 h +196 h +10 h +73 h +4 h +6107 m +12 h +4 h +9874 m +59 h +8571 m +1 h +46 h +1 h +1 h +77 h +4 h +4 h +9875 m +9876 m +83 h +10 h +9877 m +1 h +1 h +10 h +10 h +9878 m +11 h +10 h +297 h +41 h +829 m +1 h +10 h +1 h +10 h +4 h +10 h +4 h +4 h +83 h +9879 m +8472 m +929 m +143 h +538 h +9880 m +1 h +97 h +12 h +4 h +1 h +3 h +278 h +156 h +13 h +10 h +4 h +1 h +4 h +4 h +238 h +64 h +125 h +1070 m +57 h +1 h +4 h +9881 m +4 h +10 h +11 h +10 h +11 h +108 h +1 h +1 h +9882 m +3111 m +3 h +82 h +9883 m +1068 m +1 h +4 h +9884 m +9885 m +869 h +332 h +4 h +10 h +4 h +9886 m +57 h +10 h +4 h +1 h +9887 m +4 h +69 h +1 h +4 h +9888 m +9889 m +2720 m +10 h +1 h +10 h +9890 m +4 h +307 h +4 h +4 h +1 h +83 h +10 h +10 h +4 h +10 h +3847 m +10 h +110 h +9891 m +73 h +4 h +104 h +1 h +274 h +4 h +9892 m +184 h +10 h +1 h +9893 m +4 h +1 h +241 m +1 h +4 h +4 h +109 h +11 h +9894 m +1 h +9895 m +9896 m +9897 m +9898 m +1 h +9899 m +9900 m +9901 m +9902 m +109 h +9903 m +1 h +4 h +10 h +9904 m +10 h +1 h +1083 h +83 h +4 h +124 h +9905 m +36 h +185 h +578 h +12 h +4 h +10 h +4 h +109 h +1 h +4 h +4 h +1 h +9906 m +1418 m +4 h +2928 m +2313 m +139 h +9907 m +10 h +9908 m +9909 m +1 h +11 h +1 h +94 h +195 h +1 h +9910 m +4 h +45 h +4 h +57 h +185 h +4 h +9911 m +119 h +4 h +64 h +1 h +27 h +9912 m +9913 m +4 h +169 h +4 h +10 h +10 h +9914 m +1 h +4 h +4 h +59 h +358 h +4 h +114 h +10 h +97 h +3 h +4 h +9915 m +1 h +4 h +4 h +4 h +4 h +4 h +10 h +8 h +9916 m +9917 m +9918 m +278 h +83 h +64 h +196 h +1 h +9919 m +4 h +9920 m +4 h +9921 m +386 h +4 h +10 h +4 h +7924 m +1 h +4 h +9922 m +4 h +9923 m +4 h +9924 m +1 h +185 h +4 h +1 h +4 h +10 h +77 h +10 h +9925 m +1 h +9926 m +4 h +1 h +11 h +4 h +4 h +79 h +4 h +631 m +4 h +4 h +9927 m +459 m +25 h +687 h +9928 m +82 h +10 h +57 h +10 h +31 h +1 h +10 h +4 h +4 h +9929 m +1 h +1 h +1835 h +9930 m +4 h +464 h +1 h +9931 m +10 h +112 h +9932 m +185 h +69 h +11 h +4 h +109 h +2041 m +9933 m +9934 m +108 h +9935 m +156 h +9936 m +1 h +4 h +9937 m +10 h +8193 m +195 h +9938 m +4 h +196 h +9939 m +10 h +1 h +4 h +83 h +9940 m +10 h +1 h +9941 m +767 m +10 h +59 h +1 h +143 h +307 h +1 h +9942 m +2751 m +125 h +9943 m +9944 m +10 h +143 h +4867 m +266 h +4 h +3293 m +9945 m +9946 m +1 h +3534 m +5483 m +4 h +1 h +10 h +2710 m +1 h +1 h +9947 m +10 h +10 h +10 h +11 h +4 h +1 h +4 h +9948 m +9949 m +1 h +9950 m +9951 m +1 h +10 h +1 h +73 h +9952 m +11 h +4 h +4 h +1 h +9953 m +10 h +41 h +146 h +4 h +6869 m +1 h +10 h +1 h +31 h +4 h +1 h +1 h +4 h +1 h +4 h +4 h +4 h +57 h +1 h +935 h +9954 m +1847 m +4 h +9955 m +1 h +9956 m +9957 m +11 h +109 h +4 h +9958 m +45 h +9959 m +1 h +10 h +82 h +143 h +1 h +371 h +10 h +4 h +4 h +4 h +4 h +146 h +1 h +56 h +4 h +1 h +10 h +1 h +258 h +10 h +10 h +1 h +65 h +1 h +9960 m +4 h +1 h +1 h +164 h +9961 m +4 h +1 h +1 h +9962 m +10 h +4 h +4 h +4 h +9963 m +1 h +443 h +31 h +9964 m +11 h +143 h +82 h +9965 m +4 h +25 h +4 h +1 h +9966 m +82 h +8 h +238 h +4 h +4 h +9967 m +9968 m +966 m +279 h +10 h +4 h +4 h +9969 m +471 m +9970 m +9971 m +10 h +4 h +12 h +9972 m +11 h +258 h +172 h +400 m +4 h +3155 m +9973 m +1 h +10 h +9974 m +9975 m +11 h +4 h +3025 h +124 h +10 h +9976 m +13 h +146 h +4 h +1 h +9977 m +57 h +9978 m +4 h +4 h +9 h +11 h +258 h +9979 m +74 h +359 h +55 h +371 h +41 h +1 h +307 h +258 h +9980 m +41 h +27 h +1220 m +10 h +1260 m +92 h +10 h +9981 m +10 h +1 h +113 h +9982 m +10 h +3 h +8070 m +4 h +9983 m +9984 m +10 h +195 h +10 h +9985 m +9986 m +9987 m +9988 m +371 h +4 h +9989 m +4 h +4 h +1 h +83 h +4 h +4 h +4 h +9990 m +31 h +1 h +1 h +9991 m +1 h +9992 m +9993 m +9994 m +4 h +4 h +1 h +59 h +65 h +1 h +4 h +4 h +1 h +10 h +1 h +4 h +10 h +10 h +74 h +124 h +1 h +4 h +4 h +9995 m +9996 m +9997 m +4 h +9998 m +1 h +986 h +59 h +3 h +4 h +4 h +9999 m +4 h +48 h +3 h +10 h +167 h +1 h +1 h +4 h +4 h +3 h +9956 m +1 h +4 h +10000 m +1374 m +1504 m +4 h +4 h +10001 m +4 h +4 h +1619 h +169 h +10 h +1 h +6022 m +4 h +1 h +1 h +41 h +278 h +265 h +196 h +489 m +57 h +536 h +74 h +8809 m +4 h +10002 m +307 h +4 h +1886 m +4 h +10003 m +3 h +124 h +167 h +10 h +1 h +4 h +278 h +4 h +143 h +10004 m +10005 m +10 h +1 h +1 h +1 h +1 h +10 h +423 m +11 h +40 h +4 h +4 h +10006 m +4 h +4 h +10007 m +4 h +55 h +4 h +10008 m +82 h +10 h +10 h +1 h +4 h +1250 h +278 h +10009 m +10 h +83 h +1 h +3 h +109 h +10010 m +1 h +185 h +270 h +1122 m +1595 m +1 h +4 h +1 h +1 h +1074 h +4 h +1 h +10 h +1 h +125 h +4 h +1 h +4 h +4 h +3303 m +1 h +4 h +4376 m +4 h +4 h +872 m +4 h +4 h +123 h +4 h +1 h +10011 m +10 h +11 h +57 h +1 h +31 h +10012 m +1 h +83 h +4 h +10013 m +10 h +10 h +1 h +4 h +147 h +112 h +109 h +1 h +138 h +1 h +82 h +1074 h +10014 m +143 h +4 h +4 h +74 h +4 h +10015 m +4 h +4 h +4 h +185 h +1 h +10016 m +4 h +4 h +258 h +2788 m +4 h +4 h +297 h +10 h +1 h +104 h +41 h +1309 m +195 h +1 h +10017 m +138 h +1 h +4 h +31 h +10018 m +124 h +5036 m +4 h +10019 m +10 h +10020 m +1 h +1470 h +10 h +1 h +10021 m +1 h +10 h +10 h +10022 m +10023 m +4 h +4 h +7541 m +3 h +1 h +10 h +73 h +1 h +784 m +763 m +196 h +45 h +4 h +125 h +11 h +1 h +1 h +10024 m +10025 m +4 h +4 h +10026 m +285 m +1 h +1 h +1137 h +10027 m +4 h +82 h +10 h +4 h +1 h +10028 m +1309 m +371 h +4 h +1 h +10 h +4 h +135 h +10029 m +10030 m +83 h +258 h +620 m +1 h +123 h +447 h +10 h +4 h +10031 m +10032 m +4 h +1 h +10 h +10 h +4 h +10033 m +1 h +4 h +4 h +692 h +238 h +10 h +10034 m +79 h +1 h +4 h +10035 m +351 m +10 h +986 h +10036 m +10037 m +59 h +10038 m +1 h +10 h +4 h +4 h +186 h +146 h +4 h +10 h +10 h +408 m +25 h +185 h +1261 h +10 h +109 h +10039 m +1 h +4 h +3 h +10040 m +4 h +10 h +1 h +784 m +1 h +59 h +82 h +1835 h +10 h +10041 m +1 h +1 h +10 h +157 h +10 h +10 h +8 h +1 h +45 h +124 h +10 h +10 h +10042 m +10043 m +1 h +10 h +986 h +10 h +4 h +1 h +1892 m +10044 m +1 h +10 h +10045 m +4 h +10 h +1893 m +10046 m +10047 m +10 h +1 h +10048 m +900 m +1685 h +1 h +10049 m +4 h +1 h +92 h +4 h +10 h +11 h +4 h +4 h +10050 m +642 m +1 h +1 h +79 h +1638 m +57 h +173 h +125 h +57 h +1 h +1 h +10051 m +1 h +4 h +10052 m +4 h +4 h +10 h +295 h +10053 m +10054 m +196 h +4 h +383 h +82 h +1 h +1 h +4 h +10055 m +4 h +10056 m +1 h +1 h +10 h +10057 m +4 h +41 h +10058 m +1 h +10059 m +82 h +11 h +278 h +4 h +10060 m +1 h +10061 m +10 h +4 h +10062 m +1 h +1785 m +4 h +10063 m +11 h +10064 m +4 h +4 h +109 h +113 h +4 h +4 h +10065 m +4 h +4 h +1838 m +1 h +10066 m +82 h +1 h +10 h +73 h +4 h +3 h +1 h +4 h +1185 m +4 h +4 h +10 h +10 h +10 h +4 h +10 h +172 h +4 h +41 h +4 h +1 h +4 h +10067 m +59 h +4 h +4 h +1 h +2379 h +1 h +1 h +10068 m +114 h +1 h +10069 m +10 h +4 h +10070 m +10071 m +4 h +1 h +10072 m +4 h +4 h +4 h +10 h +146 h +1250 h +4 h +10073 m +31 h +10074 m +1 h +1642 h +36 h +59 h +1 h +6766 m +10 h +6124 m +10 h +4 h +10 h +1 h +1 h +156 h +25 h +4 h +1 h +262 h +4 h +4 h +10075 m +4 h +1261 h +371 h +10076 m +10 h +10 h +4 h +4 h +4 h +10 h +1 h +4 h +4 h +1 h +56 h +11 h +10 h +1 h +10 h +4 h +1 h +10 h +124 h +1 h +10077 m +10 h +1 h +4 h +1 h +1 h +489 m +10078 m +3 h +4 h +10 h +4 h +1 h +82 h +4 h +10 h +10079 m +1632 m +2379 h +2733 h +10080 m +10 h +82 h +1 h +4 h +147 h +36 h +10 h +10081 m +4 h +10082 m +10 h +1260 m +11 h +1027 m +1 h +4 h +4 h +4 h +388 m +1 h +4 h +114 h +82 h +214 m +4 h +10083 m +737 m +1 h +10084 m +10 h +230 h +10085 m +12 h +11 h +10086 m +1 h +1 h +4 h +1 h +1 h +10087 m +2840 m +556 h +4 h +10088 m +1 h +4 h +4 h +82 h +1 h +10 h +4 h +4 h +4 h +10089 m +146 h +123 h +10090 m +10 h +4 h +10091 m +1 h +10092 m +1 h +1 h +4 h +10093 m +268 m +4 h +10094 m +4240 m +59 h +10095 m +4 h +10 h +10096 m +10097 m +11 h +174 h +10 h +229 h +976 h +10098 m +13 h +10 h +4 h +10099 m +1137 h +73 h +25 h +274 h +4 h +1 h +601 h +1 h +1 h +4 h +4 h +770 m +1 h +125 h +169 h +1 h +10 h +10 h +1 h +45 h +124 h +10100 m +57 h +4 h +4 h +1 h +1 h +10101 m +74 h +10 h +687 h +1 h +91 h +4 h +10102 m +1 h +83 h +10 h +10 h +4 h +1 h +4 h +1 h +10103 m +4 h +2617 m +10104 m +1 h +4 h +92 h +31 h +7870 m +4 h +10 h +4 h +10105 m +10 h +11 h +4 h +4 h +79 h +2303 m +230 h +11 h +10106 m +412 m +10 h +4 h +10 h +10107 m +1 h +10108 m +10 h +11 h +10109 m +3990 m +1 h +185 h +11 h +57 h +109 h +640 h +74 h +238 h +10 h +64 h +4 h +11 h +390 m +124 h +10110 m +4 h +219 m +135 h +4 h +10 h +1 h +65 h +11 h +4 h +1772 h +4 h +10 h +4 h +4 h +1 h +10 h +4 h +4 h +4 h +41 h +4 h +10 h +1 h +10 h +1 h +1 h +10 h +1 h +229 h +4 h +57 h +97 h +338 m +10 h +147 h +10111 m +1 h +4 h +4 h +10112 m +10 h +10113 m +79 h +10 h +3837 m +463 h +4 h +1 h +10114 m +4 h +10115 m +10116 m +4 h +4 h +59 h +1 h +1370 m +11 h +92 h +55 h +4 h +1 h +4 h +4 h +1 h +1 h +1714 h +1 h +4 h +10 h +10 h +10117 m +11 h +1 h +11 h +10 h +11 h +25 h +4 h +10 h +955 m +10118 m +8 h +1 h +692 h +591 m +10119 m +10120 m +4 h +1 h +4 h +5976 m +1 h +8243 m +10121 m +65 h +4 h +4 h +10122 m +4 h +1650 h +10123 m +4 h +10 h +13 h +64 h +125 h +3 h +10124 m +10 h +124 h +1 h +1 h +1 h +10125 m +1 h +10126 m +4 h +4 h +10127 m +65 h +10 h +4 h +135 h +57 h +10 h +31 h +4 h +22 h +124 h +1 h +10 h +10128 m +4 h +10129 m +1 h +79 h +10 h +4 h +10130 m +10131 m +124 h +124 h +11 h +10132 m +4 h +109 h +260 m +1 h +10 h +4 h +1 h +4 h +10133 m +1 h +55 h +295 h +10134 m +4 h +110 h +4 h +11 h +10 h +10135 m +297 h +1 h +10 h +169 h +629 m +10 h +4 h +82 h +1 h +11 h +520 h +1 h +109 h +10 h +10 h +1 h +4 h +4 h +1766 h +1 h +10136 m +4 h +4 h +1 h +4 h +4 h +109 h +933 m +4 h +83 h +10137 m +10 h +10 h +10 h +1 h +181 h +270 h +4 h +4 h +169 h +4 h +97 h +578 h +10138 m +4 h +1 h +4 h +109 h +1766 h +10139 m +55 h +4 h +2851 m +4 h +33 m +10 h +4 h +5863 m +10140 m +4 h +4 h +4 h +10141 m +4 h +172 h +4 h +25 h +4 h +1 h +2041 m +10 h +4 h +10142 m +4 h +1 h +10143 m +1 h +10 h +1 h +10 h +4 h +4 h +10 h +124 h +4 h +2374 m +4 h +10 h +1199 m +358 h +11 h +4 h +146 h +4 h +10144 m +74 h +57 h +4 h +10 h +55 h +125 h +4 h +10145 m +10 h +4 h +1 h +82 h +10 h +4 h +10 h +266 h +195 h +10 h +1 h +4 h +196 h +10146 m +10147 m +114 h +1 h +359 h +10 h +11 h +3177 m +4 h +10 h +4 h +1 h +10148 m +4 h +4 h +45 h +1157 m +10 h +10 h +4 h +1083 h +10149 m +169 h +1650 h +123 h +4 h +3 h +82 h +10 h +4 h +229 h +1 h +57 h +4 h +1 h +10 h +119 h +4 h +4 h +10 h +92 h +4 h +1 h +4 h +4 h +10 h +399 h +10150 m +83 h +1 h +307 h +4 h +570 h +124 h +4 h +1 h +1 h +10151 m +1 h +124 h +10152 m +4 h +1 h +10 h +64 h +1 h +73 h +4 h +123 h +10153 m +4 h +4 h +10 h +10154 m +10155 m +70 m +1250 h +10 h +1 h +10156 m +3680 m +1 h +97 h +10157 m +4 h +578 h +1016 h +4 h +13 h +4 h +4 h +1 h +779 h +10158 m +4 h +1 h +1 h +10 h +1 h +185 h +172 h +2475 m +1 h +1 h +10 h +4 h +1105 h +4 h +10159 m +1 h +56 h +1 h +10160 m +10 h +10161 m +41 h +1 h +1861 m +1650 h +10 h +83 h +59 h +4 h +4 h +10162 m +56 h +10 h +4 h +10163 m +10164 m +10165 m +4 h +11 h +8 h +41 h +55 h +4 h +10166 m +332 h +1646 m +10167 m +10168 m +11 h +167 h +10 h +1 h +10169 m +4 h +10 h +10 h +1 h +1070 m +10 h +1 h +4 h +10170 m +192 h +10171 m +10 h +459 m +4 h +10 h +4 h +4 h +1089 m +4 h +10 h +10172 m +4 h +55 h +57 h +1 h +10173 m +124 h +1 h +10 h +10 h +7521 m +388 m +1 h +1 h +170 h +10174 m +1 h +4 h +4 h +25 h +4 h +5567 m +109 h +31 h +11 h +10175 m +4 h +4 h +147 h +10176 m +135 h +4 h +10 h +11 h +10 h +4 h +10177 m +10178 m +1 h +1 h +10179 m +1884 h +10 h +4 h +1 h +12 h +12 h +10180 m +25 h +79 h +31 h +10181 m +10 h +167 h +1 h +4 h +10 h +4 h +4 h +12 h +10 h +229 h +10 h +10182 m +114 h +10 h +94 h +4 h +297 h +3 h +10183 m +10184 m +10185 m +698 m +3622 m +31 h +4 h +4 h +1 h +74 h +10 h +10186 m +4 h +1 h +1 h +104 h +10187 m +1796 h +1 h +23 h +167 h +1 h +10188 m +158 h +4 h +10 h +3 h +10189 m +57 h +28 h +109 h +10 h +61 m +1547 m +590 h +10 h +146 h +124 h +10190 m +10191 m +4 h +583 h +10 h +1 h +74 h +158 h +143 h +4 h +1 h +4 h +10 h +45 h +4 h +64 h +4 h +10192 m +313 h +10 h +4 h +4 h +57 h +10193 m +8 h +1 h +10194 m +10 h +10 h +1 h +279 h +4 h +10195 m +4 h +10 h +1 h +4 h +173 h +5863 m +10 h +4 h +1 h +1 h +2475 m +1 h +4 h +1 h +1 h +10196 m +10197 m +9 h +1 h +4 h +2865 m +4 h +4 h +10198 m +1 h +266 h +6851 m +10 h +4 h +1 h +10 h +10 h +258 h +1 h +4 h +10199 m +176 m +10200 m +10 h +185 h +4 h +10 h +1 h +4 h +10201 m +1 h +4 h +10 h +238 h +2760 m +73 h +1 h +2300 m +10202 m +4 h +10203 m +4 h +10204 m +109 h +10205 m +83 h +1 h +1 h +1 h +4 h +10 h +4 h +6586 m +10206 m +144 h +4 h +1 h +1 h +4 h +4 h +4 h +10 h +1 h +11 h +10207 m +4 h +4 h +27 h +4 h +1 h +1 h +11 h +1 h +4 h +4 h +6125 m +4 h +10208 m +13 h +1 h +10 h +4 h +4 h +857 h +1 h +1 h +4 h +1 h +10209 m +358 h +4 h +4 h +4 h +4 h +4 h +1 h +114 h +10210 m +10 h +2041 m +10211 m +10212 m +1 h +10 h +1 h +1 h +10213 m +4 h +4 h +114 h +10 h +74 h +10214 m +10215 m +10216 m +1454 m +4 h +2475 h +4514 m +4 h +11 h +172 h +10217 m +1 h +1 h +1 h +10 h +1 h +10218 m +172 h +10219 m +276 h +1027 m +10 h +10220 m +1 h +10 h +4 h +64 h +10221 m +10222 m +10 h +2788 m +4 h +118 h +10223 m +125 h +10224 m +31 h +11 h +10225 m +2532 m +1 h +4 h +1 h +1 h +1 h +1 h +10 h +1 h +10 h +147 h +10226 m +10227 m +10228 m +256 m +82 h +4 h +4 h +1 h +4 h +4 h +146 h +4 h +258 h +10229 m +10 h +73 h +97 h +10 h +10230 m +25 h +79 h +112 h +4 h +4 h +10231 m +97 h +976 h +10232 m +104 h +97 h +1309 h +13 h +278 h +10233 m +4 h +10234 m +10235 m +10236 m +4 h +4 h +4 h +10237 m +4 h +1 h +4 h +4 h +10238 m +230 h +10239 m +10240 m +4 h +10241 m +4 h +10242 m +146 h +190 h +1016 h +31 h +1 h +11 h +12 h +1 h +10243 m +10 h +10244 m +45 h +92 h +10245 m +82 h +10 h +10 h +447 h +4 h +4 h +7271 m +1 h +4 h +10 h +113 h +83 h +124 h +10 h +238 h +1 h +10246 m +195 h +443 h +10247 m +4 h +196 h +4 h +1766 h +1 h +10 h +73 h +181 h +10248 m +510 m +4 h +1137 h +25 h +10249 m +10250 m +1471 m +10 h +1 h +12 h +4 h +1 h +4 h +10251 m +10252 m +118 h +10 h +4 h +238 h +10253 m +45 h +1 h +10 h +104 h +10254 m +10255 m +10 h +1 h +4263 m +10256 m +1 h +31 h +1619 h +1725 m +11 h +4 h +1 h +10257 m +41 h +59 h +10 h +10 h +4 h +10258 m +8571 m +4 h +125 h +4 h +266 h +10259 m +1 h +196 h +4 h +57 h +1116 m +10260 m +109 h +10261 m +184 h +10262 m +10263 m +36 h +4 h +11 h +4 h +3 h +10 h +10264 m +57 h +10 h +1 h +10265 m +4 h +1 h +4 h +64 h +10 h +1 h +569 h +1 h +4 h +167 h +108 h +10266 m +1 h +801 m +10267 m +10 h +1 h +1 h +1 h +4 h +31 h +4 h +4356 m +278 h +4 h +10268 m +124 h +4 h +31 h +4 h +4 h +124 h +4 h +10269 m +11 h +119 h +4 h +125 h +10 h +11 h +4 h +1 h +4 h +4 h +10270 m +4 h +57 h +25 h +10 h +10 h +4 h +59 h +10271 m +10 h +10272 m +10273 m +2002 m +2607 m +1685 h +73 h +10 h +10274 m +1047 m +4 h +4 h +4 h +4 h +10275 m +10276 m +10 h +209 m +4 h +1 h +1 h +4 h +83 h +114 h +1 h +72 m +4 h +45 h +10277 m +4 h +10 h +10278 m +5243 m +10 h +1 h +55 h +143 h +4 h +10 h +214 m +10279 m +1 h +238 h +1 h +4 h +10 h +11 h +10 h +1 h +10280 m +10 h +4 h +4 h +59 h +11 h +10281 m +687 h +4 h +10282 m +3 h +56 h +110 h +173 h +56 h +1 h +383 h +82 h +8 h +125 h +10283 m +1 h +11 h +113 h +4 h +4 h +10284 m +10 h +119 h +10 h +1 h +4 h +757 h +2379 h +10285 m +692 h +1 h +10286 m +4 h +3909 m +1 h +4 h +4 h +4 h +1 h +4 h +1 h +10287 m +10 h +4 h +1 h +1 h +10288 m +65 h +10289 m +10290 m +10291 m +1 h +3 h +10292 m +692 h +1620 h +10 h +10293 m +4 h +10 h +10294 m +10295 m +1 h +1 h +4 h +10296 m +31 h +172 h +143 h +123 h +10297 m +10 h +4 h +12 h +4 h +92 h +10298 m +4 h +4608 m +25 h +10299 m +4 h +10300 m +1 h +4 h +1 h +1 h +147 h +36 h +4 h +8 h +4 h +164 h +10301 m +109 h +10 h +186 h +4 h +1 h +10302 m +1 h +10303 m +10304 m +1790 h +1 h +1 h +195 h +4 h +10 h +1 h +10305 m +65 h +4 h +3299 m +1 h +4 h +4 h +114 h +10306 m +4 h +4 h +22 h +4 h +1 h +443 h +59 h +4 h +31 h +1 h +12 h +4 h +4 h +190 h +10 h +3 h +10307 m +2591 m +10 h +10 h +10308 m +1 h +10309 m +1027 h +14 m +164 h +1 h +97 h +4 h +4 h +61 m +1 h +6438 m +1 h +82 h +57 h +10310 m +4 h +1 h +1 h +4 h +169 h +4 h +601 h +339 m +83 h +1 h +4 h +73 h +65 h +113 h +278 h +4 h +10311 m +4 h +1 h +41 h +4 h +79 h +10312 m +4 h +4 h +4 h +4 h +69 h +4 h +10313 m +10314 m +10315 m +1 h +1 h +556 h +1 h +82 h +4 h +1 h +4 h +31 h +4 h +59 h +4 h +11 h +10 h +1 h +1 h +173 h +61 h +10316 m +124 h +10 h +10317 m +1 h +82 h +4 h +1 h +229 h +304 m +10318 m +10319 m +10 h +25 h +10 h +113 h +10320 m +10 h +139 h +195 h +1 h +4 h +4 h +139 h +125 h +4 h +1790 h +1835 h +4 h +716 m +125 h +3 h +10321 m +4 h +10 h +59 h +5141 m +10322 m +4 h +1 h +4 h +73 h +10323 m +1 h +4 h +1261 h +10 h +1 h +10 h +733 m +10324 m +10 h +10 h +10 h +1 h +1 h +371 h +10325 m +1 h +4 h +10 h +10 h +10 h +4 h +10326 m +4 h +4 h +4 h +4 h +28 h +1 h +4 h +4 h +10 h +3 h +1 h +10 h +4 h +1 h +1 h +31 h +1 h +10327 m +10328 m +4 h +238 h +10329 m +10 h +10330 m +1 h +4 h +1 h +10 h +10 h +10331 m +4 h +59 h +157 h +4 h +1 h +10332 m +10333 m +1 h +4 h +10334 m +45 h +57 h +10 h +4 h +4 h +11 h +900 m +31 h +4 h +1 h +4 h +10335 m +1677 m +399 h +10 h +10 h +1 h +10 h +1374 m +4 h +10336 m +4 h +1 h +41 h +11 h +10 h +10337 m +3 h +4 h +10 h +4 h +1 h +1 h +448 m +10338 m +4 h +10339 m +1 h +13 h +358 h +164 h +146 h +10340 m +1 h +4 h +10341 m +4 h +10342 m +4 h +10 h +64 h +4 h +10343 m +10344 m +125 h +1 h +10 h +4 h +57 h +4 h +4 h +1722 m +4 h +10345 m +9321 m +146 h +1 h +1 h +10346 m +4 h +332 h +109 h +10347 m +10348 m +65 h +10349 m +10350 m +4 h +82 h +10351 m +10 h +55 h +4 h +4 h +4 h +4 h +1 h +10 h +10352 m +10 h +4 h +10 h +10 h +10353 m +10354 m +10 h +10 h +403 h +4 h +4 h +4 h +4905 m +1 h +10355 m +124 h +82 h +45 h +1 h +4 h +10 h +4 h +1 h +1 h +11 h +4 h +4 h +10 h +158 h +27 h +45 h +1790 h +4 h +556 h +31 h +4 h +10 h +10356 m +57 h +368 h +10357 m +10 h +10358 m +10 h +4 h +10359 m +55 h +31 h +1817 m +10360 m +11 h +1 h +10 h +10361 m +65 h +109 h +10 h +1 h +1953 m +4 h +125 h +10 h +55 h +195 h +10362 m +10363 m +447 h +4 h +1 h +1 h +94 h +1 h +10 h +1894 m +109 h +1 h +4 h +986 h +1 h +4 h +509 m +4 h +4 h +4 h +10364 m +1 h +279 h +4 h +10365 m +10366 m +10 h +41 h +10367 m +74 h +4 h +10368 m +124 h +114 h +10369 m +3 h +83 h +10370 m +109 h +4 h +10 h +4 h +28 h +170 h +272 m +1 h +4 h +10 h +9860 m +1 h +371 h +10 h +1619 h +1 h +1 h +1 h +109 h +1 h +48 h +10371 m +4 h +581 m +10372 m +1835 h +10373 m +1 h +258 h +10 h +94 h +44 m +10374 m +65 h +464 h +10375 m +104 h +10376 m +1 h +8 h +4 h +170 h +10377 m +10 h +10 h +114 h +4 h +4 h +307 h +1 h +1 h +10 h +10378 m +4 h +1 h +82 h +4 h +65 h +10379 m +10 h +1 h +299 h +4 h +10380 m +27 h +1 h +368 h +307 h +4 h +1 h +4 h +129 h +538 h +1478 h +295 h +10 h +4 h +1 h +276 h +104 h +10381 m +4 h +4 h +12 h +10382 m +10 h +4 h +10 h +2532 m +4 h +4 h +1472 m +10 h +109 h +10383 m +1 h +1 h +10384 m +10 h +2496 m +10 h +258 h +4 h +1 h +279 h +1 h +3435 m +1 h +10385 m +1 h +10386 m +1 h +1 h +97 h +12 h +4 h +109 h +1016 h +4 h +10 h +1 h +170 h +138 h +11 h +10387 m +1 h +4 h +10388 m +11 h +1666 m +10 h +10389 m +65 h +10 h +322 m +4 h +1 h +82 h +4 h +82 h +10 h +1 h +10 h +4 h +10 h +601 h +4 h +10390 m +1 h +468 h +10391 m +1293 m +10392 m +10393 m +4 h +10394 m +10395 m +10396 m +10 h +266 h +4 h +10397 m +83 h +1 h +10398 m +11 h +10 h +56 h +297 h +4 h +4 h +10 h +4 h +1 h +10399 m +1 h +144 h +124 h +1 h +109 h +1563 m +10 h +10400 m +4 h +4 h +10401 m +10 h +10402 m +1 h +4 h +82 h +10403 m +4 h +1 h +10404 m +1769 m +520 h +10405 m +4 h +1308 m +10 h +82 h +11 h +1201 h +10406 m +10407 m +4 h +10408 m +10 h +4 h +1948 m +157 h +1796 h +10 h +125 h +4 h +11 h +295 h +4 h +10 h +5 h +45 h +10 h +10 h +192 h +10409 m +172 h +10410 m +2733 h +104 h +10411 m +31 h +4 h +10 h +97 h +1 h +10 h +64 h +1 h +10412 m +104 h +4 h +5982 m +4 h +10 h +4 h +347 m +4 h +109 h +4 h +1 h +4 h +10413 m +1 h +250 h +10414 m +4 h +4 h +119 h +4 h +1 h +10415 m +10 h +4 h +57 h +211 h +65 h +1 h +27 h +57 h +1284 m +10416 m +4 h +75 h +10417 m +10418 m +10419 m +10420 m +10421 m +1089 m +10 h +4 h +97 h +157 h +10422 m +10 h +13 h +11 h +56 h +147 h +109 h +10 h +3274 m +4 h +10423 m +10 h +1952 m +1260 h +4 h +10424 m +4 h +13 h +125 h +1 h +1 h +10 h +7938 m +10425 m +10426 m +113 h +10 h +10427 m +10 h +569 h +10 h +108 h +4 h +59 h +10 h +1 h +10428 m +10429 m +10430 m +10431 m +4 h +4 h +10432 m +4 h +1685 h +10433 m +4 h +1 h +2719 m +10434 m +1 h +125 h +4 h +10435 m +10 h +10436 m +1 h +41 h +1 h +358 h +10437 m +3 h +1 h +10438 m +10439 m +4 h +1 h +10440 m +45 h +4 h +1 h +10441 m +4 h +10 h +266 h +4 h +10442 m +94 h +10443 m +41 h +10 h +1 h +9411 m +1 h +224 h +6185 m +1576 m +4 h +10444 m +10445 m +4 h +224 h +10 h +10446 m +4 h +83 h +4 h +4 h +10447 m +1 h +4 h +109 h +59 h +1 h +1 h +4 h +10448 m +10 h +994 m +229 h +146 h +1 h +4 h +976 h +478 m +4 h +57 h +10449 m +10450 m +10 h +10451 m +10452 m +109 h +124 h +4 h +4 h +3622 m +914 m +4 h +4 h +1 h +488 h +2285 m +56 h +4 h +59 h +1 h +1 h +10453 m +4 h +125 h +1 h +10454 m +4 h +4 h +4 h +109 h +10 h +11 h +41 h +464 h +10 h +4 h +4 h +74 h +1027 h +4 h +10 h +109 h +147 h +4 h +185 h +10 h +1403 h +276 h +1 h +4 h +1 h +266 h +8 h +10455 m +31 h +368 h +8 h +10456 m +1 h +557 m +10457 m +1 h +1 h +195 h +10458 m +4 h +181 h +10459 m +4 h +1 h +59 h +4 h +10 h +10460 m +12 h +146 h +10461 m +10462 m +10 h +10463 m +109 h +4 h +10464 m +10 h +4 h +59 h +27 h +4 h +10465 m +4 h +10466 m +23 h +4 h +4 h +3 h +1 h +10 h +203 m +1 h +4 h +4 h +1 h +4 h +74 h +4215 m +10 h +31 h +138 h +10 h +6022 m +10467 m +10468 m +447 h +92 h +195 h +12 h +4 h +1 h +11 h +649 m +10469 m +4 h +2308 m +4 h +10470 m +11 h +45 h +307 h +10471 m +10472 m +4 h +10 h +10473 m +10 h +1685 h +31 h +124 h +4 h +578 h +4 h +2733 h +25 h +10 h +11 h +4 h +65 h +10 h +140 h +1 h +10474 m +4 h +1 h +4 h +55 h +10475 m +4 h +56 h +10476 m +10477 m +371 h +4 h +1 h +4 h +10 h +1 h +4 h +10 h +1 h +27 h +33 m +10478 m +10479 m +11 h +10 h +1 h +10 h +10480 m +3680 m +4 h +124 h +10481 m +10482 m +11 h +10 h +1 h +10483 m +10484 m +10485 m +10 h +1 h +4 h +1 h +1016 h +443 h +258 h +1 h +4 h +1 h +10486 m +4 h +10487 m +4 h +1 h +4 h +10488 m +1 h +164 h +1 h +1 h +4 h +10 h +10 h +22 h +45 h +4 h +10 h +1 h +10489 m +8 h +23 h +25 h +4 h +83 h +10490 m +4 h +10491 m +4 h +10492 m +186 h +11 h +10 h +10 h +1 h +94 h +10 h +195 h +4 h +2885 m +4 h +59 h +620 m +10493 m +2928 m +10 h +10494 m +4 h +1 h +1 h +10495 m +10 h +97 h +1 h +319 h +10496 m +59 h +4 h +800 m +229 h +4 h +124 h +307 h +1 h +10497 m +10498 m +104 h +10499 m +11 h +10500 m +4857 m +10501 m +4 h +4 h +1 h +74 h +1 h +4 h +330 h +1884 h +1 h +4 h +173 h +4 h +4 h +10502 m +4 h +1 h +10503 m +10504 m +1 h +83 h +1 h +125 h +1 h +10 h +196 h +1 h +135 h +10 h +10505 m +125 h +92 h +4 h +10 h +319 h +31 h +4 h +10506 m +4 h +1 h +4 h +10507 m +4 h +4 h +185 h +1 h +57 h +59 h +195 h +1 h +8497 m +1 h +4 h +190 h +1 h +4 h +10 h +59 h +10508 m +10509 m +4 h +10 h +10 h +4 h +94 h +10 h +10510 m +10511 m +4 h +10512 m +4 h +1 h +5309 m +4 h +4 h +11 h +276 h +10 h +4 h +57 h +190 h +10513 m +196 h +463 h +10 h +4 h +1 h +4 h +10514 m +1 h +1 h +77 h +1 h +295 h +10515 m +10 h +1 h +4 h +124 h +169 h +10516 m +74 h +4 h +1 h +13 h +1 h +4 h +332 h +4522 m +1 h +167 h +11 h +10517 m +12 h +4 h +169 h +10 h +4 h +41 h +4 h +4 h +4 h +56 h +1 h +10518 m +1 h +1056 m +4 h +1 h +109 h +1 h +10 h +1 h +1 h +10519 m +12 h +4 h +10520 m +289 h +10521 m +4 h +1 h +10522 m +10523 m +4 h +1 h +11 h +10524 m +1 h +4 h +124 h +10525 m +195 h +10526 m +10527 m +11 h +10528 m +4 h +74 h +123 h +109 h +1 h +195 h +1 h +10529 m +164 h +1 h +4 h +10530 m +10531 m +10 h +1 h +1 h +124 h +1 h +10532 m +1 h +82 h +82 h +4 h +258 h +11 h +10533 m +93 h +1 h +41 h +10 h +10534 m +4 h +10 h +219 m +1 h +11 h +146 h +10535 m +10 h +1 h +4 h +10 h +1 h +4 h +41 h +114 h +33 m +10536 m +4 h +125 h +478 m +10 h +1045 m +135 h +601 h +10537 m +10538 m +447 h +1 h +45 h +4 h +36 h +10539 m +10540 m +7832 m +4 h +104 h +10 h +10 h +11 h +4 h +843 m +236 m +447 h +4 h +4 h +1 h +4 h +4 h +1 h +10541 m +10542 m +11 h +73 h +1 h +1 h +124 h +450 m +3 h +4 h +10543 m +10 h +10 h +57 h +4 h +238 h +10544 m +10545 m +185 h +10546 m +1 h +4 h +258 h +4 h +82 h +48 h +94 h +109 h +10547 m +1 h +10548 m +10 h +119 h +204 h +692 h +1 h +57 h +1 h +1 h +1 h +4 h +190 h +10 h +10 h +4 h +10549 m +1 h +10550 m +25 h +4 h +196 h +4 h +1454 m +10551 m +4 h +10 h +338 m +10552 m +4 h +10 h +265 h +10553 m +56 h +4 h +10554 m +4 h +10555 m +1796 h +1 h +4 h +82 h +11 h +106 h +10556 m +7352 m +10 h +172 h +83 h +10557 m +10 h +4 h +338 m +1 h +3558 m +164 h +104 h +195 h +536 h +1 h +10558 m +4 h +4 h +10559 m +4 h +4 h +104 h +3 h +4 h +10560 m +10561 m +12 h +10 h +1 h +11 h +10562 m +1 h +12 h +109 h +1 h +10563 m +11 h +10 h +10 h +3 h +147 h +69 h +316 m +10564 m +1861 m +4 h +10565 m +282 m +4 h +54 m +10566 m +4 h +1766 h +4 h +4 h +10567 m +1406 h +57 h +11 h +11 h +74 h +31 h +258 h +109 h +10568 m +1 h +1548 m +83 h +986 h +4 h +10569 m +125 h +10570 m +73 h +4 h +1 h +1 h +1201 h +10571 m +10 h +1 h +10572 m +3 h +4 h +119 h +10573 m +10574 m +4 h +10575 m +9 h +10 h +538 h +2961 m +10 h +10 h +139 h +4542 m +10576 m +10577 m +1 h +190 h +1 h +173 h +10578 m +9040 m +1650 h +4 h +4 h +79 h +279 h +1835 h +1 h +1 h +4 h +13 h +10579 m +3 h +10580 m +1 h +10 h +1 h +1 h +1 h +4 h +10 h +4 h +10581 m +8 h +135 h +1 h +433 m +57 h +41 h +10582 m +10583 m +10 h +1 h +4 h +10 h +10 h +184 h +1 h +10584 m +10 h +104 h +4 h +109 h +146 h +97 h +4 h +1 h +10585 m +10586 m +10 h +1 h +1362 m +55 h +11 h +1 h +5 h +1685 h +10 h +10587 m +36 h +135 h +10 h +10 h +10 h +4 h +185 h +57 h +4 h +10588 m +1 h +1535 m +1 h +8133 m +1278 m +91 h +4 h +459 m +4 h +25 h +10589 m +4 h +109 h +10 h +57 h +10590 m +1 h +41 h +82 h +4 h +64 h +4 h +146 h +1 h +8 h +1 h +1 h +10 h +1 h +10 h +124 h +1 h +2002 m +4 h +82 h +1 h +4 h +8 h +4 h +1 h +4 h +57 h +7915 m +1027 h +4 h +11 h +4 h +12 h +10 h +10 h +56 h +4 h +4 h +10591 m +10 h +195 h +1732 m +1 h +578 h +169 h +626 h +1 h +1 h +4 h +10592 m +3 h +4 h +10 h +2064 m +10593 m +1 h +82 h +10 h +12 h +4 h +1 h +1 h +10 h +10 h +4 h +109 h +489 h +6197 m +1 h +1 h +10594 m +4 h +4 h +4 h +104 h +1137 h +4 h +10 h +55 h +1 h +4 h +10595 m +10 h +10 h +3 h +11 h +119 h +10596 m +4867 m +97 h +82 h +10597 m +112 h +79 h +59 h +4 h +10598 m +25 h +10 h +196 h +10599 m +4 h +11 h +1953 m +2914 m +976 h +4 h +11 h +31 h +56 h +10600 m +11 h +56 h +4 h +10 h +83 h +57 h +10601 m +1 h +10602 m +59 h +10603 m +10604 m +10605 m +2788 m +123 h +10606 m +1 h +10607 m +10 h +1 h +10608 m +10609 m +146 h +10 h +4 h +57 h +10610 m +1 h +4 h +104 h +4 h +12 h +1 h +3240 m +1 h +1975 m +41 h +10 h +45 h +4 h +1 h +4 h +10 h +4 h +10611 m +195 h +10 h +4 h +10612 m +1250 h +124 h +1 h +10 h +12 h +4 h +805 m +4 h +11 h +10613 m +57 h +1 h +4 h +4 h +1 h +4 h +59 h +10 h +10614 m +4 h +10615 m +4 h +10 h +1 h +1 h +10 h +65 h +297 h +74 h +4 h +6399 m +4 h +10616 m +31 h +1 h +10617 m +1 h +4 h +10618 m +4 h +92 h +41 h +82 h +10619 m +1092 m +1 h +4 h +104 h +4 h +10620 m +10 h +36 h +692 h +10621 m +31 h +172 h +4 h +4 h +124 h +172 h +1 h +11 h +1 h +1 h +4 h +10622 m +10623 m +1 h +10 h +4 h +1 h +10624 m +10 h +4 h +164 h +10625 m +1 h +10626 m +57 h +11 h +1 h +4 h +11 h +10627 m +1 h +10628 m +10 h +59 h +1 h +4 h +1 h +10629 m +4 h +10630 m +74 h +4 h +10631 m +4 h +110 h +1137 h +1089 m +4 h +2887 m +1 h +4 h +4 h +10632 m +31 h +430 m +4 h +1 h +4 h +10 h +157 h +4 h +4 h +10633 m +57 h +10 h +1 h +4 h +11 h +4 h +190 h +4349 m +10634 m +4 h +1 h +10635 m +10 h +10 h +124 h +9757 m +10636 m +4 h +4240 m +83 h +33 h +4692 m +1 h +4 h +1261 h +40 h +1 h +295 h +888 m +10637 m +10 h +55 h +10638 m +1 h +1861 m +10639 m +25 h +10640 m +10 h +60 m +114 h +77 h +45 h +10 h +4 h +10641 m +10 h +4 h +4 h +11 h +250 h +10 h +10642 m +114 h +4 h +92 h +10643 m +4 h +4 h +10644 m +1 h +1 h +73 h +124 h +4 h +1 h +10645 m +184 h +779 h +10 h +10 h +4 h +10 h +146 h +1 h +10646 m +114 h +10647 m +41 h +4 h +10648 m +581 m +10649 m +10 h +10650 m +56 h +113 h +10651 m +3 h +125 h +10652 m +4 h +10653 m +4 h +10654 m +4 h +1 h +4 h +97 h +4 h +10655 m +83 h +10656 m +11 h +10657 m +4 h +307 h +4 h +4 h +10 h +10 h +10 h +10658 m +1 h +11 h +1 h +10659 m +4 h +10 h +10660 m +64 h +83 h +4 h +295 h +4 h +4 h +92 h +477 m +10 h +10 h +4 h +1 h +4 h +1 h +4 h +4 h +10661 m +4 h +135 h +10662 m +27 h +4 h +10663 m +10664 m +10665 m +4 h +10666 m +1 h +4 h +2788 m +31 h +10667 m +4 h +4 h +1 h +4 h +4 h +146 h +12 h +4 h +478 h +4 h +146 h +10668 m +1024 m +10 h +82 h +10 h +109 h +10669 m +10 h +7128 m +1 h +10670 m +10671 m +1 h +196 h +125 h +1 h +1 h +57 h +11 h +1 h +135 h +83 h +4 h +135 h +10672 m +1136 m +1 h +82 h +692 h +1535 m +1 h +93 h +4 h +820 h +1 h +1 h +10 h +4 h +4 h +10673 m +4 h +4 h +1 h +10674 m +13 h +104 h +82 h +27 h +4 h +113 h +4 h +172 h +10 h +5008 m +4 h +4 h +10 h +4 h +10675 m +1 h +1 h +4 h +196 h +196 h +11 h +1 h +4 h +74 h +169 h +1 h +1 h +367 m +4 h +4 h +10676 m +10677 m +10 h +82 h +1 h +11 h +1 h +25 h +4 h +4 h +1 h +10678 m +4 h +10679 m +4 h +1 h +10 h +6252 m +4 h +488 h +4 h +10 h +10 h +11 h +104 h +1 h +371 h +109 h +10680 m +4 h +1 h +4 h +203 m +10 h +10 h +1 h +4 h +109 h +10 h +3013 m +104 h +262 h +10681 m +10 h +575 m +10682 m +4 h +1 h +1 h +1 h +1 h +10 h +55 h +11 h +4 h +1 h +4 h +109 h +10683 m +10 h +1 h +4 h +1 h +186 h +536 h +10684 m +4 h +4 h +779 h +10685 m +1 h +1 h +4 h +10 h +10 h +10 h +10686 m +223 m +110 h +4 h +10687 m +4 h +83 h +229 h +10688 m +4 h +124 h +1 h +10689 m +157 h +307 h +10690 m +10 h +10691 m +1 h +10 h +10692 m +10 h +173 h +1 h +10693 m +10 h +109 h +1 h +55 h +266 h +184 h +1 h +4 h +10694 m +109 h +4 h +1 h +4 h +1 h +124 h +238 h +140 h +10 h +82 h +1 h +10 h +82 h +10695 m +4 h +10696 m +1 h +10697 m +10 h +10698 m +4 h +92 h +4 h +4 h +10 h +4 h +1 h +4 h +109 h +1 h +65 h +1403 h +1027 h +10699 m +10 h +383 h +11 h +4 h +4 h +4 h +10700 m +4 h +1 h +1 h +3048 m +4 h +56 h +10 h +4 h +45 h +94 h +10701 m +1 h +1 h +10702 m +129 h +4 h +10703 m +435 m +156 h +164 h +10704 m +911 m +3 h +10705 m +10 h +1 h +146 h +1 h +181 h +109 h +167 h +124 h +10706 m +10 h +4 h +10707 m +10 h +10 h +4 h +4 h +10 h +1 h +10708 m +125 h +125 h +4 h +10 h +10709 m +113 h +10710 m +195 h +1 h +578 h +1 h +4 h +4 h +10711 m +82 h +4 h +65 h +4 h +10712 m +4 h +59 h +1 h +1 h +167 h +4 h +10713 m +74 h +10714 m +4 h +4 h +79 h +4 h +10715 m +4 h +13 h +4 h +10716 m +4 h +802 m +4292 m +83 h +10717 m +11 h +1 h +4 h +4 h +1 h +1 h +10718 m +22 h +1 h +4 h +3909 m +1 h +10 h +3 h +10 h +10719 m +10720 m +1 h +208 m +1 h +1137 h +443 h +12 h +10 h +10 h +10721 m +10722 m +770 m +1 h +1 h +2038 m +1 h +74 h +1 h +124 h +1 h +10 h +1 h +5866 m +195 h +1768 m +4 h +10723 m +10 h +1 h +4 h +10724 m +1 h +10725 m +10726 m +649 m +10727 m +386 h +4 h +1 h +147 h +4 h +1 h +4 h +10728 m +97 h +1 h +10729 m +4 h +1 h +4 h +1 h +82 h +110 h +10 h +82 h +1 h +4 h +2540 m +1 h +4 h +4 h +196 h +125 h +6855 m +10730 m +327 m +124 h +10 h +536 h +1 h +1 h +1 h +10 h +10731 m +869 h +195 h +10 h +4 h +10732 m +10733 m +1 h +10734 m +3 h +10 h +1309 h +125 h +2172 h +1 h +41 h +1 h +65 h +10735 m +4 h +10736 m +10737 m +4 h +10738 m +1 h +57 h +2418 h +4 h +83 h +10739 m +4 h +74 h +97 h +842 m +1 h +27 h +4 h +1 h +10 h +109 h +45 h +4 h +10 h +1 h +11 h +4 h +10740 m +10 h +10 h +536 h +1564 m +4 h +488 h +4 h +10741 m +4 h +1 h +108 h +4 h +2591 m +10742 m +114 h +79 h +11 h +4 h +79 h +10743 m +10744 m +10745 m +4 h +31 h +10685 m +1 h +10746 m +123 h +73 h +10747 m +3303 m +41 h +4 h +4 h +10748 m +10 h +10 h +83 h +8040 m +10 h +10749 m +1 h +938 m +70 m +10 h +10750 m +1 h +10 h +10 h +10751 m +59 h +64 h +10 h +4 h +48 h +109 h +1 h +10 h +1359 m +10752 m +1 h +4 h +108 h +10 h +83 h +64 h +31 h +536 h +4 h +4 h +11 h +939 m +146 h +1 h +10 h +1 h +57 h +10753 m +10 h +56 h +195 h +10754 m +229 h +1 h +10755 m +10756 m +109 h +10757 m +10 h +11 h +147 h +3 h +295 h +196 h +4 h +10758 m +1 h +1 h +1 h +4 h +5470 m +10 h +219 m +4 h +1362 m +109 h +1 h +1 h +509 m +4 h +1 h +1 h +4 h +4 h +10759 m +4 h +185 h +4 h +10760 m +124 h +1 h +4 h +7119 m +10761 m +4 h +69 h +82 h +10762 m +4 h +10763 m +1137 h +97 h +27 h +1 h +4 h +10764 m +10765 m +10 h +11 h +10 h +1 h +7047 m +97 h +238 h +1 h +135 h +1 h +10 h +1 h +3088 m +1788 m +10 h +338 h +278 h +371 h +4 h +10766 m +10 h +4 h +536 h +4 h +238 h +203 m +1 h +10 h +57 h +181 h +25 h +10767 m +1 h +2308 m +4 h +4 h +10768 m +10 h +757 h +10 h +10769 m +10770 m +10 h +10 h +4 h +1 h +1 h +11 h +1 h +447 h +10771 m +135 h +4 h +4 h +10772 m +10 h +59 h +190 h +25 h +10773 m +4 h +1 h +1 h +124 h +10 h +11 h +10 h +4 h +4 h +1137 h +4 h +295 h +4 h +10774 m +4 h +10775 m +4 h +123 h +1 h +11 h +11 h +56 h +10 h +4 h +1 h +4 h +135 h +94 h +11 h +8133 m +1027 h +4 h +4 h +276 h +10 h +10 h +1 h +10776 m +10777 m +4 h +4 h +10778 m +10779 m +281 m +4 h +10780 m +1 h +4 h +1 h +11 h +4 h +3799 m +10781 m +10782 m +4 h +10 h +1 h +1 h +4 h +4 h +1 h +172 h +10783 m +104 h +4 h +10 h +5964 m +1 h +13 h +124 h +4 h +10784 m +4 h +55 h +2265 m +412 m +11 h +10 h +4 h +3 h +10785 m +10786 m +10 h +4 h +10 h +11 h +1 h +109 h +25 h +2379 h +265 h +10787 m +8386 m +976 h +10788 m +73 h +368 h +10789 m +10 h +10 h +10790 m +10 h +2594 m +1 h +4 h +10 h +10 h +10791 m +10792 m +10793 m +4 h +10794 m +4 h +1 h +10 h +10 h +1 h +1 h +1 h +10795 m +10796 m +65 h +4 h +10 h +10797 m +1 h +10 h +10798 m +4 h +146 h +4 h +10799 m +986 h +4 h +4 h +2265 m +4 h +4 h +10 h +1 h +10 h +4 h +10 h +10800 m +4 h +123 h +10 h +3 h +10 h +4 h +1137 h +10801 m +41 h +829 h +1 h +911 h +1 h +109 h +4 h +64 h +169 h +4 h +10 h +4 h +1 h +4 h +135 h +10 h +11 h +83 h +779 h +1 h +31 h +1 h +10802 m +10803 m +196 h +4 h +371 h +196 h +996 m +1 h +4 h +10804 m +307 h +4 h +1 h +8784 m +4 h +4 h +1 h +10805 m +10 h +3396 m +10806 m +2733 h +757 h +10 h +4 h +10807 m +10808 m +1 h +31 h +167 h +10 h +1 h +10809 m +1 h +1 h +11 h +1 h +1201 h +10810 m +1 h +10811 m +224 h +4 h +1 h +10812 m +10813 m +4 h +10814 m +10 h +10815 m +1 h +1 h +1027 h +11 h +203 h +10816 m +10817 m +57 h +190 h +97 h +104 h +45 h +25 h +41 h +4 h +278 h +1 h +10818 m +190 h +3 h +1 h +4 h +196 h +82 h +4 h +6766 m +1 h +45 h +10 h +10819 m +4 h +10 h +10820 m +4 h +4 h +938 m +10 h +10 h +1 h +1 h +3 h +114 h +10821 m +10822 m +10 h +110 h +4 h +10823 m +82 h +1 h +10824 m +1374 m +1 h +1 h +1886 m +4 h +7253 m +1 h +297 h +447 h +10 h +935 h +1 h +4 h +10825 m +124 h +3845 m +10826 m +10827 m +4 h +181 h +57 h +4033 m +10828 m +10829 m +4 h +1 h +12 h +332 h +10830 m +1 h +10831 m +82 h +399 h +10 h +10 h +1 h +10832 m +10833 m +8 h +10 h +4 h +4 h +109 h +11 h +1 h +911 h +4 h +4 h +138 h +57 h +10834 m +4 h +10 h +4 h +10835 m +4 h +1 h +56 h +4 h +109 h +238 h +10 h +1 h +1 h +112 h +113 h +1 h +4 h +4151 m +8332 m +1 h +12 h +25 h +4 h +11 h +55 h +266 h +4 h +10836 m +10837 m +1 h +10 h +109 h +10838 m +10839 m +1 h +1 h +124 h +82 h +10840 m +10 h +10 h +8332 m +186 h +10 h +10 h +4 h +1 h +1710 m +1 h +1 h +10 h +1016 h +4 h +4 h +4 h +10 h +4 h +4 h +108 h +4 h +10841 m +4 h +297 h +11 h +4 h +11 h +10 h +3161 m +10 h +10 h +4 h +1545 m +10 h +10842 m +109 h +10843 m +353 m +10844 m +57 h +4 h +265 h +4 h +10845 m +4 h +4 h +626 h +10 h +1 h +10846 m +10 h +1766 h +1 h +10 h +10847 m +11 h +10848 m +4 h +4 h +1 h +10849 m +83 h +195 h +11 h +185 h +1 h +147 h +4297 m +10850 m +10 h +4 h +229 h +1 h +10 h +1 h +4 h +4 h +4 h +181 h +10 h +10851 m +4 h +1 h +2442 m +10852 m +55 h +1 h +10 h +11 h +158 h +4 h +10853 m +1 h +10 h +1 h +912 m +10854 m +10855 m +10856 m +5445 m +10 h +4 h +195 h +2261 m +4 h +11 h +4 h +82 h +10857 m +4 h +10 h +10858 m +4668 m +10859 m +10860 m +10 h +184 h +11 h +10 h +6808 m +1 h +1410 m +1 h +10 h +4 h +10 h +10 h +1 h +10 h +10 h +170 h +97 h +10861 m +10862 m +1541 m +4 h +83 h +113 h +3 h +700 m +1 h +10863 m +10864 m +258 h +31 h +10865 m +3533 m +11 h +10 h +258 h +10866 m +22 h +1220 m +10867 m +10868 m +170 h +5917 m +11 h +3 h +59 h +109 h +10869 m +59 h +4 h +10870 m +4 h +31 h +1 h +10871 m +10872 m +307 h +1 h +4 h +10 h +4 h +10 h +1 h +3 h +10 h +1 h +270 h +185 h +11 h +4 h +10 h +10 h +1 h +10873 m +10874 m +10875 m +41 h +196 h +169 h +4 h +27 h +10876 m +1 h +4 h +93 h +41 h +10877 m +1 h +31 h +192 h +4 h +75 h +92 h +295 h +31 h +10 h +10 h +4 h +55 h +4 h +28 h +124 h +299 h +1674 m +10878 m +10879 m +1 h +1 h +4 h +10880 m +4 h +10 h +10881 m +10882 m +808 m +4 h +4 h +57 h +59 h +4 h +307 h +4 h +339 m +8386 m +10 h +167 h +10883 m +10884 m +10 h +57 h +10885 m +4 h +443 h +4 h +10 h +1620 h +10886 m +1 h +860 m +1 h +4 h +266 h +83 h +3 h +258 h +109 h +3 h +10 h +10887 m +4 h +25 h +566 m +65 h +10 h +1 h +319 h +265 h +10888 m +74 h +10 h +10889 m +964 m +4 h +4 h +322 m +338 h +4 h +10 h +569 h +1 h +64 h +10 h +692 h +1 h +1 h +11 h +4 h +25 h +1 h +4 h +1 h +4 h +1 h +4 h +10 h +10890 m +10 h +10 h +10891 m +10 h +10892 m +10893 m +1 h +10 h +10894 m +4 h +83 h +10 h +4 h +4 h +4 h +1 h +1 h +4 h +10 h +147 h +4 h +10895 m +806 m +399 h +10896 m +10897 m +3680 m +10898 m +11 h +11 h +1 h +138 h +92 h +1218 m +172 h +10 h +10899 m +4 h +1 h +11 h +4 h +169 h +108 h +83 h +4 h +10900 m +4 h +10 h +10901 m +10902 m +10 h +266 h +10903 m +10904 m +4542 m +83 h +45 h +4 h +1 h +192 h +10905 m +1 h +10906 m +1 h +10907 m +10908 m +10 h +4 h +10 h +10909 m +10 h +10 h +10 h +1 h +10910 m +33 h +10 h +1 h +10911 m +4 h +278 h +506 m +4 h +10912 m +9916 m +10913 m +4 h +74 h +41 h +4 h +74 h +4 h +4 h +10914 m +4 h +11 h +4 h +104 h +479 m +10 h +4 h +4 h +1 h +4 h +10 h +262 h +4 h +4 h +4 h +11 h +124 h +4 h +10 h +10915 m +124 h +10916 m +386 h +4 h +276 h +4 h +1 h +10 h +10 h +57 h +4 h +11 h +10917 m +4 h +386 h +4 h +140 h +10 h +10918 m +10919 m +74 h +1 h +4 h +4 h +25 h +10920 m +626 h +4 h +1 h +10921 m +158 h +692 h +4 h +4 h +2434 m +82 h +10 h +258 h +1 h +4 h +4 h +4 h +4 h +10922 m +10923 m +938 h +4 h +22 h +4 h +10 h +10 h +4 h +27 h +1 h +1 h +986 h +10924 m +10 h +447 h +10925 m +77 h +22 h +41 h +1 h +10 h +8 h +1 h +3 h +1 h +2265 h +4 h +4 h +195 h +3732 m +4 h +195 h +1 h +1 h +10926 m +1 h +4 h +1 h +4 h +156 h +4 h +1 h +10 h +65 h +2265 h +59 h +10927 m +10 h +10 h +82 h +1 h +10 h +1 h +250 h +97 h +229 h +295 h +10928 m +10929 m +1 h +1 h +1 h +9831 m +10 h +123 h +118 h +10930 m +10 h +82 h +83 h +4 h +83 h +4 h +229 h +1 h +4 h +93 h +10 h +556 h +4 h +1 h +83 h +4 h +1 h +4 h +1 h +10931 m +4 h +278 h +4 h +74 h +1 h +4 h +10 h +14 m +10932 m +4 h +4 h +4 h +4 h +10933 m +10 h +10 h +10 h +10934 m +4 h +2625 h +4 h +4 h +10935 m +4 h +4 h +10 h +10 h +10936 m +4 h +4 h +10 h +4 h +1 h +1 h +3 h +3836 m +113 h +10 h +4 h +4 h +82 h +3 h +83 h +109 h +4 h +1 h +278 h +31 h +59 h +885 m +4 h +114 h +1 h +4 h +1 h +4 h +383 h +1 h +57 h +59 h +447 h +4 h +1 h +10937 m +1 h +10 h +1 h +10938 m +11 h +1 h +4 h +10939 m +4 h +1 h +10940 m +10 h +1 h +10 h +4 h +97 h +10941 m +1 h +195 h +4 h +10 h +1 h +10942 m +10943 m +1 h +1 h +3 h +1 h +1 h +10944 m +2205 m +10945 m +7479 m +4 h +31 h +10 h +1 h +4 h +10 h +10 h +6378 m +11 h +48 h +2172 h +3 h +1 h +4 h +1 h +114 h +1 h +10 h +626 h +1 h +4 h +4 h +11 h +4 h +1508 m +332 h +1 h +10 h +4 h +10 h +1 h +4 h +109 h +170 h +33 h +10946 m +10947 m +4 h +10 h +57 h +10948 m +4 h +1 h +4 h +4 h +10949 m +1796 h +12 h +10950 m +383 h +4 h +1 h +1 h +8305 m +4 h +10951 m +10952 m +435 m +196 h +4 h +1 h +1 h +1 h +10 h +4 h +10 h +808 m +4 h +10953 m +10954 m +4 h +4 h +10 h +10955 m +10956 m +10957 m +4 h +10958 m +929 m +1 h +10959 m +31 h +4 h +10960 m +92 h +10961 m +25 h +4 h +4 h +41 h +12 h +276 h +358 h +10962 m +22 h +4 h +4 h +74 h +4 h +74 h +1 h +11 h +1 h +358 h +57 h +4 h +4 h +40 h +10 h +41 h +358 h +10 h +125 h +10963 m +241 m +74 h +10 h +1 h +4 h +386 h +10964 m +1 h +4 h +2374 m +10 h +4 h +10 h +4 h +1 h +10 h +295 h +1 h +146 h +114 h +10965 m +1 h +10 h +295 h +1 h +11 h +10966 m +10967 m +74 h +10968 m +1470 h +1 h +10 h +4 h +4 h +10 h +4 h +10969 m +4 h +4 h +10 h +25 h +4 h +3 h +10 h +4 h +143 h +1470 h +4 h +195 h +601 h +1 h +11 h +10 h +1 h +10970 m +4 h +123 h +4 h +195 h +3026 m +1 h +10971 m +10 h +1 h +169 h +59 h +123 h +25 h +1 h +10 h +10 h +1 h +10972 m +82 h +10 h +124 h +4 h +94 h +4 h +4 h +41 h +164 h +9 h +4 h +12 h +1 h +146 h +1 h +1 h +4 h +10973 m +1 h +83 h +1 h +195 h +1309 h +10974 m +82 h +1499 m +10 h +10975 m +11 h +10 h +1 h +4 h +10976 m +55 h +10977 m +65 h +4 h +10978 m +1 h +10979 m +4 h +36 h +1 h +463 h +1127 m +10980 m +1 h +31 h +104 h +124 h +1 h +4 h +1 h +55 h +10981 m +55 h +3 h +1 h +1 h +10982 m +97 h +1 h +4 h +10 h +297 h +276 h +1 h +10983 m +114 h +114 h +1 h +68 m +4 h +138 h +4 h +10984 m +4 h +10985 m +1 h +4 h +4124 m +10986 m +104 h +2625 h +10 h +10987 m +1 h +4 h +10988 m +10 h +4 h +10 h +1 h +4 h +10989 m +12 h +10 h +1 h +45 h +10990 m +1642 h +1016 h +4 h +10991 m +10 h +900 m +10 h +10992 m +8486 m +10993 m +11 h +4 h +2215 m +4 h +4 h +4 h +4 h +4 h +478 h +1 h +125 h +125 h +5869 m +4 h +4 h +57 h +11 h +4596 m +10 h +1 h +45 h +124 h +1 h +10 h +109 h +167 h +10 h +10994 m +10 h +10995 m +692 h +10996 m +124 h +578 h +10 h +10997 m +10 h +10998 m +10999 m +11000 m +59 h +4 h +4 h +11001 m +10 h +10 h +74 h +11002 m +4 h +10 h +1 h +11003 m +172 h +11004 m +4 h +10 h +10 h +4 h +10 h +313 h +181 h +10 h +1 h +1 h +36 h +4 h +64 h +11005 m +11006 m +4 h +11007 m +172 h +10 h +82 h +11008 m +10 h +113 h +11009 m +11010 m +1 h +83 h +1 h +11 h +1 h +1 h +4 h +1 h +4 h +4 h +4 h +1 h +10 h +1 h +103 m +11011 m +4 h +1083 h +4 h +4 h +4 h +11012 m +1 h +737 m +1 h +57 h +10 h +1 h +928 m +4 h +74 h +4 h +11013 m +383 h +3 h +4 h +119 h +4 h +8179 m +1 h +48 h +1 h +74 h +9228 m +4 h +8 h +190 h +1 h +2887 m +82 h +367 m +10 h +11014 m +10 h +4 h +146 h +11015 m +4 h +1 h +82 h +10324 m +520 h +1 h +4 h +1 h +82 h +139 h +1 h +443 h +11016 m +6668 m +10 h +295 h +11017 m +229 h +11018 m +1 h +10 h +11 h +10 h +4 h +307 h +124 h +57 h +196 h +4 h +7913 m +10 h +4 h +1 h +2484 m +4 h +25 h +4 h +3680 m +196 h +7798 m +279 h +10 h +57 h +82 h +11019 m +31 h +358 h +11020 m +4 h +172 h +11021 m +11022 m +4 h +11023 m +4 h +1 h +11024 m +4 h +4 h +1 h +4 h +11025 m +143 h +4 h +4 h +1 h +79 h +3 h +4 h +11026 m +1 h +11027 m +11028 m +3134 m +4 h +4 h +11029 m +31 h +109 h +74 h +1 h +13 h +11030 m +10 h +4 h +124 h +1 h +124 h +4 h +9065 m +4 h +140 h +10 h +10 h +4 h +10 h +10 h +10 h +59 h +185 h +1 h +3707 m +11 h +11031 m +1 h +11032 m +11033 m +11034 m +11035 m +4 h +124 h +4 h +10 h +4 h +1 h +1406 h +4 h +1 h +11036 m +11037 m +135 h +1 h +146 h +10 h +11038 m +4 h +1 h +83 h +11039 m +11 h +4 h +10 h +4 h +77 h +2412 m +3095 m +4 h +11040 m +1 h +569 h +692 h +1 h +571 m +1 h +3 h +1 h +4 h +4 h +124 h +11041 m +11042 m +10 h +1 h +1 h +11043 m +11044 m +31 h +5 h +27 h +1 h +79 h +135 h +11045 m +10 h +4 h +13 h +25 h +11046 m +10 h +10 h +7253 m +1 h +1 h +11047 m +1 h +4 h +118 h +10 h +4 h +4 h +11048 m +4 h +11049 m +11050 m +41 h +4 h +79 h +1 h +11051 m +4 h +4 h +11052 m +73 h +11053 m +1 h +4 h +1 h +11054 m +8 h +170 h +4 h +10 h +65 h +1 h +4 h +1 h +986 h +4 h +4 h +104 h +148 m +1 h +276 h +11055 m +11056 m +5206 m +1 h +11 h +1953 m +11057 m +4 h +1 h +10 h +41 h +11058 m +10 h +10 h +11059 m +4 h +69 h +11060 m +11061 m +4 h +4 h +1 h +11062 m +114 h +976 h +11063 m +1 h +129 h +10 h +4 h +3 h +10 h +31 h +692 h +10 h +1 h +464 h +83 h +10 h +11064 m +109 h +10 h +55 h +10 h +1 h +4 h +1 h +400 m +1 h +10 h +4 h +4 h +4 h +11065 m +11066 m +4 h +83 h +1 h +4 h +1 h +1 h +4 h +64 h +65 h +10 h +11067 m +10 h +10 h +4 h +1 h +1 h +3177 m +274 h +4 h +83 h +4 h +79 h +82 h +4 h +1 h +1 h +11068 m +11069 m +11070 m +11071 m +10 h +1 h +4 h +104 h +10 h +1359 m +10 h +8 h +265 h +99 m +4 h +4 h +1 h +4 h +11 h +13 h +11072 m +1 h +1 h +4 h +10 h +11073 m +1 h +10 h +2435 m +11074 m +172 h +83 h +82 h +10 h +40 h +1 h +11075 m +11076 m +4 h +11077 m +1 h +12 h +77 h +4 h +4 h +11078 m +124 h +4 h +10 h +1 h +10 h +10 h +192 h +11079 m +11080 m +4 h +124 h +11081 m +1 h +11082 m +10 h +4 h +1 h +295 h +4 h +601 h +1 h +520 h +4 h +386 h +1 h +11083 m +11084 m +11085 m +4 h +25 h +11086 m +11087 m +425 m +11088 m +1677 m +11089 m +181 h +601 h +4 h +4 h +65 h +464 h +157 h +4 h +11090 m +10 h +11091 m +307 h +1 h +1 h +169 h +1 h +10 h +1 h +1 h +11092 m +1650 h +11093 m +1 h +59 h +464 h +10 h +11094 m +57 h +11095 m +2116 m +109 h +11096 m +1 h +4 h +4 h +22 h +109 h +11097 m +4 h +4 h +79 h +173 h +114 h +11098 m +1137 h +1 h +10 h +4 h +10 h +4 h +730 m +11099 m +10689 m +4 h +4 h +11100 m +11101 m +10 h +11102 m +10 h +1981 m +3177 m +4 h +118 h +1 h +1 h +4 h +11103 m +10 h +4 h +146 h +11 h +4 h +10 h +11 h +10 h +4 h +11104 m +11105 m +4 h +1 h +2733 h +264 m +1 h +4 h +1105 h +4 h +36 h +996 m +158 h +1 h +11106 m +1 h +10 h +1 h +2245 m +12 h +124 h +164 h +83 h +196 h +11107 m +11108 m +8114 m +4 h +11109 m +4 h +1 h +11110 m +307 h +195 h +7535 m +11111 m +91 h +25 h +208 m +92 h +1 h +10 h +11112 m +11113 m +11114 m +4 h +4 h +4 h +196 h +359 h +11115 m +31 h +4 h +65 h +10 h +11116 m +167 h +250 h +124 h +4 h +1 h +4 h +10 h +57 h +10 h +11117 m +4 h +4 h +10 h +55 h +11118 m +4 h +1083 h +11119 m +1 h +10 h +110 h +11120 m +4030 m +11 h +11121 m +143 h +1 h +1309 h +976 h +11122 m +1 h +4 h +4 h +10 h +4 h +1 h +10 h +1 h +11123 m +4 h +3 h +10 h +4 h +331 m +11124 m +64 h +135 h +4 h +11125 m +147 h +5225 m +4 h +1 h +11126 m +1 h +4 h +4 h +11127 m +124 h +295 h +4 h +10 h +11128 m +172 h +4 h +11129 m +4 h +73 h +73 h +1261 h +5046 m +4 h +4 h +11130 m +1 h +1 h +10 h +10 h +11131 m +10 h +11132 m +11133 m +10 h +4 h +1 h +601 h +10 h +11134 m +10 h +279 h +4 h +11135 m +109 h +1 h +4 h +146 h +4 h +8035 m +569 h +8767 m +367 m +1 h +4 h +1 h +104 h +10 h +97 h +25 h +10 h +4 h +57 h +1 h +181 h +4 h +56 h +4 h +1 h +11136 m +1261 h +10 h +8571 m +7641 m +181 h +3293 m +109 h +59 h +11137 m +57 h +65 h +11138 m +73 h +10 h +11139 m +1771 m +4 h +4 h +2591 m +11 h +1 h +59 h +11140 m +10 h +4 h +4 h +2887 m +4 h +11141 m +3 h +994 m +11142 m +11143 m +41 h +110 h +4 h +113 h +1 h +4 h +1 h +11144 m +11145 m +4 h +1 h +556 h +11146 m +1 h +4 h +1 h +97 h +10 h +4 h +31 h +109 h +147 h +82 h +83 h +139 h +935 h +11147 m +4 h +10 h +10 h +4 h +1016 h +1 h +1 h +11148 m +4 h +447 h +123 h +1 h +97 h +12 h +10 h +1 h +3 h +338 h +10 h +307 h +1796 h +74 h +4 h +57 h +1 h +4 h +59 h +1 h +1 h +77 h +1 h +4 h +11149 m +82 h +11150 m +5785 m +11151 m +11152 m +104 h +11153 m +25 h +11 h +11154 m +11155 m +74 h +1 h +11156 m +104 h +82 h +258 h +41 h +10 h +10 h +1 h +11157 m +10 h +10 h +4 h +109 h +11158 m +65 h +4 h +59 h +4 h +3 h +1 h +139 h +11159 m +1089 m +10 h +9691 m +1 h +4 h +45 h +4 h +83 h +4 h +11160 m +11161 m +11162 m +190 h +3 h +10 h +1 h +1337 m +10 h +11 h +135 h +28 h +4 h +3 h +1 h +10 h +10 h +31 h +443 h +4 h +1 h +1 h +4 h +11163 m +11164 m +4 h +4 h +10 h +1 h +195 h +4 h +10 h +1 h +22 h +4 h +3 h +146 h +11165 m +256 m +45 h +11166 m +2788 h +1 h +10 h +4 h +779 h +11167 m +1 h +4 h +110 h +1 h +1 h +82 h +2887 h +4 h +4 h +4 h +3455 m +4 h +10 h +1 h +4 h +1 h +10 h +11168 m +3 h +4 h +1 h +10 h +57 h +170 h +10 h +1 h +11 h +10 h +444 m +55 h +1 h +11169 m +11170 m +103 h +4 h +10 h +109 h +11171 m +4 h +11172 m +4 h +94 h +1389 m +4 h +4 h +1 h +4 h +359 h +11173 m +4 h +1 h +10 h +10 h +4 h +79 h +1 h +146 h +10 h +11174 m +10 h +4 h +11175 m +1 h +11176 m +10283 m +11177 m +10 h +4 h +1 h +2840 m +82 h +4 h +4 h +11178 m +10 h +6469 m +10111 m +1 h +1 h +10 h +4 h +4 h +358 h +278 h +10 h +4 h +4 h +10 h +4 h +4 h +1 h +125 h +10 h +4 h +1 h +11 h +1 h +4 h +4 h +11179 m +9372 m +4 h +4 h +11180 m +4 h +3272 m +1201 h +11181 m +383 h +1 h +10 h +3737 m +1 h +11182 m +11183 m +1 h +208 m +11184 m +1 h +11185 m +297 h +1737 m +10 h +11186 m +11187 m +83 h +4 h +11188 m +4 h +73 h +1 h +10 h +11189 m +1092 m +11190 m +4 h +4 h +11191 m +57 h +10 h +10 h +10 h +1 h +4 h +172 h +4 h +4 h +11192 m +83 h +104 h +1 h +4 h +1691 m +1 h +4 h +10 h +1 h +4 h +1 h +59 h +1482 m +11193 m +1 h +1 h +6139 m +73 h +11194 m +4 h +11 h +11 h +59 h +1 h +1 h +10 h +4 h +11195 m +4 h +2374 m +10 h +10 h +4 h +11196 m +10 h +990 m +1 h +64 h +208 h +536 h +83 h +4 h +5567 m +10 h +11197 m +1 h +11198 m +4 h +1 h +1 h +276 h +1 h +11199 m +11200 m +358 h +56 h +4 h +4 h +185 h +10 h +59 h +11201 m +4 h +57 h +939 m +4 h +10 h +4 h +109 h +996 m +4 h +4 h +109 h +185 h +1 h +4 h +11202 m +23 h +11203 m +10 h +11204 m +55 h +11205 m +5976 m +13 h +59 h +4 h +11 h +276 h +11206 m +10 h +11207 m +4 h +569 h +1 h +11208 m +4 h +11 h +11 h +41 h +10 h +10 h +181 h +64 h +11209 m +11 h +41 h +4 h +10 h +11210 m +4 h +4 h +10 h +196 h +1 h +4 h +3 h +3679 m +2883 m +10 h +1 h +4 h +10 h +10 h +322 m +11211 m +1 h +82 h +1016 h +65 h +31 h +48 h +146 h +11 h +1 h +1 h +11 h +4 h +10 h +4857 m +229 h +11212 m +1 h +4 h +1 h +13 h +11213 m +1 h +10 h +488 h +10 h +1 h +4 h +10 h +4 h +1 h +11214 m +4 h +1 h +1 h +2433 m +11215 m +11216 m +11217 m +11218 m +358 h +10 h +11219 m +10 h +10 h +10 h +83 h +4 h +10 h +1 h +10 h +11220 m +4 h +4 h +11221 m +4 h +157 h +1 h +82 h +104 h +4 h +10 h +11222 m +185 h +4 h +3 h +4 h +4 h +195 h +11223 m +146 h +4 h +13 h +11224 m +4 h +4 h +1868 m +3 h +4 h +1 h +11225 m +4 h +10 h +11226 m +10 h +1 h +10 h +4 h +4 h +1 h +82 h +147 h +4 h +1 h +1 h +31 h +10 h +383 h +4 h +4 h +1 h +118 h +4 h +4 h +1 h +443 h +1 h +447 h +169 h +371 h +4 h +1 h +2733 h +4 h +10 h +11227 m +10 h +4 h +4 h +11228 m +69 h +1 h +1 h +1 h +322 m +1 h +1 h +11229 m +146 h +4 h +11 h +4 h +83 h +2309 m +10 h +147 h +11230 m +11 h +4 h +10 h +41 h +59 h +1 h +10 h +4 h +143 h +4 h +4 h +1 h +4 h +9501 m +45 h +1 h +170 h +41 h +138 h +173 h +4 h +1 h +109 h +4 h +4 h +4 h +4 h +114 h +3 h +172 h +12 h +4 h +74 h +10 h +27 h +11231 m +265 h +10 h +109 h +4 h +10 h +1 h +10 h +1 h +10 h +74 h +1 h +1 h +10 h +11232 m +4 h +1253 m +10 h +6855 m +4 h +4 h +57 h +31 h +11233 m +10 h +192 h +125 h +1 h +11234 m +4 h +146 h +10 h +1 h +10 h +170 h +4 h +167 h +4 h +1 h +11 h +1 h +4 h +2846 m +5199 m +11235 m +124 h +11 h +4 h +11236 m +4 h +10 h +4 h +10 h +289 h +11237 m +4 h +4 h +3768 m +11238 m +158 h +119 h +1 h +36 h +4 h +147 h +1 h +338 h +4 h +109 h +83 h +1 h +112 h +11239 m +11240 m +1 h +11241 m +4 h +4 h +4 h +125 h +477 m +109 h +4 h +10 h +1 h +110 h +11242 m +1 h +10 h +10 h +10 h +11243 m +4 h +97 h +82 h +10 h +11 h +13 h +1 h +4 h +435 m +11244 m +1 h +41 h +4 h +181 h +1 h +11245 m +4 h +1 h +4 h +11246 m +1 h +25 h +11247 m +11248 m +1 h +10 h +31 h +2958 m +11249 m +10 h +10 h +11250 m +1 h +4 h +11251 m +10 h +1030 h +125 h +1261 h +1 h +1 h +10 h +40 h +4 h +4 h +2265 h +1330 m +533 h +4 h +1 h +10 h +11252 m +11253 m +1 h +11254 m +57 h +4 h +124 h +1 h +59 h +4292 m +4 h +110 h +1 h +97 h +1 h +11255 m +4 h +59 h +83 h +109 h +10 h +4 h +10 h +1 h +11256 m +1 h +4 h +170 h +4 h +10 h +110 h +4 h +45 h +11257 m +4 h +11258 m +4 h +11259 m +4 h +11 h +1 h +4 h +10 h +4 h +11260 m +1 h +11261 m +403 h +1 h +10 h +41 h +11262 m +1374 m +169 h +11263 m +27 h +45 h +11264 m +583 h +1691 m +4 h +11 h +4 h +10 h +4 h +4 h +11265 m +4 h +69 h +5053 m +11266 m +1 h +1619 h +185 h +1 h +3 h +4 h +11267 m +4 h +258 h +1 h +4 h +1 h +11 h +1 h +4 h +11268 m +11269 m +11270 m +59 h +1 h +11271 m +10 h +4 h +11272 m +4 h +10 h +83 h +11273 m +353 m +124 h +1 h +4 h +11274 m +4 h +94 h +10 h +1 h +1 h +10 h +1880 m +10 h +1 h +1 h +1 h +11275 m +11276 m +1 h +25 h +64 h +1 h +4 h +56 h +1 h +11277 m +4 h +11278 m +4 h +10 h +11279 m +11280 m +147 h +103 h +10 h +83 h +56 h +1 h +10 h +11281 m +11282 m +770 m +5526 m +11283 m +129 h +11284 m +10 h +124 h +195 h +1 h +1714 m +10 h +10 h +125 h +169 h +55 h +9139 m +4 h +11285 m +1 h +1 h +11 h +4 h +4 h +1691 h +4 h +630 m +124 h +4 h +10 h +1 h +4 h +4 h +1 h +11286 m +4 h +4 h +1 h +11 h +1 h +97 h +4 h +1 h +3 h +1 h +1454 m +1 h +3 h +10 h +10 h +4 h +4 h +265 h +41 h +11287 m +4 h +10 h +11288 m +55 h +4 h +1 h +1 h +4 h +1137 h +11289 m +4 h +10 h +25 h +4 h +1 h +10 h +114 h +4 h +1 h +1 h +10 h +10 h +11290 m +4 h +1 h +10 h +10 h +109 h +4 h +11291 m +25 h +3141 m +4 h +3 h +10 h +10 h +4 h +1822 m +119 h +12 h +1 h +3 h +82 h +4 h +36 h +73 h +4 h +692 h +1 h +1 h +11292 m +4 h +11293 m +10 h +1 h +4 h +22 h +124 h +11294 m +10 h +11295 m +11296 m +2794 m +64 h +1 h +4 h +4 h +146 h +195 h +10 h +1 h +10 h +109 h +79 h +169 h +1 h +358 h +11297 m +103 h +4 h +10 h +4 h +4 h +1 h +10 h +4 h +11298 m +1 h +10 h +1 h +1 h +4 h +4 h +10 h +82 h +10 h +262 h +11299 m +10 h +10 h +6129 m +114 h +31 h +11300 m +1 h +57 h +10 h +82 h +4 h +4 h +4 h +1 h +4 h +109 h +443 h +11301 m +11 h +4 h +10 h +25 h +1 h +123 h +1 h +74 h +6770 m +10 h +493 m +11 h +11302 m +4 h +4 h +1 h +10 h +4 h +1 h +11303 m +1470 h +4 h +4 h +11304 m +11305 m +4 h +11 h +4 h +4 h +1 h +11306 m +11 h +11307 m +97 h +1 h +10 h +109 h +4 h +1 h +10 h +4 h +11308 m +10 h +11309 m +11310 m +4 h +25 h +1 h +258 h +10 h +4 h +195 h +74 h +536 h +10 h +10 h +801 m +1 h +2002 m +109 h +10 h +11311 m +4714 m +11312 m +4 h +82 h +4 h +1 h +10 h +1 h +1 h +4 h +11313 m +172 h +1 h +109 h +27 h +1 h +1 h +10 h +4 h +4 h +1 h +4 h +109 h +25 h +11 h +1 h +640 m +11314 m +4 h +10 h +1 h +4 h +4 h +11315 m +4 h +109 h +4 h +4 h +11 h +11316 m +1478 h +4 h +1 h +2235 m +59 h +4 h +4 h +1 h +11 h +11317 w +1372 m +11318 m +11319 m +359 h +11320 m +4 h +1089 m +10 h +73 h +1 h +1 h +1 h +59 h +1 h +11321 m +4 h +1 h +10 h +97 h +10 h +1 h +4 h +4 h +843 m +4 h +4 h +4 h +1 h +4 h +1 h +3 h +601 h +4 h +11322 m +447 h +10 h +4 h +4 h +4 h +10 h +11323 m +1 h +11324 m +1 h +10 h +1 h +123 h +4 h +11325 m +4 h +4 h +10 h +1470 h +3240 m +4 h +11326 m +1 h +10 h +10 h +11327 m +11328 m +4 h +601 h +41 h +4 h +147 h +4 h +13 h +4 h +11329 m +10 h +57 h +258 h +10 h +10 h +208 h +1 h +83 h +4 h +11330 m +11331 m +1 h +11 h +1 h +1 h +359 h +48 h +10 h +4 h +59 h +11332 m +1 h +41 h +1 h +4 h +146 h +4 h +1 h +4 h +4 h +1 h +4338 m +10 h +11333 m +1 h +10 h +10 h +3351 m +31 h +10 h +10 h +4 h +195 h +10 h +10 h +119 h +4 h +10 h +10 h +40 h +4 h +11334 m +25 h +139 h +146 h +97 h +125 h +147 h +1 h +1 h +11335 m +11336 m +10 h +4 h +147 h +11337 m +1 h +103 h +1403 h +10 h +123 h +11 h +11338 m +1 h +4 h +3 h +1 h +11339 m +8879 m +1 h +11340 m +10 h +10 h +11341 m +4 h +74 h +11342 m +1 h +4 h +158 h +4 h +10 h +4 h +4 h +74 h +4 h +4 h +4 h +11343 m +4 h +11344 m +1 h +10 h +272 m +11345 m +4 h +10 h +174 m +4 h +1 h +425 m +1 h +1 h +2205 m +4 h +109 h +10 h +4 h +270 h +1 h +56 h +3 h +11346 m +31 h +109 h +1820 m +1 h +4 h +10 h +1 h +195 h +1 h +10 h +4 h +11347 m +10345 m +1 h +11348 m +1 h +10 h +1 h +11349 m +10 h +258 h +140 h +11350 m +79 h +11351 m +3089 m +11352 m +186 h +1 h +6558 m +4 h +10 h +1 h +4 h +10 h +4 h +1766 h +1 h +11353 m +1 h +109 h +169 h +4 h +11354 m +10 h +97 h +73 h +4 h +1780 m +11355 m +1 h +11 h +3 h +10 h +10 h +4 h +11356 m +11 h +123 h +11357 m +4 h +64 h +10 h +4 h +119 h +4 h +181 h +4 h +3150 m +56 h +1 h +4 h +11358 m +4 h +1722 m +698 m +10 h +11359 m +4 h +443 h +4 h +65 h +6144 m +4 h +10 h +4 h +4 h +13 h +25 h +307 h +4 h +157 h +22 h +1 h +196 h +12 h +371 h +1 h +4 h +1 h +4 h +1 h +4 h +1 h +4 h +11360 m +119 h +36 h +4 h +11361 m +11362 m +4 h +224 h +1 h +109 h +97 h +4 h +10 h +4 h +4 h +12 h +4 h +11363 m +5863 m +4 h +10 h +10 h +1442 m +1 h +10 h +82 h +601 h +1 h +11364 m +11365 m +10 h +1 h +59 h +124 h +10 h +1 h +1 h +698 m +11 h +1 h +4 h +1 h +10 h +11366 m +11367 m +4 h +11 h +1 h +1 h +109 h +10 h +1 h +10 h +1 h +204 h +4 h +11368 m +10 h +4 h +11369 m +4 h +1 h +12 h +57 h +4 h +11370 m +4 h +55 h +11371 m +4 h +146 h +1796 h +83 h +10 h +11372 m +478 h +4 h +4 h +4 h +41 h +11 h +266 h +1 h +155 m +1 h +124 h +642 m +92 h +1847 m +11373 m +147 h +1 h +10 h +4 h +104 h +4 h +104 h +763 m +1 h +10 h +1 h +1 h +914 m +11374 m +4 h +7913 m +1 h +4 h +10 h +22 h +4 h +1 h +1 h +10 h +4 h +1 h +10 h +11375 m +4 h +1 h +3 h +4 h +4 h +94 h +4 h +11376 m +124 h +135 h +4 h +7243 m +1 h +368 h +11377 m +1 h +4 h +582 m +1 h +1 h +11378 m +367 h +4 h +698 h +4 h +4 h +4 h +4 h +1 h +10 h +377 m +11379 m +1 h +10 h +4 h +124 h +57 h +10 h +11380 m +1 h +11381 m +4 h +25 h +1 h +1 h +4 h +10 h +4 h +3 h +10 h +9075 m +119 h +1 h +11382 m +1 h +1685 h +1 h +10 h +11383 m +229 h +104 h +1116 m +4 h +114 h +1 h +4 h +10 h +1 h +4 h +185 h +5145 m +4 h +4 h +4 h +4 h +125 h +3877 m +4 h +4 h +59 h +10 h +270 h +11384 m +125 h +1 h +11385 m +172 h +2090 m +9120 m +692 h +4 h +4 h +11386 m +4 h +258 h +1 h +10 h +1 h +4 h +307 h +1 h +4 h +10 h +512 m +4 h +4 h +125 h +4 h +10 h +4 h +1 h +4 h +4 h +11387 m +12 h +4 h +4 h +124 h +4 h +10 h +11388 m +278 h +10 h +1 h +4 h +4 h +10 h +1 h +11389 m +4 h +1 h +10 h +11390 m +4 h +4 h +11 h +3028 m +10 h +10 h +10 h +10 h +11 h +11391 m +3 h +4 h +125 h +1 h +10 h +4 h +11392 m +1 h +11 h +1 h +11393 m +1 h +164 h +4 h +1 h +1 h +1 h +1 h +146 h +4 h +104 h +1 h +11394 m +172 h +4 h +27 h +4 h +82 h +10 h +4 h +1 h +79 h +1 h +4 h +1 h +4 h +10 h +11395 m +10 h +266 h +295 h +1 h +10 h +10 h +1 h +4 h +10 h +1493 m +1027 h +4 h +11 h +65 h +4 h +1822 m +4 h +10 h +1 h +4 h +4 h +4 h +11396 m +167 h +1 h +181 h +114 h +1 h +1 h +11397 m +10 h +11 h +5122 m +4904 m +1 h +4 h +488 h +1 h +1 h +10 h +11398 m +9669 m +156 h +125 h +10 h +4 h +1 h +4 h +3 h +556 h +383 h +1 h +10 h +601 h +1 h +4 h +11399 m +1 h +4 h +10 h +10 h +10 h +11400 m +1403 h +125 h +1 h +48 h +4 h +4 h +1 h +1089 m +4 h +1089 h +4 h +7243 m +41 h +10 h +1 h +4 h +8 h +11401 m +11402 m +11403 m +164 h +256 m +1 h +10 h +4 h +10 h +626 h +1 h +1 h +278 h +55 h +10 h +4 h +11404 m +31 h +10 h +10 h +4 h +4 h +4 h +9372 m +65 h +10 h +4 h +190 h +4 h +1780 m +1 h +11405 m +11406 m +146 h +4 h +11 h +10 h +55 h +22 h +1 h +1 h +123 h +278 h +4 h +82 h +4 h +11407 m +83 h +1 h +4 h +91 h +4 h +4 h +11408 m +10 h +129 h +11409 m +4 h +104 h +11410 m +4 h +11411 m +70 m +9411 m +10 h +11412 m +2532 m +11413 m +4 h +4 h +2045 m +1 h +11414 m +10 h +1 h +83 h +22 h +10 h +1261 h +4 h +1780 h +4 h +1 h +1 h +11415 m +1 h +11416 m +110 h +4 h +4 h +4 h +4 h +11417 m +4 h +10 h +69 h +1 h +59 h +4 h +1 h +11418 m +83 h +11419 m +642 m +59 h +4 h +25 h +1 h +4 h +83 h +1697 m +10 h +1 h +3680 h +4 h +11420 m +10 h +1 h +36 h +3089 m +79 h +1 h +167 h +10 h +10 h +4 h +10 h +11421 m +124 h +11 h +57 h +109 h +11422 m +11423 m +10 h +10 h +1 h +10 h +11424 m +1 h +124 h +4 h +1053 m +4 h +4 h +1 h +4 h +4 h +488 h +1 h +113 h +57 h +195 h +4 h +1 h +4 h +10 h +1 h +73 h +10 h +1 h +358 h +11425 m +1 h +11426 m +1 h +11427 m +1 h +2887 h +1 h +11428 m +4 h +75 h +4 h +563 m +10 h +1 h +4 h +278 h +65 h +278 h +319 h +22 h +1 h +11429 m +11430 m +1 h +1939 m +10 h +1 h +1 h +1 h +10 h +4 h +4 h +3 h +10 h +11431 m +8212 m +125 h +10 h +1 h +2788 h +4 h +1 h +11432 m +167 h +4 h +4 h +124 h +1 h +626 h +11 h +125 h +4 h +123 h +25 h +125 h +11433 m +4 h +10 h +2508 m +8889 m +1 h +82 h +11434 m +4 h +11435 m +911 h +1 h +4 h +368 h +10 h +65 h +11436 m +4 h +164 h +25 h +4177 m +1 h +11437 m +258 h +4 h +11438 m +4 h +4 h +129 h +4 h +1 h +3396 m +65 h +167 h +3484 m +195 h +1 h +4 h +1 h +10 h +4 h +258 h +3 h +11439 m +11440 m +11441 m +1 h +22 h +9335 m +25 h +4 h +172 h +1038 m +45 h +73 h +170 h +1650 h +578 h +10 h +11442 m +4516 m +1 h +10 h +11443 m +377 m +1619 h +4 h +536 h +10 h +4 h +1646 m +1 h +1 h +10 h +1016 h +4 h +5526 m +82 h +11444 m +94 h +31 h +4 h +185 h +4 h +1184 m +124 h +31 h +11445 m +124 h +4 h +1 h +73 h +11446 m +169 h +4 h +4 h +4 h +11447 m +6129 m +4 h +10 h +4 h +2126 m +158 h +11448 m +1 h +11449 m +22 h +4 h +11450 m +11451 m +1642 h +11 h +10 h +4 h +11452 m +1185 m +11453 m +11454 m +11455 m +23 h +118 h +25 h +1595 m +1 h +91 h +1 h +73 h +4 h +4 h +4 h +4 h +82 h +25 h +1 h +10 h +74 h +104 h +1 h +11456 m +83 h +11457 m +109 h +143 h +109 h +108 h +10 h +10 h +4 h +10 h +5 h +4 h +4 h +4 h +56 h +1 h +79 h +4 h +4 h +4 h +10 h +4 h +11458 m +36 h +5869 m +4 h +12 h +1 h +4 h +4 h +359 h +11459 m +11 h +1 h +11460 m +13 h +11461 m +10 h +1 h +11462 m +4 h +4 h +4 h +105 m +11 h +1 h +4 h +164 h +1 h +11463 m +11464 m +11465 m +1564 m +11466 m +48 h +11467 m +1017 m +10 h +11468 m +109 h +433 m +83 h +4 h +124 h +10 h +10 h +11469 m +1 h +1 h +1 h +10 h +262 h +4 h +97 h +1 h +3 h +11 h +119 h +10 h +11470 m +11471 m +10 h +3768 m +10 h +11472 m +4 h +262 h +10 h +10 h +1 h +82 h +1 h +4 h +1 h +10 h +4 h +4 h +4 h +172 h +11473 m +1 h +4 h +11474 m +11475 m +266 h +7395 m +10 h +10 h +10 h +59 h +1 h +1 h +57 h +1261 h +83 h +4 h +1 h +4 h +123 h +3 h +4 h +11476 m +10 h +4 h +1 h +3184 m +11477 m +1 h +4 h +1 h +1 h +1 h +11478 m +224 h +118 h +4 h +91 h +1 h +1 h +109 h +64 h +82 h +146 h +11 h +57 h +4 h +10 h +4 h +11 h +1 h +10 h +1 h +10 h +1 h +4 h +1977 m +4 h +135 h +10 h +11479 m +10 h +10 h +10 h +4 h +10 h +4 h +4 h +11480 m +124 h +1642 h +1 h +1 h +1 h +214 m +4 h +10 h +4127 m +399 h +1 h +10 h +7938 m +11 h +109 h +4 h +11481 m +82 h +1 h +1556 m +4 h +1 h +1754 m +900 m +11482 m +11483 m +4 h +11484 m +25 h +181 h +124 h +1 h +57 h +2423 m +4 h +11485 m +83 h +4 h +1 h +4 h +41 h +1030 h +10 h +4 h +164 h +10 h +4 h +11486 m +1 h +11487 m +10512 m +10 h +11 h +124 h +4 h +36 h +10 h +27 h +11488 m +4 h +1 h +4 h +10 h +4 h +1 h +11489 m +97 h +10 h +146 h +28 h +1 h +146 h +10 h +124 h +4 h +10 h +143 h +57 h +11490 m +6187 m +74 h +181 h +74 h +1 h +4 h +10 h +1 h +83 h +97 h +2128 m +10 h +1403 h +8610 m +1261 h +190 h +164 h +11491 m +4 h +97 h +4 h +31 h +57 h +10 h +4 h +11492 m +1 h +4 h +170 h +433 m +1 h +11493 m +11494 m +11495 m +158 h +4 h +11496 m +4 h +4 h +1 h +10 h +4 h +1 h +10 h +1 h +11497 m +11498 m +10 h +11499 m +11500 m +1017 m +289 h +3161 m +10 h +56 h +11501 m +11502 m +1 h +4 h +11 h +82 h +4 h +135 h +4 h +1 h +11503 m +190 h +733 m +65 h +601 h +125 h +97 h +11504 m +4 h +186 h +10 h +1 h +11505 m +241 m +83 h +412 m +10 h +125 h +11506 m +4 h +10 h +59 h +83 h +146 h +4 h +3 h +6461 m +190 h +4 h +59 h +11507 m +190 h +8 h +11 h +9156 m +1 h +11508 m +4 h +11509 m +97 h +10 h +4 h +109 h +1 h +41 h +143 h +11510 m +1 h +11511 m +4 h +987 m +10 h +4 h +65 h +1 h +11512 m +124 h +1 h +11513 m +4 h +10 h +307 h +10 h +4 h +808 h +10 h +11514 m +11515 m +4 h +4 h +135 h +124 h +1 h +10 h +185 h +4 h +332 h +1 h +1 h +1 h +358 h +857 m +1 h +10 h +45 h +147 h +11 h +10 h +4 h +4 h +10 h +11516 m +4 h +447 h +285 m +1 h +10 h +10 h +109 h +1260 h +4 h +4 h +104 h +31 h +11517 m +4 h +164 h +219 m +10 h +11518 m +4 h +109 h +10 h +11519 m +4 h +94 h +10 h +1 h +11 h +11520 m +124 h +196 h +22 h +11521 m +4 h +125 h +164 h +11522 m +11141 m +92 h +10 h +10 h +11523 m +1 h +1 h +1 h +10 h +1 h +13 h +4 h +4 h +10 h +1 h +4 h +11524 m +5017 m +6381 m +4 h +10 h +219 m +11525 m +4 h +1 h +4 h +79 h +195 h +10 h +11526 m +82 h +11527 m +164 h +1 h +82 h +11528 m +125 h +10 h +1 h +10 h +1 h +1 h +11529 m +1 h +10 h +4 h +4 h +4 h +4 h +4 h +11530 m +3 h +4 h +123 h +1 h +4 h +911 h +11531 m +3 h +36 h +10 h +11532 m +229 h +383 h +4 h +11533 m +1 h +4 h +371 h +59 h +10 h +1 h +45 h +10 h +4 h +2442 m +4 h +94 h +4 h +55 h +4 h +11534 m +119 h +10 h +4 h +4 h +1 h +4 h +10 h +4 h +9385 m +11535 m +459 m +11536 m +11537 m +1137 h +4 h +10 h +1 h +10 h +4 h +11538 m +1 h +83 h +11539 m +196 h +1 h +1 h +692 h +11540 m +83 h +11541 m +4 h +172 h +190 h +3558 m +82 h +11 h +1 h +4 h +1 h +1 h +11542 m +11543 m +4 h +10 h +114 h +11544 m +4 h +1 h +4 h +11545 m +11546 m +4 h +82 h +11547 m +4 h +10 h +11548 m +11 h +4 h +4 h +359 h +11 h +104 h +11549 m +1 h +4 h +4 h +11 h +4 h +10 h +1 h +4 h +60 m +31 h +965 m +1 h +4 h +1016 h +11550 m +11551 m +11552 m +4 h +1281 m +113 h +4 h +10 h +12 h +11553 m +1 h +57 h +4 h +806 m +4 h +82 h +4 h +11554 m +2840 m +4 h +4 h +4 h +31 h +41 h +10 h +10 h +1 h +4 h +10 h +10 h +10 h +82 h +10 h +1185 m +11555 m +10 h +1 h +11556 m +11557 m +1 h +124 h +4 h +11558 m +65 h +158 h +73 h +11559 m +1 h +190 h +119 h +185 h +11 h +11 h +1 h +10 h +11560 m +1 h +1 h +31 h +4 h +10 h +4 h +74 h +11561 m +4 h +4 h +4256 m +11 h +1250 h +359 h +1 h +11562 m +307 h +11563 m +11564 m +4 h +493 m +4 h +10 h +10 h +94 h +4 h +4 h +2537 m +1 h +250 h +4 h +4 h +84 m +11565 m +1 h +1038 m +1 h +11566 m +41 h +219 h +4 h +196 h +11567 m +4 h +10 h +104 h +1 h +195 h +10 h +11568 m +4 h +11569 m +371 h +11570 m +79 h +1 h +55 h +10 h +196 h +11571 m +1403 h +10 h +10 h +4 h +10 h +1105 h +10 h +11572 m +11573 m +11574 m +11575 m +83 h +11576 m +22 h +4 h +4 h +1 h +11485 m +10 h +11577 m +11578 m +11579 m +125 h +4 h +4 h +109 h +1 h +79 h +10 h +4 h +1 h +1 h +11580 m +195 h +11581 m +332 h +1 h +656 m +4 h +4 h +25 h +11582 m +164 h +10 h +56 h +104 h +156 h +4 h +1 h +93 h +4 h +4 h +3025 m +1 h +11583 m +11584 m +1 h +124 h +10 h +1 h +11585 m +11586 m +4 h +167 h +11587 m +10 h +129 h +1 h +1 h +59 h +11588 m +4 h +36 h +1478 h +4 h +138 h +11589 m +11 h +150 m +4 h +4 h +11590 m +5093 m +1 h +229 h +4 h +1 h +11591 m +11592 m +3 h +1 h +10 h +1 h +74 h +4 h +1030 h +11593 m +1 h +4 h +1 h +4735 m +10 h +1016 h +1 h +1016 h +82 h +757 h +1 h +4 h +5475 m +11594 m +11595 m +4 h +1 h +11596 m +4 h +11597 m +57 h +11598 m +11599 m +74 h +10 h +109 h +11600 m +4 h +1 h +10 h +57 h +10 h +11601 m +11602 m +3 h +11603 m +1 h +10 h +1 h +1261 h +10 h +368 h +272 m +25 h +11604 m +31 h +10 h +59 h +97 h +11605 m +11606 m +1 h +11607 m +4 h +4 h +1027 h +11 h +1 h +1 h +1 h +57 h +493 m +1 h +1 h +10 h +110 h +4 h +4 h +11608 m +27 h +4 h +10 h +4 h +11 h +4 h +119 h +22 h +10 h +45 h +1017 h +11609 m +4 h +278 h +196 h +74 h +4 h +447 h +857 m +1 h +36 h +1444 m +172 h +11610 m +41 h +4 h +195 h +125 h +557 m +11611 m +4 h +238 m +4 h +82 h +4 h +48 h +55 h +4 h +4 h +11 h +8496 m +57 h +11612 m +10 h +1 h +1 h +1 h +1 h +181 h +10 h +4 h +371 h +11613 m +1678 m +11614 m +11615 m +1 h +10 h +1 h +11616 m +533 h +10 h +11617 m +41 h +11618 m +4 h +1 h +1 h +1 h +11619 m +10 h +83 h +8 h +59 h +1 h +1 h +4 h +4 h +135 h +135 h +4 h +11620 m +1 h +195 h +1 h +506 m +59 h +1 h +238 h +278 h +4 h +57 h +4 h +4 h +11621 m +4 h +272 m +59 h +10 h +700 m +57 h +59 h +4 h +4 h +59 h +4 h +11622 m +1278 m +1 h +1 h +10 h +11623 m +10 h +1 h +5650 m +11624 m +10 h +10 h +104 h +1 h +1 h +1 h +41 h +282 m +11625 m +1 h +4 h +4 h +8332 h +31 h +316 m +4 h +238 h +119 h +82 h +11626 m +238 h +4 h +4 h +11627 m +367 h +1 h +59 h +258 h +10 h +10 h +10 h +119 h +266 h +4 h +11628 m +1 h +258 h +2300 m +4 h +4 h +4 h +4 h +10 h +3 h +11 h +25 h +10 h +4 h +31 h +4 h +158 h +4 h +4 h +11629 m +31 h +64 h +139 h +4 h +57 h +4 h +11630 m +10 h +12 h +31 h +1 h +4 h +258 h +1 h +1 h +4 h +10 h +2251 m +3 h +4 h +10 h +97 h +10 h +15 m +11631 m +82 h +25 h +11632 m +2755 m +1 h +10 h +11633 m +4 h +41 h +110 h +4 h +4 h +1835 m +4 h +10 h +1 h +11 h +11634 m +4 h +1 h +186 h +11635 m +124 h +11636 m +1 h +11637 m +1 h +371 h +4 h +3 h +4 h +41 h +4 h +11638 m +4 h +1016 h +4 h +11639 m +79 h +1 h +11640 m +5387 m +4 h +25 h +412 m +11641 m +1 h +104 h +11642 m +11 h +104 h +1 h +156 h +4 h +295 h +11643 m +11644 m +1 h +11645 m +31 h +278 h +10914 m +82 h +4 h +11646 m +125 h +4297 m +11647 m +83 h +1 h +11648 m +4 h +13 h +11649 m +3473 m +11650 m +4 h +11651 m +1 h +238 h +10 h +4 h +167 h +297 h +1 h +4 h +4 h +146 h +4 h +463 h +1 h +11652 m +11653 m +4 h +11654 m +737 m +10 h +4 h +1893 m +1 h +4 h +1 h +1070 m +10 h +11 h +1822 h +109 h +10 h +297 h +230 m +4 h +1 h +143 h +386 h +569 h +4 h +11655 m +4 h +123 h +3 h +65 h +1 h +4 h +10 h +11656 m +10 h +59 h +1 h +124 h +12 h +500 m +1 h +4 h +4 h +138 h +4 h +10 h +11657 m +1 h +1 h +1 h +601 h +4 h +31 h +11 h +1 h +74 h +4 h +8496 m +4 h +1 h +11658 m +4 h +11659 m +64 h +1 h +10 h +97 h +1 h +2925 m +1 h +11660 m +1583 m +25 h +4 h +92 h +4 h +10 h +4 h +11661 m +1 h +10 h +11662 m +4 h +11663 m +1 h +1 h +1 h +1 h +1271 m +2719 m +11664 m +1 h +48 h +11665 m +11666 m +4 h +1074 m +4 h +11667 m +10 h +4 h +11668 m +10 h +371 h +4 h +31 h +640 m +4 h +1 h +10 h +158 h +1 h +4 h +11669 m +1 h +82 h +3 h +4 h +109 h +1 h +10 h +104 h +11670 m +1 h +124 h +4 h +4 h +11671 m +41 h +11672 m +4 h +45 h +4 h +156 h +1 h +11 h +11673 m +181 h +4 h +11674 m +4 h +4 h +11675 m +1 h +1 h +11676 m +371 h +4 h +238 h +1 h +1 h +4 h +1953 m +10 h +4 h +45 h +11 h +1 h +4 h +185 h +10 h +11677 m +11678 m +4 h +139 h +1265 m +4 h +28 h +4 h +10 h +11679 m +4 h +956 m +11680 m +4 h +6784 m +976 h +1 h +6001 m +11681 m +4 h +11682 m +1 h +4 h +1 h +4 h +3 h +4 h +10 h +108 h +11683 m +82 h +31 h +10 h +4 h +1 h +10 h +11 h +59 h +11684 m +4 h +4 h +1764 m +10 h +4 h +109 h +11685 m +11686 m +11687 m +11688 m +41 h +1 h +1 h +1 h +11689 m +4333 m +10 h +266 h +57 h +108 h +124 h +1 h +11690 m +4 h +4 h +808 h +11691 m +11692 m +307 h +1 h +10 h +757 h +172 h +10 h +11693 m +11694 m +4 h +104 h +1 h +119 h +57 h +1 h +196 h +10970 m +1 h +11 h +11695 m +11696 m +92 h +10 h +538 m +10 h +1 h +10 h +83 h +10 h +167 h +4 h +808 h +10 h +4 h +4 h +11697 m +10 h +69 h +48 h +4 h +4 h +11698 m +332 h +278 h +57 h +57 h +2258 m +4 h +41 h +125 h +1 h +4 h +1 h +4 h +11699 m +4 h +172 h +114 h +10 h +11 h +1 h +10 h +1 h +11700 m +1 h +1 h +147 h +10 h +4 h +4 h +195 h +4437 m +10 h +10 h +4 h +1 h +10 h +1 h +1 h +4 h +4 h +45 h +1 h +10 h +10 h +124 h +10 h +10 h +124 h +1 h +1 h +10 h +4 h +5230 m +1 h +10 h +59 h +11701 m +447 h +3 h +939 m +2418 m +4 h +1499 m +1642 h +167 h +10 h +12 h +10 h +79 h +4 h +97 h +59 h +4 h +11702 m +1 h +264 m +371 h +10 h +10 h +4 h +1 h +4 h +10 h +1 h +97 h +4 h +11427 m +10 h +74 h +125 h +4 h +4 h +4 h +124 h +10 h +65 h +1 h +1030 h +4 h +36 h +31 h +4 h +4 h +2028 m +1 h +1470 h +185 h +4542 m +10 h +10 h +11 h +109 h +965 m +11 h +164 h +11 h +10 h +10 h +282 m +109 h +4 h +4 h +83 h +11703 m +4 h +11704 m +229 h +11705 m +5864 m +1 h +10 h +4 h +65 h +11706 m +4 h +41 h +4 h +4297 m +1 h +4 h +1 h +1 h +11707 m +109 h +4 h +10 h +11708 m +11709 m +11710 m +10 h +278 h +109 h +10 h +4 h +11711 m +4 h +13 h +4 h +73 h +3 h +4 h +57 h +11712 m +11713 m +11714 m +11715 m +11716 m +629 m +4 h +104 h +1 h +1 h +1 h +31 h +1 h +109 h +1 h +4 h +10 h +109 h +3 h +11717 m +4 h +4 h +640 m +4 h +1 h +10 h +109 h +1 h +6776 m +11718 m +1 h +4 h +146 h +11 h +11719 m +13 h +1 h +779 h +4 h +10 h +4 h +4929 m +10 h +4 h +4 h +57 h +1 h +1 h +11720 m +4 h +1 h +11721 m +1 h +307 h +11722 m +56 h +11723 m +11724 m +221 m +4 h +11725 m +1 h +4 h +4 h +1 h +4 h +10 h +169 h +123 h +104 h +146 h +10 h +185 h +11726 m +9282 m +11727 m +1 h +110 h +4 h +2300 m +4 h +11728 m +1 h +11729 m +11730 m +1 h +11731 m +135 h +10 h +1 h +1 h +4 h +2594 m +4 h +4 h +1 h +11732 m +196 h +82 h +11733 m +11734 m +10 h +11735 m +1 h +65 h +1 h +4 h +4 h +22 h +4 h +10 h +10 h +11736 m +1024 m +11737 m +4 h +1 h +11738 m +1 h +11739 m +1 h +4 h +10 h +4 h +1 h +1 h +1 h +195 h +4 h +11740 m +10 h +11741 m +45 h +10 h +1 h +10 h +74 h +27 h +4 h +1 h +4 h +11742 m +4188 m +11743 m +4 h +368 h +4 h +10 h +146 h +4 h +45 h +1189 m +11744 m +1 h +1 h +4 h +1 h +1 h +4 h +1 h +11745 m +11 h +11746 m +31 h +1 h +338 h +11747 m +11748 m +1 h +4 h +1 h +10 h +1027 h +83 h +64 h +4 h +41 h +1 h +1 h +11749 m +1 h +4 h +1 h +4 h +4 h +4 h +1 h +55 h +10 h +4 h +41 h +10 h +36 h +358 h +1 h +4 h +11750 m +10918 m +278 h +6311 m +172 h +1128 m +4 h +307 h +279 h +10 h +4 h +1 h +1886 m +278 h +4 h +195 h +11751 m +82 h +3772 m +11752 m +1 h +8188 m +10 h +1 h +1 h +59 h +358 h +186 h +999 m +83 h +3 h +4 h +11753 m +167 h +4 h +11754 m +1 h +10 h +11755 m +359 h +11756 m +124 h +10 h +4 h +10 h +10 h +6747 m +4 h +10 h +113 h +10 h +25 h +4 h +8423 m +11757 m +82 h +4 h +11758 m +190 h +1 h +10 h +10 h +4 h +10070 m +1 h +2438 m +31 h +1 h +11759 m +1 h +4 h +4 h +204 h +4 h +11760 m +124 h +4 h +4 h +11761 m +10 h +4 h +11762 m +10 h +10 h +4 h +11763 m +10 h +459 m +1677 m +11 h +262 h +3 h +11764 m +4 h +135 h +4 h +3112 m +4 h +11765 m +195 h +386 h +4 h +124 h +1 h +170 h +10 h +1 h +10 h +108 h +11766 m +10 h +1 h +125 h +11767 m +4 h +203 h +190 h +11768 m +40 h +4 h +4 h +11769 m +4 h +11770 m +10 h +82 h +3 h +73 h +11771 m +12 h +386 h +4 h +1 h +10 h +10 h +536 h +4 h +918 m +73 h +11772 m +10 h +692 h +4 h +4 h +4 h +10 h +1250 h +935 h +939 m +11773 m +11774 m +4 h +4 h +123 h +1 h +4 h +11 h +4728 m +13 h +10 h +104 h +4 h +10 h +4 h +11 h +10 h +4 h +4 h +358 h +447 h +10 h +11775 m +4 h +238 h +802 m +4 h +4 h +1 h +119 h +1027 h +1 h +11776 m +11777 m +2265 h +10 h +10 h +4 h +185 h +4 h +74 h +10 h +265 h +4 h +11778 m +13 h +801 m +74 h +13 h +371 h +266 h +11779 m +11780 m +14 m +11781 m +11782 m +124 h +4 h +82 h +11783 m +459 m +1 h +4 h +4 h +3 h +687 m +59 h +11784 m +464 h +11785 m +10 h +1 h +10 h +1249 m +4 h +4 h +1 h +10 h +1 h +1 h +4 h +1 h +23 h +4 h +10 h +2891 m +1 h +4 h +10070 m +4 h +1 h +1 h +583 h +4 h +10 h +11786 m +8243 m +1 h +73 h +4 h +4 h +4 h +4 h +238 h +4 h +358 h +10 h +4 h +10 h +11787 m +274 h +4 h +4 h +4 h +4 h +57 h +4520 m +10 h +97 h +338 h +82 h +4 h +3558 m +4 h +65 h +6066 m +119 h +425 m +1619 h +4 h +1 h +4 h +4 h +57 h +10 h +4 h +278 h +195 h +4 h +11 h +4 h +11788 m +3 h +139 h +11789 m +4 h +1454 m +11790 m +195 h +104 h +11791 m +31 h +4 h +181 h +10 h +8 h +1 h +25 h +11792 m +11793 m +4 h +82 h +4 h +3216 m +11794 m +11795 m +10 h +4 h +11796 m +11797 m +41 h +1 h +2266 m +10 h +10 h +83 h +8 h +1 h +4 h +1 h +4 h +129 h +10 h +11798 m +11799 m +4 h +11800 m +196 h +4 h +10 h +718 m +4 h +4 h +935 h +4 h +82 h +1 h +11801 m +4 h +11802 m +4 h +4 h +10 h +4 h +10 h +1261 h +297 h +11803 m +10 h +4 h +4 h +57 h +36 h +4 h +4 h +4 h +11804 m +330 m +144 m +10 h +10 h +4 h +1 h +11805 m +31 h +289 h +1 h +2920 m +181 h +4 h +1406 h +4 h +92 h +4 h +135 h +897 m +10 h +1 h +4 h +478 h +11806 m +11807 m +195 h +1 h +109 h +104 h +11808 m +4 h +11809 m +1 h +103 h +11810 m +601 h +8497 m +169 h +1 h +10 h +10 h +57 h +4 h +10 h +4 h +4 h +4 h +10 h +4 h +41 h +4 h +11 h +11811 m +1 h +10 h +4 h +4 h +11 h +1 h +4 h +83 h +1 h +4 h +4 h +4 h +1 h +11812 m +1 h +11813 m +41 h +4 h +59 h +114 h +41 h +1 h +1 h +1 h +1 h +11 h +103 h +11814 m +73 h +3 h +4 h +147 h +620 m +10 h +599 m +1 h +11815 m +11816 m +278 h +11817 m +1 h +1018 m +13 h +4 h +332 h +10 h +41 h +4 h +10 h +10 h +118 h +258 h +74 h +1 h +4 h +800 m +1 h +4 h +11818 m +353 m +1 h +10 h +1 h +1 h +31 h +79 h +4 h +97 h +10 h +185 h +3 h +4 h +4 h +147 h +11819 m +11820 m +1 h +11821 m +41 h +4 h +386 h +11822 m +1 h +1 h +11 h +8 h +10 h +265 h +10 h +40 h +4 h +4 h +1 h +11823 m +10 h +3 h +4 h +10 h +1 h +4 h +4 h +3 h +114 h +4 h +8 h +109 h +10 h +4 h +4 h +1478 h +11824 m +10 h +11825 m +1 h +11826 m +10 h +11827 m +8974 m +41 h +10 h +4 h +31 h +169 h +1 h +3188 m +1 h +5562 m +4 h +1137 h +1 h +1 h +146 h +173 h +11828 m +4 h +10 h +11829 m +4 h +1 h +10 h +41 h +97 h +11 h +25 h +138 h +986 h +11830 m +11831 m +358 h +11832 m +31 h +4 h +256 m +4 h +11833 m +478 h +55 h +57 h +10 h +1 h +6613 m +692 h +11834 m +397 m +11835 m +1 h +10 h +435 m +1 h +10 h +10 h +4 h +4 h +1 h +110 h +25 h +1 h +146 h +1030 h +11836 m +11837 m +4 h +124 h +4 h +13 h +297 h +11838 m +11839 m +3 h +258 h +256 m +4 h +10 h +59 h +1 h +11840 m +1952 m +1 h +10 h +4 h +11841 m +1 h +4 h +11842 m +4 h +4 h +11843 m +41 h +10 h +10 h +11 h +1 h +4 h +10 h +10 h +28 h +3 h +4966 m +36 h +11844 m +4 h +1 h +1 h +10 h +11845 m +4 h +169 h +195 h +10 h +11846 m +10 h +1 h +11847 m +190 h +11848 m +11849 m +4 h +1 h +4 h +4 h +1 h +4 h +1 h +11850 m +1 h +4 h +4 h +11851 m +11852 m +4 h +1650 h +11853 m +4 h +4 h +4 h +999 m +192 h +11854 m +687 h +146 h +11855 m +94 h +11 h +4 h +11856 m +10 h +11857 m +11858 m +536 h +11859 m +11860 m +10 h +11861 m +4 h +11862 m +733 m +1650 h +33 h +4 h +22 h +1 h +10 h +1 h +55 h +4 h +1 h +190 h +157 h +36 h +1 h +82 h +1445 m +10 h +11 h +307 h +11863 m +184 h +4 h +1 h +464 h +1 h +10 h +4 h +4 h +11864 m +3344 m +124 h +11865 m +11866 m +4 h +11867 m +1 h +48 h +10 h +57 h +172 h +4 h +94 h +11868 m +4 h +1 h +266 h +4 h +4 h +11 h +1 h +31 h +1 h +11869 m +4 h +4 h +4 h +1 h +10 h +1 h +4 h +10 h +108 h +10 h +1 h +1 h +11870 m +4 h +11871 m +4 h +124 h +1 h +1 h +11872 m +569 h +11873 m +1 h +4 h +10 h +11874 m +10 h +10 h +1 h +4 h +196 h +55 h +57 h +1 h +10 h +4 h +1 h +1403 h +11875 m +119 h +170 h +11876 m +1 h +4 h +74 h +10 h +11877 m +146 h +4 h +4 h +4 h +57 h +82 h +10 h +172 h +4 h +124 h +1 h +332 h +2585 m +10 h +11878 m +146 h +4 h +4 h +112 h +109 h +1 h +307 h +4 h +11879 m +4 h +4 h +11880 m +83 h +1 h +11881 m +4 h +10 h +74 h +1 h +4 h +11882 m +10 h +1 h +10 h +1 h +4 h +1 h +11883 m +11884 m +4 h +11885 m +59 h +11886 m +41 h +164 h +4 h +4 h +10 h +1 h +1 h +1 h +129 h +1 h +129 h +1817 m +11887 m +10 h +59 h +1 h +104 h +10 h +1 h +5600 m +57 h +14 m +10 h +11888 m +4 h +11889 m +11890 m +4 h +93 h +144 m +4 h +109 h +4 h +124 h +4 h +124 h +174 m +687 h +4 h +412 h +109 h +4 h +4 h +1 h +11891 m +581 m +40 h +82 h +147 h +11892 m +1 h +129 h +4 h +31 h +8 h +114 h +11893 m +4 h +1 h +536 h +11894 m +4 h +2002 m +2865 m +11 h +270 h +11895 m +10 h +1 h +31 h +11896 m +10 h +11897 m +4 h +59 h +139 h +11898 m +195 h +9482 m +1 h +4 h +4 h +10 h +185 h +4 h +11 h +11899 m +10 h +716 m +1 h +31 h +4 h +10 h +279 h +4 h +4 h +57 h +4 h +10 h +4 h +4 h +59 h +1 h +11900 m +4 h +224 h +11901 m +4 h +10 h +82 h +65 h +10 h +4 h +4 h +11902 m +10 h +83 h +10 h +69 h +10 h +10 h +4 h +11903 m +57 h +4 h +91 h +11904 m +1 h +1 h +1 h +1 h +4 h +10 h +4 h +1 h +1 h +295 h +11905 m +1 h +36 h +4 h +1 h +1 h +104 h +10 h +11906 m +1 h +1 h +1771 m +158 h +11907 m +4 h +11908 m +10 h +4 h +6784 m +196 h +4 h +11909 m +123 h +10 h +4 h +10 h +172 h +196 h +31 h +10 h +1 h +4 h +65 h +11910 m +4 h +11911 m +73 h +11912 m +4 h +4 h +11 h +185 h +10 h +4 h +124 h +114 h +601 h +4 h +10 h +11913 m +147 h +330 m +70 m +173 h +1 h +570 m +1410 m +110 h +10 h +4 h +4 h +1 h +4 h +784 m +4 h +164 h +4 h +185 h +11914 m +169 h +266 h +258 h +119 h +1 h +4 h +10 h +10 h +359 h +124 h +4 h +10 h +11915 m +10 h +1 h +10 h +1 h +15 m +10 h +1 h +27 h +1 h +1 h +1 h +11 h +11916 m +4 h +4849 m +114 h +11917 m +11918 m +11919 m +10 h +538 h +124 h +11920 m +4 h +1 h +110 h +146 h +4 h +1 h +1 h +170 h +3 h +1 h +1 h +11921 m +1 h +10 h +1 h +4 h +113 h +4 h +1 h +1 h +114 h +11922 m +45 h +276 h +10 h +69 h +1470 h +241 m +2623 m +11 h +11923 m +10 h +10 h +11924 m +1 h +11925 m +83 h +1 h +1 h +4 h +10 h +59 h +4 h +4 h +10 h +73 h +11 h +10 h +307 h +1030 h +11926 m +11927 m +25 h +11928 m +11929 m +11930 m +4 h +11931 m +1 h +4 h +11932 m +4 h +10 h +1 h +1 h +4 h +10 h +11933 m +1 h +966 m +10 h +1 h +10 h +10 h +11934 m +4 h +61 m +83 h +93 h +11935 m +31 h +109 h +83 h +11936 m +57 h +4 h +11937 m +11938 m +10 h +104 h +11939 m +11940 m +11941 m +4 h +403 h +11942 m +4 h +1 h +307 h +4 h +10 h +10 h +4 h +3539 m +5505 m +4 h +104 h +4 h +10 h +10 h +31 h +11943 m +10 h +1281 m +11 h +4 h +11944 m +4 h +10 h +109 h +11945 m +4 h +25 h +11 h +1 h +1 h +4 h +11 h +11946 m +11947 m +109 h +10 h +1284 m +10 h +4 h +139 h +10 h +4 h +1 h +11948 m +11949 m +170 h +4 h +11950 m +10 h +4 h +11951 m +11 h +11952 m +143 h +3177 m +97 h +25 h +109 h +11953 m +11954 m +4 h +11955 m +11956 m +4 h +11957 m +4 h +1027 h +1 h +1 h +135 h +4 h +4 h +266 h +4 h +45 h +11958 m +938 h +10 h +196 h +10 h +4 h +11959 m +11960 m +4 h +4 h +11961 m +114 h +11962 m +1710 m +11 h +147 h +1 h +1 h +10 h +274 h +4 h +1 h +11963 m +1 h +1830 m +4 h +11 h +4 h +59 h +10 h +11 h +11964 m +1 h +4 h +1 h +4 h +4 h +1 h +4 h +11965 m +11966 m +11967 m +8 h +11968 m +939 h +1 h +82 h +11 h +1 h +1 h +4 h +4 h +190 h +4 h +147 h +10 h +11969 m +10 h +164 h +692 h +11970 m +1470 h +109 h +104 h +59 h +447 h +82 h +10 h +7585 m +11971 m +10 h +4 h +73 h +1 h +4 h +11972 m +4 h +1 h +11973 m +6784 m +4 h +82 h +4 h +31 h +11 h +41 h +124 h +4 h +31 h +11974 m +10 h +319 h +403 h +124 h +11975 m +11976 m +1 h +1 h +1 h +1632 m +11977 m +11978 m +4 h +82 h +1 h +11 h +74 h +4 h +4 h +4 h +11979 m +11980 m +4 h +1 h +264 m +11 h +11981 m +4 h +1 h +575 m +4 h +11982 m +4 h +4 h +4 h +4 h +236 m +4 h +4 h +57 h +10 h +4 h +10 h +10 h +11983 m +4 h +11984 m +1542 m +4 h +1957 m +11 h +11985 m +10 h +10 h +1 h +135 h +506 m +10 h +1 h +11 h +4 h +4 h +307 h +11986 m +82 h +10 h +11987 m +10 h +4 h +4 h +1 h +1 h +1 h +1 h +11988 m +11989 m +10 h +10 h +10 h +10 h +10 h +11990 m +1 h +1 h +57 h +57 h +11991 m +4 h +11 h +1 h +4 h +11992 m +82 h +83 h +4 h +278 h +4 h +11993 m +1 h +11 h +83 h +1 h +11994 m +10 h +11995 m +11996 m +4 h +1 h +170 h +82 h +11997 m +1 h +11998 m +11999 m +12000 m +1 h +10 h +1 h +10 h +124 h +195 h +2096 m +4 h +125 h +12001 m +185 h +74 h +6015 m +65 h +83 h +266 h +444 m +4 h +123 h +12002 m +10 h +12003 m +12004 m +578 h +4 h +10 h +12005 m +12006 m +1 h +10 h +4 h +276 h +190 h +4 h +11 h +12007 m +4 h +12008 m +1 h +11 h +1697 m +4 h +110 h +12009 m +1 h +12010 m +12011 m +4 h +4 h +10 h +4 h +1 h +12012 m +25 h +1 h +4 h +4 h +4 h +358 h +196 h +57 h +4 h +4 h +1 h +459 h +4 h +1 h +190 h +4 h +10 h +1 h +12013 m +274 h +10 h +12014 m +74 h +83 h +4 h +1 h +10 h +4 h +10 h +12015 m +3 h +966 m +1 h +160 m +45 h +190 h +125 h +1 h +12016 m +12017 m +10 h +1 h +12018 m +377 h +11 h +79 h +41 h +1 h +10 h +10 h +12019 m +65 h +2813 m +12020 m +10 h +1 h +12021 m +10 h +12022 m +4 h +1886 m +1 h +65 h +4 h +4 h +1 h +4 h +1 h +12023 m +12024 m +12025 m +114 h +447 h +10 h +10 h +10 h +1 h +83 h +687 h +4 h +1 h +12026 m +434 m +1 h +10 h +0 m +12027 m +4 h +10 h +2592 m +4 h +156 h +4 h +4 h +1 h +1 h +12028 m +10 h +4 h +4 h +4 h +12029 m +57 h +4 h +4 h +10 h +12030 m +11 h +1 h +1 h +69 h +4 h +1 h +12 h +169 h +136 m +12031 m +1299 m +4 h +12032 m +1 h +10 h +12033 m +12034 m +1 h +12035 m +124 h +4 h +1 h +4 h +10 h +12036 m +97 h +1 h +12037 m +1 h +12038 m +109 h +4471 m +4 h +12039 m +138 h +5964 m +536 h +4 h +4 h +1 h +4 h +10 h +185 h +59 h +10 h +4 h +4 h +181 h +12040 m +12041 m +10 h +1 h +4 h +4 h +1 h +1 h +601 h +520 m +3 h +12042 m +4 h +1189 m +2733 m +10 h +125 h +41 h +36 h +1 h +1 h +4 h +12043 m +4 h +1 h +4 h +192 h +1 h +135 h +12044 m +12045 m +12046 m +10 h +12047 m +11 h +12048 m +4 h +174 m +1 h +4 h +4 h +11 h +10 h +12049 m +1016 h +358 h +25 h +4 h +1 h +10 h +170 h +10 h +10 h +1137 h +11 h +1 h +10 h +10 h +69 h +4 h +110 h +4 h +4 h +65 h +11 h +12050 m +10 h +1 h +4 h +4 h +11 h +4 h +4 h +1089 h +1 h +12051 m +12052 m +4 h +31 h +12053 m +12054 m +12055 m +12056 m +1 h +11 h +4 h +186 h +10 h +11 h +12057 m +170 h +1016 h +4 h +124 h +196 h +229 h +939 h +1 h +1220 m +1751 m +82 h +124 h +10 h +40 h +129 h +4 h +92 h +12058 m +1 h +1 h +0 m +125 h +10 h +1 h +12059 m +1 h +1 h +4 h +4 h +56 h +113 h +4 h +12060 m +1 h +4 h +477 m +45 h +11 h +4 h +4 h +97 h +332 h +12061 m +12062 m +12063 m +11 h +2540 m +4 h +1 h +12064 m +11 h +1 h +55 h +4 h +45 h +4 h +104 h +911 h +4229 m +36 h +4 h +4 h +4 h +10 h +45 h +1 h +4 h +1 h +4 h +13 h +11 h +447 h +10 h +10330 m +10 h +8 h +1 h +12065 m +55 h +4 h +1266 m +12066 m +110 h +12067 m +55 h +4 h +273 m +12068 m +73 h +1 h +1 h +12069 m +1359 m +10 h +1 h +12070 m +4 h +12071 m +6025 m +109 h +12072 m +1 h +4 h +4 h +4 h +12073 m +4 h +12074 m +74 h +10 h +10 h +264 m +12075 m +12076 m +12077 m +276 h +278 h +4 h +4 h +377 h +2786 m +1 h +12078 m +4 h +12079 m +83 h +285 m +83 h +4 h +4 h +383 h +143 h +10 h +3341 m +12080 m +1 h +4 h +146 h +12081 m +278 h +1 h +181 h +1 h +4 h +124 h +12082 m +1 h +1 h +31 h +4 h +12083 m +1 h +11 h +1 h +4 h +4 h +79 h +10 h +12084 m +1017 h +12085 m +1 h +265 h +4 h +12086 m +383 h +59 h +4 h +3321 m +1409 m +12087 m +1 h +13 h +109 h +1 h +4 h +82 h +4 h +4 h +1 h +1 h +1 h +1 h +12088 m +12089 m +82 h +1 h +59 h +4 h +12090 m +1 h +10 h +12091 m +2788 h +1650 h +156 h +629 m +104 h +4 h +109 h +45 h +4 h +6788 m +1 h +167 h +1 h +1 h +2379 m +2116 m +12092 m +12093 m +1 h +10 h +1 h +12094 m +1 h +4 h +794 m +12095 m +12096 m +4744 m +1 h +4 h +10 h +59 h +278 h +1 h +1 h +928 m +11 h +1 h +4 h +4 h +1 h +3 h +12097 m +10 h +954 m +4 h +250 h +10 h +83 h +4 h +190 h +4 h +12098 m +12099 m +1 h +57 h +4 h +10 h +11 h +2665 m +10 h +578 h +4 h +36 h +533 h +12100 m +82 h +1 h +10 h +1574 m +6413 m +1 h +1 h +12101 m +4 h +93 h +12102 m +59 h +4 h +69 h +1 h +4 h +1 h +1250 h +12103 m +12104 m +10 h +2309 m +1 h +4 h +4 h +97 h +12105 m +64 h +41 h +4 h +4 h +1 h +28 h +1 h +1 h +12106 m +59 h +1 h +1 h +430 m +578 h +1 h +4 h +1 h +4 h +10 h +4 h +110 h +4 h +12107 m +1 h +4 h +4 h +4 h +3 h +12108 m +12109 m +10 h +57 h +73 h +10 h +1 h +4 h +4 h +1 h +4 h +12110 m +10 h +4 h +1 h +258 h +4 h +12111 m +25 h +1 h +12112 m +4 h +12113 m +3 h +12114 m +12115 m +3845 m +10 h +10 h +10 h +4 h +307 h +186 h +12116 m +124 h +12117 m +4 h +12118 m +1 h +12119 m +45 h +4106 m +12120 m +10 h +4 h +12121 m +169 h +64 h +124 h +4 h +1 h +4 h +167 h +12122 m +4 h +11 h +1 h +4 h +1 h +12123 m +4 h +10 h +1 h +4 h +12124 m +1 h +10 h +4 h +4 h +10 h +11 h +4 h +27 h +12125 m +4 h +1 h +79 h +10 h +4 h +1 h +8 h +270 h +1 h +41 h +12126 m +172 h +1 h +41 h +12127 m +12128 m +4 h +12129 m +59 h +2733 h +195 h +1359 m +1959 m +12130 m +12131 m +4 h +12132 m +4 h +12133 m +12134 m +12 h +297 h +12135 m +12136 m +1 h +1722 m +10 h +129 h +114 h +4 h +97 h +12137 m +383 h +229 h +10 h +10 h +1 h +6766 m +1 h +1 h +10 h +4 h +4 h +1 h +1 h +4127 m +4 h +12138 m +114 h +11 h +10 h +10 h +173 h +31 h +1 h +11 h +1406 h +10 h +12139 m +1 h +1 h +25 h +10 h +25 h +173 h +279 h +45 h +4 h +4 h +6095 m +83 h +224 h +4 h +114 h +1 h +939 h +4 h +4 h +11 h +57 h +4 h +10 h +10 h +10 h +4 h +687 h +146 h +10 h +12140 m +1 h +12141 m +4 h +25 h +1 h +1 h +1 h +170 h +10 h +4 h +31 h +4 h +83 h +12142 m +12143 m +307 h +12144 m +1 h +12145 m +4 h +1 h +4 h +83 h +10 h +4 h +74 h +13 h +4 h +10 h +10 h +1 h +4 h +258 h +4 h +124 h +10 h +1 h +1 h +12146 m +4 h +1 h +12147 m +4 h +770 m +57 h +266 h +4 h +104 h +1 h +4 h +10 h +167 h +57 h +4 h +1 h +1 h +8206 m +1 h +11 h +4 h +4 h +59 h +1 h +10 h +1138 m +83 h +4 h +241 m +12148 m +1 h +1409 m +1016 h +4 h +4 h +4 h +4 h +1 h +4 h +4 h +1 h +12149 m +11 h +12150 m +112 h +4 h +31 h +12151 m +583 h +10 h +12152 m +10 h +4 h +12153 m +1 h +4 h +4 h +10 h +12154 m +4 h +1556 m +135 h +2923 m +22 h +4 h +65 h +4 h +270 h +2928 m +12155 m +1 h +12156 m +4 h +12157 m +158 h +59 h +278 h +31 h +57 h +7649 m +1 h +10 h +59 h +1666 m +109 h +25 h +5125 m +2951 m +1 h +10 h +4 h +64 h +582 m +4 h +4 h +1 h +12158 m +963 m +12159 m +1 h +12160 m +4 h +2794 m +1 h +2205 m +83 h +4 h +1 h +4 h +57 h +1 h +1 h +1 h +4 h +25 h +1454 m +6437 m +10 h +4 h +10 h +10 h +307 h +25 h +94 h +2041 m +1 h +10 h +12161 m +4 h +25 h +4 h +4 h +3668 m +4 h +4 h +1 h +196 h +195 h +65 h +11 h +31 h +12162 m +4 h +10 h +10 h +12163 m +12164 m +1 h +1486 m +2339 m +4 h +4 h +13 h +12165 m +4 h +12166 m +45 h +12167 m +12168 m +4 h +4 h +10 h +10 h +94 h +1 h +289 h +12169 m +10 h +1 h +1 h +124 h +358 h +12170 m +1 h +10 h +97 h +4 h +25 h +1016 h +4 h +4297 h +888 m +124 h +146 h +59 h +4 h +258 h +12171 m +10 h +4 h +4 h +10 h +4 h +480 m +57 h +400 m +4 h +4 h +1 h +4 h +10 h +10 h +444 m +4 h +4 h +520 m +12172 m +10 h +65 h +4 h +4 h +266 h +4 h +10 h +104 h +104 h +4 h +4 h +270 h +82 h +10 h +12173 m +12131 m +147 h +114 h +1 h +10 h +10 h +12174 m +4 h +12175 m +1 h +12176 m +4 h +196 h +143 h +1 h +10 h +11 h +11 h +64 h +1 h +1 h +4 h +4 h +4 h +4 h +4 h +12 h +74 h +1 h +104 h +1796 m +12177 m +1 h +10 h +1 h +113 h +57 h +4 h +1 h +4 h +4 h +12178 m +1 h +1 h +82 h +4 h +4 h +386 h +12179 m +12180 m +4 h +4 h +4 h +4 h +1 h +1105 h +104 h +12181 m +1 h +10 h +4 h +92 h +4 h +10 h +1 h +1 h +104 h +12182 m +12183 m +4 h +10 h +12184 m +203 h +104 h +10 h +12185 m +4 h +4 h +871 m +12186 m +4 h +65 h +10 h +1 h +147 h +266 h +170 h +11 h +12187 m +124 h +12188 m +10 h +1 h +82 h +4 h +1 h +11 h +12189 m +124 h +1 h +4 h +4 h +25 h +13 h +4 h +4 h +195 h +1 h +64 h +10 h +12190 m +464 h +1 h +31 h +1 h +4 h +1 h +1 h +169 h +4 h +1 h +4 h +447 h +82 h +10 h +73 h +282 h +129 h +12191 m +79 h +4 h +10 h +25 h +1 h +196 h +4 h +12192 m +4 h +8274 m +10 h +10 h +10 h +4 h +12193 m +295 h +1 h +1 h +1 h +12194 m +12195 m +12196 m +10 h +3402 m +125 h +143 h +12197 m +4 h +12 h +10 h +1 h +4 h +12198 m +74 h +4849 m +10 h +4 h +12199 m +12200 m +31 h +4 h +10 h +12201 m +1 h +139 h +4 h +11 h +12202 m +4 h +1 h +12203 m +4 h +4 h +83 h +1261 h +119 h +83 h +4 h +4 h +1 h +4 h +463 h +4 h +118 h +4 h +41 h +4 h +12 h +12204 m +1 h +65 h +1 h +4 h +3 h +1 h +1 h +10 h +3702 m +12205 m +12206 m +4 h +57 h +22 h +4 h +12207 m +12208 m +1 h +1 h +1 h +4 h +4 h +4 h +12209 m +3 h +10 h +1 h +12210 m +10 h +4 h +12211 m +55 h +12212 m +1 h +170 h +156 h +1 h +4 h +4 h +12213 m +1 h +12214 m +4 h +10 h +1 h +10 h +4 h +12215 m +4 h +4 h +11 h +221 m +4 h +196 h +1137 h +4 h +327 m +10 h +12216 m +12217 m +11 h +10 h +4 h +10 h +4 h +10 h +10 h +10 h +56 h +12218 m +1 h +4 h +184 h +1 h +986 h +12219 m +12220 m +12221 m +4 h +73 h +73 h +124 h +10 h +10 h +57 h +170 h +12222 m +55 h +12223 m +4 h +196 h +57 h +12224 m +12225 m +10 h +4 h +1 h +4 h +12226 m +4 h +109 h +276 h +146 h +74 h +575 m +104 h +1 h +12227 m +11 h +4 h +1 h +12228 m +12229 m +10 h +388 m +12230 m +12231 m +82 h +4 h +4 h +185 h +1 h +185 h +1828 m +1 h +4 h +12232 m +10 h +10 h +4 h +1 h +278 h +4 h +12233 m +4 h +12234 m +12235 m +1 h +10 h +4 h +12236 m +112 h +1 h +12237 m +169 h +4 h +1 h +1835 m +74 h +1 h +1 h +10 h +4 h +10 h +4 h +82 h +10 h +10 h +8608 m +12238 m +4 h +12239 m +10659 m +4 h +4 h +12240 m +10 h +3278 m +12241 m +2002 m +3 h +82 h +55 h +4 h +1 h +4 h +25 h +124 h +57 h +172 h +10 h +12242 m +186 h +195 h +10 h +10 h +986 h +82 h +1403 h +4 h +10 h +31 h +1 h +57 h +1 h +12243 m +1 h +1822 h +12244 m +4 h +12245 m +10 h +10 h +83 h +12246 m +82 h +4 h +124 h +4 h +10 h +114 h +12 h +31 h +12247 m +109 h +692 h +4 h +8854 m +1137 h +238 h +12248 m +12249 m +4 h +41 h +31 h +31 h +12250 m +4 h +12251 m +10 h +332 h +13 h +57 h +1 h +109 h +4 h +36 h +12252 m +83 h +4 h +10 h +12253 m +4 h +447 h +12254 m +195 h +4 h +82 h +1 h +1 h +12255 m +10 h +4 h +10 h +57 h +4 h +10 h +4 h +4 h +1 h +10 h +12256 m +110 h +10 h +250 h +12257 m +82 h +4 h +10 h +10 h +258 h +73 h +25 h +59 h +65 h +1470 h +25 h +10 h +12258 m +1379 m +41 h +31 h +1 h +1 h +12259 m +31 h +4 h +12260 m +3 h +4 h +4 h +279 h +196 h +181 h +1 h +12261 m +4 h +266 h +12262 m +10 h +4 h +12263 m +146 h +4 h +25 h +278 h +12264 m +4 h +12265 m +12266 m +12267 m +4 h +12268 m +10 h +10 h +12269 m +12270 m +1 h +1 h +12271 m +104 h +4 h +2923 m +1 h +1 h +11 h +1 h +12272 m +8556 m +4 h +10 h +990 m +1 h +4 h +83 h +4 h +8643 m +536 h +4 h +143 h +12273 m +1 h +57 h +912 m +1 h +4 h +3 h +4 h +11 h +4 h +12274 m +4 h +74 h +386 h +10 h +538 h +91 h +1027 h +40 h +1074 h +1 h +10 h +1 h +4 h +12275 m +4 h +12276 m +4 h +1 h +1 h +4 h +173 h +124 h +124 h +4 h +10 h +4 h +109 h +74 h +1299 m +12277 m +4 h +146 h +1 h +4 h +12278 m +8104 m +4 h +139 h +1 h +1 h +7306 m +12279 m +297 h +135 h +10 h +12280 m +12281 m +1 h +4 h +1201 m +1 h +10 h +11 h +4815 m +10 h +10 h +196 h +10 h +1 h +83 h +4 h +12282 m +12283 m +10 h +11 h +4 h +147 h +1 h +1 h +4 h +2719 m +12284 m +157 h +1 h +11 h +4 h +4 h +4 h +1 h +79 h +1201 h +12285 m +12 h +12286 m +1 h +4 h +12287 m +4 h +1 h +12288 m +4 h +12289 m +10 h +4 h +4 h +12290 m +10 h +1 h +4 h +10745 m +1 h +1 h +1 h +4 h +1698 m +1 h +74 h +11 h +1 h +4 h +4 h +425 m +10 h +10 h +4 h +12291 m +4 h +1 h +12292 m +270 h +10 h +3 h +12293 m +10 h +4 h +12294 m +12295 m +1453 m +4 h +4 h +4 h +1953 m +12296 m +83 h +1 h +488 h +757 h +170 h +4 h +1 h +443 h +12297 m +12298 m +2374 m +1504 m +74 h +1 h +13 h +10 h +144 h +10 h +147 h +399 h +4 h +10 h +12299 m +1 h +4 h +1 h +6503 m +10 h +10 h +82 h +10 h +4 h +1089 h +124 h +538 h +12300 m +1 h +110 h +12301 m +12302 m +4 h +73 h +238 h +4 h +36 h +4 h +185 h +4 h +12303 m +11 h +12304 m +12305 m +730 m +1 h +56 h +10 h +12306 m +578 h +6129 m +12307 m +4 h +124 h +12308 m +12309 m +10 h +4 h +1 h +12310 m +12311 m +110 h +12312 m +13 h +1 h +4 h +1 h +297 h +1 h +1 h +12313 m +11 h +12314 m +4 h +4 h +1 h +12194 m +10 h +10 h +4 h +4701 m +4 h +94 h +12 h +10 h +4 h +4 h +10 h +83 h +13 h +41 h +10 h +10 h +250 h +12315 m +11 h +4 h +12316 m +1 h +135 h +12317 m +1 h +41 h +12318 m +1 h +1 h +4 h +238 h +109 h +10 h +4 h +12319 m +10 h +1 h +109 h +12320 m +12 h +48 h +12321 m +4 h +10 h +83 h +4 h +12322 m +14 h +57 h +124 h +22 h +146 h +12323 m +57 h +4 h +12324 m +1 h +83 h +6457 m +1 h +4 h +1 h +164 h +146 h +10 h +616 m +4 h +97 h +12325 m +4 h +4 h +10 h +4 h +1 h +1 h +12326 m +1 h +1337 m +195 h +1650 h +10 h +10 h +12327 m +13 h +140 m +1016 h +1858 m +4 h +1 h +12328 m +12329 m +12330 m +196 h +4 h +5621 m +12331 m +1 h +4 h +12332 m +12333 m +1 h +10 h +11 h +332 h +82 h +4 h +4 h +4 h +11 h +82 h +4 h +12334 m +2379 m +82 h +10 h +1953 m +4 h +4 h +10 h +1 h +1 h +119 h +10 h +12335 m +4 h +1 h +12336 m +12337 m +12 h +45 h +181 h +25 h +196 h +12338 m +4 h +12339 m +4 h +74 h +1 h +1089 h +443 h +1137 h +4 h +91 h +12340 m +10 h +1 h +12341 m +12342 m +104 h +1 h +157 h +12343 m +113 h +41 h +12344 m +4 h +4 h +192 h +12345 m +1 h +737 m +1 h +59 h +12346 m +12347 m +1 h +170 h +23 h +12348 m +4 h +371 h +31 h +57 h +10 h +6187 m +12349 m +1 h +4 h +184 h +27 h +274 h +4 h +12350 m +274 h +1766 m +57 h +4 h +10 h +3 h +10 h +11 h +1 h +59 h +4 h +7 m +125 h +12351 m +12352 m +4 h +12353 m +12354 m +4 h +10 h +4 h +1 h +12355 m +4 h +4 h +12356 m +196 h +4 h +3 h +56 h +4 h +10 h +56 h +10 h +256 h +1 h +4 h +869 m +4 h +1 h +12 h +124 h +4 h +6784 h +4 h +4 h +230 m +403 h +1 h +266 h +4 h +12357 m +4 h +1 h +57 h +12358 m +4 h +164 h +77 h +1 h +12359 m +92 h +4 h +12360 m +1 h +55 h +2205 m +4 h +3 h +10 h +59 h +83 h +4 h +1 h +57 h +82 h +74 h +174 m +73 h +10 h +27 h +1 h +10 h +4 h +65 h +4 h +4 h +1 h +146 h +12361 m +12362 m +12363 m +10 h +12364 m +359 h +4 h +12365 m +12366 m +538 h +4 h +1 h +12367 m +10 h +2285 m +1 h +4 h +65 h +1 h +11 h +1 h +4 h +1 h +10 h +4 h +106 m +10 h +104 h +1 h +59 h +185 h +125 h +1406 h +4 h +10 h +164 h +1 h +12368 m +41 h +10 h +3 h +114 h +1 h +520 h +1 h +10 h +10 h +386 h +59 h +4 h +556 h +1 h +10 h +12369 m +12370 m +4 h +12371 m +4 h +4 h +976 h +10 h +11 h +1 h +4 h +4 h +23 h +124 h +1 h +4 h +4 h +59 h +196 h +12372 m +4 h +4 h +10 h +10 h +11 h +1 h +1 h +10 h +4240 m +1 h +1 h +4 h +4 h +1 h +4 h +56 h +93 h +12373 m +4 h +74 h +4 h +4 h +125 h +12374 m +1 h +536 h +4 h +31 h +12375 m +4 h +10 h +3 h +2923 h +443 h +12376 m +1 h +258 h +12377 m +10 h +570 m +12378 m +4 h +976 h +640 h +195 h +1089 h +12 h +2172 m +87 m +1 h +737 m +575 m +1137 h +64 h +488 h +12379 m +319 h +104 h +12380 m +4 h +4 h +4 h +10 h +238 h +12381 m +1 h +56 h +1 h +4 h +104 h +3 h +4 h +4 h +1 h +195 h +4 h +4 h +144 h +506 m +4 h +196 h +11 h +2002 m +1 h +27 h +73 h +4 h +4111 m +195 h +266 h +1 h +10 h +4 h +3 h +316 m +10 h +1 h +12382 m +83 h +282 h +12383 m +1 h +91 h +4 h +10 h +4 h +1 h +443 h +143 h +83 h +27 h +1016 h +185 h +12384 m +12385 m +4 h +12386 m +10 h +11 h +12387 m +12388 m +3 h +65 h +1 h +10 h +10 h +12389 m +12390 m +12391 m +12392 m +307 h +1 h +59 h +12393 m +1 h +1 h +4 h +1 h +10 h +12394 m +169 h +12395 m +12396 m +0 h +124 h +82 h +10 h +4 h +270 h +124 h +1 h +10 h +1 h +1 h +146 h +4 h +11 h +1 h +10 h +77 h +93 h +4 h +1 h +12397 m +57 h +12398 m +4 h +4 h +1 h +4 h +601 h +10 h +11 h +1 h +10 h +4 h +10 h +4 h +79 h +10 h +1 h +12399 m +358 h +4 h +10 h +274 h +1 h +4 h +10 h +83 h +1 h +4 h +4 h +10 h +135 h +332 h +12400 m +1 h +1 h +41 h +4229 m +4 h +10 h +4 h +4 h +358 h +1 h +1 h +1 h +12401 m +4 h +4 h +190 h +371 h +109 h +1 h +1 h +219 h +10 h +12402 m +12403 m +1714 m +196 h +12404 m +1 h +1 h +1 h +10 h +1 h +10 h +1822 h +12405 m +1 h +1 h +1955 m +109 h +11 h +1 h +10 h +12406 m +12407 m +258 h +82 h +4966 m +274 h +10 h +10 h +270 h +12408 m +1 h +4 h +48 h +12409 m +27 h +12410 m +4 h +1737 m +12411 m +4 h +4 h +4 h +12412 m +4 h +10 h +4 h +12413 m +73 h +4 h +4 h +1 h +10 h +12414 m +1 h +45 h +123 h +4 h +4 h +12415 m +12416 m +4 h +12417 m +4 h +94 h +169 h +1957 m +4 h +83 h +12418 m +4 h +97 h +4 h +1 h +12419 m +3 h +83 h +4 h +97 h +1 h +4 h +11 h +1 h +1 h +10 h +1 h +1478 h +12420 m +12421 m +4 h +12422 m +173 h +12423 m +1 h +4 h +10 h +1 h +4 h +229 h +4 h +4 h +110 h +12424 m +1 h +4 h +4 h +338 h +190 h +10 h +31 h +10 h +10 h +10 h +1 h +874 m +12425 m +10 h +10 h +4 h +125 h +10 h +104 h +79 h +12426 m +4 h +4 h +238 h +12427 m +1 h +10 h +3 h +11 h +2442 m +12428 m +10 h +59 h +12429 m +10 h +10 h +377 h +12430 m +1 h +1 h +12431 m +1 h +4 h +533 h +4 h +25 h +82 h +10 h +1 h +4 h +10 h +4 h +10 h +4 h +4 h +11 h +4520 m +10 h +4 h +12432 m +64 h +4 h +1 h +10 h +27 h +4 h +12433 m +12434 m +276 h +12 h +4 h +124 h +82 h +1 h +110 h +45 h +12435 m +12436 m +4 h +4 h +11 h +10 h +4 h +823 m +12437 m +3 h +31 h +1 h +125 h +4 h +82 h +12438 m +1 h +4 h +4 h +22 h +4 h +12 h +4 h +1 h +230 m +12439 m +4 h +332 h +885 m +146 h +1 h +12440 m +4 h +4 h +12441 m +11 h +125 h +4 h +4 h +12442 m +82 h +12443 m +4 h +1 h +1 h +10 h +92 h +359 h +1 h +4 h +12444 m +583 h +59 h +2617 m +12445 m +4 h +1 h +41 h +1 h +112 h +12446 m +1437 m +601 h +4 h +83 h +59 h +4 h +6753 m +4 h +12447 m +241 m +10 h +125 h +1 h +157 h +31 h +92 h +358 h +12448 m +82 h +12449 m +1 h +82 h +36 h +27 h +1499 m +12450 m +4 h +12451 m +124 h +125 h +4 h +74 h +12452 m +12453 m +11 h +1261 h +986 h +4 h +1 h +1838 m +4 h +4 h +4 h +4 h +12454 m +1 h +11 h +12455 m +581 m +4 h +12456 m +4 h +25 h +83 h +4 h +12457 m +4 h +4 h +10 h +104 h +10 h +10 h +4 h +27 h +4 h +10 h +41 h +1053 m +1 h +146 h +1 h +10 h +4 h +12458 m +4 h +1642 h +109 h +196 h +1 h +12459 m +1 h +12460 m +10 h +31 h +3299 m +1 h +1 h +1 h +1 h +4 h +1 h +4 h +4 h +4 h +4 h +12461 m +4 h +1 h +10 h +359 h +97 h +4 h +1499 m +55 h +1359 h +12462 m +11 h +10 h +4 h +229 h +1 h +4 h +4 h +10 h +1 h +10 h +27 h +12463 m +1 h +4 h +4 h +9 m +1 h +358 h +12464 m +104 h +167 h +31 h +1 h +11 h +11 h +12465 m +12 h +6469 m +1 h +1 h +1 h +12466 m +59 h +146 h +4 h +1 h +10 h +57 h +10 h +1 h +1308 m +57 h +104 h +25 h +7 m +4 h +10 h +258 h +112 h +1 h +976 h +31 h +4 h +1 h +386 h +1 h +4 h +1 h +1 h +12467 m +332 h +12 h +10 h +4 h +74 h +4 h +1 h +140 h +4 h +57 h +2418 m +124 h +1205 m +4 h +10 h +10 h +1 h +10 h +1 h +3 h +124 h +12468 m +10 h +4 h +1 h +10 h +4 h +1 h +4 h +1 h +12469 m +1 h +601 h +4 h +10 h +4 h +12470 m +1 h +12471 m +1 h +3 h +2465 m +1 h +1 h +124 h +1 h +4 h +12272 m +4 h +12472 m +10 h +12473 m +307 h +64 h +272 m +4 h +12474 m +1535 m +114 h +10 h +4 h +109 h +295 h +10 h +4 h +12475 m +1 h +4 h +4 h +1 h +33 h +123 h +12476 m +31 h +10 h +1 h +1 h +12477 m +181 h +12478 m +97 h +56 h +4 h +12479 m +4 h +1 h +1 h +1 h +1 h +1 h +181 h +12480 m +8 h +4 h +10 h +12481 m +4 h +770 m +170 h +1 h +1 h +4 h +83 h +1 h +1 h +1 h +12482 m +12483 m +4 h +1 h +12484 m +4 h +4 h +10 h +12485 m +1 h +12486 m +12487 m +1299 m +4 h +2927 m +12488 m +322 m +77 h +10 h +10 h +135 h +12489 m +4 h +4 h +1822 h +12490 m +4 h +1 h +1 h +109 h +104 h +4 h +536 h +11490 m +12491 m +12492 m +4 h +1089 h +1 h +1 h +4 h +12493 m +4 h +1 h +12494 m +4 h +10 h +4 h +12495 m +10 h +12496 m +10 h +1 h +31 h +12497 m +10 h +2625 m +79 h +4 h +12498 m +10 h +1 h +125 h +186 h +12 h +1 h +79 h +4 h +1 h +10 h +12499 m +4 h +169 h +935 h +1 h +7572 m +12500 m +718 m +4 h +10 h +4 h +12501 m +12502 m +1 h +12503 m +11 h +3 h +10243 m +1 h +1667 m +4 h +8146 m +12504 m +10 h +4 h +4 h +10 h +4 h +3 h +36 h +82 h +10 h +4 h +12505 m +4 h +1 h +447 h +4 h +1 h +4 h +146 h +146 h +4 h +10 h +640 h +4 h +1 h +1261 h +110 h +1 h +10 h +4 h +4 h +10 h +4 h +12506 m +4 h +13 h +4 h +1 h +25 h +1955 m +10 h +1 h +12507 m +12508 m +4 h +1 h +4 h +1 h +3 h +12509 m +4 h +25 h +1074 h +114 h +1 h +1 h +11 h +4 h +10 h +93 h +4 h +4 h +12510 m +10 h +1 h +1 h +4 h +1024 m +601 h +158 h +1 h +12511 m +82 h +10 h +73 h +12512 m +12513 m +4 h +12514 m +1 h +4 h +12515 m +4 h +1 h +12516 m +10 h +124 h +4 h +1574 m +12 h +1 h +45 h +146 h +6187 m +1 h +83 h +1 h +167 h +12517 m +59 h +4 h +4 h +1 h +10 h +250 h +10 h +10 h +12518 m +4 h +4 h +12519 m +1 h +31 h +3177 m +4 h +1 h +11 h +109 h +1 h +10 h +12520 m +1619 h +12521 m +478 h +1 h +12522 m +7809 m +10 h +10 h +368 h +1 h +1 h +12523 m +1 h +12524 m +82 h +4 h +55 h +1499 h +119 h +4 h +2379 m +1939 m +10 h +12525 m +1504 m +4 h +355 m +538 h +1 h +1 h +1 h +74 h +124 h +12526 m +82 h +77 h +10 h +11 h +1189 m +5348 m +265 h +322 m +1 h +1 h +74 h +1 h +4 h +10 h +806 m +10 h +185 h +10 h +4 h +4 h +1 h +1 h +4815 m +1 h +195 h +1 h +10 h +170 h +986 h +12527 m +4 h +4 h +12528 m +12529 m +447 h +5224 m +124 h +12530 m +91 h +10 h +4 h +1 h +4 h +146 h +4 h +1 h +10 h +1 h +1366 m +1 h +12531 m +1 h +10099 m +12532 m +10 h +12533 m +367 h +114 h +12534 m +12535 m +3704 m +1646 m +1 h +1 h +41 h +447 h +41 h +219 h +109 h +82 h +1 h +219 h +4 h +4 h +55 h +1 h +13 h +3 h +10 h +12536 m +1 h +12537 m +1 h +55 h +3 h +10 h +10 h +1 h +12538 m +1 h +94 h +11 h +4 h +59 h +368 h +4 h +83 h +4 h +12539 m +10 h +10 h +4 h +1 h +4 h +1 h +12540 m +1 h +10 h +1 h +1 h +12541 m +12542 m +307 h +1642 h +4 h +4 h +12543 m +64 h +10 h +4 h +65 h +4 h +4 h +10 h +82 h +4 h +1 h +12544 m +12545 m +12546 m +12547 m +11 h +1389 m +1 h +12548 m +12549 m +10 h +4 h +10 h +4 h +10 h +274 h +12550 m +1 h +1 h +12551 m +4 h +10 h +10 h +124 h +4 h +12552 m +10 h +12553 m +1 h +57 h +1 h +4 h +79 h +82 h +147 h +10 h +12554 m +4 h +15 m +4 h +167 h +1 h +12555 m +79 h +10 h +569 h +4 h +219 h +12556 m +1714 m +3 h +146 h +12557 m +12558 m +12559 m +4 h +601 h +1766 h +1 h +1337 m +12560 m +1 h +4 h +12561 m +11 h +12562 m +82 h +12563 m +12564 m +4 h +55 h +12565 m +195 h +4 h +12566 m +4 h +1 h +13 h +12567 m +538 h +4 h +1 h +1 h +938 h +12568 m +12569 m +258 h +10 h +12570 m +74 h +4 h +10 h +12571 m +12572 m +4 h +307 h +1 h +4 h +1 h +443 h +11 h +1 h +11 h +4 h +4 h +10 h +4 h +10 h +11 h +13 h +12573 m +1 h +4 h +12574 m +12575 m +4 h +12576 m +1 h +10 h +12577 m +4 h +718 m +4 h +144 h +56 h +12578 m +12579 m +12580 m +4 h +12581 m +4 h +12582 m +4 h +94 h +1650 h +4 h +367 h +55 h +4 h +45 h +4 h +12583 m +12584 m +12585 m +1 h +10 h +104 h +1948 m +10 h +1359 h +28 h +12586 m +4 h +10 h +4 h +1919 m +4 h +12587 m +4 h +1309 m +238 h +46 m +1 h +12588 m +12589 m +10 h +8 h +578 h +112 h +83 h +1 h +1056 m +4 h +10 h +10 h +1751 m +1 h +124 h +10 h +11 h +1 h +1 h +12590 m +82 h +4 h +172 h +57 h +12591 m +4 h +1 h +12592 m +4 h +10 h +10 h +12593 m +4 h +12594 m +4 h +4 h +3499 m +4 h +10 h +1 h +4 h +195 h +4 h +45 h +1 h +12595 m +4 h +10 h +12596 m +146 h +4 h +1508 m +1 h +203 h +238 h +2733 h +12597 m +4 h +7901 m +158 h +10 h +9347 m +169 h +888 m +1 h +4 h +1 h +12598 m +1 h +41 h +4 h +12599 m +1 h +12600 m +4 h +10 h +4 h +12 h +274 h +11 h +10 h +4 h +1835 m +10 h +4 h +1 h +4 h +124 h +170 h +118 h +10 h +11 h +1 h +1 h +4 h +12601 m +12602 m +170 h +4 h +295 h +1486 m +92 h +10 h +4 h +11 h +124 h +10 h +4 h +1 h +109 h +4 h +3161 m +12603 m +4 h +10 h +4 h +10 h +12604 m +83 h +59 h +4 h +31 h +747 m +10 h +10 h +11 h +3 h +10 h +12605 m +57 h +4 h +185 h +1 h +12606 m +4 h +12607 m +1 h +12 h +4810 m +82 h +169 h +3 h +10 h +12608 m +10 h +4 h +1020 m +1030 h +12609 m +279 h +10 h +1619 h +4 h +181 h +4 h +12610 m +4 h +10 h +4 h +27 h +4 h +12611 m +12612 m +1 h +11 h +25 h +4 h +11 h +10 h +4 h +156 h +4 h +10 h +25 h +4 h +83 h +1981 m +31 h +976 h +4 h +83 h +10 h +25 h +147 h +12613 m +4 h +299 m +1 h +1 h +4 h +1 h +10 h +4 h +266 h +718 h +12614 m +1 h +1 h +258 h +1 h +358 h +6851 m +12615 m +939 h +147 h +4 h +1 h +12616 m +4 h +4 h +10 h +1 h +1 h +4 h +12617 m +12618 m +12619 m +4 h +4 h +12620 m +4 h +59 h +4 h +12621 m +1 h +4 h +4 h +94 h +307 h +108 h +12622 m +172 h +125 h +12623 m +1886 m +1 h +83 h +1 h +1 h +3342 m +109 h +10 h +4 h +1 h +11 h +1 h +158 h +12624 m +12625 m +1 h +184 h +4 h +1089 h +56 h +4 h +59 h +205 m +1 h +1 h +4 h +10 h +12626 m +295 h +11 h +4 h +4 h +687 h +1 h +10 h +4 h +10 h +4 h +164 h +109 h +279 h +10 h +4 h +4 h +4 h +4 h +1 h +4 h +157 h +82 h +626 m +11 h +196 h +109 h +278 h +8147 m +10 h +4 h +59 h +5 m +1 h +104 h +83 h +3 h +83 h +1 h +82 h +104 h +361 m +4 h +10 h +57 h +12627 m +4 h +10 h +4 h +1 h +4 h +12 h +11 h +4 h +10 h +1 h +9431 m +124 h +4 h +1 h +82 h +4 h +10 h +82 h +109 h +10 h +55 h +4 h +1 h +1 h +4 h +1 h +10 h +196 h +4 h +11 h +238 h +124 h +4 h +1532 m +368 h +1 h +73 h +332 h +1 h +12628 m +124 h +4 h +12629 m +9902 m +113 h +1127 m +10 h +4 h +41 h +1322 m +12 h +108 h +10 h +1235 m +435 m +169 h +92 h +4 h +10 h +12630 m +1 h +12631 m +447 h +10 h +250 h +147 h +196 h +4 h +4 h +4 h +10 h +258 h +1 h +1 h +1 h +12632 m +4 h +4 h +57 h +12633 m +9040 m +10 h +4 h +41 h +1 h +258 h +82 h +12634 m +4 h +27 h +146 h +4 h +41 h +12635 m +146 h +12636 m +4 h +10 h +1 h +125 h +10 h +12637 m +4 h +10 h +82 h +83 h +1 h +12638 m +1 h +12639 m +2072 m +1 h +12640 m +4 h +10 h +36 h +4 h +1 h +3396 m +4 h +10 h +4 h +1 h +83 h +12641 m +74 h +4 h +10 h +1 h +1 h +12642 m +31 h +10 h +4 h +400 m +4 h +1 h +4 h +1 h +1 h +4 h +1 h +12643 m +12644 m +25 h +82 h +238 h +12645 m +173 h +97 h +538 h +12 h +74 h +1576 m +10 h +4 h +10 h +4 h +12646 m +4 h +59 h +4 h +4 h +4 h +12647 m +83 h +4 h +12648 m +4 h +4 h +1 h +4 h +10 h +1 h +818 m +12649 m +10 h +4 h +129 h +869 m +3 h +1 h +4 h +1 h +4 h +12650 m +11 h +4 h +12651 m +1 h +1 h +11 h +4 h +10 h +12652 m +1 h +1 h +4 h +4 h +12653 m +4 h +1 h +4 h +443 h +238 h +1 h +1 h +4 h +278 h +3 h +77 h +4 h +1 h +1 h +4 h +10 h +12654 m +4 h +4 h +687 h +10 h +1 h +118 h +10 h +4 h +109 h +368 h +10 h +10 h +12655 m +430 m +104 h +1 h +1 h +4 h +4 h +12656 m +1 h +139 h +12657 m +12658 m +1 h +10 h +45 h +114 h +10 h +55 h +258 h +1 h +57 h +12659 m +4 h +82 h +1 h +1 h +119 h +368 h +4 h +113 h +12660 m +1 h +4 h +13 h +82 h +4524 m +1 h +12661 m +4 h +4 h +12662 m +4 h +4 h +4 h +12663 m +1 h +1 h +7553 m +12664 m +12665 m +12666 m +109 h +12667 m +281 m +12668 m +1772 m +4 h +1 h +10 h +12669 m +156 h +4 h +1261 h +94 h +196 h +124 h +1655 m +156 h +4 h +4 h +966 h +10 h +1 h +12670 m +25 h +10 h +1 h +12671 m +83 h +4 h +802 m +587 m +1 h +10 h +94 h +147 h +1 h +135 h +3 h +4 h +10 h +258 h +12672 m +4 h +11 h +4 h +57 h +12 h +12673 m +4 h +4 h +12674 m +109 h +12675 m +12676 m +55 h +146 h +1 h +12677 m +10 h +1659 m +3373 m +4 h +1 h +4 h +12678 m +48 h +12679 m +4 h +4 h +1 h +143 h +125 h +169 h +4 h +1271 m +4 h +146 h +4 h +173 h +12680 m +10 h +4 h +10 h +4 h +1 h +1 h +10 h +123 h +135 h +4 h +1 h +156 h +181 h +4 h +1 h +1 h +4 h +12681 m +1 h +4 h +124 h +10 h +10 h +40 h +12682 m +4 h +13 h +11 h +125 h +1 h +4 h +4 h +11 h +196 h +114 h +1 h +10 h +4 h +1250 h +1201 h +4 h +12683 m +10 h +12684 m +12685 m +4 h +4 h +4 h +119 h +13 h +10 h +25 h +82 h +4 h +4 h +4 h +10 h +195 h +1 h +10 h +307 h +74 h +12686 m +1 h +1 h +10 h +4 h +12687 m +1 h +4 h +12688 m +4 h +1 h +4 h +1 h +3188 m +83 h +1 h +125 h +10 h +10 h +1 h +10 h +4905 m +11 h +12689 m +167 h +1 h +10 h +146 h +11 h +92 h +12690 m +9228 m +1 h +4 h +4 h +104 h +4 h +65 h +1 h +10 h +12691 m +79 h +195 h +4 h +57 h +12692 m +4 h +12693 m +4 h +1486 m +13 h +12694 m +10 h +12695 m +10 h +104 h +4 h +4 h +1 h +12696 m +1 h +146 h +4 h +59 h +10 h +4 h +4 h +12697 m +10 h +59 h +1 h +12698 m +4 h +12699 m +82 h +123 h +104 h +10 h +4 h +124 h +784 m +278 h +10 h +767 m +129 h +4 h +4 h +1 h +1542 m +10 h +4 h +4378 m +12700 m +147 h +4 h +10 h +124 h +10 h +181 h +4 h +386 h +10 h +4 h +92 h +82 h +1 h +147 h +10 h +1 h +12701 m +1 h +4 h +79 h +12702 m +4 h +169 h +41 h +172 h +11 h +82 h +4 h +4 h +109 h +4 h +1 h +1 h +106 m +10 h +3 h +12703 m +11358 m +1 h +479 m +885 m +12704 m +266 h +1 h +192 h +10 h +1 h +1 h +12705 m +1261 h +4 h +10 h +399 h +12706 m +4 h +9893 m +1 h +10 h +538 h +1 h +79 h +4 h +4 h +12707 m +10 h +82 h +1 h +12708 m +12709 m +4 h +57 h +12710 m +12711 m +11 h +12712 m +2794 m +4 h +195 h +1 h +12713 m +1 h +1261 h +57 h +4 h +1 h +1 h +10 h +10 h +1 h +4 h +4 h +104 h +10 h +601 h +10 h +4 h +5422 m +1 h +486 m +83 h +10 h +45 h +4 h +4 h +4 h +10 h +12714 m +4 h +4 h +55 h +12715 m +1 h +4 h +48 h +4 h +1359 h +4 h +12716 m +12717 m +10 h +57 h +1 h +11 h +1201 h +1 h +82 h +353 m +12718 m +1089 h +1 h +4 h +31 h +48 h +4 h +1 h +1 h +1 h +143 h +12719 m +1 h +4 h +10 h +10 h +25 h +250 h +4 h +10 h +113 h +10 h +25 h +1 h +4 h +12720 m +110 h +59 h +1 h +10 h +1 h +4 h +4 h +10 h +1 h +1 h +12721 m +1 h +12722 m +4 h +92 h +1366 m +10 h +10 h +4 h +143 h +10 h +1 h +1 h +104 h +12199 m +1185 m +169 h +12723 m +4 h +4 h +601 h +124 h +12724 m +57 h +265 h +3 h +11 h +1 h +12725 m +10 h +12726 m +4 h +10 h +1 h +4 h +4 h +11 h +4 h +59 h +1 h +4 h +1 h +74 h +4 h +196 h +1804 m +1 h +146 h +12727 m +4 h +164 h +11 h +59 h +158 h +46 m +4 h +82 h +4 h +1 h +464 h +1 h +1 h +12728 m +12729 m +97 h +12730 m +1 h +3 h +3 h +11 h +83 h +1 h +59 h +12731 m +12732 m +4 h +4 h +146 h +31 h +4 h +83 h +41 h +10 h +12733 m +10 h +11 h +103 h +109 h +12734 m +4 h +1 h +338 h +4 h +10 h +3067 m +11 h +4 h +1 h +10 h +125 h +10 h +12735 m +4 h +124 h +4 h +135 h +1 h +12736 m +2846 m +12737 m +4 h +12738 m +229 h +12739 m +10 h +11 h +1535 m +1 h +10 h +123 h +124 h +12740 m +1737 m +10 h +82 h +31 h +1 h +10 h +4 h +12741 m +1718 m +4 h +13 h +4 h +4 h +358 h +1 h +11477 m +12742 m +83 h +4 h +10 h +12743 m +12744 m +55 h +125 h +4 h +4 h +12745 m +12746 m +10 h +10 h +996 m +4 h +403 h +1 h +83 h +1 h +1 h +12747 m +1 h +1 h +147 h +4 h +4 h +4 h +2935 m +1 h +4 h +41 h +10 h +124 h +1 h +91 h +12748 m +4 h +10 h +1 h +12749 m +430 m +12750 m +10 h +4 h +6391 m +3 h +10 h +25 h +64 h +1 h +1 h +12751 m +12752 m +83 h +4 h +1 h +4 h +4 h +12753 m +82 h +10 h +4 h +4 h +964 m +12754 m +1 h +1 h +4 h +12755 m +4 h +12756 m +109 h +10177 m +83 h +10 h +1 h +10 h +4 h +10 h +3 h +10 h +1 h +10 h +12757 m +1278 m +12758 m +1 h +12759 m +403 h +83 h +92 h +1 h +4 h +510 m +195 h +124 h +10 h +4 h +10 h +12760 m +1 h +12761 m +12762 m +4 h +10 h +1 h +12 h +6808 m +10 h +4 h +195 h +4 h +4 h +258 h +1691 h +10 h +82 h +4 h +4 h +10 h +10 h +56 h +258 h +1 h +83 h +4 h +12763 m +12764 m +1 h +12765 m +1 h +4 h +10 h +1 h +1 h +57 h +4 h +1 h +4 h +12766 m +135 h +36 h +25 h +12767 m +12768 m +10 h +12769 m +12770 m +10 h +1 h +4 h +4 h +59 h +4 h +12771 m +12772 m +1 h +4 h +4 h +10 h +10 h +82 h +4 h +648 m +733 m +11 h +1304 m +92 h +4 h +181 h +4 h +109 h +12773 m +299 m +3 h +4 h +12774 m +4 h +1 h +1 h +1 h +1 h +196 h +1 h +4 h +939 h +12775 m +4 h +4 h +110 h +1 h +12776 m +109 h +4 h +4 h +4 h +59 h +4 h +97 h +1 h +1 h +1027 h +4 h +10 h +1 h +12777 m +12778 m +1 h +12779 m +25 h +10 h +10 h +135 h +12780 m +185 h +10 h +10 h +4 h +36 h +4 h +4 h +264 h +4 h +11 h +82 h +4 h +368 h +4 h +1 h +4 h +10 h +4 h +4 h +31 h +59 h +10 h +74 h +10 h +4 h +4 h +4 h +1 h +10 h +11 h +1 h +1 h +3 h +4 h +147 h +1 h +10 h +41 h +3070 m +4 h +4718 m +10 h +763 m +295 h +12781 m +1 h +12782 m +4 h +4 h +4 h +4 h +12783 m +4 h +1 h +124 h +1 h +109 h +1 h +41 h +4 h +4 h +12784 m +12785 m +82 h +109 h +10 h +25 h +110 h +10 h +2506 m +10 h +12 h +13 h +1 h +125 h +1 h +10 h +1 h +12786 m +12787 m +124 h +383 h +31 h +4 h +1 h +12788 m +12789 m +4 h +12790 m +4 h +4 h +74 h +1 h +4 h +4 h +1 h +11 h +10 h +443 h +4 h +12791 m +36 h +12792 m +1 h +82 h +12793 m +59 h +12794 m +12795 m +10221 m +12796 m +1027 h +10 h +12797 m +11 h +190 h +12798 m +4 h +10 h +4 h +692 h +31 h +82 h +94 h +59 h +7479 m +1265 m +1 h +124 h +41 h +4 h +1 h +12799 m +109 h +82 h +1535 m +12800 m +114 h +12801 m +4 h +4 h +4 h +124 h +12802 m +1454 m +4 h +69 h +10 h +4 h +10 h +383 h +36 h +4 h +57 h +4 h +1 h +109 h +57 h +4 h +1 h +10 h +4 h +4 h +3 h +1 h +4 h +12803 m +4 h +10 h +73 h +12804 m +4 h +10 h +10 h +1016 h +10 h +1 h +1359 h +4 h +12805 m +12806 m +11 h +359 h +3 h +54 m +1 h +4 h +10 h +4 h +1 h +10 h +964 m +12807 m +4 h +12808 m +4 h +12809 m +1271 m +12810 m +4 h +73 h +273 m +3 h +10 h +1 h +1 h +10 h +1499 h +12811 m +12812 m +4 h +74 h +143 h +12813 m +12814 m +1 h +114 h +12815 m +1016 h +674 m +12816 m +4 h +10 h +1 h +1 h +12817 m +939 h +12818 m +4 h +687 h +10 h +4 h +10 h +1 h +12819 m +1 h +1 h +12820 m +12821 m +4 h +55 h +12822 m +1 h +12823 m +1 h +4 h +4074 m +4 h +12824 m +447 h +82 h +12825 m +1 h +59 h +12826 m +1 h +1 h +10 h +11 h +10 h +1 h +97 h +31 h +4 h +12827 m +10 h +4 h +4 h +12828 m +258 h +10 h +146 h +135 h +12829 m +12830 m +124 h +74 h +10 h +10 h +1 h +1 h +10 h +4 h +12831 m +108 h +478 h +1 h +1 h +1 h +1 h +10 h +10 h +1 h +1 h +332 h +12832 m +12833 m +4723 m +22 h +4 h +4 h +4 h +1619 h +10 h +12834 m +885 m +4 h +1 h +4 h +1 h +83 h +1 h +4 h +4 h +4 h +12835 m +4 h +4 h +1470 h +1 h +1 h +1 h +4 h +10 h +1 h +4 h +10 h +10 h +4 h +4 h +31 h +10 h +1 h +12836 m +73 h +4 h +4 h +74 h +12837 m +92 h +4252 m +1 h +4 h +59 h +12838 m +1016 h +10 h +12839 m +10 h +4 h +12840 m +1 h +4 h +10 h +4 h +103 h +146 h +10 h +4 h +4 h +12841 m +1374 m +229 h +12842 m +12 h +114 h +4 h +12843 m +56 h +12844 m +1 h +885 h +4 h +12845 m +124 h +11 h +10 h +266 h +238 h +1 h +12846 m +181 h +119 h +10272 m +146 h +4 h +73 h +1 h +11 h +195 h +65 h +12847 m +12848 m +41 h +4 h +10 h +12849 m +4 h +79 h +4 h +12850 m +10 h +1 h +41 h +313 m +4 h +1 h +10 h +64 h +1650 h +1 h +1 h +1 h +12851 m +12852 m +13 h +12853 m +4 h +2625 m +278 h +10 h +10 h +570 m +55 h +12854 m +12855 m +10 h +238 h +12856 m +4 h +10 h +353 m +266 h +1 h +12857 m +124 h +1 h +4 h +1 h +4 h +4 h +1 h +12858 m +4 h +12859 m +27 h +114 h +10 h +109 h +125 h +1 h +1 h +41 h +12860 m +12861 m +1 h +10 h +12862 m +1 h +8332 m +8663 m +11 h +338 h +12863 m +25 h +4 h +358 h +1 h +1 h +1 h +97 h +1 h +1 h +11 h +1 h +10 h +12864 m +4 h +147 h +12865 m +57 h +55 h +12866 m +1 h +10 h +297 h +4 h +11 h +1 h +12867 m +12868 m +1 h +1 h +4 h +4 h +10 h +4 h +12869 m +12870 m +10 h +12871 m +140 h +4 h +125 h +2813 m +4 h +10 h +4 h +4 h +1 h +4 h +124 h +124 h +12872 m +1 h +73 h +12873 m +4 h +12874 m +12875 m +12876 m +11 h +4 h +4 h +4 h +1 h +11 h +1 h +57 h +97 h +3 h +10 h +59 h +11 h +12877 m +270 h +12878 m +3 h +1 h +12879 m +2257 m +4 h +6290 m +12880 m +2438 m +1261 h +1 h +12881 m +4 h +1 h +12882 m +4 h +4 h +1 h +12883 m +238 h +250 h +316 m +4 h +1 h +1 h +1 h +59 h +1 h +57 h +12884 m +1 h +174 m +1 h +11 h +1 h +12885 m +990 m +4 h +12886 m +4 h +4 h +12887 m +794 m +3 h +10 h +4 h +399 h +45 h +10 h +12888 m +4 h +12889 m +12890 m +150 m +4 h +147 h +104 h +1 h +4 h +12891 m +1 h +10 h +10 h +109 h +4 h +12892 m +12893 m +12894 m +4 h +12895 m +538 h +258 h +5093 m +119 h +10783 m +1030 h +1 h +12896 m +3 h +806 m +4 h +12897 m +4 h +4 h +4 h +4 h +12898 m +82 h +1 h +27 h +4 h +1 h +10 h +28 h +10 h +7135 m +4 h +170 h +4 h +181 h +10 h +1 h +1 h +73 h +4 h +358 h +1 h +158 h +4 h +4 h +4 h +4 h +10 h +4 h +10 h +4 h +4 h +4 h +1 h +3 h +4 h +4 h +1 h +4 h +181 h +4 h +1 h +12899 m +59 h +12900 m +4 h +110 h +229 h +92 h +55 h +10 h +11 h +119 h +104 h +12901 m +10 h +10 h +4 h +12902 m +57 h +10 h +1 h +1 h +185 h +110 h +1 h +12903 m +1 h +12904 m +1 h +1261 h +10 h +1 h +167 h +190 h +10 h +12905 m +1309 h +4 h +124 h +12906 m +4 h +1 h +27 h +4 h +12907 m +12908 m +763 m +4 h +4 h +25 h +12909 m +12910 m +10 h +1 h +12911 m +11 h +289 h +265 h +12912 m +4 h +1 h +4596 m +147 h +83 h +10 h +10 h +1 h +158 h +1 h +4 h +1 h +4 h +1 h +4 h +12913 m +10 h +12914 m +4 h +12915 m +4 h +4 h +1 h +12916 m +10 h +4 h +12917 m +12918 m +10 h +4 h +36 h +1 h +109 h +10 h +1 h +109 h +10 h +10 h +97 h +10 h +167 h +4 h +185 h +4 h +4 h +10 h +4 h +11 h +12919 m +10 h +1 h +12920 m +1 h +10 h +74 h +1 h +4 h +109 h +3 h +12921 m +59 h +4 h +4 h +114 h +307 h +1 h +4 h +1 h +10 h +11 h +4 h +4 h +4 h +91 h +4 h +12922 m +56 h +1 h +10 h +10 h +4 h +3345 m +12923 m +10 h +4 h +97 h +270 h +4 h +10 h +10 h +4 h +1 h +4 h +12924 m +10 h +4 h +1 h +12925 m +10 h +4 h +1 h +12926 m +147 h +112 h +1 h +1 h +4 h +1 h +4 h +124 h +4 h +64 h +4 h +10 h +4 h +640 h +181 h +359 h +4 h +1642 h +4 h +10 h +109 h +4 h +114 h +1 h +74 h +1409 h +4 h +4 h +57 h +1 h +109 h +10 h +77 h +4 h +12927 m +1 h +11 h +265 h +1 h +4 h +12928 m +10 h +1650 h +25 h +4 h +1 h +1 h +1 h +12929 m +146 h +4 h +12930 m +4 h +4 h +4 h +74 h +1 h +12931 m +83 h +11 h +4 h +12932 m +13 h +12933 m +4 h +4 h +1 h +12934 m +79 h +12935 m +196 h +1017 h +10 h +11 h +12936 m +12 h +172 h +1 h +4 h +41 h +4 h +4 h +4 h +4 h +4 h +12937 m +4 h +27 h +4 h +172 h +10 h +265 h +12938 m +4 h +82 h +4 h +59 h +1 h +10 h +10 h +297 h +82 h +4 h +258 h +1 h +2617 m +10 h +11 h +4 h +11 h +10 h +41 h +12939 m +219 h +4 h +4 h +97 h +12940 m +4 h +10 h +1822 h +12941 m +1 h +4 h +278 h +12942 m +12943 m +4 h +4 h +12944 m +12945 m +12946 m +1713 m +10 h +83 h +332 h +425 m +4 h +12947 m +41 h +125 h +104 h +4 h +3742 m +12948 m +10 h +1 h +12949 m +1 h +447 h +10 h +10 h +258 h +83 h +4 h +181 h +1 h +10 h +172 h +92 h +92 h +56 h +12950 m +186 h +4 h +12951 m +1 h +4 h +10 h +10 h +12952 m +56 h +447 h +10 h +12953 m +12954 m +1 h +274 h +4 h +25 h +4 h +12955 m +4 h +135 h +108 h +1 h +12956 m +10 h +1 h +4 h +2582 m +4 h +12957 m +12958 m +4 h +10 h +1137 h +805 m +59 h +82 h +8133 m +4 h +4 h +10 h +195 h +114 h +1 h +10 h +12959 m +8017 m +4 h +12960 m +4 h +4 h +10 h +11 h +169 h +12961 m +1 h +5141 m +10 h +3988 m +12962 m +8327 m +11 h +12963 m +12964 m +10 h +4 h +1 h +12965 m +172 h +12966 m +238 h +1 h +77 h +10 h +104 h +1 h +10 h +4 h +1 h +692 h +10 h +4177 m +109 h +1 h +4 h +4 h +1 h +1 h +12967 m +12968 m +4 h +195 h +12969 m +1 h +1685 m +10 h +10 h +4 h +12970 m +3742 m +12971 m +1 h +1780 h +4 h +4 h +10 h +10 h +12972 m +10 h +4 h +12973 m +12974 m +4 h +1 h +12975 m +1 h +10 h +124 h +57 h +12976 m +10 h +6187 m +196 h +11 h +79 h +12977 m +12978 m +1003 m +1 h +124 h +4 h +12979 m +1 h +1 h +11 h +82 h +59 h +4 h +4 h +3 h +10 h +5348 m +266 h +4 h +12980 m +1 h +11 h +1 h +109 h +12981 m +94 h +4 h +10 h +1271 h +4 h +4 h +12982 m +10 h +104 h +10 h +12983 m +327 m +41 h +4 h +4 h +1454 m +1 h +1 h +1 h +10 h +25 h +12984 m +10 h +4 h +10 h +10 h +3115 m +4 h +11 h +4 h +10 h +4 h +81 m +31 h +4 h +4 h +258 h +59 h +12985 m +10 h +10 h +114 h +4 h +4 h +4 h +109 h +1 h +4 h +172 h +82 h +2374 m +11 h +64 h +27 h +1 h +10 h +4 h +12986 m +12987 m +12988 m +41 h +1504 m +4 h +12989 m +4 h +4 h +10 h +12990 m +1 h +4 h +266 h +12991 m +4 h +4 h +31 h +12992 m +4 h +12993 m +1 h +274 h +1389 m +10 h +4 h +4 h +3 h +1 h +170 h +11 h +386 h +1 h +10 h +10 h +10 h +10 h +11 h +203 h +12994 m +12995 m +4 h +1 h +12996 m +4 h +146 h +1 h +10 h +27 h +299 m +1454 m +4229 m +258 h +56 h +10 h +1250 h +4 h +94 h +1 h +10 h +4 h +4 h +1 h +10 h +1 h +12 h +146 h +229 h +82 h +5363 m +1 h +25 h +12997 m +4 h +10 h +3680 m +59 h +1 h +10 h +4 h +2592 m +12998 m +10 h +4 h +158 h +4 h +56 h +59 h +55 h +12999 m +10 h +57 h +13000 m +104 h +1 h +2865 m +10 h +4 h +1 h +1 h +11 h +1 h +4 h +135 h +4 h +1 h +1 h +13001 m +4 h +1 h +990 m +79 h +10 h +1003 m +45 h +1 h +10378 m +4 h +1 h +1 h +10 h +225 m +1 h +10 h +10 h +13002 m +41 h +92 h +1 h +2617 m +13003 m +10 h +97 h +1766 h +74 h +10 h +4 h +25 h +3344 m +266 h +1 h +4 h +13004 m +4 h +1 h +10 h +1 h +13005 m +297 h +4 h +143 h +4 h +1 h +10 h +4 h +13006 m +1 h +1 h +4 h +13007 m +1 h +10 h +13008 m +10 h +158 h +10 h +82 h +4 h +4 h +464 h +22 h +4 h +195 h +4 h +5863 m +61 m +4 h +4 h +1 h +10 h +10 h +1 h +702 m +11 h +4 h +13009 m +13010 m +204 m +13011 m +4 h +13012 m +164 h +278 h +4 h +41 h +109 h +13013 m +57 h +13014 m +1 h +13015 m +1 h +9771 m +13016 m +4 h +169 h +368 h +5 m +10 h +4 h +4 h +79 h +10 h +1 h +12 h +1220 m +1 h +4 h +3 h +4 h +266 h +64 h +4 h +4 h +13017 m +13018 m +1 h +10 h +4 h +10 h +113 h +1 h +1 h +13019 m +4 h +250 h +10 h +3 h +11 h +104 h +10 h +4 h +13020 m +1261 h +13021 m +64 h +10 h +13022 m +10 h +1 h +13023 m +10 h +13024 m +4 h +3 h +10 h +4 h +1 h +1 h +41 h +4 h +11 h +4 h +186 h +12 h +13025 m +59 h +1406 h +13026 m +1 h +1 h +10 h +885 h +1 h +4 h +13027 m +22 h +139 h +11 h +4 h +13028 m +4 h +4 h +83 h +976 h +11 h +13029 m +125 h +1 h +10 h +10 h +13030 m +4 h +10 h +10 h +1 h +1 h +59 h +4 h +4 h +13031 m +4 h +1 h +10 h +13032 m +40 h +10 h +1 h +4 h +4 h +13033 m +4 h +59 h +1 h +4 h +1822 h +109 h +45 h +13034 m +4 h +4 h +541 m +10 h +92 h +1 h +55 h +10 h +4 h +13035 m +11 h +10 h +4 h +10 h +11 h +192 h +4 h +57 h +1 h +13036 m +1 h +11 h +4 h +400 m +4 h +65 h +114 h +13037 m +10 h +4 h +4 h +13038 m +124 h +4 h +31 h +135 h +10 h +10 h +8 h +83 h +1 h +5 m +4 h +1 h +1751 m +1 h +1 h +10 h +4 h +1 h +1 h +13039 m +4 h +1 h +13040 m +190 h +3702 m +4 h +371 h +1 h +1 h +4 h +1 h +13041 m +13042 m +1 h +10 h +1 h +25 h +4 h +1 h +1 h +195 h +4 h +4 h +13043 m +1 h +56 h +1 h +4 h +10 h +10700 m +11246 m +1 h +4 h +13044 m +13045 m +4 h +517 m +4 h +13046 m +1 h +1 h +1544 m +2496 m +4 h +13047 m +7862 m +11 h +4 h +113 h +135 h +871 m +10 h +10 h +11 h +986 h +2374 m +4 h +10 h +109 h +2418 m +278 h +13048 m +10 h +1 h +13049 m +4 h +1 h +1 h +13050 m +124 h +1 h +4516 m +167 h +69 h +13 h +278 h +359 h +4 h +4 h +10 h +83 h +1 h +13051 m +7616 m +13052 m +13053 m +13054 m +1 h +13055 m +1 h +358 h +157 h +11990 m +10 h +4 h +11 h +1632 m +11 h +57 h +6863 m +10 h +77 h +36 h +1016 h +11 h +914 m +276 h +4 h +110 h +13056 m +110 h +4 h +4 h +13057 m +25 h +83 h +4 h +10 h +13058 m +4 h +4 h +13059 m +276 h +143 h +4 h +125 h +270 h +13060 m +4 h +1 h +1981 m +1 h +10 h +4 h +83 h +4 h +114 h +45 h +64 h +10 h +31 h +538 h +2885 m +25 h +1 h +1 h +4 h +10 h +83 h +13061 m +1 h +13062 m +13063 m +4 h +1785 m +520 h +4 h +10 h +74 h +186 h +10 h +1 h +1 h +10 h +1 h +4 h +2617 h +4 h +196 h +13064 m +119 h +11 h +13065 m +4 h +13066 m +27 h +1 h +8 h +7243 m +13067 m +13068 m +1 h +13069 m +10 h +911 m +4 h +4 h +1 h +1 h +36 h +4 h +4 h +10 h +11 h +10 h +1 h +13070 m +13071 m +13072 m +4 h +13073 m +13074 m +1 h +4 h +74 h +2433 m +4 h +4 h +125 h +358 h +13075 m +13076 m +13077 m +13078 m +12 h +4 h +97 h +13079 m +1 h +1 h +25 h +4 h +109 h +4 h +4 h +1284 m +13080 m +1 h +13081 m +1 h +13082 m +13083 m +13084 m +172 h +1 h +10 h +173 h +4 h +13085 m +4 h +1 h +10 h +10 h +4 h +10 h +4 h +332 h +114 h +2813 m +4 h +4 h +4 h +167 h +4 h +1 h +13086 m +11 h +25 h +13087 m +4 h +1 h +74 h +1 h +110 h +2865 m +10 h +184 h +41 h +57 h +4 h +13088 m +4 h +13089 m +4 h +83 h +10 h +104 h +4 h +1 h +109 h +10 h +13090 m +3558 m +59 h +285 m +3 h +109 h +1 h +1 h +1 h +10 h +1 h +13091 m +4 h +4 h +4 h +10 h +109 h +4 h +4 h +1 h +297 h +125 h +10 h +4 h +386 h +4 h +13092 m +109 h +10 h +295 h +8 h +1 h +4 h +124 h +13093 m +64 h +10 h +1281 m +1714 m +10 h +13094 m +1 h +10 h +10 h +1 h +4824 m +57 h +10 h +10 h +1 h +4 h +1359 h +3 h +13095 m +1 h +13096 m +146 h +4 h +164 h +10 h +3278 m +13097 m +1 h +3909 m +13098 m +13099 m +4 h +10 h +12 h +3383 m +41 h +119 h +1 h +4 h +4 h +10 h +10 h +124 h +2591 m +1 h +4 h +4 h +13100 m +10 h +5053 m +10 h +267 m +11 h +1 h +65 h +4 h +13101 m +10 h +1 h +57 h +104 h +4 h +1 h +13102 m +13103 m +4 h +13104 m +1 h +3089 m +6821 m +1 h +4 h +10 h +13105 m +1 h +195 h +13106 m +1 h +4 h +4 h +1 h +4 h +91 h +196 h +4 h +123 h +4 h +13107 m +10 h +119 h +1 h +13108 m +4 h +13109 m +13110 m +4 h +4 h +1 h +13111 m +64 h +13112 m +10 h +1 h +10 h +4 h +156 h +73 h +196 h +10 h +4 h +4 h +368 h +10 h +45 h +13113 m +4 h +4 h +167 h +1 h +1118 m +1 h +13114 m +10 h +8 h +10 h +11 h +4 h +1 h +83 h +11 h +118 h +1 h +10 h +4 h +13115 m +13116 m +13117 m +1 h +7 h +4 h +13118 m +8496 m +935 h +13119 m +69 h +1 h +4 h +10 h +3 h +59 h +10 h +1 h +1006 m +13120 m +114 h +31 h +489 m +1 h +109 h +13121 m +164 h +124 h +4 h +4 h +1 h +12218 m +13122 m +1 h +4 h +4 h +13123 m +4 h +97 h +4 h +1403 h +278 h +4 h +10 h +13124 m +278 h +57 h +4 h +4 h +4 h +276 h +13125 m +13126 m +289 h +56 h +1 h +4 h +1766 h +1646 m +10 h +4 h +11 h +13127 m +4 h +13128 m +1 h +1 h +1556 m +129 h +82 h +1 h +13129 m +4 h +4 h +229 h +1 h +359 h +92 h +10 h +11 h +10 h +13130 m +1 h +13131 m +4 h +1030 h +157 h +124 h +13132 m +13133 m +73 h +11 h +4 h +110 h +939 h +3 h +4 h +13134 m +13135 m +10 h +4 h +4 h +10 h +31 h +13136 m +4 h +13137 m +82 h +10 h +4 h +12 h +1 h +4 h +1 h +1528 m +10 h +124 h +4 h +10 h +11 h +4 h +4 h +10 h +41 h +10 h +10 h +1 h +125 h +13138 m +1 h +4 h +109 h +169 h +4 h +13139 m +13140 m +4 h +82 h +4 h +82 h +1 h +885 h +13141 m +12 h +1 h +13142 m +4 h +1 h +4 h +1 h +229 h +4 h +172 h +4 h +13143 m +4 h +11 h +10 h +4 h +10 h +266 h +13144 m +10 h +4 h +4 h +13145 m +1 h +4 h +558 m +4 h +4 h +4 h +569 h +82 h +13146 m +1 h +1 h +140 h +13147 m +10 h +13148 m +10 h +13149 m +13150 m +10 h +4 h +704 m +4 h +97 h +4 h +57 h +13151 m +1 h +4 h +4 h +4 h +1003 h +10 h +10 h +4 h +8 h +4 h +13152 m +4 h +1 h +1 h +13153 m +10 h +278 h +1 h +10 h +4 h +1 h +276 h +1 h +4 h +4 h +13154 m +1796 m +1 h +31 h +13155 m +13156 m +1 h +57 h +4 h +59 h +57 h +4 h +13157 m +488 h +1 h +4 h +9282 m +663 m +119 h +10 h +31 h +4 h +13158 m +4 h +4 h +11 h +10 h +11 h +11 h +4 h +82 h +10 h +4 h +13159 m +10 h +4 h +10 h +4 h +13160 m +13161 m +4 h +11 h +1 h +173 h +109 h +4 h +1 h +4 h +10 h +27 h +10 h +125 h +4 h +4 h +1 h +13162 m +1 h +10324 m +4 h +13163 m +1 h +75 m +13164 m +13165 m +25 h +1 h +4 h +4 h +1 h +10 h +10 h +97 h +10 h +1 h +13166 m +4 h +10 h +1 h +1 h +203 h +59 h +10 h +4 h +13167 m +1 h +1642 h +307 h +13 h +4 h +27 h +1 h +13168 m +10 h +1 h +13169 m +4 h +4 h +83 h +104 h +13170 m +0 h +276 h +1 h +4 h +601 h +230 h +3675 m +3112 m +1 h +464 h +114 h +4 h +13171 m +13172 m +13 h +4 h +266 h +4 h +13173 m +10 h +1 h +13174 m +13175 m +4 h +10 h +1 h +5348 m +4 h +7419 m +135 h +4 h +857 m +1 h +13176 m +219 h +13177 m +13178 m +1 h +2923 h +1 h +10 h +10 h +10 h +13179 m +4 h +4 h +113 h +114 h +276 h +13180 m +13181 m +195 h +377 h +1 h +13182 m +3 h +13183 m +4 h +124 h +1016 h +124 h +13184 m +4623 m +4 h +1 h +13185 m +4 h +109 h +4 h +25 h +5093 m +3 h +4 h +4 h +25 h +13186 m +114 h +1 h +3 h +13187 m +10 h +92 h +4 h +1 h +10 h +1089 h +13188 m +10 h +4 h +10 h +10 h +1 h +10 h +11 h +1 h +10 h +59 h +2924 m +4 h +10 h +13 h +4 h +4 h +1 h +73 h +965 m +157 h +13189 m +4 h +124 h +4 h +1309 h +10 h +338 h +230 h +687 h +4 h +13190 m +83 h +13191 m +11 h +11 h +13192 m +196 h +4 h +1116 m +13193 m +11 h +10 h +10 h +4 h +1 h +12 h +10 h +13194 m +13195 m +10 h +13196 m +13197 m +4 h +4 h +3555 m +10 h +1 h +4 h +97 h +267 m +135 h +1 h +1 h +11 h +10 h +4 h +10 h +1 h +4 h +41 h +1 h +1359 h +13198 m +59 h +4 h +1 h +13199 m +13200 m +13201 m +11 h +4 h +195 h +143 h +1 h +367 h +4 h +124 h +5504 m +1 h +1 h +3398 m +114 h +10 h +4 h +13202 m +13203 m +57 h +10 h +4 h +1 h +4314 m +13204 m +25 h +74 h +68 m +11 h +9400 m +13205 m +4 h +4 h +2510 m +1 h +1189 m +4 h +1 h +83 h +45 h +4 h +10 h +4 h +41 h +41 h +990 h +1 h +1 h +167 h +1886 m +1 h +4 h +13206 m +13207 m +13208 m +147 h +1 h +124 h +3558 m +31 h +1822 h +10 h +10 h +13209 m +13210 m +10 h +94 h +10 h +45 h +13211 m +1 h +13212 m +4 h +119 h +1 h +13213 m +147 h +13214 m +1915 m +4 h +10 h +4 h +1 h +4 h +4 h +383 h +13215 m +4 h +64 h +1 h +1620 m +10 h +5917 m +990 h +11 h +1 h +110 h +4 h +10 h +1 h +4 h +55 h +13216 m +195 h +4 h +4 h +147 h +6663 m +1 h +256 h +13217 m +13218 m +1 h +185 h +1 h +4 h +4 h +386 h +726 m +1 h +412 h +147 h +278 h +45 h +536 h +1 h +4 h +4 h +4 h +119 h +1 h +1 h +13219 m +1 h +25 h +10 h +4 h +13220 m +338 h +4 h +4 h +13221 m +10 h +124 h +204 m +13222 m +4 h +4 h +1 h +4 h +31 h +4 h +12 h +4 h +13223 m +13224 m +4 h +1 h +10 h +169 h +13225 m +10682 m +13226 m +13227 m +258 h +57 h +4 h +109 h +13228 m +307 h +57 h +1 h +59 h +4 h +13229 m +22 h +4 h +12 h +1 h +1 h +13230 m +4 h +13231 m +109 h +12 h +4 h +1 h +10 h +757 m +40 h +170 h +10 h +4 h +10468 m +258 h +1 h +13232 m +22 h +4 h +10 h +13233 m +10862 m +4 h +4 h +4 h +10 h +10 h +13234 m +13235 m +4 h +4 h +12 h +4 h +1 h +4 h +1 h +1 h +13236 m +1 h +1 h +1 h +104 h +119 h +4 h +4802 m +10 h +64 h +4 h +911 h +13237 m +13238 m +13239 m +4 h +1 h +10 h +4 h +1 h +4 h +1 h +1 h +1 h +965 m +536 h +4 h +4 h +1 h +4 h +112 h +4 h +4 h +4 h +13240 m +4 h +73 h +4 h +41 h +10 h +92 h +1 h +3 h +2920 m +146 h +4 h +1 h +13241 m +1 h +1 h +10 h +124 h +1 h +31 h +31 h +10 h +888 m +843 m +109 h +59 h +4 h +203 h +13 h +4 h +4 h +83 h +1 h +4 h +285 m +11 h +73 h +13242 m +124 h +83 h +10 h +4 h +1 h +4 h +10 h +3 h +4 h +4 h +11 h +195 h +59 h +195 h +1 h +10 h +10 h +10 h +13243 m +13244 m +11 h +11 h +4 h +297 h +10 h +4 h +468 m +698 m +10 h +1 h +11 h +4 h +124 h +167 h +4 h +13245 m +4 h +4 h +83 h +278 h +1 h +11 h +1 h +4 h +4 h +1 h +4 h +28 h +10 h +1 h +13246 m +10 h +109 h +13247 m +4 h +4 h +83 h +11 h +1 h +4 h +1619 h +1 h +1 h +73 h +4 h +400 m +112 h +4 h +276 h +13248 m +1 h +57 h +1 h +10 h +13249 m +4 h +4 h +1 h +4 h +3 h +1220 m +1 h +10 h +57 h +270 h +1 h +11485 m +109 h +3 h +1137 h +11 h +1 h +172 h +1 h +4 h +10 h +1 h +10 h +1 h +13250 m +2040 m +6107 m +1 h +65 h +1 h +10 h +10 h +192 h +4 h +1 h +4 h +4 h +238 h +2928 m +144 h +13251 m +1 h +4 h +1 h +196 h +13252 m +10 h +1 h +1 h +1 h +13253 m +185 h +4 h +13254 m +1 h +1 h +10 h +94 h +13255 m +41 h +4 h +4 h +157 h +911 h +1 h +4 h +656 m +13256 m +10 h +13257 m +97 h +143 h +204 m +4 h +169 h +1 h +4 h +13258 m +4 h +1137 h +1 h +6851 m +1 h +125 h +123 h +11 h +129 h +1 h +10 h +181 h +4 h +10 h +332 h +13259 m +10 h +109 h +1 h +4 h +13260 m +10 h +3539 m +4 h +185 h +11 h +13261 m +4 h +13262 m +4 h +1 h +11 h +123 h +4 h +10 h +109 h +4 h +4 h +10 h +74 h +8 h +10 h +1 h +1772 m +147 h +36 h +10 h +4 h +83 h +4 h +1 h +313 m +13263 m +74 h +104 h +124 h +1 h +1 h +1 h +4 h +10 h +83 h +13264 m +10 h +3 h +4 h +4 h +10 h +13265 m +109 h +4 h +4 h +1089 h +57 h +4 h +4 h +1 h +322 h +10 h +10 h +11 h +5567 m +139 h +1 h +1 h +1 h +10 h +4 h +4 h +1 h +4 h +1 h +13266 m +74 h +266 h +1 h +74 h +3558 h +114 h +31 h +10 h +10 h +1039 m +13267 m +10 h +157 h +1 h +4 h +10 h +108 h +1 h +4 h +1 h +13268 m +4 h +13269 m +13270 m +4 h +1835 m +278 h +4 h +13271 m +1 h +82 h +57 h +8 h +4 h +1 h +5557 m +13272 m +10 h +1 h +1 h +4 h +262 m +536 h +119 h +478 h +295 h +4561 m +13273 m +1 h +4 h +1 h +10 h +104 h +4 h +4 h +4 h +124 h +10 h +4 h +4 h +13274 m +4 h +4 h +4 h +4 h +4 h +23 h +4 h +13275 m +1 h +10 h +4145 m +4 h +4 h +1 h +4 h +488 h +1 h +4 h +1 h +65 h +4 h +1 h +1 h +1 h +4 h +1 h +10 h +13276 m +11 h +1137 h +1 h +4 h +4 h +13277 m +1 h +13278 m +10 h +4 h +84 m +295 h +509 m +113 h +10 h +1 h +1 h +45 h +170 h +13279 m +167 h +1 h +4 h +1 h +13280 m +4 h +4 h +3 h +13281 m +13282 m +4 h +13283 m +1 h +13284 m +1 h +10 h +2238 m +1 h +4 h +4 h +13285 m +10 h +4 h +4 h +4 h +10 h +12 h +4 h +83 h +167 h +4 h +4 h +4 h +1 h +4 h +195 h +3 h +4 h +10 h +4 h +1 h +976 h +4 h +13286 m +10 h +1 h +1 h +31 h +1 h +135 h +319 h +13287 m +4 h +4 h +10 h +4 h +4 h +4 h +3 h +1 h +59 h +13288 m +1 h +4 h +1 h +13289 m +4 h +59 h +13290 m +11 h +13291 m +1284 m +10 h +4 h +1 h +10 h +4 h +1 h +11 h +1261 h +1 h +119 h +6985 m +13292 m +6705 m +1 h +1 h +1 h +4 h +4 h +10 h +4 h +82 h +1553 m +1953 m +147 h +64 h +11 h +4 h +4 h +13293 m +1 h +443 h +10 h +4 h +10 h +82 h +4 h +4 h +10 h +109 h +4 h +13294 m +4 h +1 h +10 h +13295 m +41 h +65 h +82 h +5526 m +65 h +1 h +10 h +250 h +1 h +10 h +4 h +737 m +4 h +10 h +147 h +1 h +1 h +3 h +3 h +13296 m +13297 m +4 h +10 h +10 h +13298 m +13299 m +10414 m +11 h +4 h +4 h +4 h +1 h +82 h +4 h +4 h +1 h +4 h +4 h +11 h +4 h +1 h +1 h +4 h +59 h +1 h +11 h +3303 m +4 h +7999 m +73 h +124 h +1016 h +10 h +10 h +4 h +109 h +74 h +143 h +13300 m +4 h +13301 m +57 h +79 h +36 h +13302 m +1 h +13303 m +4509 m +10 h +4 h +11 h +1 h +181 h +1 h +13304 m +1 h +13305 m +1 h +10 h +10 h +13306 m +82 h +1 h +3815 m +10 h +1 h +10 h +10 h +1 h +13307 m +27 h +3 h +11 h +10 h +109 h +1 h +13308 m +8535 m +10 h +1 h +5682 m +10 h +57 h +1 h +4 h +1 h +262 h +1 h +4 h +1 h +10 h +4 h +13309 m +4 h +692 h +109 h +4 h +4 h +109 h +4 h +1 h +377 h +13310 m +146 h +11 h +1 h +295 h +4 h +4 h +4 h +112 h +125 h +4 h +4 h +13311 m +4 h +1619 h +10 h +1 h +1 h +195 h +1 h +412 h +718 h +109 h +1 h +4 h +1 h +692 h +4 h +13312 m +10 h +10 h +64 h +11 h +4 h +124 h +73 h +1 h +4 h +82 h +1 h +169 h +4 h +118 h +190 h +83 h +4 h +48 h +55 h +1 h +10 h +1 h +45 h +9027 m +4 h +1 h +4 h +1 h +11 h +10 h +278 h +10 h +939 h +1 h +55 h +258 h +4 h +10 h +2887 m +10 h +9450 m +13313 m +104 h +10 h +1 h +112 h +4 h +13314 m +4 h +10 h +25 h +10 h +4 h +13315 m +10 h +82 h +1 h +4 h +13316 m +109 h +13317 m +41 h +129 h +97 h +11 h +13318 m +13319 m +74 h +13320 m +1751 m +1 h +4 h +4 h +1 h +1 h +4 h +4 h +13321 m +123 h +11 h +4 h +36 h +1 h +74 h +10 h +620 m +4 h +10 h +10 h +1 h +4 h +1 h +77 h +13322 m +4 h +10 h +1619 h +1 h +147 h +4 h +13323 m +59 h +82 h +1 h +4 h +97 h +57 h +10 h +11 h +4 h +1409 h +1685 m +82 h +4 h +10 h +1 h +13324 m +13325 m +692 h +124 h +190 h +4 h +57 h +307 h +4 h +13326 m +10 h +358 h +601 h +4 h +4 h +295 h +4 h +4 h +4 h +174 m +1650 h +10 h +4 h +124 h +157 h +10 h +4 h +124 h +31 h +1 h +13327 m +4 h +13 h +2374 h +1 h +1619 h +388 m +13328 m +10 h +196 h +4 h +10 h +13329 m +13330 m +13331 m +82 h +55 h +31 h +13332 m +4 h +1 h +332 h +13333 m +139 h +13334 m +3 h +13335 m +25 h +4 h +3 h +1 h +307 h +601 h +10 h +12 h +13336 m +103 h +4 h +10 h +10 h +4 h +13337 m +4 h +1 h +10 h +13338 m +640 h +11 h +238 h +1137 h +359 h +172 h +1 h +112 h +11 h +1 h +4 h +13339 m +4 h +1478 h +4 h +4 h +65 h +10 h +83 h +4 h +1 h +59 h +4 h +10 h +1 h +4 h +230 h +313 m +3 h +10 h +4 h +10 h +4 h +4 h +4 h +1 h +1642 h +13340 m +1470 h +4 h +4 h +4 h +1 h +10 h +1 h +11 h +123 h +146 h +10 h +4 h +4 h +13341 m +4 h +1 h +31 h +4 h +1 h +10 h +41 h +1 h +13342 m +31 h +4 h +172 h +13343 m +104 h +4 h +10 h +45 h +4 h +10 h +118 h +1 h +3 h +3 h +1 h +13344 m +4 h +307 h +83 h +13345 m +13346 m +10 h +13347 m +10 h +1 h +10 h +1 h +11 h +1359 h +1117 m +146 h +4 h +11 h +1016 h +4 h +25 h +164 h +371 h +3177 m +13 h +109 h +124 h +11 h +808 m +1 h +196 h +459 h +4 h +4 h +13348 m +2788 h +1 h +48 h +13349 m +1 h +4 h +4 h +447 h +4 h +4 h +31 h +13350 m +104 h +10 h +13351 m +4 h +13352 m +10 h +82 h +1 h +10 h +13353 m +4 h +13354 m +10 h +57 h +10 h +13355 m +4 h +82 h +3307 m +31 h +4 h +13356 m +1 h +3070 m +13357 m +109 h +25 h +8324 m +536 h +1 h +10 h +4 h +4 h +13358 m +1 h +640 h +1 h +73 h +22 h +1 h +1 h +996 m +1 h +13359 m +11 h +10 h +10 h +1 h +11 h +13360 m +1 h +4 h +4 h +4 h +10 h +1 h +358 h +109 h +157 h +13361 m +757 h +10 h +1 h +4 h +3 h +11 h +4 h +13362 m +10 h +10 h +4 h +1 h +1 h +10 h +3028 m +1 h +1 h +4 h +10 h +13363 m +13364 m +10 h +1 h +8184 m +1 h +13365 m +4 h +4 h +4 h +4 h +4 h +4 h +11 h +13366 m +13367 m +13368 m +13369 m +1 h +4 h +1 h +262 h +779 h +124 h +4 h +13 h +65 h +13370 m +1470 h +125 h +59 h +10 h +25 h +124 h +11 h +10 h +4 h +4 h +31 h +27 h +113 h +10 h +1 h +4 h +10 h +13371 m +4 h +172 h +4 h +13372 m +91 h +4 h +1 h +8105 m +4 h +4 h +82 h +13373 m +3 h +1 h +4 h +4 h +8879 m +13 h +10 h +4 h +112 h +12 h +4 h +4 h +164 h +4 h +13374 m +1 h +135 h +114 h +13375 m +1 h +4 h +13376 m +11 h +4 h +10 h +4 h +1 h +1 h +1 h +82 h +10 h +4 h +25 h +13377 m +4 h +13378 m +79 h +13379 m +13380 m +4 h +10 h +4 h +1 h +83 h +1 h +4 h +143 h +1359 h +276 h +1 h +1 h +1 h +4 h +94 h +258 h +10 h +1 h +13 h +158 h +4 h +4 h +31 h +10 h +10 h +36 h +1 h +13381 m +4 h +1055 m +1 h +6381 m +10 h +13382 m +36 h +143 h +4 h +4 h +25 h +109 h +278 h +4 h +13383 m +1 h +13384 m +1772 m +10 h +1993 m +330 m +57 h +13385 m +338 h +4 h +109 h +10 h +4 h +13386 m +4 h +13387 m +4 h +13388 m +150 m +13389 m +13390 m +4 h +10 h +4 h +1 h +1 h +3 h +687 h +1 h +1 h +13391 m +4 h +27 h +10 h +4 h +1 h +13392 m +4 h +82 h +430 h +10 h +75 m +10 h +10 h +13 h +1 h +2004 m +11 h +1 h +10 h +238 h +31 h +4 h +31 h +124 h +109 h +976 h +4 h +2124 m +1 h +11 h +114 h +190 h +4 h +10 h +3 h +13393 m +4 h +4 h +13394 m +265 h +4 h +4 h +4 h +13395 m +4 h +10 h +41 h +12 h +7074 m +4 h +74 h +4 h +4 h +13396 m +13397 m +13398 m +4 h +125 h +1 h +4 h +125 h +13399 m +1 h +13400 m +13401 m +13402 m +737 h +31 h +10 h +13403 m +1 h +10 h +4 h +4 h +4 h +13404 m +13405 m +1 h +1 h +13406 m +4 h +13407 m +56 h +4 h +10 h +1 h +13408 m +332 h +1017 h +4 h +258 h +124 h +13409 m +10 h +1 h +1 h +109 h +13410 m +13411 m +13412 m +13413 m +4 h +13414 m +10 h +4 h +125 h +4 h +1 h +11 h +13415 m +4 h +169 h +1089 h +4 h +109 h +1309 h +1642 h +204 h +82 h +601 h +59 h +4 h +4 h +601 h +13416 m +11 h +13417 m +4 h +13418 m +4 h +4 h +4 h +10 h +4 h +169 h +1 h +10 h +10 h +11 h +11 h +1 h +250 h +4 h +10 h +10 h +10 h +1 h +10 h +1 h +13419 m +13420 m +4 h +4 h +4 h +1 h +1 h +4 h +11 h +10 h +11 h +57 h +1016 h +13421 m +1 h +1 h +104 h +13422 m +2285 m +1 h +109 h +25 h +10 h +3 h +4 h +4 h +10 h +1 h +13423 m +13424 m +4 h +4 h +13425 m +51 m +332 h +4 h +13426 m +1 h +73 h +4 h +13427 m +265 h +65 h +278 h +4 h +299 m +13428 m +10 h +1 h +332 h +4 h +13429 m +4 h +13430 m +10 h +1 h +1 h +1 h +13431 m +1 h +10 h +8 h +10 h +1127 m +1559 m +1796 m +4 h +4 h +359 h +1 h +57 h +4 h +4966 m +4 h +1 h +97 h +1 h +1 h +10 h +114 h +204 h +2148 m +10 h +13432 m +1074 m +91 h +412 h +13433 m +124 h +10 h +1 h +10 h +13434 m +45 h +57 h +135 h +13435 m +687 h +13436 m +1 h +61 m +82 h +4 h +1 h +104 h +1 h +4 h +4 h +13437 m +13438 m +4 h +4 h +10 h +1 h +13439 m +9397 m +4 h +10 h +4 h +56 h +976 h +10990 m +10 h +4 h +4 h +1 h +195 h +4 h +124 h +13440 m +4 h +3 h +10 h +459 h +520 h +4 h +1016 h +13441 m +4 h +10 h +10 h +10 h +13442 m +4 h +119 h +4 h +13443 m +4 h +13444 m +1 h +1 h +13445 m +13446 m +358 h +13447 m +238 h +1 h +104 h +10 h +40 h +1 h +4 h +13448 m +10 h +13449 m +13450 m +13451 m +1 h +6678 m +143 h +4 h +31 h +13452 m +13453 m +1 h +13454 m +4 h +4 h +156 h +3616 m +25 h +4 h +1504 m +4 h +82 h +1 h +25 h +73 h +4 h +109 h +4 h +1 h +4 h +13455 m +13456 m +10 h +10 h +146 h +156 h +4 h +4 h +172 h +65 h +2788 h +3 h +196 h +1 h +4 h +10 h +13457 m +1 h +13458 m +13459 m +4 h +1 h +510 m +13460 m +11 h +13461 m +1 h +13462 m +13463 m +13464 m +5296 m +1 h +13465 m +423 m +13466 m +164 h +65 h +10 h +4 h +13467 m +1 h +13468 m +1 h +10 h +10 h +13469 m +1 h +368 h +1 h +13470 m +1 h +4245 m +4 h +10 h +13471 m +1 h +11 h +13472 m +83 h +10 h +4 h +10 h +4 h +10 h +13473 m +13474 m +1 h +13475 m +295 h +2308 m +13476 m +4 h +1771 m +1 h +146 h +258 h +123 h +4 h +1764 m +13477 m +13478 m +1 h +59 h +55 h +1 h +4 h +10 h +13479 m +146 h +4 h +13480 m +13481 m +4 h +10 h +10 h +10 h +4 h +181 h +10 h +13482 m +13483 m +1 h +4 h +41 h +4 h +1100 m +10 h +146 h +22 h +1 h +4 h +2733 h +1 h +4 h +1 h +359 h +823 m +1 h +4 h +23 h +124 h +10 h +10 h +4 h +190 h +1 h +13484 m +57 h +10 h +578 h +172 h +1 h +13485 m +31 h +10 h +4 h +57 h +64 h +8346 m +1 h +1 h +258 h +1127 m +4 h +10 h +10 h +1 h +692 h +278 h +265 h +4 h +640 h +229 h +10 h +13486 m +11 h +13487 m +4 h +172 h +104 h +4 h +4 h +4 h +4 h +91 h +13488 m +10 h +10 h +1 h +1 h +10101 m +10 h +82 h +1 h +13489 m +4 h +4 h +4 h +13490 m +13491 m +4 h +5708 m +10 h +41 h +1 h +4 h +170 h +13492 m +65 h +4 h +1 h +13493 m +56 h +4030 m +4 h +124 h +4 h +135 h +2851 m +1993 m +1 h +82 h +146 h +4 h +4 h +1 h +1 h +10 h +4 h +13494 m +13495 m +2022 m +4 h +57 h +1 h +4 h +4 h +13496 m +4 h +332 h +4 h +1 h +1 h +4 h +31 h +1 h +386 h +4 h +1137 h +4 h +40 h +146 h +13497 m +3 h +1 h +823 m +57 h +10 h +124 h +13498 m +74 h +135 h +1 h +10 h +41 h +1 h +13499 m +10 h +54 m +5348 m +13500 m +11 h +1 h +1 h +13501 m +204 h +167 h +13502 m +1595 m +31 h +13503 m +1 h +1 h +687 h +1 h +4 h +1 h +4 h +1 h +147 h +1 h +124 h +1 h +1 h +801 m +1 h +1138 m +13504 m +1 h +11 h +10 h +13505 m +83 h +13506 m +11 h +4 h +4 h +73 h +13507 m +13508 m +3737 m +8 h +109 h +13509 m +258 h +258 h +9692 m +4 h +4 h +10 h +1 h +10 h +13510 m +1 h +13511 m +10 h +4 h +10 h +10 h +4 h +59 h +3396 m +4 h +195 h +4 h +1 h +41 h +109 h +13512 m +13513 m +13514 m +1 h +10 h +91 h +270 h +1 h +1 h +1 h +1 h +56 h +13515 m +4 h +169 h +4 h +13516 m +10 h +1 h +13517 m +13518 m +11 h +4 h +73 h +10 h +469 m +167 h +4 h +13519 m +146 h +4 h +4 h +196 h +403 h +7444 m +13520 m +13521 m +2625 m +4 h +4 h +114 h +146 h +4 h +73 h +13522 m +4 h +82 h +10 h +1 h +4 h +13523 m +1 h +4 h +13524 m +11 h +11 h +59 h +11 h +1403 h +10 h +13525 m +10 h +1 h +4 h +10 h +4 h +83 h +1 h +4 h +986 h +4 h +13526 m +4 h +11 h +270 h +10 h +4 h +4 h +208 m +3 h +13527 m +1 h +4 h +13528 m +10 h +1 h +74 h +10 h +4 h +3 h +297 h +13529 m +10 h +4 h +13530 m +13531 m +13532 m +1 h +13533 m +6852 m +4 h +13534 m +1 h +10 h +1 h +1 h +4 h +1 h +1 h +13535 m +13536 m +10 h +10 h +114 h +4538 m +13537 m +74 h +13538 m +59 h +1 h +82 h +1 h +2374 h +13539 m +125 h +10 h +4 h +1030 h +4 h +1 h +10 h +124 h +13540 m +1 h +13541 m +82 h +1 h +13542 m +13543 m +4 h +10 h +687 h +1 h +10 h +6226 m +4 h +10 h +36 h +174 m +13544 m +10 h +1 h +4 h +146 h +13545 m +6863 m +4 h +104 h +4 h +10 h +1 h +13546 m +13547 m +65 h +36 h +1 h +4867 m +190 h +1 h +4 h +4 h +13548 m +4 h +4 h +13549 m +4 h +4 h +74 h +4 h +4 h +10 h +5632 m +1 h +1 h +13550 m +1 h +1 h +13551 m +1 h +13552 m +1 h +11 h +13553 m +1 h +13554 m +4 h +1 h +1 h +10 h +11 h +13555 m +169 h +13556 m +1 h +13557 m +129 h +272 m +10 h +13558 m +4 h +83 h +1 h +4 h +4 h +1 h +59 h +3768 m +109 h +1 h +41 h +4 h +4 h +10 h +11 h +4 h +10 h +11559 m +13559 m +10 h +4 h +13560 m +94 h +4 h +12898 m +4 h +434 m +297 h +1 h +4 h +143 h +25 h +110 h +10 h +146 h +1 h +278 h +41 h +13561 m +1 h +25 h +488 h +11 h +10 h +1 h +124 h +1 h +4 h +13562 m +1 h +1 h +82 h +1 h +13563 m +4 h +10 h +447 h +1 h +808 m +11 h +13564 m +13565 m +10 h +10 h +4 h +443 h +10 h +143 h +4 h +4 h +443 h +13566 m +13567 m +1 h +10 h +10 h +1 h +1499 h +195 h +4 h +109 h +13 h +1 h +13568 m +104 h +4 h +13569 m +4 h +4 h +13570 m +10 h +13571 m +307 h +1 h +10 h +4 h +13572 m +83 h +13573 m +1 h +1 h +7395 m +13574 m +10 h +327 m +25 h +1 h +57 h +4 h +4 h +10 h +1 h +190 h +10 h +13575 m +4 h +13576 m +10 h +10 h +124 h +4 h +13577 m +4 h +1 h +10 h +1 h +1 h +13578 m +10 h +10 h +10 h +13579 m +93 h +45 h +412 h +360 m +13580 m +4 h +10 h +157 h +1 h +4 h +1 h +13581 m +11 h +4 h +13582 m +112 h +83 h +109 h +4 h +11 h +463 h +4 h +1 h +13583 m +4 h +119 h +367 h +4 h +41 h +1 h +4 h +1 h +718 h +13584 m +11 h +10 h +258 h +4471 m +4 h +41 h +13585 m +13586 m +2625 m +147 h +113 h +1 h +4 h +13587 m +59 h +1 h +59 h +13588 m +4 h +4 h +4 h +13589 m +41 h +1 h +4 h +150 m +250 h +10 h +13590 m +4 h +109 h +13591 m +1 h +13592 m +4 h +4 h +10 h +13593 m +238 h +4 h +359 h +1470 h +4 h +1 h +13173 m +10 h +11 h +13594 m +4 h +4 h +4 h +4 h +4 h +1765 m +4 h +11 h +1 h +125 h +5619 m +13595 m +1 h +13596 m +4 h +4 h +536 h +41 h +4 h +1 h +1620 m +4 h +4 h +146 h +4 h +1 h +28 h +1 h +185 h +1 h +113 h +11 h +1 h +1 h +36 h +91 h +10 h +31 h +79 h +10 h +4 h +1 h +620 m +10 h +13597 m +13598 m +13599 m +83 h +1 h +1 h +11 h +55 h +4 h +184 h +10 h +167 h +10 h +48 h +59 h +82 h +1 h +10 h +11 h +124 h +4 h +10 h +4 h +31 h +13600 m +4 h +13601 m +4 h +4 h +13602 m +779 h +1685 m +13603 m +4 h +1 h +13604 m +36 h +4 h +4 h +92 h +4 h +10 h +13605 m +13606 m +1 h +1 h +92 h +65 h +4874 m +10 h +6851 m +10 h +4 h +170 h +13607 m +4 h +4 h +4 h +1 h +1 h +4 h +12 h +4 h +1 h +10 h +4 h +4 h +13608 m +4 h +1 h +285 m +10 h +1 h +1642 h +1 h +13609 m +5917 m +10 h +2308 m +4 h +169 h +4 h +4 h +13610 m +13611 m +25 h +11 h +69 h +1620 m +1 h +10 h +1 h +986 h +1 h +4 h +4 h +12 h +167 h +2717 m +266 h +10 h +196 h +124 h +13612 m +250 h +4 h +55 h +4 h +4 h +1619 h +1 h +10 h +860 m +109 h +185 h +59 h +1 h +4 h +11 h +1 h +31 h +10 h +1 h +13613 m +25 h +4 h +13614 m +103 h +13615 m +1 h +108 h +307 h +82 h +13616 m +3161 m +1 h +4 h +13617 m +13618 m +1 h +4 h +1 h +13619 m +11 h +119 h +4 h +13620 m +433 m +102 m +181 h +40 h +1016 h +8 h +444 m +124 h +13621 m +124 h +4 h +463 h +41 h +10 h +10 h +109 h +13622 m +12 h +4 h +11 h +57 h +64 h +74 h +143 h +4 h +69 h +1 h +83 h +7535 m +13623 m +4 h +1 h +1201 h +4440 m +1559 m +4 h +13624 m +443 h +13625 m +1 h +10 h +13626 m +195 h +10 h +10 h +10 h +4 h +10 h +13627 m +1411 m +36 h +10 h +83 h +4 h +41 h +6699 m +10 h +13628 m +1454 h +13629 m +13630 m +10 h +1 h +1 h +1403 h +4 h +4 h +1 h +869 m +13631 m +1 h +1 h +1 h +1 h +1 h +4 h +13632 m +4 h +4 h +4 h +4 h +4 h +13633 m +13634 m +13635 m +1 h +108 h +1 h +13636 m +4 h +4 h +13637 m +13638 m +2229 m +59 h +25 h +4 h +1 h +10 h +13639 m +4 h +4 h +1 h +13640 m +13641 m +82 h +1 h +4 h +1 h +4 h +12739 m +4 h +4 h +25 h +468 m +1780 h +4 h +266 h +10 h +41 h +13642 m +1 h +4 h +10 h +359 h +1 h +10 h +4 h +4 h +83 h +94 h +104 h +94 h +4 h +1532 m +4 h +3 h +1 h +4 h +2379 m +147 h +4 h +11 h +10 h +3 h +118 h +146 h +146 h +4 h +59 h +10 h +4 h +1 h +13643 m +4 h +4 h +1 h +1 h +228 m +1 h +31 h +10 h +36 h +338 h +13644 m +13645 m +10 h +13646 m +4 h +4 h +1 h +13647 m +11 h +164 h +13648 m +4 h +4 h +69 h +4 h +143 h +1 h +172 h +11 h +4 h +13649 m +630 m +10 h +4 h +4 h +8486 m +13650 m +1092 m +1 h +737 h +106 m +1 h +65 h +157 h +1 h +41 h +110 h +13651 m +13652 m +4 h +4 h +13653 m +10 h +1 h +4 h +64 h +25 h +13654 m +1 h +1 h +794 m +272 m +4 h +1 h +64 h +10 h +110 h +13655 m +13656 m +4 h +10 h +4 h +1 h +4625 m +4 h +4 h +13657 m +307 h +13658 m +4 h +1 h +1 h +13659 m +332 h +13660 m +146 h +4 h +13661 m +13662 m +10 h +181 h +4 h +4 h +1 h +6370 m +10 h +13663 m +36 h +10177 m +4 h +278 h +13664 m +4 h +13665 m +4 h +4 h +13666 m +57 h +4 h +10 h +4 h +13667 m +4 h +10 h +4 h +124 h +4 h +13668 m +4 h +83 h +4 h +13 h +11 h +1 h +79 h +4 h +10 h +4 h +1 h +1 h +125 h +443 h +13669 m +11 h +10 h +27 h +124 h +36 h +27 h +196 h +1 h +57 h +135 h +1761 m +10 h +1 h +40 h +11 h +4 h +10 h +31 h +72 m +4 h +195 h +4 h +10 h +1 h +3 h +13670 m +22 h +1955 m +4 h +4 h +13671 m +1 h +110 h +13672 m +13673 m +1003 h +10 h +57 h +8497 m +13674 m +164 h +4 h +13675 m +1 h +3143 m +1445 m +65 h +1 h +1 h +1 h +1 h +4 h +9940 m +4 h +10925 m +13676 m +10 h +4 h +13677 m +11 h +13678 m +1 h +13679 m +4 h +10 h +1 h +13680 m +4 h +3 h +104 h +10 h +13681 m +11 h +41 h +1 h +2788 h +11 h +13682 m +13683 m +4 h +1 h +1309 h +4 h +10 h +11 h +1 h +13684 m +195 h +83 h +116 m +13685 m +13686 m +10 h +8040 m +181 h +13687 m +13688 m +94 h +2309 m +13689 m +1 h +10 h +4 h +4 h +4 h +1 h +13690 m +13691 m +1975 m +125 h +1 h +10 h +10 h +10 h +4 h +4 h +1 h +10 h +4 h +7128 m +4 h +1281 m +146 h +10 h +1 h +1 h +83 h +10 h +13692 m +2418 m +4 h +1 h +10 h +10 h +1 h +83 h +82 h +1 h +1 h +4 h +10 h +91 h +13693 m +13694 m +1 h +11 h +4 h +13695 m +56 h +307 h +1 h +4 h +1 h +295 h +15 m +4 h +4 h +10 h +4 h +1 h +11 h +13696 m +4 h +1 h +31 h +299 m +13697 m +83 h +13698 m +36 h +4 h +4975 m +4 h +1 h +13699 m +114 h +13700 m +1 h +92 h +4 h +7630 m +1 h +1 h +13701 m +4 h +109 h +181 h +13702 m +1 h +10 h +1 h +1 h +2720 m +1 h +299 m +119 h +13703 m +36 h +59 h +1 h +433 m +41 h +10 h +13704 m +11 h +110 h +4 h +520 h +448 m +4 h +10 h +4 h +912 m +10 h +1 h +1 h +4 h +3 h +4 h +13705 m +10 h +1 h +13706 m +4 h +2914 m +146 h +135 h +13707 m +4 h +10 h +1 h +109 h +4 h +4 h +74 h +13708 m +4 h +10 h +1822 h +10 h +4 h +1 h +4 h +4 h +1 h +25 h +13709 m +13710 m +13711 m +10 h +59 h +13712 m +4 h +1 h +1 h +478 h +109 h +59 h +156 h +11 h +13713 m +1 h +184 h +13714 m +1 h +4 h +10 h +258 h +13715 m +7814 m +184 h +1 h +65 h +4 h +1 h +13716 m +11 h +11 h +276 h +97 h +4 h +13717 m +1 h +109 h +31 h +1 h +4 h +270 h +1 h +3 h +10272 m +4 h +2308 h +13718 m +4 h +1 h +143 h +4 h +1 h +1100 m +4 h +4 h +4 h +1 h +109 h +10 h +13719 m +779 h +13720 m +1 h +4 h +10 h +57 h +10 h +4 h +1 h +13721 m +59 h +4 h +10 h +1 h +1847 m +13722 m +195 h +10 h +1 h +125 h +13723 m +13724 m +5653 m +4 h +820 m +10 h +10 h +1 h +13725 m +4 h +488 h +1 h +4 h +1 h +4 h +4 h +4 h +4 h +4 h +25 h +13726 m +1 h +105 m +11 h +319 h +73 h +28 h +10 h +1 h +181 h +10 h +4 h +332 h +1685 m +74 h +4 h +4 h +4 h +1 h +13727 m +10 h +1 h +371 h +10 h +82 h +13728 m +250 h +13729 m +1 h +13730 m +4 h +4 h +1 h +31 h +258 h +143 h +13731 m +4 h +11 h +10 h +4 h +1780 h +10 h +10 h +1 h +7253 m +7243 m +4 h +73 h +3622 m +10 h +13732 m +13733 m +4 h +112 h +125 h +124 h +1 h +4 h +13734 m +10 h +2072 m +59 h +83 h +13735 m +1 h +297 h +13736 m +4 h +4 h +83 h +46 m +13737 m +299 h +601 h +10 h +1 h +64 h +1 h +10 h +13738 m +4 h +1261 h +10 h +1 h +10 h +1 h +4 h +13739 m +61 m +1 h +10 h +13740 m +4 h +4 h +1 h +4 h +4521 m +116 m +1564 m +4 h +4 h +82 h +10 h +265 h +1 h +13741 m +4 h +4 h +11 h +10 h +1 h +82 h +1 h +4 h +4 h +857 m +1 h +4 h +13742 m +4 h +4 h +119 h +13743 m +4 h +109 h +10 h +4 h +1 h +1 h +4 h +1 h +13744 m +13745 m +172 h +11 h +56 h +10 h +196 h +1 h +91 h +4 h +1714 m +1 h +4 h +10 h +4 h +1 h +64 h +10 h +4 h +13746 m +1497 m +10 h +64 h +25 h +265 h +83 h +109 h +27 h +109 h +1 h +109 h +4 h +112 h +13747 m +13748 m +11 h +1 h +13749 m +399 h +4 h +1 h +4 h +13750 m +13751 m +13752 m +46 m +112 h +1 h +10 h +13753 m +1 h +22 h +4 h +1 h +10 h +467 m +4 h +1 h +4 h +358 h +13754 m +1 h +2379 m +779 h +1 h +45 h +83 h +13755 m +13756 m +4 h +1337 m +332 h +114 h +4 h +108 h +92 h +124 h +10 h +10 h +13757 m +1 h +1 h +123 h +1 h +4 h +4 h +4 h +1 h +13758 m +59 h +11 h +13759 m +4 h +10 h +4 h +1 h +4 h +359 h +4 h +1 h +4 h +83 h +1 h +1 h +13760 m +649 m +140 h +13761 m +13762 m +4 h +13763 m +1 h +13764 m +124 h +185 h +4 h +241 m +1 h +10 h +4 h +41 h +13765 m +31 h +4 h +11 h +4 h +4 h +65 h +10 h +13766 m +10 h +11 h +11 h +10 h +1 h +57 h +13767 m +4 h +13768 m +3742 h +170 h +4 h +2625 h +1 h +4 h +13769 m +1 h +7585 m +1 h +1 h +4 h +10 h +10612 m +13770 m +4 h +4 h +9 m +1 h +640 h +4 h +13771 m +10 h +10 h +4 h +97 h +4 h +1 h +1 h +4 h +13772 m +4 h +11 h +72 m +4 h +10 h +4 h +10 h +1 h +13773 m +109 h +4 h +104 h +1 h +13774 m +276 h +10 h +10 h +4 h +13775 m +94 h +10 h +173 h +1 h +124 h +4 h +25 h +4 h +4 h +10 h +4 h +13776 m +368 h +11 h +13777 m +109 h +299 h +1 h +1 h +1 h +4986 m +687 h +266 h +1 h +10 h +109 h +11 h +1128 m +4 h +4 h +4 h +10 h +10 h +4 h +10 h +28 h +112 h +13778 m +1 h +4 h +1 h +13779 m +11 h +276 h +297 h +1548 m +13780 m +13781 m +13782 m +13783 m +10 h +1790 m +82 h +8 h +570 m +4 h +13784 m +1655 m +1 h +2041 m +4 h +4 h +1 h +13785 m +13786 m +13787 m +1796 m +698 m +12 h +13788 m +13789 m +1 h +4 h +13790 m +1 h +195 h +383 h +4 h +478 h +4 h +13791 m +13792 m +10588 m +737 h +4 h +4 h +11 h +4 h +211 m +264 h +112 h +4 h +10 h +538 h +124 h +13793 m +4 h +13794 m +1030 h +13795 m +1 h +4 h +1138 m +601 h +124 h +295 h +1 h +4 h +1016 h +4 h +181 h +10 h +4 h +195 h +83 h +10 h +10 h +13796 m +4 h +55 h +4 h +1 h +13797 m +10 h +1 h +41 h +4 h +1 h +4 h +4 h +185 h +13798 m +4 h +278 h +1 h +23 h +109 h +1 h +83 h +274 h +4 h +10 h +10 h +27 h +1 h +13799 m +13800 m +92 h +4 h +4 h +4 h +4 h +104 h +13 h +13801 m +1 h +1 h +4 h +1 h +13802 m +1 h +10 h +13803 m +97 h +25 h +13804 m +11 h +1 h +4 h +97 h +41 h +4 h +3 h +94 h +10 h +10 h +3 h +1714 m +1 h +1 h +13805 m +4 h +10 h +10 h +13806 m +4 h +1 h +172 h +13807 m +1 h +1 h +332 h +13808 m +13809 m +1 h +185 h +332 h +10 h +10 h +83 h +1 h +4 h +4 h +4 h +13810 m +4 h +13811 m +4 h +146 h +278 h +103 h +11 h +1 h +10 h +13812 m +13813 m +10 h +10 h +687 h +10 h +109 h +4 h +64 h +164 h +4 h +4 h +1 h +109 h +4 h +1 h +10 h +4 h +10 h +4 h +13814 m +478 h +10 h +13815 m +13816 m +59 h +13817 m +1 h +1822 h +13818 m +143 h +172 h +4 h +4 h +65 h +13819 m +10 h +13820 m +4 h +4 h +3435 m +4 h +184 h +661 m +11 h +13821 m +3 h +1 h +4 h +1 h +842 m +4 h +13822 m +488 h +82 h +4 h +1 h +1020 m +124 h +59 h +13823 m +2710 m +1 h +4 h +13824 m +3 h +687 h +1 h +82 h +10 h +4 h +371 h +265 h +10 h +4 h +124 h +9397 m +13825 m +8 h +4 h +1 h +4 h +1 h +4 h +4 h +190 h +13826 m +10 h +83 h +4 h +1 h +41 h +11 h +74 h +129 h +167 h +10 h +13827 m +1 h +113 h +185 h +64 h +4 h +4 h +119 h +4 h +11 h +10 h +4 h +570 m +13828 m +13829 m +332 h +4 h +10 h +4 h +10 h +13830 m +4 h +4 h +1 h +1953 m +27 h +13392 m +4 h +10 h +59 h +208 m +13831 m +13832 m +13833 m +1 h +1 h +4 h +10 h +82 h +11 h +1 h +4 h +447 h +114 h +13834 m +3 h +10 h +10 h +3 h +13835 m +94 h +1 h +4 h +1418 m +13836 m +4 h +1 h +13837 m +1 h +4 h +13838 m +13839 m +467 m +146 h +124 h +4 h +4 h +13840 m +4 h +13841 m +13842 m +13843 m +13844 m +4 h +10 h +273 m +10 h +109 h +332 h +4 h +1556 m +10 h +1442 m +1 h +1 h +4 h +13845 m +13846 m +13847 m +13848 m +1 h +185 h +1 h +447 h +1 h +1 h +10 h +124 h +266 h +4 h +4 h +1 h +11 h +110 h +10 h +10 h +12 h +10 h +97 h +13849 m +1 h +13850 m +371 h +1 h +64 h +4 h +13851 m +186 h +4 h +10 h +4 h +3 h +11 h +13852 m +156 h +4 h +4 h +82 h +13853 m +10 h +36 h +371 h +3 h +4 h +10 h +4 h +10 h +4 h +11 h +358 h +10 h +1 h +2920 m +4 h +4 h +1 h +757 h +1 h +13854 m +13855 m +13856 m +1 h +13857 m +4426 m +13858 m +170 h +1 h +4 h +1 h +1 h +4 h +158 h +4 h +10 h +13859 m +4 h +13860 m +10 h +10 h +57 h +13861 m +10 h +1 h +4 h +1 h +13862 m +125 h +83 h +114 h +13863 m +13864 m +4 h +3 h +13865 m +82 h +278 h +59 h +10 h +1 h +1 h +1541 m +1 h +4 h +104 h +13866 m +1 h +1 h +10 h +4 h +147 h +125 h +2508 m +82 h +13867 m +13868 m +40 h +10 h +44 m +146 h +13869 m +13870 m +3 h +172 h +912 m +11 h +104 h +10 h +939 h +73 h +10 h +10 h +4 h +10 h +4 h +10 h +10 h +74 h +13871 m +45 h +3 h +195 h +10 h +13872 m +1 h +1470 h +13873 m +10 h +10 h +1 h +4 h +390 m +1 h +41 h +10 h +1 h +8 h +123 h +368 h +36 h +13874 m +1 h +509 m +4 h +195 h +57 h +1 h +4 h +1 h +1 h +59 h +13875 m +13876 m +4 h +59 h +4 h +4 h +3396 m +1 h +91 h +1 h +57 h +3360 m +13877 m +4 h +119 h +1 h +97 h +113 h +4 h +1 h +1 h +4 h +4 h +10 h +8555 m +13878 m +1 h +4 h +4 h +13879 m +4 h +4 h +79 h +1 h +195 h +31 h +10 h +13880 m +82 h +13881 m +13882 m +1 h +10 h +135 h +4849 m +13883 m +110 h +82 h +27 h +4 h +65 h +1 h +1 h +1 h +266 h +10 h +4 h +4 h +82 h +10 h +1 h +10 h +74 h +196 h +13884 m +10 h +4 h +1 h +13885 m +10 h +1 h +10 h +1 h +10 h +4 h +1 h +4 h +10 h +181 h +25 h +4 h +1 h +124 h +196 h +1 h +41 h +459 h +94 h +13886 m +4 h +124 h +4 h +4 h +83 h +1 h +4 h +10 h +10 h +13887 m +13888 m +11 h +13889 m +73 h +83 h +4 h +4 h +4 h +258 h +4 h +1 h +13890 m +59 h +22 h +36 h +13891 m +1 h +4 h +278 h +13892 m +4 h +4 h +13893 m +4 h +13894 m +13 h +4 h +13895 m +10 h +10 h +794 m +13896 m +1 h +13897 m +1 h +13898 m +109 h +7950 m +11 h +10 h +4 h +2494 m +4 h +575 m +13899 m +59 h +13900 m +4 h +1 h +1914 m +353 m +13901 m +13902 m +4 h +4 h +1 h +295 h +4 h +5374 m +25 h +4 h +918 m +4 h +13903 m +297 h +82 h +13904 m +4 h +13905 m +1 h +13906 m +10 h +12 h +10 h +4 h +4 h +1 h +4 h +1089 h +1 h +10 h +13907 m +1 h +13908 m +4 h +5348 m +1 h +4 h +1 h +1 h +479 m +13909 m +4 h +10 h +12237 m +13910 m +13911 m +1822 h +4 h +13912 m +110 h +299 h +13913 m +13914 m +3704 m +520 h +13915 m +10 h +4 h +59 h +57 h +276 h +13916 m +13917 m +1 h +13918 m +13919 m +195 h +10 h +10 h +1 h +4 h +1 h +13920 m +109 h +13921 m +13922 m +4 h +4 h +4 h +112 h +4 h +124 h +73 h +10 h +13923 m +1 h +31 h +59 h +1 h +10 h +976 h +13924 m +104 h +10 h +135 h +69 h +65 h +4 h +1 h +4 h +109 h +13925 m +4905 m +10044 m +1 h +4 h +4 h +9586 m +97 h +13926 m +578 h +1020 m +4 h +4 h +27 h +4 h +4 h +190 h +1725 m +190 h +13927 m +13928 m +4 h +4 h +1 h +13929 m +13930 m +83 h +1 h +4 h +1 h +4 h +10 h +10 h +10 h +10 h +45 h +169 h +4 h +10 h +104 h +13931 m +4 h +7169 m +13932 m +13933 m +10 h +13 h +91 h +3 h +11 h +185 h +976 h +3 h +156 h +4 h +112 h +10 h +57 h +10 h +13934 m +10 h +4 h +386 h +1650 h +190 h +82 h +1 h +1 h +13935 m +1 h +11 h +11 h +1 h +4 h +13936 m +83 h +1635 m +13937 m +3982 m +4 h +83 h +1 h +0 h +353 m +4 h +4 h +10 h +616 m +4 h +13938 m +10 h +1 h +1 h +570 h +13939 m +2215 m +13940 m +4 h +119 h +4 h +1 h +13941 m +13942 m +4 h +10 h +11639 m +10 h +13943 m +11 h +1 h +1 h +109 h +103 h +55 h +10 h +13944 m +13945 m +10 h +4 h +10 h +1 h +493 m +1016 h +64 h +1 h +13946 m +1 h +1 h +1470 h +31 h +4 h +4 h +1 h +4 h +408 m +1 h +3025 m +12 h +4 h +1 h +13947 m +10 h +109 h +41 h +146 h +13948 m +57 h +10 h +4 h +167 h +4 h +13949 m +1 h +13950 m +1 h +13951 m +4 h +4 h +693 m +13952 m +74 h +125 h +1 h +57 h +1 h +124 h +57 h +4 h +1 h +10 h +1137 h +1 h +10 h +55 h +1 h +266 h +4 h +1 h +124 h +4 h +13953 m +1 h +4 h +13954 m +10 h +4 h +13955 m +10 h +13956 m +4 h +1 h +13957 m +97 h +1 h +7565 m +11 h +1 h +13958 m +4 h +4 h +10 h +36 h +10 h +1 h +11 h +1713 m +1 h +1 h +4 h +4 h +10 h +1 h +4 h +1 h +1 h +135 h +7950 m +332 h +3344 m +10 h +13959 m +4 h +13960 m +10 h +1 h +4 h +10 h +13961 m +4 h +1 h +10 h +4 h +36 h +13962 m +10 h +4 h +158 h +13963 m +13964 m +4 h +1 h +10 h +289 h +4 h +1 h +295 h +13965 m +109 h +1 h +13966 m +4 h +4 h +4 h +11 h +4 h +4 h +295 h +1556 m +11522 m +10 h +59 h +1 h +976 h +258 h +11 h +124 h +13753 m +444 m +4 h +258 h +10 h +10 h +13967 m +13968 m +83 h +4 h +11 h +4 h +1 h +4 h +1 h +13969 m +31 h +13970 m +10 h +307 h +677 m +1 h +3322 m +10 h +109 h +13971 m +4 h +4 h +31 h +1 h +13972 m +4 h +4 h +10 h +1 h +1 h +4 h +13973 m +1 h +36 h +59 h +1 h +260 m +4 h +1 h +13974 m +10 h +258 h +1003 h +11 h +4 h +83 h +1 h +31 h +3508 m +885 h +10 h +10 h +1 h +13975 m +1 h +13976 m +10 h +250 h +13977 m +1 h +13978 m +10 h +11 h +1053 m +4 h +140 h +13979 m +109 h +4 h +4 h +109 h +965 h +10 h +124 h +4 h +13980 m +4 h +13981 m +59 h +1 h +10 h +10 h +10 h +55 h +13982 m +59 h +10 h +59 h +57 h +13983 m +146 h +1 h +1 h +74 h +170 h +10 h +1261 h +59 h +10 h +13 h +13984 m +4 h +1 h +1 h +1 h +1 h +10 h +13985 m +65 h +13986 m +170 h +109 h +1 h +4 h +13987 m +74 h +10 h +13988 m +1 h +1 h +113 h +11 h +10 h +4 h +146 h +1 h +1 h +1 h +4 h +307 h +13989 m +10 h +2733 h +4 h +3 h +41 h +4 h +1 h +3 h +4 h +11117 m +13990 m +4 h +13991 m +1 h +2733 h +109 h +10 h +10 h +124 h +23 h +10 h +11691 m +4 h +1 h +4 h +4 h +104 h +13992 m +11672 m +13993 m +10 h +1 h +4 h +570 h +13994 m +1 h +5197 m +13995 m +10 h +4 h +4 h +1 h +11 h +10 h +1 h +94 h +4 h +4 h +91 h +1 h +73 h +3 h +1 h +4 h +13996 m +1 h +4 h +13997 m +4 h +10 h +1122 m +10 h +4 h +181 h +4 h +82 h +358 h +4 h +13998 m +4 h +13999 m +10 h +4 h +1796 m +10 h +843 m +1 h +125 h +1 h +1105 m +1 h +10 h +359 h +45 h +1 h +1 h +114 h +1 h +4 h +14000 m +1 h +45 h +4 h +185 h +185 h +1 h +1 h +4 h +4 h +114 h +7713 m +125 h +109 h +1038 m +12 h +186 h +3 h +14001 m +5111 m +2623 m +1 h +11 h +4 h +1 h +10 h +2002 m +124 h +4 h +1 h +14002 m +4 h +14003 m +59 h +11 h +14004 m +779 h +10 h +1 h +642 m +12131 m +4 h +14005 m +4 h +4 h +83 h +4 h +181 h +36 h +4 h +4 h +114 h +1 h +2840 m +4 h +1038 m +1 h +11 h +68 m +45 h +14006 m +307 h +36 h +14007 m +1 h +14008 m +1 h +1 h +10 h +4 h +167 h +4 h +14009 m +59 h +488 h +10 h +687 h +1 h +2846 m +1 h +295 h +110 h +10 h +14010 m +1955 m +1250 h +4 h +1 h +4 h +14011 m +14012 m +1 h +4 h +3882 m +1 h +14013 m +4 h +1 h +4 h +9228 m +358 h +14014 m +1 h +2041 m +4 h +11 h +135 h +4 h +8251 m +1 h +4 h +195 h +59 h +14015 m +59 h +10 h +1 h +11 h +56 h +1 h +10 h +1620 h +14016 m +10 h +4 h +1 h +1 h +1 h +266 h +4 h +4 h +196 h +14017 m +4 h +4966 m +1 h +4 h +1 h +11 h +4 h +1 h +1 h +1 h +986 h +4 h +59 h +10 h +1 h +79 h +4 h +4 h +1 h +4 h +1 h +4 h +109 h +1 h +146 h +8882 m +4 h +10 h +4 h +4 h +1619 h +4 h +146 h +4 h +1 h +332 h +4 h +5 m +1 h +538 h +1067 m +4 h +4 h +14018 m +4 h +10 h +14019 m +169 h +10 h +14020 m +65 h +4 h +11 h +4 h +79 h +12 h +10 h +14021 m +14022 m +125 h +4 h +4 h +4 h +14023 m +14024 m +1 h +14025 m +14026 m +10 h +83 h +14027 m +266 h +59 h +1 h +14028 m +4 h +536 h +124 h +10 h +14029 m +10 h +4 h +169 h +14030 m +4 h +14031 m +14032 m +1 h +1 h +196 h +4 h +1 h +83 h +3533 m +4 h +14033 m +1 h +4 h +41 h +12 h +10 h +14034 m +533 m +1 h +59 h +11 h +14035 m +11 h +79 h +4 h +14036 m +1 h +4 h +1 h +109 h +10 h +14037 m +4 h +14038 m +11 h +11 h +1 h +31 h +464 h +1 h +4 h +3 h +64 h +56 h +10 h +14039 m +14040 m +14041 m +14042 m +14043 m +1 h +10 h +10 h +4 h +4 h +4 h +4 h +4 h +4378 m +4 h +4 h +57 h +1 h +4 h +14044 m +1 h +2592 m +11 h +41 h +10 h +27 h +14045 m +229 h +1299 m +1 h +4 h +443 h +10 h +241 m +25 h +1 h +399 h +11 h +14046 m +10 h +10 h +73 h +4 h +10 h +94 h +2281 m +14047 m +266 h +1 h +25 h +1 h +1 h +14048 m +4 h +10 h +4 h +10 h +14049 m +1 h +77 h +1379 m +3 h +10 h +4 h +11 h +4 h +4 h +276 h +1 h +4848 m +4 h +150 m +3 h +14050 m +11 h +14051 m +1 h +14052 m +1 h +10 h +14053 m +964 m +1 h +4 h +57 h +1 h +164 h +10 h +1 h +74 h +14054 m +83 h +3177 m +14055 m +93 h +1 h +4 h +4 h +4 h +14056 m +10 h +11 h +538 h +55 h +14057 m +10 h +93 h +69 h +4 h +258 h +104 h +164 h +11 h +36 h +14058 m +4 h +13 h +1 h +1 h +10 h +59 h +14059 m +1185 m +1 h +5475 m +4 h +541 m +1 h +10 h +10 h +10 h +4 h +104 h +82 h +14060 m +11 h +10 h +4 h +274 h +1 h +4 h +7827 m +1 h +1 h +1 h +14061 m +10 h +4966 m +4 h +4 h +4 h +195 h +4 h +4 h +14062 m +1 h +14063 m +4 h +79 h +1 h +14064 m +59 h +138 m +13435 m +1 h +14065 m +4481 m +4 h +10 h +433 h +14066 m +10 h +11 h +871 m +4 h +14067 m +4 h +185 h +11 h +4 h +4 h +10 h +79 h +4 h +195 h +4 h +4 h +83 h +276 h +14068 m +1027 m +4 h +12655 m +10 h +146 h +156 h +250 h +4 h +55 h +258 h +229 h +1886 m +4 h +1 h +4 h +10 h +1 h +73 h +4 h +7950 h +31 h +10 h +14069 m +14070 m +92 h +1 h +14071 m +45 h +1 h +124 h +14072 m +1 h +14073 m +1 h +1 h +1 h +10 h +14074 m +4 h +1 h +69 h +1 h +185 h +59 h +4 h +4 h +12964 m +10 h +10 h +1 h +1 h +4 h +1 h +1 h +10 h +124 h +59 h +14075 m +14076 m +1 h +196 h +11 h +1 h +11 h +1 h +1 h +14077 m +41 h +1 h +82 h +10 h +4 h +114 h +229 h +4 h +258 h +10 h +4 h +10649 m +13 h +4 h +6882 m +371 h +1 h +10 h +1 h +97 h +14078 m +1 h +25 h +1 h +14079 m +10 h +4 h +74 h +332 h +10 h +14080 m +14081 m +1 h +2607 m +181 h +124 h +14082 m +11 h +1764 m +4 h +4 h +185 h +332 h +69 h +4 h +4 h +4 h +185 h +114 h +1 h +1 h +4645 m +10 h +73 h +173 h +31 h +14083 m +1105 m +1 h +6726 m +10 h +1847 m +674 m +94 h +4 h +4 h +14084 m +45 h +1 h +888 m +10 h +14085 m +4 h +1 h +14086 m +1 h +10 h +4 h +1 h +10 h +4 h +1 h +4 h +4 h +10 h +1 h +10 h +10 h +1 h +10 h +14087 m +4 h +36 h +4 h +10 h +14088 m +14089 m +1 h +1886 m +14090 m +14091 m +14092 m +1 h +14093 m +146 h +1 h +11 h +4 h +125 h +4 h +14094 m +135 h +276 h +4 h +10 h +14095 m +25 h +14096 m +1 h +1 h +1 h +4 h +642 m +4 h +11 h +1 h +4 h +1 h +10 h +11 h +14097 m +10 h +14098 m +10 h +1 h +14099 m +1 h +14100 m +3 h +10 h +124 h +45 h +14101 m +14102 m +104 h +10 h +14103 m +14104 m +4 h +10 h +238 h +1 h +1 h +14105 m +10 h +45 h +2617 h +4 h +14106 m +1 h +65 h +82 h +31 h +4 h +64 h +172 h +14107 m +1 h +124 h +92 h +1796 m +770 m +276 h +1 h +4 h +36 h +4 h +10 h +139 h +82 h +82 h +14108 m +1 h +119 h +1 h +14109 m +94 h +1 h +10 h +5944 m +14110 m +41 h +1884 m +1 h +14111 m +22 h +1 h +4 h +14112 m +1458 m +4 h +14113 m +14114 m +4 h +14115 m +167 h +14116 m +4 h +228 m +14117 m +1 h +135 h +11 h +11779 m +14118 m +1 h +109 h +2824 m +10 h +4 h +4 h +1 h +11 h +4 h +4 h +4 h +147 h +138 m +4 h +704 m +10 h +10 h +74 h +82 h +4 h +10 h +4 h +4 h +4 h +307 h +57 h +4 h +4 h +125 h +4 h +10 h +14119 m +4 h +2535 m +3837 m +14120 m +11 h +83 h +11 h +114 h +14121 m +14122 m +55 h +4 h +10 h +31 h +10 h +57 h +25 h +59 h +14123 m +1 h +1 h +1 h +10 h +10 h +22 h +10 h +4 h +196 h +109 h +1 h +83 h +57 h +4 h +1 h +135 h +14124 m +10 h +1 h +4 h +258 h +1 h +4 h +10 h +14125 m +1 h +195 h +10 h +25 h +124 h +4 h +109 h +74 h +97 h +4 h +14126 m +57 h +358 h +1 h +14127 m +1 h +82 h +14128 m +1886 h +65 h +4 h +1 h +172 h +4 h +4 h +229 h +4 h +1 h +10 h +13 h +14129 m +477 m +4 h +4 h +4 h +4 h +857 m +10 h +4 h +4 h +4 h +14130 m +14131 m +10 h +1 h +14132 m +4 h +2438 m +1 h +481 m +2582 m +5728 m +4 h +10 h +4 h +10 h +14133 m +65 h +1 h +4 h +10 h +1655 m +1 h +10 h +146 h +11 h +10 h +10 h +82 h +1 h +4 h +1 h +1 h +10 h +169 h +1 h +1 h +1 h +1 h +146 h +10 h +4 h +4 h +65 h +14134 m +83 h +82 h +109 h +14135 m +1 h +2412 m +11 h +14136 m +14137 m +10 h +144 h +196 h +14138 m +27 h +14139 m +10 h +1 h +3 h +14140 m +4 h +73 h +10 h +1 h +4 h +1137 h +10 h +92 h +4 h +10 h +1 h +14141 m +4 h +4 h +4 h +258 h +272 h +4 h +4 h +22 h +1 h +587 m +1 h +4 h +1250 h +4 h +5470 m +14142 m +447 h +1957 m +912 m +4 h +10 h +7181 m +1 h +14143 m +1 h +4 h +10 h +14144 m +14145 m +11806 m +185 h +4 h +4 h +4 h +10 h +2928 m +11 h +2056 m +1 h +4 h +10 h +82 h +10 h +10 h +1 h +14146 m +4 h +109 h +14147 m +118 h +1 h +14148 m +10 h +14149 m +1 h +82 h +14150 m +10 h +4 h +45 h +1403 h +4 h +10 h +258 h +1201 h +4 h +266 h +307 h +11 h +1 h +140 h +4 h +1 h +14151 m +2265 m +990 h +64 h +10 h +14152 m +266 h +403 h +4 h +14153 m +4 h +13 h +4 h +125 h +4 h +4 h +1 h +4 h +1016 h +1 h +74 h +1 h +36 h +10 h +10 h +8 h +10 h +14154 m +4 h +14155 m +4 h +386 h +92 h +12 h +4 h +184 h +14156 m +4 h +109 h +1 h +368 h +1 h +11 h +10 h +509 m +10 h +4 h +10 h +10 h +353 h +1 h +4 h +12 h +478 h +4 h +10 h +1 h +10 h +434 m +538 h +556 m +1 h +14157 m +4 h +10 h +4359 m +4 h +1 h +1 h +1 h +10 h +4 h +14158 m +4 h +330 m +169 h +1261 h +630 m +1 h +10 h +10 h +278 h +10 h +45 h +4 h +12 h +4 h +2435 m +1053 m +23 h +4 h +267 m +1 h +10 h +75 m +124 h +57 h +3115 m +7474 m +1 h +14159 m +1 h +14160 m +10 h +4 h +1 h +4 h +14161 m +1 h +125 h +82 h +4 h +14162 m +112 h +14163 m +860 m +386 h +31 h +41 h +4 h +4 h +1 h +1 h +10 h +14164 m +185 h +104 h +14165 m +10 h +297 h +14166 m +14167 m +4 h +31 h +91 h +4 h +7271 m +14168 m +4 h +10 h +4 h +1 h +4 h +869 m +1 h +83 h +10 h +10 h +1 h +4 h +14169 m +14170 m +10 h +10 h +104 h +83 h +4 h +59 h +211 m +10 h +25 h +123 h +1 h +1 h +4 h +4297 m +14171 m +4 h +332 h +173 h +31 h +10 h +1 h +83 h +14172 m +14173 m +14174 m +4 h +10 h +1 h +10 h +1 h +1138 m +4 h +82 h +74 h +14175 m +195 h +57 h +5 h +4 h +10 h +4 h +1 h +82 h +1 h +1685 h +14176 m +14177 m +1017 h +4 h +4 h +935 h +135 h +4 h +14178 m +12301 m +4 h +1 h +1 h +4 h +4 h +14179 m +10 h +40 h +4 h +5504 m +92 h +14180 m +124 h +1 h +139 h +10 h +10 h +1 h +10 h +4 h +1 h +4 h +4 h +4 h +3742 h +1 h +14181 m +10 h +575 m +1 h +1 h +14182 m +143 h +1 h +82 h +10 h +1 h +10 h +4 h +3479 m +10 h +1 h +14183 m +4 h +1 h +4 h +3424 m +1 h +4 h +1 h +14184 m +278 h +388 m +124 h +1 h +2172 m +83 h +4 h +14185 m +1 h +368 h +1 h +11 h +4 h +82 h +1 h +4 h +4 h +965 h +1 h +10 h +14186 m +4 h +4 h +1 h +581 m +65 h +14187 m +843 m +109 h +14188 m +14189 m +83 h +109 h +11 h +241 m +10 h +4 h +986 h +4 h +4256 m +4 h +1 h +4 h +1 h +14190 m +1 h +83 h +10 h +911 h +4 h +295 h +14191 m +4 h +1218 m +83 h +1 h +4 h +4 h +59 h +10 h +4 h +478 h +14192 m +4 h +1 h +82 h +10 h +4 h +14193 m +10 h +3 h +14194 m +192 h +1 h +4 h +12571 m +1 h +1 h +14195 m +10 h +31 h +14196 m +4 h +4 h +82 h +10 h +10 h +57 h +14197 m +14198 m +4 h +4 h +447 h +74 h +91 h +14199 m +14200 m +10 h +1 h +83 h +1 h +1 h +14201 m +4 h +4 h +8890 m +4 h +4 h +1 h +4 h +1 h +4 h +10 h +1 h +196 h +10 h +1045 m +4 h +4 h +10 h +195 h +14202 m +14203 m +2920 m +4 h +1 h +4 h +14204 m +4 h +1 h +1278 m +14205 m +1 h +1 h +4 h +1 h +10 h +14206 m +307 h +97 h +11 h +14207 m +4 h +4 h +4 h +1 h +4 h +1261 h +4 h +14208 m +295 h +14209 m +330 m +14210 m +14211 m +1 h +4 h +13 h +10 h +14212 m +10 h +4 h +10 h +869 m +4 h +14213 m +359 h +10 h +14214 m +6747 m +13 h +4 h +10 h +4 h +169 h +4 h +14215 m +14216 m +1 h +79 h +4 h +2442 m +14217 m +82 h +1 h +4 h +4 h +1 h +14218 m +4 h +48 h +3 h +4 h +11 h +358 h +4 h +4 h +64 h +307 h +4151 m +14219 m +1 h +1 h +1 h +195 h +14220 m +27 h +1 h +4 h +4 h +14221 m +4 h +129 h +4 h +10 h +1 h +139 h +10 h +295 h +14222 m +13 h +601 h +14223 m +10 h +12 h +14224 m +10 h +10 h +1 h +4 h +1 h +10 h +4 h +14225 m +700 m +14226 m +14227 m +4 h +1 h +14228 m +10 h +14229 m +1 h +1 h +22 h +14230 m +4 h +196 h +10 h +14231 m +1 h +14232 m +14233 m +14234 m +10 h +1 h +4 h +83 h +97 h +1 h +10 h +888 m +1 h +4 h +14235 m +10 h +55 h +1 h +692 h +1 h +4 h +1 h +14236 m +1 h +4 h +14237 m +4 h +195 h +556 m +4 h +22 h +1 h +23 h +13 h +2309 m +1299 m +4 h +4 h +10 h +11 h +4 h +10 h +1 h +1 h +4 h +1504 m +10 h +14238 m +3 h +97 h +1 h +4 h +4 h +575 m +8395 m +14239 m +82 h +14240 m +56 h +172 h +1 h +195 h +4 h +14241 m +4 h +4 h +31 h +1027 m +4 h +59 h +4 h +14242 m +14243 m +10 h +10 h +4 h +4 h +1 h +1685 h +4 h +1 h +3177 m +4 h +104 h +125 h +109 h +1 h +4895 m +4 h +4 h +378 m +4 h +10 h +25 h +1 h +4 h +59 h +1 h +125 h +143 h +10 h +14244 m +14245 m +14246 m +22 h +1 h +14247 m +4 h +82 h +4481 m +79 h +11 h +1 h +36 h +1 h +4 h +10 h +14248 m +14078 m +33 m +14249 m +27 h +79 h +1 h +4 h +1 h +4 h +4 h +40 h +4 h +10 h +14250 m +14251 m +14252 m +169 h +109 h +64 h +125 h +155 m +1 h +1454 h +14253 m +4 h +139 h +14254 m +1 h +10 h +14255 m +1 h +123 h +1 h +143 h +82 h +10 h +10 h +82 h +196 h +4 h +4 h +73 h +1454 h +4 h +14256 m +14257 m +1 h +10 h +110 h +10 h +41 h +4 h +4 h +25 h +4 h +57 h +4 h +4 h +1 h +4 h +976 h +10 h +10 h +10 h +14258 m +10 h +158 h +4 h +1 h +4 h +1 h +10 h +82 h +10 h +184 h +4 h +4 h +1 h +295 h +14259 m +172 h +1185 m +1 h +10 h +4 h +14260 m +14261 m +36 h +14262 m +1 h +4 h +10 h +14263 m +4 h +2879 m +4 h +1 h +4 h +4 h +91 h +110 h +4 h +1 h +147 h +4 h +1027 m +1 h +4 h +57 h +463 m +1 h +14264 m +4 h +1 h +125 h +533 m +1 h +109 h +1 h +1 h +4 h +10 h +14265 m +1 h +14266 m +181 h +4 h +1 h +1470 h +4867 m +1 h +14267 m +4 h +124 h +109 h +125 h +10 h +10 h +14268 m +82 h +4 h +4 h +4 h +4 h +14269 m +1 h +4 h +10 h +1 h +4 h +169 h +4 h +10 h +1 h +1 h +3923 m +4 h +14270 m +4 h +14271 m +10 h +10 h +36 h +4 h +11 h +4 h +4 h +14272 m +14273 m +14274 m +10 h +4 h +1 h +59 h +1454 h +14275 m +10 h +14276 m +1 h +3 h +4 h +156 h +6413 m +14277 m +8535 m +4 h +1 h +14278 m +10 h +1 h +11 h +14279 m +14280 m +14281 m +14282 m +10 h +1 h +307 h +4 h +459 h +14283 m +276 h +3 h +1 h +4 h +10 h +4 h +10 h +14284 m +27 h +14285 m +4 h +4 h +459 h +955 m +4 h +14286 m +14287 m +14288 m +10 h +1445 m +4 h +109 h +10 h +1 h +533 m +10 h +3299 m +4 h +14289 m +4 h +4 h +1 h +601 h +14290 m +84 m +14291 m +10 h +14292 m +1 h +57 h +25 h +1 h +170 h +92 h +129 h +1 h +1 h +250 h +14293 m +4 h +4 h +14294 m +4 h +250 h +14295 m +1822 h +14296 m +14297 m +4 h +25 h +1 h +10 h +31 h +14298 m +1 h +14299 m +94 h +10 h +124 h +14300 m +14301 m +12 h +4 h +4 h +1 h +3307 m +4 h +10 h +14302 m +125 h +14303 m +4 h +10 h +4 h +124 h +1070 m +4 h +4 h +1 h +4 h +307 h +10 h +57 h +14304 m +14305 m +10 h +3 h +110 h +4 h +82 h +276 h +83 h +4 h +25 h +4 h +135 h +65 h +4 h +14306 m +14307 m +14308 m +4 h +10 h +45 h +299 h +109 h +4 h +1 h +383 h +5567 m +1 h +14309 m +270 h +8697 m +4 h +14310 m +1 h +3 h +14311 m +10 h +14312 m +186 h +1 h +10 h +11 h +4 h +10 h +557 m +156 h +104 h +1 h +14313 m +4 h +1 h +14314 m +4 h +520 h +14315 m +10 h +14316 m +4 h +45 h +1 h +14317 m +4 h +14318 m +14319 m +14320 m +109 h +1250 h +4 h +59 h +28 h +124 h +1 h +4 h +687 h +13 h +10 h +601 h +1 h +757 h +14321 m +12675 m +4 h +4 h +31 h +620 m +10 h +10 h +14322 m +447 h +11 h +14323 m +1 h +10 h +1 h +10 h +4 h +4 h +4 h +8 h +1 h +14324 m +10267 m +10 h +1 h +1 h +478 h +10 h +10 h +3837 m +4 h +1 h +10 h +14325 m +14326 m +14327 m +83 h +185 h +11 h +41 h +4 h +4 h +4 h +135 h +10 h +11 h +1 h +83 h +82 h +1261 h +1 h +1 h +4 h +113 h +1 h +2308 h +1 h +14328 m +297 h +11 h +4 h +2475 m +14329 m +4 h +25 h +2046 m +4 h +4 h +4 h +14330 m +10 h +12 h +4 h +4 h +2139 m +4 h +4 h +73 h +368 h +59 h +10 h +4 h +1 h +56 h +83 h +14331 m +14332 m +59 h +10 h +129 h +4 h +25 h +4 h +14333 m +124 h +1 h +196 h +4 h +4 h +1 h +4 h +59 h +4 h +1 h +10 h +3 h +4 h +4 h +4 h +114 h +1 h +14334 m +4 h +4 h +2212 m +10 h +10 h +125 h +172 h +109 h +10 h +14335 m +14336 m +278 h +4 h +1 h +158 h +520 h +1 h +14337 m +14338 m +578 h +4 h +1 h +12 h +14339 m +14340 m +92 h +1 h +10 h +14341 m +14342 m +10 h +31 h +718 h +65 h +57 h +358 h +1 h +1 h +14343 m +14344 m +332 h +1 h +4 h +14345 m +10 h +146 h +1 h +1 h +14346 m +10 h +143 h +1 h +135 h +4 h +1 h +7727 m +14 m +14347 m +82 h +1 h +986 h +31 h +14348 m +4 h +14349 m +14350 m +83 h +4 h +4 h +11 h +14351 m +93 h +4 h +119 h +14352 m +4 h +146 h +14353 m +167 h +6057 m +14354 m +4 h +157 h +41 h +112 h +172 h +4 h +11 h +124 h +258 h +109 h +1 h +1 h +383 h +1 h +109 h +14355 m +10 h +14356 m +10 h +6749 m +383 h +1 h +1 h +4 h +69 h +1 h +14357 m +82 h +14358 m +1 h +1 h +109 h +167 h +4 h +1 h +4 h +14359 m +10 h +11 h +14360 m +1 h +14361 m +82 h +41 h +4 h +143 h +14362 m +1 h +14363 m +4 h +1 h +367 m +14364 m +10 h +4 h +41 h +14365 m +14366 m +10 h +119 h +10 h +10 h +10 h +59 h +4 h +4 h +1 h +1642 m +2418 m +1 h +4 h +14367 m +143 h +91 h +10 h +10 h +4 h +104 h +620 m +4 h +14368 m +4 h +25 h +14369 m +1 h +4 h +82 h +14370 m +10 h +996 m +97 h +12570 m +11417 m +1 h +1 h +4 h +1269 m +4 h +4 h +4 h +5205 m +27 h +10 h +4 h +4 h +1 h +56 h +14371 m +1 h +4 h +14372 m +4 h +10 h +1535 m +1 h +1 h +1 h +1 h +4 h +4 h +857 m +1089 h +14373 m +1096 m +10 h +22 h +14374 m +4 h +10 h +10 h +1 h +146 h +94 h +83 h +45 h +5616 m +13 h +10 h +4 h +1 h +4 h +1089 h +97 h +14375 m +10391 m +13 h +4 h +10 h +1 h +14376 m +65 h +82 h +4 h +10 h +10 h +1 h +10 h +1642 h +14377 m +4 h +36 h +4 h +1 h +3 h +14378 m +12020 m +258 h +11 h +4 h +14379 m +14380 m +2770 m +146 h +25 h +14381 m +2028 m +14382 m +1 h +10 h +14383 m +14384 m +4 h +68 m +4 h +14385 m +1 h +14386 m +10 h +4 h +11 h +4 h +4 h +14387 m +238 h +10 h +368 h +14388 m +10 h +4 h +4 h +4 h +4 h +307 h +14389 m +14390 m +14391 m +1 h +4 h +1 h +1 h +4 h +57 h +1 h +297 h +5 h +1 h +4 h +5025 m +11 h +14392 m +11 h +4 h +10 h +4 h +3 h +1 h +74 h +14393 m +10 h +10 h +10 h +3143 m +4 h +4 h +794 m +14394 m +4530 m +110 h +10 h +10 h +13 h +41 h +1 h +104 h +4 h +1 h +10 h +124 h +4 h +36 h +4 h +10 h +14395 m +4 h +1 h +4 h +4 h +4 h +10 h +4 h +10 h +10 h +10 h +371 h +124 h +14396 m +1 h +278 h +1 h +322 m +4 h +3 h +4 h +3293 m +59 h +10 h +4 h +1 h +4 h +41 h +10 h +10 h +109 h +4 h +12047 m +12 h +14397 m +4 h +4 h +14398 m +3704 m +1759 m +1016 h +1766 h +4 h +10 h +4 h +4524 m +10 h +10 h +11 h +10 h +10 h +4 h +2194 m +10 h +4 h +13969 m +1 h +6869 m +1 h +143 h +135 h +14399 m +25 h +1 h +4 h +1137 h +4 h +31 h +1309 h +4 h +65 h +1 h +14400 m +79 h +4030 m +976 h +10 h +1 h +10 h +14401 m +10 h +1 h +689 m +14402 m +181 h +4 h +92 h +104 h +10 h +1 h +4 h +11 h +1 h +11 h +4 h +109 h +1 h +4 h +1309 h +14403 m +4 h +195 h +93 h +1 h +14404 m +91 h +4 h +1 h +14405 m +4 h +2339 m +10 h +1 h +10 h +10 h +1 h +104 h +4 h +1 h +1 h +3 h +4 h +1 h +1 h +1 h +57 h +11 h +1 h +278 h +1 h +1 h +642 h +10 h +1 h +1 h +1 h +59 h +14406 m +4 h +1 h +14407 m +14408 m +4 h +14409 m +4 h +4 h +4 h +4 h +4 h +11 h +4 h +10 h +57 h +4 h +1 h +4 h +3 h +2851 m +83 h +10 h +14410 m +601 h +14411 m +823 m +10 h +1650 h +57 h +176 m +14412 m +3 h +1822 h +1 h +1619 h +14413 m +4 h +1 h +4 h +4 h +14414 m +10 h +14415 m +10 h +195 h +14416 m +1 h +100 m +41 h +4 h +1 h +10 h +4 h +124 h +14417 m +1 h +4 h +14418 m +1 h +3523 m +14419 m +4 h +10 h +808 m +1 h +10 h +1 h +4 h +74 h +4 h +14420 m +4 h +4 h +14421 m +5475 m +14422 m +1 h +27 h +1 h +14423 m +10 h +4 h +14424 m +10 h +4538 m +14425 m +1 h +74 h +1 h +1 h +4 h +4 h +12 h +14426 m +3307 m +8 h +14427 m +14428 m +1 h +1 h +14429 m +211 m +172 h +14430 m +14431 m +14432 m +245 m +14433 m +4 h +10 h +1 h +1 h +14434 m +14435 m +327 m +4 h +57 h +4 h +6726 m +14436 m +4 h +10 h +14437 m +278 h +1 h +4 h +5475 m +11 h +4 h +14438 m +14439 m +14440 m +4 h +4 h +692 h +10 h +14441 m +14442 m +14443 m +10 h +4 h +10 h +10 h +4 h +4 h +25 h +14444 m +4 h +1 h +83 h +1 h +14445 m +4 h +14446 m +1 h +14447 m +8114 m +14448 m +14449 m +1 h +1 h +14450 m +14451 m +59 h +14452 m +4 h +4 h +4 h +4 h +22 h +10 h +1 h +14453 m +10 h +1 h +167 h +14454 m +1 h +109 h +4 h +125 h +14455 m +4 h +1 h +14456 m +4 h +1 h +10 h +4 h +14457 m +143 h +4 h +139 h +10 h +14458 m +4 h +14459 m +4 h +10 h +4 h +1 h +1 h +31 h +1137 h +14460 m +1 h +14461 m +1 h +692 h +64 h +10 h +14462 m +4 h +14463 m +1261 h +1 h +14464 m +1 h +10 h +4 h +1 h +14465 m +4 h +108 h +10 h +297 h +10 h +601 h +952 m +36 h +1 h +1 h +1261 h +124 h +14466 m +10 h +1 h +10 h +3 h +92 h +1027 h +3 h +10 h +14467 m +156 h +124 h +1 h +69 h +1 h +10 h +14468 m +10 h +1 h +125 h +11 h +4 h +14469 m +14470 m +10 h +10 h +14471 m +1 h +10 h +14472 m +4 h +10 h +14473 m +10 h +4 h +143 h +1 h +14474 m +1261 h +69 h +10 h +11147 m +2379 m +14475 m +14476 m +192 h +31 h +41 h +10 h +25 h +10 h +4788 m +4 h +4 h +1 h +1 h +10 h +14477 m +4 h +1 h +4 h +1822 h +41 h +3913 m +14478 m +1 h +4 h +322 m +1 h +14479 m +22 h +14480 m +119 h +11 h +139 h +10 h +1 h +14481 m +14482 m +4 h +125 h +157 h +1 h +9837 m +1 h +3 h +14483 m +4 h +319 h +4 h +4932 m +10 h +4 h +5709 m +13 h +2923 h +14484 m +14485 m +1 h +14486 m +61 m +10 h +109 h +10 h +57 h +4 h +14487 m +4 h +167 h +2710 m +14488 m +990 h +258 h +4 h +11 h +10 h +10 h +14489 m +14490 m +1359 h +4 h +14491 m +41 h +14492 m +82 h +692 h +4 h +1128 m +10 h +1196 m +4 h +170 h +4 h +1 h +4 h +10 h +74 h +536 h +14493 m +14494 m +10 h +143 h +4 h +4 h +1886 h +124 h +109 h +1 h +1 h +718 h +10 h +10 h +4 h +10 h +1 h +1 h +10 h +55 h +147 h +10 h +6784 m +339 m +25 h +11 h +1089 h +94 h +57 h +124 h +14495 m +14496 m +358 h +4 h +22 h +57 h +1 h +4 h +14497 m +10 h +59 h +2265 m +279 m +1 h +82 h +520 h +10 h +4 h +36 h +4 h +195 h +14498 m +14499 m +14500 m +28 h +11 h +1092 m +4 h +10 h +74 h +4 h +10 h +1 h +1 h +11 h +2110 m +104 h +4 h +1 h +1 h +10 h +1 h +11 h +109 h +11 h +14501 m +1 h +4 h +2418 m +1 h +25 h +172 h +1 h +918 m +10 h +1 h +14502 m +10 h +11 h +10 h +1 h +10 h +2534 m +1 h +4 h +10 h +4 h +322 h +4 h +4 h +1 h +274 h +59 h +10 h +4 h +1 h +4 h +1 h +4 h +14503 m +10 h +1 h +4 h +104 h +601 h +11 h +14504 m +14505 m +6399 m +147 h +4 h +146 h +10 h +14506 m +169 h +4 h +124 h +14507 m +1 h +4 h +4 h +4 h +238 h +11 h +4 h +4 h +83 h +4 h +4 h +14508 m +104 h +14509 m +1 h +129 h +1 h +250 h +10 h +57 h +14510 m +1 h +10 h +4 h +13 h +14511 m +1 h +11 h +25 h +4 h +1646 m +14512 m +14513 m +4 h +10 h +1 h +14514 m +10 h +10 h +125 h +10 h +10 h +10 h +97 h +4 h +4 h +27 h +1 h +10 h +14515 m +4 h +4 h +14516 m +10 h +11 h +170 h +10 h +1 h +1 h +10 h +14517 m +147 h +31 h +135 h +14518 m +12170 m +1 h +14519 m +4 h +10 h +11 h +1 h +4 h +4297 m +14520 m +4 h +10 h +4 h +184 h +4 h +1 h +13 h +4 h +4 h +11 h +14521 m +4 h +1 h +1 h +7553 m +1884 m +265 h +14522 m +10 h +1 h +104 h +14523 m +56 h +4 h +14524 m +1 h +14525 m +195 h +1 h +14526 m +1 h +10 h +273 m +538 h +14527 m +3484 m +4 h +256 m +4131 m +307 h +195 h +332 h +4 h +158 h +10 h +359 h +11 h +14528 m +12 h +4 h +10 h +4 h +14529 m +11 h +1 h +1 h +1 h +125 h +4 h +506 m +10 h +297 h +57 h +4 h +10 h +434 m +57 h +4 h +14530 m +14531 m +4 h +4 h +83 h +4 h +4 h +295 h +110 h +135 h +83 h +278 h +1 h +14532 m +13 h +4 h +1 h +114 h +10 h +10 h +14533 m +4 h +14534 m +3607 m +1 h +266 h +4 h +1 h +1 h +14535 m +10 h +4 h +14536 m +4 h +1796 h +14537 m +10615 m +14538 m +10 h +4 h +804 m +185 h +104 h +358 h +14539 m +4 h +14540 m +10 h +10 h +238 h +125 h +12 h +14541 m +10 h +276 h +10 h +4 h +114 h +4 h +10 h +1 h +125 h +4 h +4 h +14542 m +55 h +181 h +181 h +14543 m +4 h +114 h +1 h +14544 m +1 h +1 h +6135 m +10 h +181 h +4 h +4 h +10 h +1 h +4 h +447 h +4 h +164 h +14545 m +10 h +10 h +146 h +14546 m +10 h +10 h +1 h +195 h +104 h +4 h +4 h +4 h +4 h +14547 m +113 h +74 h +367 h +4 h +1 h +10 h +114 h +4 h +119 h +4 h +4 h +123 h +10 h +104 h +11 h +1 h +14548 m +386 h +10 h +14549 m +1 h +79 h +1 h +1 h +25 h +14550 m +12005 m +14551 m +10 h +14552 m +1 h +4 h +5929 m +10 h +1 h +4 h +4 h +11 h +10 h +119 h +10 h +1 h +10 h +10 h +11 h +14553 m +14554 m +1 h +91 h +10 h +4 h +1 h +97 h +14555 m +4 h +25 h +224 m +10 h +279 m +10 h +1 h +185 h +14556 m +4 h +4 h +4 h +14557 m +125 h +167 h +40 h +124 h +1137 h +25 h +14558 m +4 h +123 h +692 h +14559 m +5505 m +1 h +4 h +1 h +447 h +1 h +10 h +172 h +2379 h +4 h +14560 m +14561 m +14562 m +10 h +10 h +4 h +10 h +82 h +14563 m +14564 m +83 h +10 h +64 h +28 h +1 h +14565 m +1677 m +14566 m +11 h +10 h +10 h +14567 m +14568 m +14569 m +4 h +12329 m +10 h +1 h +377 h +4 h +4 h +367 h +190 h +464 h +124 h +1016 h +11 h +104 h +11 h +14570 m +119 h +31 h +4 h +10 h +73 h +4 h +14571 m +4 h +640 h +4 h +14572 m +124 h +11 h +31 h +167 h +1 h +41 h +1 h +11 h +4 h +857 m +11 h +10 h +11 h +14573 m +204 h +192 h +25 h +10028 m +1089 h +4 h +4 h +1 h +73 h +14574 m +196 h +1 h +104 h +10 h +57 h +14575 m +4 h +14576 m +1 h +14577 m +10 h +4 h +4 h +4 h +57 h +1 h +4 h +14578 m +73 h +10 h +14579 m +25 h +59 h +4 h +4 h +1 h +14580 m +12 h +1 h +10 h +2379 h +124 h +1 h +1 h +14581 m +78 m +4 h +14582 m +14583 m +4 h +14584 m +14585 m +1 h +1 h +10 h +4 h +14586 m +14587 m +10 h +82 h +25 h +14588 m +41 h +4 h +10 h +14589 m +10 h +1 h +1 h +1 h +4 h +1 h +10 h +82 h +10 h +4 h +14590 m +14591 m +11 h +219 h +4 h +4 h +4 h +1 h +196 h +11 h +4 h +4 h +55 h +258 h +1 h +10 h +10 h +1 h +14592 m +4 h +169 h +143 h +297 h +1 h +14593 m +14594 m +74 h +82 h +14595 m +241 m +4 h +12 h +123 h +1 h +124 h +4 h +1 h +4 h +83 h +4 h +124 h +1 h +14596 m +9176 m +2447 m +1 h +4 h +4 h +14597 m +14598 m +31 h +4 h +459 h +10 h +4 h +14599 m +10 h +1 h +1 h +10 h +55 h +1 h +11 h +4 h +278 h +146 h +1 h +146 h +1 h +935 h +601 h +10 h +28 h +4 h +14600 m +4 h +10 h +4 h +4 h +14601 m +4 h +4 h +13544 m +4 h +1 h +14602 m +41 h +10 h +1 h +14603 m +14604 m +1697 m +25 h +1 h +14605 m +104 h +14606 m +4 h +1 h +4 h +146 h +82 h +25 h +14607 m +4 h +10 h +14608 m +4 h +4 h +10 h +74 h +14609 m +14610 m +591 m +1 h +1 h +14611 m +10 h +520 h +4 h +4 h +1 h +14612 m +14613 m +4 h +238 h +10 h +2788 m +266 h +4 h +338 m +5 h +1714 m +14614 m +258 h +4 h +1 h +4 h +447 h +55 h +114 h +10 h +181 h +57 h +1 h +4 h +1 h +4 h +196 h +10 h +4 h +823 m +4 h +1 h +1 h +1 h +12 h +11 h +4 h +10 h +129 h +14615 m +10 h +1 h +14616 m +1 h +185 h +14617 m +8950 m +1 h +14618 m +14619 m +10 h +569 m +1 h +14620 m +124 h +4 h +185 h +14621 m +14622 m +1 h +57 h +14623 m +1 h +14624 m +65 h +14625 m +93 h +4 h +14626 m +196 h +3 h +109 h +4 h +1 h +10 h +14627 m +14628 m +4 h +1 h +12131 m +31 h +14629 m +14630 m +1016 h +25 h +14631 m +57 h +4 h +10 h +5478 m +14632 m +109 h +1 h +14633 m +10 h +278 h +10 h +14634 m +10 h +109 h +10 h +4 h +10 h +10 h +14635 m +10 h +83 h +4 h +65 h +14636 m +1137 h +353 h +1 h +1 h +10 h +10 h +14637 m +5 h +109 h +1 h +4 h +4 h +1772 m +10089 m +92 h +10 h +10 h +1 h +1470 h +10 h +1 h +4 h +118 h +1 h +737 h +31 h +14638 m +14639 m +1 h +1595 m +14640 m +299 h +2172 m +14641 m +28 h +4 h +1 h +14642 m +3 h +4 h +10 h +65 h +4 h +4 h +1 h +14643 m +11 h +109 h +10 h +1 h +4 h +4 h +1 h +10 h +14644 m +4 h +4 h +14645 m +692 h +14646 m +14647 m +14648 m +57 h +4 h +125 h +4 h +1 h +2733 h +109 h +14649 m +14650 m +1 h +1 h +4 h +10 h +4 h +258 h +109 h +1 h +14651 m +1 h +536 h +4 h +2494 m +1 h +4 h +10 h +383 h +2367 m +4 h +1 h +109 h +14652 m +59 h +3 h +1838 m +195 h +11 h +10 h +4 h +10 h +4 h +4 h +4 h +10 h +1 h +1 h +10 h +14653 m +14654 m +124 h +92 h +10 h +14655 m +1 h +1 h +4 h +14656 m +1227 m +4 h +4 h +4 h +14657 m +1 h +10 h +4 h +10 h +14658 m +913 m +56 h +10 h +4 h +1 h +135 h +14659 m +10 h +83 h +1 h +14660 m +4 h +1 h +1 h +4 h +31 h +4 h +14661 m +1 h +169 h +14662 m +1 h +14663 m +10 h +64 h +4 h +4 h +4 h +274 h +14664 m +1957 m +14665 m +41 h +10 h +135 h +14666 m +11 h +4 h +10 h +10 h +1 h +13 h +10 h +10 h +14667 m +289 h +195 h +2418 h +190 h +10 h +74 h +14668 m +59 h +14669 m +4 h +4 h +4 h +4 h +10 h +104 h +4 h +11 h +3028 m +4 h +1074 m +10 h +4 h +536 h +10 h +1 h +31 h +3 h +4 h +4 h +57 h +1 h +1 h +10 h +10 h +74 h +1 h +10 h +135 h +6869 m +10 h +4 h +14670 m +14671 m +22 h +195 h +109 h +1 h +1 h +124 h +172 h +10 h +10 h +10 h +4 h +14672 m +307 h +2971 m +10 h +4 h +264 m +1 h +97 h +11 h +1 h +4 h +4 h +124 h +1 h +14673 m +1 h +4 h +14674 m +4 h +4 h +4 h +14675 m +184 h +12 h +1 h +2072 m +1030 m +14676 m +11 h +64 h +4 h +13 h +195 h +83 h +14677 m +1137 h +477 m +1 h +3083 m +4 h +10 h +14678 m +14679 m +4 h +10 h +14680 m +1 h +28 h +14681 m +1 h +1 h +1 h +9861 m +4 h +1 h +4 h +10 h +14682 m +83 h +1 h +1 h +9757 m +10 h +14683 m +14684 m +14685 m +10 h +14686 m +1 h +1 h +11 h +11 h +4 h +10 h +104 h +4 h +10 h +10 h +14687 m +14688 m +4 h +14689 m +860 m +1 h +10 h +11 h +11 h +1 h +14690 m +4 h +338 h +4 h +4 h +4 h +14691 m +10 h +104 h +1 h +11 h +4 h +1039 m +10 h +13361 m +114 h +156 h +146 h +1 h +4 h +9727 m +156 h +14692 m +124 h +4 h +1 h +6567 m +1 h +14693 m +295 h +4 h +4 h +143 h +1 h +4 h +11 h +14694 m +4 h +13 h +4 h +4 h +14695 m +4 h +195 h +4 h +4 h +1419 m +3 h +964 m +1 h +1 h +109 h +60 m +730 m +1 h +1 h +4 h +109 h +562 m +10 h +10 h +124 h +10 h +143 h +14696 m +10 h +1 h +172 h +1 h +14697 m +4 h +14698 m +4 h +143 h +3 h +4900 m +36 h +4 h +14699 m +1 h +4 h +45 h +10 h +10 h +82 h +1 h +55 h +10 h +976 h +4 h +1 h +2359 m +10 h +4 h +59 h +1 h +4 h +74 h +4 h +1697 m +4 h +3 h +36 h +1 h +4 h +1 h +1 h +4 h +14700 m +124 h +4 h +289 h +4 h +1 h +57 h +3 h +10 h +4 h +4 h +110 h +4 h +1 h +1 h +11 h +332 h +4 h +14701 m +14702 m +1 h +112 h +10 h +1 h +4 h +1 h +1 h +10 h +258 h +14703 m +82 h +14704 m +172 h +14705 m +10 h +10 h +10 h +1 h +10 h +1 h +10 h +12 h +640 h +59 h +8 h +4 h +1 h +14706 m +4 h +10 h +125 h +4 h +4 h +10 h +10 h +4 h +57 h +4 h +1 h +1 h +74 h +11 h +1 h +147 h +4 h +1 h +1 h +10464 m +10 h +31 h +14707 m +147 h +10640 m +14708 m +4 h +4 h +4 h +230 h +8938 m +843 m +4 h +83 h +10 h +4 h +4 h +14709 m +4 h +1713 m +4 h +4 h +278 h +64 h +10 h +104 h +6702 m +4 h +1 h +230 h +278 h +14710 m +10 h +57 h +383 h +11 h +4 h +4 h +10 h +4 h +82 h +125 h +1 h +1 h +10 h +10 h +10 h +10 h +4 h +14711 m +520 h +4 h +4 h +4 h +10 h +1 h +1 h +69 h +4 h +1 h +1 h +1 h +1639 m +4 h +569 m +14712 m +986 h +9933 m +4441 m +4 h +1 h +258 h +4 h +14713 m +40 h +4 h +509 m +857 m +4 h +83 h +4 h +14714 m +4 h +4441 m +447 h +1 h +190 h +14715 m +266 h +45 h +1 h +110 h +412 h +146 h +4 h +278 h +1 h +143 h +10 h +10 h +4 h +169 h +1 h +14716 m +258 h +1027 h +1 h +10 h +10 h +82 h +4 h +195 h +2163 m +1 h +14717 m +1 h +25 h +10 h +10 h +104 h +4 h +10 h +164 h +185 h +1337 m +4 h +27 h +10 h +147 h +4 h +147 h +4 h +167 h +4 h +14718 m +4 h +4 h +14719 m +114 h +184 h +359 h +57 h +4 h +1 h +14720 m +238 h +2442 m +4 h +1 h +1 h +4 h +4 h +258 h +10 h +10 h +10 h +1 h +4 h +14721 m +10 h +139 h +1 h +10 h +10 h +1 h +13879 m +10 h +4 h +10 h +13536 m +4 h +146 h +125 h +45 h +14722 m +192 h +1 h +94 h +1 h +10 h +4 h +4 h +4 h +1 h +14723 m +14724 m +10 h +125 h +1 h +10 h +14725 m +536 h +14726 m +4 h +1 h +14727 m +14728 m +125 h +307 h +11 h +238 h +109 h +1 h +14729 m +1 h +5125 m +181 h +91 h +4 h +13 h +4 h +45 h +104 h +4 h +1 h +4 h +4 h +14730 m +4 h +4 h +10 h +1 h +55 h +4 h +14731 m +45 h +59 h +14732 m +1 h +4 h +14733 m +10 h +4 h +59 h +4 h +4 h +12 h +10 h +41 h +1 h +1 h +92 h +14734 m +10 h +41 h +4 h +1 h +1 h +31 h +4 h +14735 m +10 h +14736 m +1 h +14737 m +11 h +14738 m +1 h +195 h +1 h +170 h +4 h +14739 m +59 h +1 h +11 h +4 h +4 h +124 h +181 h +1 h +238 h +14740 m +4 h +10 h +14741 m +14742 m +1337 m +114 h +14743 m +31 h +45 h +4 h +14744 m +8197 m +3 h +14745 m +10 h +79 h +27 h +10 h +4 h +1737 m +386 h +4 h +14746 m +4905 m +1 h +1 h +10 h +229 h +1 h +4 h +4 h +195 h +10 h +11 h +295 h +14747 m +11 h +1 h +2769 m +14748 m +14749 m +14750 m +4 h +92 h +14751 m +1 h +581 m +4 h +4 h +195 h +14752 m +4 h +332 h +278 h +12805 m +4 h +14753 m +14754 m +169 h +10 h +10 h +4177 m +36 h +14755 m +4 h +12218 m +4 h +14756 m +10 h +5557 m +31 h +10 h +4 h +383 h +4 h +14757 m +4 h +1 h +4 h +2591 m +14758 m +1 h +1 h +3995 m +146 h +119 h +108 h +1 h +10 h +14759 m +1 h +265 h +79 h +146 h +4 h +1 h +14760 m +4 h +2490 m +10 h +123 h +109 h +14761 m +1 h +10 h +7479 m +11 h +4 h +14762 m +359 h +104 h +125 h +14763 m +4 h +1 h +41 h +1 h +282 m +14764 m +9933 m +276 h +4 h +4 h +14765 m +4 h +1 h +1 h +14766 m +12 h +10 h +14767 m +104 h +1 h +14768 m +11 h +10 h +14769 m +4 h +1 h +1 h +4 h +4 h +41 h +358 h +1 h +1 h +10 h +4 h +1 h +1 h +4 h +27 h +1 h +92 h +14770 m +14771 m +10 h +124 h +332 h +1 h +4 h +1 h +4 h +14772 m +1 h +10 h +10 h +4528 m +3 h +103 h +31 h +1 h +10 h +10 h +4 h +4 h +10 h +614 m +626 m +173 h +57 h +14773 m +14774 m +10 h +4 h +167 h +4 h +4 h +14775 m +14776 m +1 h +31 h +238 h +1 h +4 h +1 h +10 h +109 h +14777 m +319 h +10 h +4 h +4 h +1 h +83 h +14778 m +11 h +4 h +14779 m +4 h +4 h +10 h +10 h +1 h +196 h +8683 m +443 h +83 h +10 h +124 h +11 h +4 h +4 h +4 h +4 h +14780 m +10 h +14781 m +8890 m +11 h +10 h +1 h +14782 m +10 h +4 h +11 h +1 h +124 h +4 h +4 h +4 h +14783 m +4 h +12338 m +1791 m +1796 h +4 h +10 h +10 h +4 h +82 h +10 h +4 h +4 h +4 h +258 h +14784 m +1 h +124 h +3 h +1 h +1 h +1 h +4 h +14785 m +4 h +14786 m +14787 m +124 h +4 h +14788 m +4 h +4 h +14789 m +113 h +13 h +4 h +14790 m +4 h +4 h +14791 m +14792 m +106 m +1261 h +4188 m +10 h +14793 m +14794 m +4 h +1 h +1 h +10 h +3750 m +4 h +14795 m +1 h +1 h +14796 m +11 h +119 h +14797 m +1 h +10 h +73 h +4 h +10 h +25 h +14798 m +83 h +14477 m +10 h +955 m +1 h +14799 m +4744 m +14800 m +1780 m +14801 m +104 h +14802 m +45 h +14803 m +276 h +10 h +1 h +55 h +14804 m +4 h +14805 m +75 m +14806 m +11 h +1 h +4 h +14807 m +69 h +4 h +399 h +4 h +4 h +14808 m +104 h +733 m +31 h +14809 m +10 h +14810 m +14811 m +124 h +4 h +11 h +14812 m +10 h +1952 m +4 h +14813 m +110 h +11 h +14814 m +83 h +10 h +4 h +1 h +4 h +10 h +4 h +4 h +1 h +4 h +57 h +10 h +57 h +692 h +10 h +114 h +64 h +12551 m +4 h +10 h +4 h +167 h +4 h +4 h +4 h +124 h +1 h +10 h +4 h +1 h +1 h +10 h +181 h +83 h +10 h +4 h +10 h +4 h +10 h +82 h +10 h +10 h +83 h +82 h +3170 m +14815 m +1 h +8 h +14816 m +14817 m +1 h +1 h +7395 m +4 h +270 h +307 h +14818 m +4 h +6599 m +4 h +59 h +1 h +10 h +4 h +11 h +1 h +64 h +10 h +4 h +14819 m +83 h +4 h +10 h +10 h +109 h +1 h +371 h +7592 m +4 h +4 h +14820 m +14821 m +278 h +41 h +1 h +4 h +82 h +11 h +124 h +125 h +14822 m +167 h +10 h +10 h +14823 m +4 h +4 h +10 h +1 h +266 h +83 h +1 h +4 h +4 h +14824 m +1330 m +31 h +14825 m +10 h +4 h +124 h +1574 m +11 h +763 m +41 h +14826 m +4 h +14827 m +1 h +13 h +4 h +14828 m +4 h +10 h +12637 m +10 h +1 h +4 h +14829 m +14830 m +14831 m +4 h +13944 m +642 h +14832 m +4 h +11 h +1 h +10 h +14833 m +10 h +4 h +10 h +1725 m +14834 m +10 h +4 h +4 h +14835 m +82 h +4 h +14836 m +25 h +14837 m +14838 m +10 h +4 h +1 h +68 m +4 h +10 h +1 h +4 h +167 h +144 h +4 h +14839 m +10 h +4 h +4 h +4 h +4 h +4 h +10 h +538 h +10 h +10 h +4 h +10 h +2845 m +36 h +1 h +408 m +14840 m +124 h +330 h +114 h +13 h +1 h +74 h +10 h +3 h +14841 m +11486 m +4 h +10 h +987 m +14842 m +1 h +10 h +14843 m +14844 m +4 h +2885 m +112 h +4 h +146 h +10 h +4 h +10 h +10 h +41 h +14845 m +1 h +57 h +2184 m +1 h +10 h +4 h +14846 m +79 h +1 h +4 h +144 h +27 h +4 h +4 h +10 h +14847 m +4 h +570 h +14848 m +1 h +3 h +4 h +124 h +4 h +14849 m +14850 m +10 h +14851 m +10 h +1 h +125 h +14852 m +1 h +14853 m +1 h +36 h +73 h +69 h +170 h +8 h +83 h +10 h +14854 m +25 h +14855 m +4 h +4 h +1 h +10 h +1 h +57 h +4 h +1 h +10 h +4 h +1 h +4 h +14856 m +4 h +14857 m +164 h +14858 m +1 h +10 h +4 h +1 h +14859 m +869 h +14860 m +82 h +4 h +10 h +266 h +10 h +330 h +1 h +10 h +10 h +83 h +14861 m +10 h +73 h +4 h +45 h +3396 m +1 h +74 h +124 h +4 h +64 h +5544 m +1 h +10 h +196 h +4 h +14862 m +14863 m +59 h +4103 m +14689 m +2699 m +146 h +147 h +299 h +14864 m +147 h +4 h +1 h +1 h +10 h +10 h +82 h +4 h +4 h +4 h +1 h +447 h +299 h +10 h +10 h +83 h +83 h +307 h +279 h +14865 m +1 h +65 h +10 h +1 h +1 h +119 h +8 h +1016 h +14866 m +1 h +238 h +65 h +1 h +1780 m +4 h +74 h +125 h +14867 m +4 h +986 h +4 h +14868 m +14869 m +57 h +4 h +10 h +114 h +4 h +1105 h +4 h +14870 m +14871 m +1 h +1 h +4 h +10 h +4 h +57 h +14872 m +14873 m +10 h +4 h +4 h +10 h +7300 m +14874 m +6668 m +488 h +59 h +14875 m +10 h +10 h +14876 m +59 h +1 h +10 h +196 h +1 h +10 h +14877 m +25 h +123 h +4 h +4 h +4 h +4 h +265 h +1 h +173 h +14878 m +10 h +4 h +1281 m +1 h +10 h +14879 m +147 h +10 h +4 h +4 h +4 h +196 h +2275 m +57 h +14880 m +3 h +2022 m +1 h +3048 m +986 h +8643 m +11 h +7872 m +1642 h +1 h +196 h +14881 m +10 h +10 h +1 h +1 h +14882 m +83 h +4 h +4 h +4 h +258 h +4 h +1 h +4 h +1 h +1 h +4 h +4 h +578 h +167 h +4 h +4 h +1 h +1137 h +1 h +10 h +109 h +10 h +4 h +4 h +265 h +172 h +1 h +1 h +196 h +1 h +73 h +1 h +36 h +57 h +4 h +14883 m +4 h +1 h +4 h +1 h +4 h +885 h +256 m +563 m +4 h +14884 m +14885 m +4 h +10 h +1 h +10 h +14886 m +1 h +27 h +158 h +1 h +1070 m +57 h +10 h +4 h +4 h +196 h +82 h +10 h +4 h +274 h +4 h +74 h +12884 m +4 h +10 h +4 h +156 h +4 h +14887 m +443 h +11 h +14888 m +14889 m +181 h +258 h +14890 m +4 h +4 h +14891 m +4 h +4 h +4 h +14892 m +2444 m +1 h +4 h +4 h +10 h +1 h +1454 h +4 h +135 h +11 h +59 h +4 h +1 h +10 h +124 h +4 h +124 h +11 h +3 h +4 h +1 h +12755 m +10 h +12 h +104 h +11 h +14893 m +5557 m +25 h +10 h +10 h +2846 m +14894 m +10 h +82 h +1 h +4 h +135 h +10 h +4 h +10 h +1 h +27 h +41 h +1 h +4 h +14895 m +1 h +4 h +164 h +569 h +1 h +4253 m +14896 m +57 h +57 h +1 h +4 h +82 h +10 h +10 h +1 h +1 h +1 h +82 h +4 h +1 h +1 h +14897 m +14898 m +1 h +4 h +59 h +10 h +1 h +4 h +146 h +4 h +94 h +14899 m +1 h +41 h +1 h +11 h +83 h +443 h +4 h +82 h +119 h +94 h +1 h +1 h +65 h +10 h +4 h +1 h +10062 m +4844 m +14900 m +10 h +55 h +4 h +10 h +4486 m +1 h +1 h +13349 m +464 h +4 h +1199 m +1 h +4 h +14901 m +14902 m +1 h +7479 m +4 h +1 h +4 h +4 h +4 h +14903 m +4 h +1 h +1 h +14904 m +10 h +125 h +31 h +10 h +1 h +1 h +11 h +10 h +258 h +41 h +3293 m +14905 m +14906 m +4 h +14907 m +195 h +14908 m +1 h +1 h +1 h +3 h +186 h +14909 m +11 h +10 h +10 h +4 h +1 h +1403 h +97 h +4 h +1 h +1 h +113 h +4 h +1 h +1 h +109 h +4 h +4 h +403 h +10 h +31 h +31 h +10 h +1 h +25 h +1 h +12 h +10 h +4 h +10 h +1 h +82 h +83 h +1 h +4 h +1 h +14910 m +14911 m +14912 m +278 h +1 h +14913 m +14914 m +4 h +4 h +1 h +11 h +358 h +65 h +1822 h +143 h +4 h +25 h +14915 m +172 h +2379 h +14916 m +276 h +125 h +4 h +4 h +10 h +14917 m +1 h +196 h +4 h +27 h +4 h +1250 h +1 h +14918 m +185 h +10 h +1 h +4 h +14919 m +4 h +4 h +4 h +14920 m +10 h +4 h +14921 m +109 h +14922 m +270 h +10 h +4 h +1 h +1 h +57 h +270 h +14923 m +14924 m +14925 m +307 h +14926 m +83 h +4 h +1 h +123 h +2594 m +10 h +12131 m +55 h +14927 m +3095 m +3 h +192 h +11 h +10 h +10 h +4 h +4 h +190 h +79 h +1 h +4 h +14928 m +1 h +10 h +1 h +14929 m +279 h +195 h +124 h +4240 m +4 h +4 h +14930 m +10 h +124 h +1 h +386 h +41 h +14931 m +1 h +11 h +4 h +4 h +83 h +4 h +4 h +11766 m +4 h +64 h +1 h +4 h +157 h +4 h +1 h +13 h +14932 m +125 h +1 h +4 h +195 h +4 h +125 h +10 h +94 h +14933 m +4 h +57 h +31 h +14934 m +4 h +10 h +14935 m +4 h +64 h +82 h +11 h +4 h +97 h +14936 m +4 h +73 h +46 m +1 h +57 h +1454 h +1 h +1 h +14937 m +4 h +13 h +1 h +112 h +14938 m +10 h +14939 m +1 h +14940 m +4 h +25 h +4 h +109 h +10 h +14941 m +1 h +169 h +14942 m +10 h +4 h +488 h +4 h +27 h +1 h +4 h +10 h +1 h +14943 m +158 h +10 h +10 h +22 h +124 h +14944 m +2721 m +1 h +143 h +14945 m +14946 m +10 h +73 h +1 h +1697 h +14947 m +41 h +1 h +10 h +4 h +11 h +14948 m +1 h +14949 m +4538 m +10 h +11 h +10 h +4 h +279 h +4 h +8 h +147 h +4 h +14950 m +167 h +10 h +106 m +1 h +1 h +14951 m +1650 h +10 h +1 h +10 h +14952 m +1 h +10 h +1 h +14953 m +195 h +173 h +11 h +3 h +1 h +14954 m +4 h +1 h +4 h +1 h +332 h +10 h +4 h +4 h +10 h +307 h +1284 m +2887 m +2928 m +10 h +2163 m +3 h +196 h +14955 m +10 h +1 h +14838 m +11 h +1 h +146 h +4567 m +4 h +14956 m +3 h +14957 m +4 h +1 h +10 h +73 h +1 h +4 h +4 h +4 h +14958 m +1 h +14959 m +4 h +12 h +14960 m +125 h +4 h +14961 m +14962 m +1915 m +1 h +14963 m +1 h +1 h +640 h +258 h +4 h +14964 m +14965 m +11 h +181 h +4 h +6663 m +14966 m +996 m +4 h +4 h +11 h +4 h +1 h +14967 m +15 m +11 h +278 h +4 h +4 h +14968 m +14969 m +192 h +195 h +14970 m +82 h +31 h +13 h +27 h +8 h +25 h +14971 m +14972 m +1 h +3 h +124 h +57 h +14973 m +1 h +1 h +11 h +4 h +4 h +4 h +4 h +14974 m +1 h +14975 m +9411 m +4 h +10 h +266 h +97 h +1 h +55 h +125 h +266 h +14976 m +14977 m +4 h +14978 m +4 h +10 h +1 h +274 h +57 h +14979 m +840 m +3161 m +14980 m +4 h +1 h +146 h +59 h +73 h +1677 m +124 h +1775 m +196 h +4 h +4 h +14981 m +147 h +779 h +1374 m +4 h +5562 m +1 h +4 h +4 h +1 h +4 h +1 h +65 h +4 h +195 h +4 h +11 h +4 h +14982 m +14983 m +4 h +1 h +4 h +79 h +4 h +1 h +125 h +631 m +14984 m +14985 m +4 h +4 h +8324 m +10 h +14986 m +1 h +10 h +82 h +1 h +1 h +1 h +1 h +1 h +4 h +97 h +4 h +14987 m +10 h +10 h +9176 m +14988 m +190 h +4 h +64 h +31 h +9400 m +10 h +1 h +10 h +14989 m +4 h +1 h +14990 m +538 h +14991 m +4 h +14992 m +1 h +1 h +1714 h +14993 m +109 h +1 h +4 h +4 h +11 h +14994 m +4 h +2064 m +1 h +57 h +1 h +4 h +8188 m +4 h +1 h +10 h +4 h +14995 m +125 h +1685 h +11 h +10 h +1 h +14996 m +538 h +1 h +5541 m +14997 m +10 h +14998 m +10 h +4 h +124 h +14999 m +146 h +15000 m +1 h +2002 m +4 h +4 h +10 h +4 h +10 h +4127 m +83 h +687 h +3 h +4 h +144 h +4 h +10 h +15001 m +10 h +4 h +4 h +481 m +8610 m +15002 m +11 h +5125 m +10 h +15003 m +15004 m +15005 m +10 h +15006 m +1 h +15007 m +15008 m +12 h +332 h +4 h +4 h +65 h +15009 m +3141 m +1 h +4 h +4 h +8 h +15010 m +4 h +15011 m +65 h +6963 m +10 h +10 h +563 m +1 h +15012 m +15013 m +10 h +15014 m +15015 m +1 h +4 h +172 h +4 h +1293 m +94 h +1 h +15016 m +57 h +15017 m +10 h +97 h +4 h +15018 m +1 h +13980 m +4 h +332 h +156 h +4 h +6260 m +4 h +1 h +238 h +55 h +229 h +92 h +4 h +4 h +1 h +4 h +601 h +109 h +229 h +196 h +15019 m +15020 m +1 h +11 h +1 h +11 h +124 h +15021 m +4 h +2054 m +4 h +1 h +4 h +11 h +10 h +15022 m +110 h +113 h +69 h +135 h +15023 m +1359 h +4 h +156 h +15024 m +15025 m +1 h +15026 m +1 h +10 h +15027 m +4089 m +2891 m +10 h +1 h +5422 m +536 h +15028 m +1 h +8 h +57 h +15029 m +1 h +15030 m +4 h +4 h +4 h +4 h +383 h +4 h +363 m +1 h +4 h +10 h +4 h +11 h +1 h +285 m +885 h +3 h +59 h +10 h +15031 m +185 h +41 h +4 h +1 h +9691 m +4 h +4 h +10 h +4 h +10 h +3 h +1 h +170 h +147 h +59 h +15032 m +1070 m +4 h +10 h +82 h +6783 m +4 h +15033 m +15034 m +1 h +15035 m +135 h +1 h +15036 m +1 h +1 h +1 h +48 h +4 h +83 h +1 h +146 h +10 h +1 h +1 h +4 h +10 h +135 h +1953 m +15037 m +1409 m +27 h +10 h +56 h +4 h +10 h +10 h +147 h +857 h +124 h +94 h +4 h +15038 m +10 h +10 h +359 h +4 h +12 h +4 h +1 h +10 h +14570 m +15039 m +15040 m +2303 m +10 h +83 h +10 h +1 h +4 h +64 h +4 h +1 h +10 h +2928 m +10 h +4 h +1 h +196 h +15041 m +1 h +15042 m +10 h +4 h +4 h +1261 h +4 h +57 h +4 h +15043 m +459 h +124 h +15044 m +1 h +55 h +1 h +45 h +10 h +15045 m +10 h +1645 m +4 h +4 h +1 h +1 h +4 h +1 h +124 h +83 h +55 h +649 m +65 h +10 h +4 h +1198 m +15046 m +575 h +1 h +15047 m +195 h +338 h +1 h +1 h +124 h +1205 m +10 h +15048 m +4 h +124 h +147 h +4 h +15049 m +4 h +1 h +15050 m +4 h +82 h +147 h +1 h +31 h +4718 m +186 h +4 h +11 h +3 h +1 h +82 h +15051 m +316 m +278 h +3 h +4 h +888 m +278 h +73 h +1 h +10 h +82 h +15052 m +164 h +15053 m +10 h +146 h +10 h +1 h +82 h +10 h +4 h +1 h +1 h +718 h +6296 m +15054 m +4 h +1 h +4 h +1 h +4 h +578 h +139 h +6501 m +4 h +4 h +3 h +4 h +10 h +3 h +4 h +4 h +1 h +119 h +10 h +1 h +4 h +10 h +10 h +1 h +2495 m +1116 m +1 h +4 h +12 h +4 h +15055 m +1 h +4 h +124 h +11 h +1 h +1 h +425 m +15056 m +4 h +1 h +15057 m +4 h +15058 m +3 h +1 h +59 h +10 h +1 h +1 h +10 h +195 h +15059 m +15060 m +15061 m +15062 m +139 h +4 h +3 h +1 h +4 h +4 h +4 h +4 h +97 h +4 h +10 h +15063 m +15064 m +270 h +1 h +4 h +74 h +10 h +15065 m +5809 m +1 h +15066 m +4 h +10 h +1470 h +1 h +1 h +1 h +4 h +4 h +82 h +4905 m +7 m +4 h +3 h +4 h +15067 m +1 h +181 h +4 h +307 h +1 h +5387 m +124 h +31 h +15068 m +147 h +4372 m +10 h +82 h +196 h +1953 m +15069 m +1 h +109 h +15070 m +104 h +169 h +10 h +4 h +4 h +15071 m +1 h +172 h +10 h +4 h +15072 m +15073 m +1 h +1470 h +4905 m +1 h +15074 m +82 h +10 h +10 h +15075 m +10 h +10 h +13 h +4 h +386 h +10 h +15076 m +4 h +15077 m +1 h +10 h +1 h +181 h +4 h +1 h +10 h +386 h +4 h +4 h +15078 m +10 h +10 h +15079 m +15080 m +4 h +6549 m +164 h +266 h +10 h +83 h +10 h +4 h +59 h +12 h +15081 m +10 h +82 h +15082 m +124 h +15083 m +12 h +4 h +10 h +15084 m +4 h +15085 m +1 h +15086 m +1 h +15087 m +1 h +1 h +1 h +25 h +15088 m +97 h +124 h +10 h +124 h +57 h +124 h +10 h +10 h +10 h +1685 h +4 h +10 h +10 h +146 h +15089 m +10 h +45 h +258 h +276 h +15090 m +15091 m +15092 m +1 h +1 h +1 h +4 h +1 h +10 h +124 h +4 h +3216 m +15093 m +258 h +10 h +15094 m +4 h +4 h +74 h +4 h +1 h +104 h +1835 m +4 h +1137 h +15095 m +82 h +79 h +15096 m +10 h +83 h +10 h +4 h +4 h +4 h +10 h +1 h +12 h +1 h +124 h +124 h +322 h +319 h +332 h +4 h +4 h +15097 m +4857 m +15098 m +15099 m +10 h +15100 m +181 h +10 h +4 h +4 h +4 h +10 h +4 h +22 h +82 h +15101 m +4 h +1 h +4 h +10 h +10 h +15102 m +10 h +15103 m +698 m +15104 m +15105 m +74 h +4 h +10 h +31 h +4 h +4 h +15106 m +139 h +4 h +11 h +4 h +1 h +1 h +15107 m +94 h +15108 m +538 h +15109 m +1 h +15110 m +4132 m +15111 m +1 h +10 h +4 h +10 h +4 h +10 h +1 h +1027 h +109 h +3 h +15112 m +15113 m +4 h +108 h +10 h +12700 m +15114 m +15115 m +65 h +358 h +4 h +15116 m +4 h +10 h +4 h +4 h +4 h +1 h +4 h +4 h +276 h +10 h +10 h +15117 m +15118 m +1 h +533 h +256 m +3841 m +15119 m +10 h +1 h +4 h +135 h +15120 m +190 h +1556 m +97 h +91 h +10 h +15121 m +119 h +15122 m +15123 m +15124 m +4 h +4 h +57 h +4 h +15125 m +3 h +92 h +4 h +172 h +15126 m +15127 m +15128 m +94 h +15129 m +15130 m +1 h +104 h +1137 h +1 h +3 h +15131 m +4 h +5348 m +4 h +11 h +4 h +4 h +10 h +14308 m +15132 m +10 h +23 h +112 h +1 h +135 h +4 h +601 h +4 h +5632 m +10 h +4 h +15133 m +73 h +4 h +14 m +146 h +10 h +15134 m +1 h +195 h +11 h +4 h +31 h +10 h +4 h +109 h +15135 m +10 h +4 h +1 h +15136 m +1 h +15137 m +124 h +15138 m +15139 m +195 h +10 h +4 h +536 h +1576 m +10 h +15140 m +3 h +15141 m +1 h +4 h +1 h +278 h +1817 m +1 h +82 h +112 h +15142 m +195 h +143 h +15143 m +1309 h +169 h +4 h +4 h +4 h +25 h +10 h +4 h +1 h +196 h +15144 m +4 h +1 h +1 h +15145 m +82 h +1 h +4 h +15146 m +124 h +15147 m +15148 m +383 h +4 h +15149 m +1 h +104 h +1 h +238 h +1 h +1 h +4 h +4350 m +83 h +11 h +10 h +10 h +82 h +10 h +1 h +10 h +113 h +15150 m +4 h +74 h +4 h +4 h +125 h +1 h +27 h +4 h +4 h +367 h +15151 m +11 h +1 h +15152 m +139 h +4 h +976 h +124 h +1 h +195 h +15153 m +10 h +15154 m +10 h +59 h +10 h +15155 m +808 m +238 h +10 h +872 m +4 h +36 h +15156 m +4 h +156 h +4 h +10 h +307 h +33 m +15157 m +15158 m +57 h +55 h +1 h +79 h +59 h +297 h +15159 m +31 h +4 h +1 h +10 h +4 h +15160 m +1 h +4 h +1 h +8 h +186 h +1574 m +15161 m +447 h +15162 m +15163 m +15164 m +147 h +79 h +186 h +4 h +3 h +4 h +15165 m +45 h +1 h +1725 m +3702 m +10 h +4 h +1 h +1403 h +15166 m +108 h +1 h +1 h +4 h +11 h +31 h +15167 m +10 h +15168 m +4 h +4 h +1 h +10 h +935 h +15169 m +4 h +10 h +4 h +4 h +15170 m +57 h +4 h +4 h +10 h +4 h +15171 m +3 h +10 h +97 h +4 h +10 h +112 h +10 h +48 h +1 h +1 h +1 h +114 h +83 h +82 h +164 h +15172 m +10 h +114 h +4 h +1116 m +4 h +4 h +10 h +4 h +1 h +4 h +74 h +1 h +4 h +1 h +4 h +4 h +1 h +1 h +15173 m +10 h +10 h +4 h +15174 m +82 h +435 m +59 h +4 h +83 h +10 h +2720 m +15175 m +11 h +1 h +1 h +123 h +13 h +15176 m +83 h +31 h +5545 m +15177 m +82 h +4 h +1766 h +41 h +15178 m +5982 m +15179 m +10 h +1 h +169 h +4 h +4 h +15180 m +443 h +123 h +15181 v +13854 m +11 h +1861 m +1 h +1 h +15182 m +4 h +1 h +36 h +97 h +124 h +10 h +990 h +1 h +195 h +1 h +11 h +4 h +1 h +3 h +1441 m +955 m +1 h +4 h +10 h +12 h +123 h +1261 h +15183 m +91 h +109 h +4 h +4 h +10 h +65 h +124 h +169 h +717 m +10 h +124 h +4 h +4 h +31 h +94 h +359 h +11 h +113 h +2937 m +2285 m +4 h +8 h +15184 m +104 h +4 h +4 h +297 h +10 h +4 h +4 h +15185 m +82 h +15186 m +1 h +1 h +11 h +10 h +45 h +4 h +15187 m +4 h +1 h +4 h +273 m +4 h +15188 m +1045 m +1 h +1089 h +4 h +15189 m +1 h +1 h +10 h +10 h +4301 m +4 h +45 h +83 h +4 h +195 h +64 h +146 h +10 h +4 h +1 h +10 h +10 h +15190 m +2183 m +299 h +4 h +97 h +15191 m +1261 h +1 h +454 m +1 h +15192 m +15193 m +135 h +4 h +195 h +15194 m +10 h +4 h +4 h +1 h +15195 m +13 h +278 h +4 h +5249 m +986 h +82 h +1 h +114 h +10 h +358 h +10 h +4 h +74 h +4 h +15196 m +4 h +15197 m +1 h +15198 m +1 h +4 h +4857 m +15199 m +4 h +1 h +279 h +31 h +10 h +15200 m +1 h +15201 m +1 h +4 h +4 h +4 h +156 h +11 h +1 h +4 h +11 h +10 h +27 h +4 h +3405 m +10 h +3 h +15202 m +4 h +1 h +4 h +1089 h +4 h +1 h +15203 m +4 h +31 h +124 h +57 h +15204 m +1 h +15205 m +1 h +4 h +10 h +41 h +15206 m +15207 m +4 h +4 h +4 h +110 h +79 h +15208 m +15209 m +4 h +4 h +1 h +10 h +4 h +4 h +204 h +4 h +4520 m +4 h +135 h +4 h +10 h +129 h +82 h +15210 m +307 h +10 h +1 h +1015 m +911 h +4 h +125 h +238 h +59 h +11 h +10 h +4 h +238 h +10 h +195 h +4 h +14112 m +170 h +1 h +6290 m +195 h +11 h +1 h +1714 h +97 h +1137 h +10 h +4 h +857 h +56 h +4 h +11 h +4 h +601 h +4 h +15211 m +82 h +4 h +2788 h +15212 m +125 h +31 h +1 h +4 h +65 h +258 h +64 h +4 h +15213 m +10 h +358 h +57 h +1 h +15214 m +10 h +11 h +15215 m +15216 m +1074 m +278 h +4 h +11 h +297 h +4 h +4 h +10 h +256 m +10 h +41 h +146 h +15217 m +15218 m +1 h +4 h +307 h +3 h +15219 m +11 h +97 h +1 h +4 h +4 h +1 h +10 h +289 h +83 h +15220 m +4 h +1 h +36 h +733 m +196 h +1 h +4 h +15221 m +15222 m +1 h +10 h +108 h +4 h +92 h +4 h +258 h +15223 m +15224 m +41 h +1 h +195 h +65 h +316 m +695 m +3287 m +11 h +97 h +15225 m +1 h +4 h +1 h +10 h +1 h +1 h +15226 m +1535 m +1030 h +307 h +4 h +15227 m +358 h +10 h +10 h +124 h +1 h +15228 m +59 h +4 h +1898 m +25 h +4 h +4 h +15229 m +11 h +15230 m +4 h +4 h +10 h +4 h +31 h +15231 m +57 h +4 h +4 h +10 h +156 h +1 h +8 h +3 h +125 h +119 h +443 h +4 h +2418 h +1 h +15232 m +4 h +4 h +4 h +15233 m +56 h +802 m +11 h +1309 h +4 h +59 h +11 h +82 h +4 h +15234 m +4 h +12543 m +41 h +1 h +10 h +10 h +94 h +1 h +4 h +4 h +10 h +4 h +1 h +170 h +4 h +15235 m +10 h +4 h +15236 m +15237 m +1772 m +1 h +10 h +15238 m +1 h +4 h +1016 h +41 h +10 h +1 h +1 h +1822 h +10 h +10 h +41 h +4 h +10 h +57 h +4 h +4 h +10 h +15239 m +10 h +15240 m +4 h +15241 m +4 h +109 h +4 h +11 h +94 h +10 h +4 h +1 h +10 h +15242 m +238 h +4 h +4 h +4 h +4 h +5814 m +144 h +1083 m +3141 m +4 h +10 h +4 h +9691 m +4 h +57 h +4 h +15243 m +114 h +15244 m +10 h +15245 m +1 h +4 h +10 h +15246 m +1 h +4 h +3 h +285 m +10 h +15247 m +447 h +12 h +15248 m +1137 h +4 h +4 h +4 h +4 h +4 h +157 h +59 h +4 h +110 h +10 h +15249 m +266 h +25 h +10 h +45 h +59 h +10 h +4 h +15250 m +15251 m +109 h +15252 m +3 h +4 h +124 h +4 h +4 h +15253 m +31 h +1 h +15254 m +4 h +2923 h +109 h +4966 m +4 h +1260 m +27 h +74 h +332 h +15255 m +386 h +195 h +65 h +124 h +4 h +15256 m +31 h +1 h +332 h +4 h +4 h +4 h +15257 m +10 h +1386 m +15258 m +1 h +6214 m +15259 m +4 h +74 h +7400 m +1403 h +5145 m +4 h +83 h +4 h +1 h +10 h +4 h +289 h +4 h +8477 m +139 h +109 h +4 h +4524 m +15260 m +1 h +4 h +4 h +4030 m +11 h +157 h +1027 h +143 h +10 h +4 h +4 h +4 h +279 h +1 h +15261 m +15262 m +12 h +4 h +59 h +1 h +15263 m +400 m +74 h +1619 h +4 h +10 h +4 h +1 h +125 h +4 h +1 h +114 h +1 h +4 h +1 h +15264 m +15265 m +25 h +10 h +4 h +15266 m +443 h +10 h +25 h +10 h +1 h +330 h +146 h +1 h +15267 m +1 h +4 h +11 h +11 h +45 h +146 h +59 h +10 h +4 h +10 h +3 h +4 h +15268 m +4 h +125 h +74 h +59 h +4 h +10 h +295 h +79 h +1 h +125 h +4 h +4 h +11654 m +10 h +2627 m +10 h +1 h +97 h +4 h +4 h +9300 m +14 m +4 h +59 h +172 h +59 h +4 h +2887 m +1 h +94 h +190 h +103 h +10 h +59 h +15269 m +15270 m +4 h +15271 m +1 h +1 h +25 h +4 h +1 h +1 h +1 h +4 h +4 h +4 h +104 h +4 h +1 h +109 h +3 h +3523 m +169 h +11 h +10 h +27 h +493 m +79 h +4240 m +1 h +1650 h +15272 m +119 h +1751 m +4 h +4 h +15273 m +4 h +412 h +435 m +912 m +15274 m +1 h +1 h +1 h +143 h +27 h +1 h +27 h +4 h +289 h +4 h +10 h +10 h +114 h +10 h +70 m +640 h +15275 m +97 h +15276 m +1 h +1 h +119 h +4 h +28 h +4 h +4 h +11 h +4 h +4 h +104 h +15277 m +15278 m +386 h +15279 m +4 h +15280 m +15281 m +4 h +10 h +10 h +1 h +10 h +10 h +164 h +109 h +1 h +109 h +172 h +4 h +238 h +1 h +83 h +10 h +10 h +10 h +4 h +4 h +1685 h +4 h +1 h +1 h +124 h +1 h +64 h +10 h +11 h +8 h +295 h +10 h +1 h +4 h +1089 h +57 h +10 h +15282 m +109 h +10 h +11109 m +1 h +509 m +15283 m +15284 m +41 h +952 m +15285 m +15286 m +1 h +2733 h +4 h +15287 m +15288 m +15289 m +1 h +1677 m +1 h +4 h +22 h +1 h +10 h +10062 m +307 h +83 h +3477 m +59 h +31 h +97 h +13 h +11 h +135 h +11 h +10 h +11 h +1 h +1 h +10 h +12 h +124 h +10 h +4 h +4 h +4 h +170 h +1137 h +307 h +41 h +113 h +83 h +4 h +4 h +4 h +1 h +10 h +15290 m +270 h +15291 m +1 h +1 h +4 h +1 h +692 h +4 h +109 h +4 h +110 h +10 h +1 h +10 h +10 h +79 h +36 h +15292 m +1 h +1 h +10 h +1 h +443 h +1 h +4 h +15293 m +10 h +1 h +1642 h +1016 h +238 h +4 h +1 h +11 h +4 h +368 h +1 h +1766 h +1 h +399 h +11 h +10 h +4 h +15294 m +1 h +11 h +258 h +72 m +1 h +10 h +31 h +1893 m +74 h +11 h +15295 m +7646 m +1 h +4 h +1 h +4 h +295 h +15296 m +1 h +135 h +10 h +4 h +443 h +11 h +319 h +1 h +12047 m +15297 m +170 h +15298 m +135 h +1 h +1 h +4 h +55 h +9175 m +15299 m +15300 m +11 h +10 h +4 h +15301 m +353 h +478 h +143 h +266 h +15302 m +15303 m +1 h +57 h +124 h +15304 m +10 h +338 h +10 h +15305 m +4 h +536 h +4 h +15306 m +59 h +1 h +1 h +2475 m +1 h +91 h +10 h +1 h +4 h +15307 m +10 h +15308 m +1 h +1 h +10 h +5613 m +10 h +4 h +15309 m +330 h +4 h +10 h +15310 m +31 h +15311 m +1 h +1 h +15312 m +4 h +10 h +15313 m +57 h +4 h +4 h +1 h +15314 m +4 h +59 h +55 h +92 h +15315 m +15316 m +4 h +4 h +4 h +4 h +15317 m +4 h +276 h +929 m +403 h +15318 m +2041 m +4 h +15319 m +4 h +13 h +10 h +83 h +12389 m +4 h +125 h +94 h +185 h +184 h +4 h +10 h +104 h +256 h +15320 m +15321 m +656 m +82 h +1 h +82 h +25 h +1 h +4 h +15322 m +56 h +10 h +1 h +1016 h +10 h +11 h +10 h +4 h +1 h +8346 m +147 h +12 h +295 h +82 h +169 h +1 h +10 h +1 h +1 h +10 h +36 h +3622 m +1 h +4 h +109 h +15323 m +59 h +1 h +4 h +1 h +15324 m +3679 m +31 h +1 h +170 h +4 h +172 h +4 h +10 h +4 h +1 h +4 h +10 h +1881 m +15325 m +1 h +10 h +1650 h +1 h +10 h +83 h +4 h +1 h +704 m +114 h +4 h +1 h +1 h +10 h +15326 m +4 h +15327 m +10 h +4 h +295 h +694 m +14098 m +1 h +4 h +1 h +129 h +3155 m +377 h +15328 m +56 h +1 h +4 h +1 h +1914 m +1595 m +10 h +15329 m +125 h +10 h +1 h +4 h +15154 m +4 h +4 h +208 m +4 h +91 h +196 h +4 h +4 h +77 m +15330 m +1 h +1 h +1 h +4 h +779 h +4 h +297 h +258 h +1 h +4 h +82 h +3344 m +56 h +368 h +11 h +15331 m +1105 h +4 h +1 h +4 h +82 h +4 h +4 h +11 h +15332 m +1137 h +4 h +15333 m +147 h +4 h +124 h +10 h +1 h +4 h +15334 m +1 h +15335 m +195 h +10 h +4 h +15336 m +11 h +15337 m +4 h +4 h +124 h +10 h +1 h +935 h +15338 m +15339 m +25 h +4 h +4 h +4 h +1 h +2865 m +123 h +4 h +167 h +82 h +1 h +119 h +4 h +1 h +512 m +4 h +4 h +124 h +15340 m +92 h +59 h +1 h +857 h +10 h +25 h +4 h +1 h +13 h +4 h +10 h +1 h +10 h +1 h +430 m +15341 m +4 h +5065 m +591 m +15342 m +10 h +157 h +15343 m +4 h +4 h +1 h +11 h +28 h +59 h +10 h +15344 m +10 h +10 h +1 h +264 h +41 h +10 h +15345 m +15346 m +11 h +10 h +146 h +15347 m +4 h +4 h +164 h +97 h +1 h +83 h +4 h +10 h +15348 m +125 h +399 h +4 h +104 h +1 h +1 h +196 h +57 h +10 h +1 h +15349 m +4 h +1 h +1 h +10 h +59 h +1 h +195 h +11 h +15350 m +4 h +65 h +1840 m +15351 m +15352 m +124 h +10 h +104 h +112 h +4 h +15353 m +4 h +15354 m +4 h +5348 m +1 h +1 h +367 h +123 h +4 h +10 h +15355 m +1 h +36 h +11 h +11 h +15356 m +10 h +15357 m +15358 m +15359 m +1 h +1250 h +250 h +15360 m +4 h +48 h +4 h +630 m +11 h +1 h +65 h +1 h +147 h +10 h +4 h +143 h +1 h +6133 m +15361 m +15362 m +1 h +10 h +12 h +1 h +15363 m +4 h +4 h +25 h +5017 m +4 h +4 h +15364 m +1 h +15365 m +92 h +10 h +15366 m +36 h +103 h +4 h +15367 m +4 h +4 h +10 h +4 h +92 h +7 m +358 h +15368 m +4 h +642 h +13099 m +15369 m +1 h +83 h +15370 m +1322 m +15371 m +124 h +4 h +69 h +4 h +1 h +10 h +15372 m +4 h +10 h +1 h +4 h +15373 m +443 h +4441 h +15374 m +279 h +1764 m +10 h +10 h +124 h +15375 m +92 h +97 h +1 h +124 h +4 h +4 h +4 h +10 h +1685 h +1 h +687 h +90 m +4 h +15376 m +4 h +10 h +15377 m +10 h +15378 m +3 h +109 h +25 h +4 h +1 h +139 h +4 h +181 h +11 h +15379 m +4 h +4 h +10 h +258 h +1759 m +4 h +4 h +15380 m +4 h +15381 m +1 h +31 h +15382 m +229 h +10 h +1 h +146 h +11 h +57 h +1 h +4542 m +125 h +15383 m +258 h +4 h +82 h +1 h +10 h +15384 m +109 h +114 h +10 h +10 h +1 h +1 h +15385 m +4 h +41 h +169 h +4 h +124 h +83 h +4 h +25 h +15386 m +15387 m +4 h +935 h +10 h +7381 m +15388 m +10 h +15389 m +125 h +158 h +4 h +1470 h +125 h +10 h +11427 m +538 h +15390 m +4 h +10 h +11381 m +1 h +1 h +146 h +15391 m +4 h +1 h +4 h +157 h +2535 m +1 h +10 h +4 h +4 h +4 h +4 h +12 h +14473 m +704 m +3 h +12 h +966 m +15392 m +13 h +1 h +332 h +15393 m +4 h +15394 m +41 h +1 h +59 h +1 h +15395 m +10564 m +10 h +4 h +4 h +3028 m +11 h +4 h +4 h +2733 h +15396 m +1 h +1074 m +5047 m +124 h +10 h +1 h +104 h +4 h +4 h +10 h +15397 m +4 h +15398 m +1 h +41 h +129 h +4 h +4 h +478 h +1 h +92 h +1 h +5911 m +73 h +4 h +15399 m +4 h +10177 m +4 h +4 h +229 h +74 h +11 h +31 h +1 h +1 h +4 h +1 h +1 h +1 h +757 h +1 h +15400 m +1 h +172 h +15401 m +4 h +1 h +14529 m +15402 m +10 h +4 h +1 h +4 h +15403 m +4 h +4513 m +15404 m +4 h +4 h +10 h +4 h +4 h +1 h +4 h +15405 m +367 h +1137 h +1 h +4 h +6882 m +11 h +1 h +10 h +1 h +83 h +15406 m +383 h +4 h +4 h +250 h +109 h +15407 m +181 h +4 h +1 h +15408 m +4 h +1 h +319 h +138 m +1 h +10 h +10 h +125 h +4 h +1 h +4 h +15409 m +25 h +4 h +332 h +4 h +11778 m +1 h +4 h +15410 m +15411 m +1 h +1 h +4 h +4464 m +15412 m +4 h +4 h +1914 m +11 h +4 h +4 h +4 h +4 h +10 h +1 h +4 h +22 h +83 h +10 h +56 h +1 h +805 m +238 h +15413 m +911 h +4 h +10 h +15414 m +4 h +36 h +1 h +1 h +1 h +976 h +11 h +10 h +57 h +125 h +4 h +1 h +15415 m +15416 m +114 h +4 h +10532 m +1454 h +15417 m +65 h +125 h +15418 m +4 h +1 h +48 h +65 h +97 h +4 h +15419 m +4 h +56 h +97 h +1 h +3657 m +4 h +368 h +4 h +82 h +4 h +10 h +11 h +93 h +4 h +4 h +15420 m +4 h +15421 m +72 m +1 h +1 h +229 h +1 h +569 h +1 h +4 h +15422 m +4815 m +109 h +3 h +1 h +10 h +4 h +4 h +1 h +11 h +4 h +15423 m +15424 m +11 h +4 h +10028 m +104 h +4 h +94 h +4 h +4 h +999 m +73 h +82 h +1 h +59 h +307 h +885 h +238 h +15425 m +4 h +10 h +3 h +4 h +11 h +135 h +4 h +4 h +124 h +31 h +508 m +4 h +82 h +1 h +65 h +82 h +11 h +4 h +1 h +1 h +4 h +15426 m +5377 m +83 h +59 h +15427 m +1 h +186 h +10 h +2002 m +97 h +1 h +15428 m +195 h +4 h +196 h +15429 m +4 h +4 h +1 h +156 h +11 h +4 h +4 h +1403 h +1 h +3 h +4 h +156 h +1 h +10 h +11 h +1 h +4 h +4 h +1 h +4 h +57 h +4 h +31 h +4 h +1838 m +1454 h +4 h +164 h +4 h +4 h +1 h +1 h +25 h +4 h +196 h +10 h +15430 m +15431 m +82 h +4 h +403 h +1 h +4 h +15432 m +4 h +11 h +10 h +1 h +4 h +164 h +114 h +10 h +4 h +229 h +1 h +4 h +4 h +11 h +135 h +1 h +135 h +1 h +15433 m +15434 m +185 h +15435 m +10 h +15436 m +4 h +15437 m +10 h +12873 m +1 h +124 h +15438 m +4 h +4 h +10 h +10 h +10 h +966 m +1 h +1 h +1835 m +3 h +4 h +1893 m +4 h +124 h +125 h +4 h +10 h +57 h +4 h +4 h +15026 m +3 h +10 h +144 h +15439 m +4 h +15440 m +1 h +289 h +4 h +1 h +4 h +10 h +15441 m +4 h +10 h +1 h +11 h +10 h +1 h +4 h +9692 m +11 h +15442 m +353 h +109 h +10 h +1 h +65 h +195 h +4 h +1 h +10 h +4 h +10 h +4 h +4 h +4 h +1 h +1 h +147 h +538 h +4 h +1 h +15443 m +260 m +15444 m +4 h +1 h +15445 m +124 h +1 h +59 h +1 h +10 h +4 h +1 h +10 h +4 h +4 h +15446 m +104 h +1 h +601 h +1 h +4 h +10 h +4 h +169 h +15447 m +6528 m +1278 m +104 h +15448 m +82 h +59 h +15449 m +386 h +1 h +4 h +10 h +1 h +1 h +4 h +10 h +59 h +10 h +4 h +4 h +1 h +1 h +190 h +185 h +10 h +1 h +1 h +143 h +4 h +1 h +1 h +1 h +1074 m +4 h +1 h +11 h +15450 m +15451 m +77 h +1 h +1993 m +4 h +15452 m +82 h +1403 h +45 h +3 h +109 h +10 h +1 h +1989 m +4 h +146 h +169 h +278 h +4 h +104 h +4 h +533 h +1 h +118 h +4 h +368 h +10 h +1 h +143 h +10 h +15453 m +125 h +59 h +469 m +4 h +10 h +1 h +10 h +83 h +15454 m +92 h +57 h +3 h +1198 m +15455 m +22 h +1269 m +4 h +10 h +82 h +59 h +15456 m +4 h +3453 m +278 h +3 h +1 h +109 h +8206 m +113 h +10 h +65 h +12576 m +1 h +15457 m +73 h +15458 m +10 h +3 h +15459 m +4 h +4 h +1 h +4 h +82 h +1 h +10 h +307 h +15460 m +15461 m +10 h +1 h +4 h +10 h +4 h +4 h +4 h +4 h +4 h +4 h +10 h +630 m +4 h +10 h +1382 m +15462 m +4 h +4 h +1 h +10 h +10 h +59 h +1 h +1304 m +4 h +10 h +41 h +15463 m +1 h +2840 m +533 h +4 h +15464 m +1 h +15465 m +4 h +4 h +59 h +31 h +250 h +1 h +1 h +119 h +4 h +4 h +10 h +15466 m +1 h +146 h +25 h +10 h +124 h +4 h +1 h +15467 m +195 h +15468 m +10 h +10 h +10 h +1 h +79 h +10 h +1 h +1 h +4 h +11 h +1 h +156 h +65 h +10 h +15469 m +1595 m +10 h +1 h +1 h +15470 m +10 h +2314 m +1 h +167 h +4 h +1 h +1772 m +5475 m +10 h +15471 m +15472 m +15473 m +10 h +1 h +4 h +25 h +10 h +4 h +11 h +4 h +4 h +4 h +15474 m +31 h +575 h +12 h +4 h +1 h +15475 m +57 h +59 h +4 h +15476 m +4 h +3 h +11 h +12 h +1 h +1 h +113 h +15477 m +4 h +1 h +1 h +10 h +15478 m +181 h +82 h +185 h +15479 m +10 h +15480 m +15481 m +11 h +15482 m +94 h +4 h +538 h +15483 m +4 h +156 h +15484 m +10 h +10 h +1308 m +15485 m +15486 m +986 h +1 h +10 h +7938 m +1 h +1 h +1 h +15487 m +12 h +15488 m +15489 m +124 h +10 h +1 h +15490 m +1 h +15491 m +4 h +190 h +10 h +65 h +4 h +1 h +15492 m +4608 m +10 h +45 h +57 h +1 h +10 h +4 h +15493 m +12 h +3398 m +4 h +10 h +1 h +109 h +6545 m +195 h +31 h +1 h +59 h +1564 m +1 h +1 h +1 h +15494 m +109 h +1 h +10 h +1 h +2041 m +1 h +4 h +10 h +276 h +1 h +4 h +15495 m +15496 m +10 h +1 h +10 h +1 h +4 h +1 h +10 h +1 h +10 h +1 h +15497 m +15498 m +4 h +65 h +4 h +1 h +82 h +4 h +15499 m +15500 m +10 h +1 h +11 h +135 h +15501 m +57 h +195 h +15502 m +4 h +1 h +1 h +10 h +2556 m +4 h +10 h +11128 m +4 h +15503 m +25 h +123 h +4 h +4 h +4 h +15504 m +265 h +195 h +1 h +15505 m +10 h +10 h +1 h +10 h +4 h +118 h +1 h +4 h +15506 m +57 h +73 h +108 h +15507 m +1 h +1 h +1 h +3 h +1 h +57 h +10 h +4 h +11 h +15508 m +4 h +15509 m +4 h +4 h +97 h +1 h +15510 m +109 h +15511 m +27 h +10 h +1 h +1 h +4 h +1 h +4 h +4 h +65 h +59 h +15512 m +4 h +124 h +479 m +4 h +11 h +1 h +661 m +1 h +4 h +15513 m +15514 m +8 h +5407 m +4 h +190 h +12 h +10 h +1 h +91 h +74 h +110 h +4 h +4 h +79 h +4 h +15515 m +1 h +10 h +11 h +1 h +15516 m +185 h +368 h +4 h +4 h +15517 m +15518 m +4 h +1 h +124 h +83 h +3322 m +15519 m +157 h +1 h +299 h +15520 m +4 h +4 h +250 h +57 h +13 h +190 h +4 h +15521 m +119 h +109 h +4 h +4 h +15522 m +4 h +4 h +1 h +4 h +4 h +11 h +4 h +15523 m +1 h +10 h +10 h +10 h +4 h +83 h +4 h +10 h +10 h +15524 m +10 h +97 h +4 h +15525 m +8 h +190 h +1 h +41 h +94 h +13326 m +996 m +1 h +94 h +123 h +1 h +1 h +4 h +59 h +1 h +15526 m +1 h +3 h +911 h +10 h +36 h +447 h +4 h +15527 m +15528 m +6583 m +74 h +15529 m +15530 m +4 h +1 h +65 h +10 h +3 h +5483 m +1 h +358 h +4 h +330 h +15531 m +10 h +15532 m +15533 m +123 h +10 h +4 h +4 h +82 h +108 h +278 h +520 m +15534 m +4 h +1 h +8 h +10 h +15535 m +297 h +15536 m +97 h +25 h +770 m +15537 m +4 h +15538 m +82 h +1 h +10 h +1 h +4 h +1 h +55 h +15539 m +274 h +4 h +65 h +4 h +4 h +10 h +15540 m +1 h +10 h +10 h +15541 m +109 h +10 h +4 h +4 h +15542 m +1 h +1 h +1 h +4 h +1 h +10 h +4 h +1 h +73 h +4 h +59 h +55 h +1 h +1 h +15543 m +1 h +1 h +82 h +1 h +4 h +4 h +4 h +1 h +4 h +4 h +15544 m +4 h +10 h +10 h +10 h +15545 m +4849 m +190 h +4 h +15546 m +4 h +520 h +1 h +1 h +15547 m +4 h +1 h +83 h +4896 m +4 h +15548 m +4 h +57 h +125 h +15549 m +15550 m +1 h +15551 m +10 h +15552 m +1 h +4 h +1030 h +4 h +1737 m +74 h +109 h +1 h +15553 m +10 h +1 h +1 h +1 h +4 h +97 h +15554 m +1 h +4 h +1 h +1 h +59 h +1 h +15555 m +1 h +195 h +15556 m +10 h +195 h +11 h +114 h +1 h +82 h +3 h +1 h +15557 m +4 h +140 h +4 h +125 h +4 h +1678 m +8882 m +368 h +386 h +10 h +15558 m +15559 m +4 h +15560 m +4 h +4 h +15561 m +10 h +10 h +104 h +11 h +15562 m +10 h +4 h +4 h +2308 h +104 h +1 h +15563 m +1 h +79 h +10 h +763 m +4 h +4 h +15564 m +10 h +59 h +15565 m +173 h +4 h +15566 m +158 h +1271 m +4 h +57 h +536 h +4 h +4 h +11 h +15567 m +4 h +170 h +1 h +1 h +6158 m +4 h +1 h +1 h +4 h +1 h +196 h +104 h +82 h +266 h +15568 m +15569 m +15570 m +147 h +15571 m +11 h +10 h +1 h +15572 m +1 h +10 h +15573 m +184 h +258 h +1 h +15574 m +15575 m +4 h +10 h +57 h +1 h +15576 m +10 h +489 m +10 h +1 h +4 h +186 h +15577 m +4 h +1 h +12 h +146 h +15578 m +871 m +757 h +4 h +15579 m +4 h +15580 m +4 h +4 h +15581 m +4 h +15582 m +1 h +10 h +15583 m +4 h +31 h +8767 m +10 h +10 h +13 h +4 h +4 h +15584 m +10 h +15585 m +124 h +82 h +4 h +4 h +4 h +1 h +15586 m +1260 m +10 h +15587 m +110 h +4 h +65 h +12489 m +56 h +10 h +1 h +1 h +57 h +15588 m +10 h +3 h +4 h +4 h +687 h +15589 m +1 h +10 h +15590 m +7839 m +4 h +65 h +1 h +3 h +125 h +10 h +74 h +15591 m +15592 m +15593 m +779 h +15594 m +4 h +10 h +74 h +10 h +808 m +4 h +1308 m +2625 m +1 h +22 h +25 h +1 h +965 h +59 h +15595 m +74 h +15596 m +4 h +10 h +10 h +1 h +11 h +4 h +143 h +1 h +15597 m +5330 m +1 h +125 h +10 h +15598 m +15599 m +1 h +4 h +11 h +1 h +1 h +3 h +125 h +10 h +15600 m +15601 m +3 h +83 h +15602 m +1 h +22 h +167 h +15603 m +687 h +4 h +1 h +55 h +15604 m +1 h +13 h +2931 m +4 h +186 h +1 h +818 m +4 h +1 h +15605 m +1 h +1 h +15606 m +15607 m +4 h +181 h +1685 h +1 h +479 m +1 h +1 h +3 h +15608 m +4 h +11 h +4 h +10 h +15377 m +295 h +4 h +11 h +4 h +353 h +15609 m +4 h +10 h +1 h +10 h +4 h +1027 h +10 h +1 h +10 h +1 h +196 h +3558 m +4 h +10 h +123 h +15610 m +5225 m +109 h +4 h +2591 m +15611 m +82 h +91 h +4 h +1 h +250 h +1478 m +13 h +6941 m +1 h +723 m +70 m +4 h +15612 m +1 h +4 h +4 h +1 h +15613 m +77 h +116 m +4 h +4 h +104 h +10 h +73 h +10 h +10 h +15614 m +10 h +4 h +616 m +6129 m +4 h +10 h +59 h +55 h +82 h +4229 m +11 h +4 h +4 h +10 h +1 h +10 h +10 h +57 h +109 h +15615 m +4 h +1 h +3 h +4 h +258 h +104 h +1220 m +4 h +15616 m +15617 m +65 h +108 h +1 h +1 h +4 h +15618 m +10 h +1 h +1 h +172 h +4 h +10 h +82 h +276 h +10 h +1 h +4 h +10 h +15619 m +1 h +4 h +15620 m +139 h +266 h +109 h +1 h +1 h +92 h +1 h +106 m +15621 m +15622 m +10 h +1780 h +2851 m +45 h +146 h +4 h +1 h +147 h +1 h +1 h +15623 m +125 h +1016 h +4 h +1 h +15624 m +4 h +4 h +64 h +97 h +22 h +1 h +64 h +15625 m +15626 m +10 h +1 h +4 h +15627 m +15628 m +4 h +59 h +4 h +10 h +4 h +11 h +1 h +1796 h +15629 m +1981 m +386 h +15630 m +114 h +15631 m +4 h +9482 m +4 h +15632 m +279 h +1 h +10 h +1 h +448 m +4 h +1 h +36 h +104 h +15633 m +4 h +119 h +41 h +10 h +4 h +1 h +4 h +1 h +4 h +10 h +123 h +4 h +4 h +10 h +10 h +15634 m +4 h +10 h +307 h +10 h +15635 m +1642 h +4 h +1 h +583 m +11 h +15636 m +15637 m +77 h +15638 m +4 h +4 h +55 h +15639 m +1 h +1 h +332 h +15640 m +15641 m +10 h +4 h +25 h +874 m +15642 m +4 h +10 h +15643 m +15644 m +10 h +83 h +10 h +15645 m +4 h +11 h +10 h +10 h +15646 m +4 h +3177 m +1 h +1642 h +10 h +12 h +1444 m +27 h +15647 m +1 h +4 h +4 h +119 h +4 h +10 h +10 h +4 h +4 h +15648 m +15649 m +332 h +124 h +1 h +10 h +224 m +1 h +1 h +208 m +1 h +167 h +15650 m +97 h +1 h +4 h +1 h +4 h +4 h +4 h +4 h +15651 m +9293 m +15652 m +1815 m +4 h +10 h +295 h +4 h +10 h +15653 m +15654 m +1 h +4 h +1 h +626 m +15655 m +10 h +15656 m +15657 m +15658 m +10 h +65 h +169 h +195 h +10 h +4 h +15659 m +45 h +15660 m +4 h +4 h +15661 m +15662 m +10640 m +10 h +4 h +4 h +45 h +1 h +74 h +15663 m +185 h +1822 h +73 h +4 h +10 h +4 h +124 h +1 h +15664 m +1 h +307 h +2172 m +4 h +4 h +538 h +4 h +4 h +2474 m +1 h +15665 m +10 h +1 h +109 h +5809 m +1 h +15666 m +4 h +10 h +25 h +1 h +109 h +167 h +119 h +4 h +11869 m +1 h +4 h +15667 m +15668 m +15669 m +285 m +124 h +1 h +4 h +124 h +94 h +4 h +3 h +1 h +10 h +4 h +4 h +10 h +1 h +15670 m +4 h +4 h +10 h +15671 m +4 h +4 h +4 h +897 m +1 h +10 h +11 h +31 h +10 h +1 h +15672 m +4 h +15673 m +4 h +10 h +587 m +3 h +82 h +10 h +59 h +4 h +4 h +11 h +1 h +10 h +74 h +1 h +15674 m +10 h +1 h +41 h +10 h +15675 m +4 h +15676 m +31 h +4 h +10 h +1 h +224 m +15677 m +238 h +15678 m +10 h +119 h +2438 m +125 h +15679 m +538 h +15680 m +4 h +15681 m +15682 m +22 h +13140 m +1 h +15683 m +1 h +185 h +1 h +11 h +74 h +1 h +11 h +1 h +15684 m +1 h +1 h +1 h +10 h +1 h +15685 m +1 h +1 h +4 h +109 h +82 h +4 h +1 h +10 h +4 h +1030 h +15686 m +2126 m +4 h +15687 m +4 h +10 h +15688 m +56 h +1 h +4 h +36 h +1 h +15689 m +4 h +15690 m +4 h +10 h +10 h +97 h +1 h +1478 m +4 h +10 h +4 h +3 h +15691 m +10 h +4 h +1 h +10 h +97 h +1 h +1 h +15692 m +36 h +4 h +4 h +1 h +27 h +10 h +10 h +10 h +15693 m +1 h +147 h +73 h +10 h +74 h +13 h +15694 m +169 h +4 h +15695 m +297 h +1 h +4 h +4 h +8734 m +1 h +11 h +59 h +10 h +1 h +10 h +1 h +4 h +10 h +4 h +15696 m +258 h +114 h +3562 m +1 h +1 h +10 h +4 h +1948 m +258 h +4 h +3 h +10 h +1 h +15697 m +83 h +10 h +386 h +4 h +10 h +140 h +15698 m +4 h +371 h +8383 m +1 h +4 h +1220 m +359 h +10 h +12372 m +11 h +1038 m +94 h +3732 m +41 h +4 h +15699 m +10 h +3 h +15700 m +10 h +2309 m +15701 m +1 h +1 h +57 h +5863 m +2965 m +25 h +1 h +15702 m +1 h +4 h +4 h +1 h +4 h +1 h +4 h +11779 m +10 h +4 h +119 h +92 h +4 h +15703 m +27 h +15704 m +4524 m +1454 h +15705 m +1 h +4 h +15706 m +15707 m +258 h +4 h +4 h +1 h +15708 m +4 h +109 h +1083 m +229 h +912 m +4 h +4 h +4 h +1955 m +15709 m +1 h +1 h +2851 m +15710 m +10 h +258 h +45 h +4359 m +10 h +4 h +4 h +4 h +15711 m +4 h +2720 m +10 h +278 h +15712 m +1 h +4 h +1 h +4 h +4 h +219 m +1 h +4 h +57 h +15713 m +109 h +4 h +15714 m +82 h +4 h +11 h +31 h +9940 m +10 h +4 h +4 h +4 h +11 h +15715 m +109 h +15716 m +1 h +15717 m +10 h +4 h +4 h +139 h +1 h +4 h +147 h +15718 m +55 h +4 h +10 h +1 h +3 h +3845 m +15719 m +4 h +28 h +15720 m +1 h +22 h +94 h +1 h +4 h +10 h +4 h +4 h +59 h +4 h +11 h +4 h +4 h +1 h +4 h +4 h +1 h +124 h +1 h +4 h +3089 m +435 h +278 h +15721 m +4 h +4 h +1 h +15722 m +15723 m +11 h +976 h +15724 m +1 h +15725 m +3748 m +10 h +11 h +57 h +10 h +1 h +15726 m +1 h +1137 h +1 h +11 h +265 h +15727 m +1722 m +1 h +10 h +172 h +10 h +3 h +10 h +241 m +11 h +4 h +167 h +1 h +123 h +7215 m +4 h +238 h +8 h +15728 m +4 h +4 h +1 h +3 h +4 h +4 h +238 h +1 h +10 h +15729 m +97 h +195 h +83 h +10 h +4 h +4 h +143 h +15730 m +65 h +4 h +278 h +4 h +10 h +10 h +4 h +4 h +3 h +5933 m +4 h +57 h +5 h +4 h +4 h +15731 m +1 h +1 h +238 h +278 h +15732 m +4 h +4 h +11 h +4 h +1 h +82 h +10 h +10 h +158 h +4 h +10 h +4 h +15733 m +25 h +4 h +156 h +15734 m +15735 m +59 h +15736 m +1 h +1492 m +15737 m +1 h +10 h +4 h +4 h +10 h +1 h +1 h +4 h +11 h +4 h +4 h +4 h +15738 m +447 h +15739 m +4 h +114 h +4 h +25 h +15740 m +2494 m +6001 m +1 h +31 h +15741 m +15742 m +4 h +1 h +15743 m +2022 m +4 h +192 h +15744 m +10 h +15745 m +119 h +4 h +4 h +4 h +4 h +1 h +1 h +4 h +4 h +55 h +1 h +55 h +2002 m +15746 m +1 h +4 h +1714 h +4 h +10 h +195 h +1 h +4 h +1 h +1 h +1914 h +4 h +10 h +25 h +57 h +4 h +4 h +36 h +4 h +15747 m +4 h +15748 m +640 h +2183 m +82 h +64 h +4 h +1 h +10 h +11 h +279 h +536 h +10 h +371 h +1 h +11 h +64 h +538 h +3702 m +1 h +1 h +10 h +10 h +1 h +15749 m +270 h +79 h +10 h +10 h +1 h +10 h +4 h +164 h +64 h +64 h +124 h +14050 m +1 h +1 h +520 h +1 h +172 h +10 h +4 h +4 h +15750 m +1780 h +4 h +10 h +15751 m +2116 m +10 h +4 h +10 h +15752 m +15753 m +15754 m +687 h +1 h +15755 m +358 h +15756 m +4714 m +10 h +1 h +4 h +4 h +1 h +1 h +15757 m +10 h +1 h +12 h +5296 m +12005 m +15758 m +83 h +4 h +15759 m +4 h +11 h +4542 m +156 h +757 h +4 h +1 h +10958 m +15760 m +4 h +10 h +4 h +13392 m +10 h +167 h +15761 m +656 m +15762 m +4 h +1948 m +1 h +10 h +4 h +45 h +36 h +172 h +1 h +15763 m +4 h +1 h +4 h +15764 m +57 h +4 h +4 h +55 h +146 h +10 h +36 h +258 h +4 h +109 h +2459 m +15765 m +4 h +15766 m +4 h +1 h +15767 m +15768 m +1 h +4 h +1 h +82 h +4 h +10 h +266 h +4 h +4 h +15769 m +10 h +10 h +15770 m +4 h +15771 m +10 h +31 h +10 h +4 h +135 h +10 h +31 h +1 h +1 h +4 h +4 h +10 h +1 h +1 h +383 h +10 h +1 h +1062 m +4 h +1 h +1677 m +15772 m +15773 m +4 h +146 h +125 h +4 h +10 h +195 h +4 h +4 h +15774 m +15775 m +109 h +1 h +15776 m +15777 m +73 h +156 h +4 h +10 h +25 h +15778 m +3177 m +11 h +10 h +10 h +1 h +10 h +4 h +15779 m +1198 m +1 h +10 h +1 h +172 h +1 h +146 h +15780 m +27 h +4 h +1201 m +1 h +11 h +4 h +15781 m +939 m +125 h +112 h +10 h +25 h +15782 m +4 h +1 h +170 h +10 h +4 h +10 h +4 h +3 h +181 h +10 h +4 h +4308 m +278 h +1 h +15783 m +355 m +109 h +15784 m +59 h +1 h +1 h +10 h +15785 m +1 h +278 h +11 h +1 h +1003 m +4 h +10 h +110 h +4 h +15786 m +4 h +4 h +83 h +10 h +4 h +10 h +1 h +15787 m +1 h +10 h +4 h +591 m +15788 m +138 m +15789 m +4 h +11 h +15790 m +79 h +15791 m +15792 m +6124 m +1 h +4 h +59 h +10 h +4 h +3 h +13 h +10 h +57 h +15793 m +15794 m +1137 h +109 h +15795 m +4 h +1 h +3143 m +1 h +10 h +135 h +82 h +224 h +15796 m +10 h +190 h +65 h +11 h +15797 m +15798 m +10 h +15799 m +10 h +15800 m +1 h +169 h +10 h +41 h +170 h +221 m +15801 m +338 h +10 h +15802 m +1 h +15803 m +56 h +1 h +250 h +4 h +82 h +4 h +97 h +4 h +10 h +11 h +11 h +4 h +15804 m +10 h +1 h +82 h +83 h +1 h +15805 m +4 h +10 h +15806 m +4 h +10 h +4 h +10 h +15807 m +4 h +1 h +10 h +1 h +15808 m +4 h +124 h +4 h +4 h +1 h +4 h +4 h +966 h +1 h +10 h +4 h +11 h +1 h +1 h +31 h +12 h +2148 m +79 h +4 h +59 h +4 h +143 h +1 h +1 h +15809 m +10 h +10 h +15810 m +59 h +10 h +15811 m +1 h +1 h +15812 m +25 h +15813 m +124 h +4 h +1 h +1 h +4 h +3847 m +10 h +10 h +15814 m +3 h +4 h +59 h +104 h +1 h +11 h +15815 m +1 h +347 m +4 h +4 h +1 h +27 h +15816 m +169 h +156 h +10 h +4 h +83 h +1 h +1 h +36 h +10 h +1 h +10 h +124 h +1027 h +15817 m +15818 m +143 h +4 h +2887 m +1 h +1 h +15819 m +1 h +15820 m +1 h +124 h +114 h +1 h +297 h +4 h +6726 m +4 h +3398 m +307 h +15821 m +12 h +1 h +10 h +1 h +4 h +15822 m +4 h +138 m +2096 m +15823 m +4 h +1 h +4 h +10 h +4 h +359 h +3539 m +15824 m +2002 m +4 h +1 h +4 h +59 h +123 h +10 h +1 h +10 h +10 h +7135 m +4 h +15825 m +4 h +1 h +10 h +1 h +10 h +11 h +1 h +15826 m +1 h +1 h +15827 m +13 h +2694 m +6422 m +113 h +15828 m +464 h +1 h +15829 m +83 h +1 h +1 h +4 h +11 h +10 h +15830 m +1122 m +59 h +1 h +13468 m +976 h +15831 m +1 h +15832 m +10 h +1 h +10 h +1 h +4 h +12 h +4 h +15833 m +10 h +4 h +4 h +36 h +65 h +169 h +195 h +1 h +1137 h +11 h +1 h +1359 h +77 h +677 m +15834 m +4 h +15835 m +15836 m +4 h +238 h +2308 h +1 h +27 h +10 h +15837 m +3 h +1 h +3 h +10 h +1 h +4 h +4 h +4 h +15838 m +14345 m +15839 m +109 h +1 h +11 h +4 h +15840 m +12301 m +169 h +1 h +15 m +1117 m +15841 m +15842 m +15843 m +10 h +10 h +15844 m +1 h +397 m +10089 m +4 h +434 m +110 h +13886 m +4 h +4 h +241 m +10 h +1 h +1 h +106 m +10 h +9536 m +1822 h +1 h +4 h +684 m +10 h +5 h +1650 h +1 h +15845 m +15846 m +1 h +15847 m +109 h +15848 m +1 h +109 h +3 h +15849 m +767 m +4 h +1 h +82 h +4 h +1 h +338 h +1 h +10 h +114 h +4 h +25 h +113 h +15850 m +228 m +31 h +4 h +2920 m +386 h +10 h +15851 m +4 h +57 h +15852 m +11 h +1137 h +57 h +73 h +64 h +1 h +4 h +1981 m +15853 m +4 h +4 h +15854 m +15855 m +10 h +15856 m +92 h +10 h +109 h +4 h +2379 h +569 h +10 h +11 h +258 h +41 h +15857 m +4576 m +114 h +1 h +10 h +4 h +10 h +146 h +4 h +27 h +1 h +10 h +10 h +15858 m +4 h +55 h +3 h +4 h +10 h +83 h +10 h +1 h +267 m +55 h +4 h +10 h +4 h +536 h +73 h +12 h +82 h +15859 m +140 h +1 h +4 h +1 h +48 h +4 h +124 h +10 h +4 h +4 h +1 h +1 h +113 h +25 h +15860 m +97 h +15861 m +1 h +1 h +15862 m +4 h +10 h +10 h +36 h +10 h +1 h +74 h +1 h +386 h +1 h +15863 m +4 h +1 h +3025 m +10 h +1 h +1953 h +1 h +31 h +196 h +15864 m +718 h +8809 m +10 h +5863 m +45 h +31 h +4 h +5689 m +1 h +15865 m +10 h +25 h +10 h +15866 m +83 h +15867 m +4 h +31 h +4 h +11 h +10 h +4 h +15868 m +106 m +4 h +97 h +4 h +1 h +718 h +15869 m +386 h +15870 m +15871 m +10 h +4 h +10 h +1 h +57 h +1 h +110 h +1 h +10 h +4 h +447 h +4 h +119 h +15872 m +11 h +15873 m +4 h +4 h +4 h +1 h +367 h +15874 m +15875 m +185 h +1 h +1 h +10 h +15876 m +15877 m +31 h +45 h +4 h +4 h +15878 m +4 h +11 h +110 h +4 h +1 h +4 h +15879 m +36 h +270 h +4 h +25 h +4 h +15880 m +1 h +11 h +15881 m +15882 m +4830 m +114 h +279 h +3707 m +1 h +15883 m +10 h +1650 h +4 h +92 h +82 h +4 h +4 h +10 h +92 h +10 h +4 h +1 h +1 h +15884 m +332 h +4905 h +1 h +15885 m +1 h +1 h +10 h +10 h +15886 m +15887 m +15888 m +119 h +10 h +1 h +4 h +4 h +10 h +56 h +10 h +1 h +15889 m +1 h +56 h +15890 m +1006 m +4 h +10 h +4 h +538 h +4 h +109 h +1 h +1 h +1 h +4 h +15891 m +104 h +10 h +1 h +10 h +15892 m +10 h +73 h +4 h +1 h +15893 m +57 h +1 h +15894 m +10 h +15895 m +15896 m +1 h +954 m +238 h +1 h +135 h +1 h +1666 m +1 h +15897 m +443 h +4 h +4 h +15898 m +82 h +10 h +1 h +1 h +4 h +12363 m +1714 h +15899 m +1 h +4 h +15900 m +15901 m +509 m +10464 m +15902 m +15903 m +15904 m +4 h +146 h +4 h +276 h +15905 m +15906 m +10 h +1 h +1 h +10 h +11 h +164 h +4 h +15907 m +536 h +4 h +15908 m +15909 m +4 h +10 h +4 h +12 h +1 h +1 h +25 h +4 h +4 h +4 h +10 h +4 h +27 h +73 h +36 h +4 h +4 h +1 h +3704 m +15910 m +4 h +15911 m +3 h +12 h +11 h +368 h +4 h +15912 m +11 h +15913 m +1 h +2265 m +1 h +11 h +94 h +10 h +15914 m +3 h +15915 m +124 h +1 h +1 h +31 h +4 h +1 h +4 h +3 h +138 h +4 h +15916 m +4 h +65 h +1 h +4 h +2128 m +103 h +4 h +73 h +10 h +15917 m +4 h +447 h +1 h +4 h +4 h +10 h +4 h +13481 m +1 h +83 h +25 h +11 h +1 h +4 h +1 h +1016 h +1 h +15918 m +4 h +10 h +83 h +1 h +11 h +1374 m +1 h +56 h +74 h +114 h +4 h +4 h +4 h +15919 m +2754 m +10 h +114 h +15920 m +15921 m +15922 m +15923 m +185 h +4 h +172 h +7532 m +1027 h +1822 h +15924 m +1 h +1 h +10 h +15925 m +10 h +82 h +15926 m +4 h +195 h +4 h +1 h +1 h +4 h +1 h +464 h +1 h +4 h +1 h +4 h +3 h +10 h +1 h +1 h +15927 m +4 h +15928 m +7727 m +1 h +4 h +15929 m +4 h +15930 m +11 h +10 h +10 h +10 h +4 h +10 h +1 h +1 h +10 h +15931 m +15932 m +64 h +15933 m +1 h +4 h +92 h +307 h +10 h +4 h +10391 m +4 h +4 h +25 h +25 h +4 h +4 h +109 h +307 h +4 h +79 h +4 h +1 h +1 h +4 h +4 h +1 h +82 h +59 h +1 h +582 m +10 h +616 m +1 h +15934 m +4 h +1359 h +11 h +1321 m +73 h +110 h +11 h +15935 m +15936 m +4 h +1619 h +15937 m +1 h +22 h +4 h +15938 m +15939 m +11 h +1 h +10 h +1 h +13 h +92 h +3 h +3 h +10685 m +1 h +15940 m +10 h +10324 m +4 h +124 h +181 h +4 h +15941 m +843 m +1 h +15942 m +10 h +13662 m +4 h +15943 m +3 h +15944 m +266 h +15945 m +15946 m +15947 m +1 h +15948 m +146 h +10 h +4 h +15949 m +15950 m +10 h +10 h +10 h +1 h +112 h +1713 m +4 h +464 h +10 h +332 h +15951 m +359 h +15952 m +10 h +10 h +10 h +4 h +15953 m +147 h +15954 m +4 h +486 m +10 h +238 h +59 h +74 h +4 h +4 h +3068 m +10 h +57 h +15955 m +4229 m +10 h +4 h +10 h +10 h +4 h +10 h +14050 m +11353 m +10 h +1 h +1074 h +1 h +1 h +10 h +15956 m +4 h +15957 m +640 h +10 h +1 h +15958 m +1 h +1 h +15959 m +4 h +27 h +4 h +1 h +124 h +477 m +4 h +1083 m +1 h +4 h +15960 m +4 h +15961 m +1 h +109 h +15962 m +4 h +4 h +82 h +10 h +109 h +124 h +15963 m +15964 m +15965 m +41 h +11 h +125 h +888 m +4 h +4 h +1053 m +4 h +15966 m +4 h +31 h +15967 m +4 h +10 h +1 h +15968 m +736 m +15969 m +31 h +1 h +10 h +11 h +4 h +4 h +15970 m +4 h +15971 m +1 h +10 h +4 h +4 h +1340 m +3 h +190 h +583 m +4 h +15972 m +10 h +1 h +15973 m +4 h +83 h +15974 m +135 h +13 h +13544 m +15975 m +4 h +15976 m +1 h +4 h +15977 m +4 h +4 h +4 h +4 h +114 h +15978 m +82 h +1 h +15979 m +12526 m +15980 m +4030 m +1 h +2733 h +41 h +860 m +114 h +1250 h +15981 m +15982 m +4 h +15983 m +1089 h +4 h +110 h +15984 m +13 h +399 h +15985 m +15986 m +196 h +10 h +4 h +147 h +10 h +888 m +11 h +15987 m +5141 m +109 h +1 h +229 h +97 h +4 h +677 m +464 h +359 h +4 h +15988 m +4 h +15989 m +104 h +1 h +10 h +12 h +4 h +10 h +820 m +15990 m +5357 m +4 h +15263 m +12 h +3 h +1 h +1 h +94 h +4 h +1 h +11344 m +4 h +1 h +1780 h +15991 m +10 h +1 h +74 h +15992 m +10 h +4 h +4 h +238 h +1 h +4 h +1 h +4 h +82 h +15993 m +1 h +10 h +4 h +15994 m +4 h +12 h +4 h +25 h +10 h +12635 m +4 h +4 h +4 h +147 h +4 h +270 h +267 m +1 h +412 h +10 h +4 h +1 h +279 h +15995 m +65 h +12 h +15996 m +1 h +1 h +1 h +10 h +1 h +4 h +15997 m +25 h +8040 m +1 h +1 h +1 h +1 h +7702 m +10 h +808 m +4 h +1 h +4 h +15998 m +15999 m +10 h +92 h +12 h +195 h +11 h +16000 m +16001 m +92 h +718 h +1 h +16002 m +55 h +10 h +123 h +10 h +4 h +4 h +10 h +13 h +3112 m +16003 m +3 h +562 m +663 m +172 h +10 h +31 h +10 h +10 h +1 h +1 h +4 h +135 h +1 h +16004 m +16005 m +4 h +16006 m +16007 m +10 h +31 h +16008 m +1 h +16009 m +1 h +16010 m +10 h +1083 m +4 h +1 h +4 h +16011 m +4 h +4 h +264 h +10 h +16012 m +16013 m +4 h +4 h +2733 h +57 h +1 h +4 h +16014 m +16015 m +250 h +185 h +4 h +4 h +16016 m +185 h +1 h +1 h +1 h +339 m +10 h +307 h +10 h +10 h +55 h +4 h +12264 m +4 h +3 h +4 h +4 h +4 h +11 h +264 h +16017 m +36 h +5720 m +687 h +167 h +10 h +109 h +4 h +4 h +1 h +4 h +1 h +276 h +146 h +4 h +4 h +1 h +9182 m +10 h +11 h +10 h +5254 m +1 h +4 h +4 h +1 h +73 h +74 h +1 h +1 h +10 h +16018 m +10 h +65 h +258 h +4 h +4 h +10 h +61 m +1 h +1 h +10 h +110 h +16019 m +16020 m +4 h +4 h +10 h +4 h +10 h +27 h +536 h +10 h +16021 m +16022 m +16023 m +40 m +1 h +281 m +4 h +82 h +11621 m +1535 m +16024 m +82 h +16025 m +11 h +229 h +82 h +195 h +143 h +203 m +83 h +4 h +16026 m +11 h +11 h +1 h +16027 m +16028 m +1261 h +59 h +16029 m +16030 m +4 h +10 h +12 h +143 h +4 h +169 h +10 h +1 h +12 h +1 h +104 h +10 h +10 h +10 h +4 h +4 h +16031 m +1 h +59 h +57 h +4 h +16032 m +16033 m +195 h +224 h +41 h +443 h +16034 m +843 m +195 h +297 h +74 h +31 h +1 h +16035 m +10 h +4 h +10 h +471 m +10 h +1 h +10 h +181 h +74 h +1 h +10 h +4 h +12225 m +1 h +4 h +4 h +169 h +2148 m +11810 m +1 h +4 h +11 h +16036 m +4 h +16037 m +1 h +425 m +4 h +11 h +16038 m +16039 m +285 m +16040 m +278 h +16041 m +10 h +1 h +83 h +10 h +16042 m +4 h +16043 m +1 h +104 h +4 h +1 h +3 h +506 m +4 h +10 h +1 h +10 h +4 h +307 h +1 h +4 h +1 h +73 h +16044 m +104 h +169 h +22 h +1 h +1 h +1 h +1 h +79 h +16045 m +10 h +4 h +16046 m +10 h +59 h +16047 m +11 h +16048 m +3 h +4 h +10 h +16049 m +83 h +48 h +16050 m +4 h +10 h +1 h +4 h +1 h +1 h +57 h +82 h +1 h +59 h +16051 m +4 h +1 h +65 h +104 h +104 h +939 m +1 h +1 h +16052 m +1 h +16053 m +1 h +228 m +1 h +8070 m +412 h +16054 m +16055 m +10 h +55 h +1 h +16056 m +10 h +1 h +104 h +16057 m +1 h +443 h +3 h +1 h +4 h +16058 m +16059 m +92 h +16060 m +16061 m +74 h +4 h +4 h +238 h +11 h +2040 m +1 h +82 h +140 h +82 h +73 h +4 h +55 h +1 h +3170 m +10 h +4 h +10 h +578 h +4 h +16062 m +4 h +2308 h +986 h +1 h +4 h +1 h +10 h +1 h +4 h +12372 m +16063 m +4 h +10 h +4 h +16064 m +10 h +196 h +4 h +16065 m +1 h +4 h +4 h +1 h +1642 h +16066 m +4 h +16067 m +208 m +338 h +110 h +97 h +9757 m +16068 m +10 h +10 h +10 h +10 h +4 h +16069 m +16070 m +16071 m +16072 m +1 h +297 h +2851 h +114 h +4 h +16073 m +195 h +97 h +1 h +10 h +109 h +443 h +16074 m +16075 m +1 h +1 h +1 h +83 h +642 h +4 h +10 h +1 h +124 h +4 h +33 m +1 h +16076 m +4 h +4 h +692 h +1 h +59 h +2054 m +10 h +4 h +104 h +13980 m +10 h +806 m +4 h +4 h +16077 m +10 h +4 h +10 h +16078 m +16079 m +16080 m +147 h +4592 m +65 h +16081 m +11 h +16082 m +12 h +1 h +4 h +10 h +1 h +601 h +10 h +73 h +16083 m +1955 m +16084 m +5809 m +10 h +11 h +41 h +4 h +16085 m +1 h +4 h +57 h +2751 m +36 h +16086 m +3435 m +4 h +4 h +181 h +1 h +119 h +4 h +16087 m +4 h +4 h +104 h +143 h +157 h +5387 m +185 h +74 h +4 h +45 h +4 h +10 h +16088 m +464 h +1 h +2625 m +10 h +16089 m +1 h +16090 m +2840 m +10 h +10 h +41 h +4 h +1 h +4 h +1 h +4 h +4 h +16091 m +1 h +1 h +92 h +10 h +10 h +1 h +10 h +16092 m +4 h +4911 m +4 h +358 h +124 h +10 h +4 h +1 h +16093 m +16094 m +146 h +11 h +4 h +106 h +5 h +1 h +143 h +4 h +59 h +174 m +16095 m +1359 h +966 h +16096 m +4 h +156 h +139 h +146 h +4 h +1 h +4 h +4 h +10 h +10 h +3 h +124 h +4 h +82 h +184 h +16097 m +10 h +16098 m +804 m +4 h +1 h +6869 m +4 h +16099 m +59 h +4 h +1 h +2607 m +4 h +16100 m +11 h +4 h +16101 m +1 h +4 h +4 h +25 h +10 h +16102 m +82 h +4 h +16103 m +1 h +1 h +4 h +4 h +109 h +1 h +16104 m +10 h +16105 m +4 h +4 h +1 h +16106 m +28 h +16107 m +2025 m +1 h +10 h +1 h +229 h +13426 m +77 h +1 h +278 h +16108 m +172 h +4 h +10 h +307 h +125 h +4 h +16109 m +11 h +1 h +16110 m +4 h +4 h +4 h +16111 m +4 h +3 h +4 h +25 h +12 h +41 h +297 h +124 h +4 h +10 h +16112 m +16113 m +16114 m +4 h +1 h +16115 m +4 h +57 h +83 h +4 h +1 h +10 h +4 h +74 h +4 h +1 h +41 h +10 h +4 h +16116 m +1 h +1 h +1 h +10 h +10 h +4 h +297 h +459 h +196 h +601 h +4 h +157 h +16117 m +83 h +16118 m +1 h +97 h +4 h +16119 m +10177 m +4 h +181 h +11 h +16120 m +16121 m +4 h +11 h +4 h +10 h +4 h +59 h +41 h +4 h +16122 m +146 h +4 h +16123 m +10 h +4 h +16124 m +7401 m +172 h +109 h +1 h +16125 m +181 h +687 h +4 h +1 h +1 h +16126 m +82 h +4 h +258 h +4 h +4 h +16127 m +10 h +16128 m +147 h +4 h +164 h +1445 m +4 h +1 h +11948 m +10 h +4 h +272 m +124 h +1 h +57 h +59 h +1 h +3 h +403 h +16129 m +83 h +7839 m +4 h +16130 m +4 h +181 h +186 h +150 m +172 h +65 h +10 h +59 h +230 m +4 h +119 h +4 h +59 h +10 h +10 h +4 h +16131 m +16132 m +10 h +4 h +16133 m +1 h +27 h +10958 m +124 h +16134 m +16135 m +16136 m +10 h +82 h +4 h +1 h +1 h +737 m +1 h +3307 m +185 h +97 h +16137 m +57 h +16138 m +4 h +16139 m +10 h +41 h +4 h +124 h +1 h +386 h +203 m +601 h +3988 m +845 m +10 h +4 h +59 h +16140 m +16141 m +4 h +146 h +167 h +124 h +119 h +1 h +10 h +1470 h +4 h +4 h +4 h +156 h +10 h +16142 m +31 h +1 h +4 h +10 h +195 h +16143 m +82 h +966 h +4 h +16144 m +4 h +16145 m +4 h +536 h +1 h +10 h +10 h +16146 m +82 h +1 h +10 h +5348 m +1 h +4 h +693 m +11 h +16147 m +147 h +11 h +1 h +16148 m +10 h +4 h +229 h +10 h +4 h +10 h +147 h +16149 m +4 h +92 h +16150 m +10539 m +10 h +10 h +4 h +4 h +28 h +1 h +10 h +16151 m +10 h +4 h +1 h +1 h +10 h +1478 h +1 h +1 h +16152 m +4 h +4 h +10 h +4 h +55 h +16153 m +10 h +4 h +4 h +4 h +10 h +57 h +16154 m +857 h +4 h +383 h +27 h +16155 m +4 h +10 h +146 h +16156 m +1 h +16157 m +1 h +10 h +10 h +10 h +4 h +4 h +16158 m +124 h +82 h +125 h +97 h +4 h +1 h +4 h +11 h +57 h +16159 m +10 h +332 h +147 h +4 h +3048 m +10 h +185 h +4 h +57 h +10 h +4 h +538 h +1 h +704 h +4 h +276 h +184 h +10 h +4 h +4 h +57 h +4 h +351 m +55 h +4 h +1 h +16160 m +16161 m +16162 m +1 h +4 h +4 h +4 h +11 h +10 h +173 h +4 h +295 h +16163 m +4 h +4 h +16164 m +55 h +4 h +10 h +16165 m +16166 m +11940 m +1 h +11 h +4 h +4 h +12 h +3 h +4 h +1 h +16167 m +10 h +7444 m +97 h +109 h +1 h +10 h +146 h +4 h +4 h +10 h +4 h +1 h +16168 m +266 h +59 h +4 h +16169 m +386 h +1 h +4 h +16170 m +10 h +4 h +1 h +1 h +167 h +4 h +10 h +64 h +10 h +4 h +10414 m +1 h +10 h +73 h +4 h +1 h +190 h +4 h +185 h +156 h +16171 m +4 h +4 h +147 h +16172 m +125 h +4 h +4 h +4 h +143 h +109 h +16173 m +108 h +147 h +13157 m +124 h +1 h +1016 h +4 h +16174 m +3 h +4 h +358 h +16175 m +10 h +4 h +57 h +4 h +4 h +359 h +2923 h +16176 m +73 h +16177 m +1 h +10 h +16178 m +196 h +16179 m +1 h +16180 m +4 h +4 h +11 h +1 h +10 h +276 h +82 h +1 h +59 h +10 h +97 h +3533 m +10 h +10 h +1 h +16181 m +83 h +4 h +16182 m +4 h +4 h +16183 m +16184 m +16185 m +4 h +1201 m +3558 m +10 h +4 h +4 h +16186 m +16187 m +1 h +27 h +118 h +10 h +16188 m +59 h +10 h +16189 m +10 h +536 h +266 h +10 h +16190 m +1 h +16191 m +16192 m +10 h +1 h +4 h +16193 m +295 h +6469 m +16194 m +16195 m +10 h +1 h +109 h +274 h +16196 m +4 h +10 h +10 h +16197 m +4 h +1 h +16198 m +16199 m +4 h +16200 m +16201 m +1 h +4 h +4 h +1 h +4 h +10 h +278 h +4 h +97 h +16202 m +169 h +164 h +4 h +1 h +3 h +1 h +36 h +10 h +10 h +16203 m +11 h +4 h +1 h +82 h +4 h +4 h +4 h +1 h +83 h +4 h +10 h +16204 m +4 h +41 h +16205 m +4 h +1 h +2887 m +954 m +4 h +146 h +4 h +1 h +266 h +10 h +16206 m +4 h +10 h +1 h +1250 h +1470 h +4 h +10 h +109 h +1 h +4 h +1 h +1 h +1 h +16207 m +109 h +11 h +10 h +358 h +1 h +4 h +4 h +3 h +4 h +64 h +1 h +59 h +16208 m +10 h +124 h +1 h +1 h +4 h +1 h +10418 m +4 h +109 h +1 h +16209 m +1 h +10 h +140 h +74 h +4 h +1 h +4 h +8324 m +4 h +11 h +83 h +10 h +10 h +1 h +7541 m +9027 m +10 h +10 h +1 h +16210 m +4 h +1261 h +1 h +4 h +10 h +57 h +1 h +16211 m +16212 m +4 h +1 h +4 h +4 h +4 h +1 h +4 h +1 h +4 h +27 h +1 h +10 h +1 h +10 h +4 h +1 h +10 h +16213 m +10 h +16214 m +4 h +16215 m +1 h +79 h +16216 m +4 h +16217 m +11 h +4 h +10 h +1 h +10 h +1 h +16218 m +10 h +4 h +65 h +1 h +4 h +113 h +10 h +4 h +10 h +59 h +114 h +4 h +25 h +16219 m +4 h +124 h +1 h +109 h +12 h +2788 h +1 h +4 h +1 h +322 h +10 h +16220 m +4 h +10 h +4 h +10 h +1 h +4 h +4 h +4 h +12 h +1 h +4 h +16221 m +110 h +279 h +4 h +10 h +4 h +1 h +41 h +6066 m +125 h +16222 m +16223 m +4 h +955 m +16224 m +493 m +16225 m +4 h +10 h +11 h +4 h +4 h +16226 m +4 h +1 h +1309 h +4 h +918 m +4 h +4 h +1 h +1 h +4535 m +1 h +4 h +1 h +57 h +10 h +124 h +1 h +147 h +4 h +10 h +112 h +10 h +11 h +4 h +16227 m +1 h +4 h +10 h +16228 m +4 h +1 h +147 h +1 h +10 h +16229 m +104 h +9450 m +83 h +1 h +676 m +1 h +4 h +104 h +4 h +4 h +1 h +10 h +4 h +1 h +16230 m +16231 m +10 h +16232 m +16233 m +4 h +16234 m +1535 m +11 h +74 h +11 h +57 h +4 h +0 m +124 h +11 h +10 h +10 h +1 h +10 h +4 h +4 h +11 h +16235 m +10 h +4 h +4 h +59 h +16236 m +4 h +114 h +4 h +4 h +16237 m +16238 m +4 h +1959 m +5567 m +403 h +10 h +1030 h +146 h +16239 m +1 h +59 h +57 h +146 h +1 h +16240 m +74 h +1 h +4 h +1 h +857 h +10 h +10 h +48 h +4 h +83 h +190 h +1 h +10 h +16241 m +1 h +4 h +4 h +1 h +2788 h +1569 m +10 h +10 h +4 h +4 h +4 h +123 h +4 h +10 h +10 h +174 m +1 h +279 h +135 h +1 h +16242 m +10 h +4 h +4 h +16243 m +371 h +1 h +4 h +1 h +1 h +1 h +1 h +11 h +1 h +16244 m +94 h +11 h +1 h +1 h +74 h +267 h +1 h +10 h +464 h +16245 m +94 h +10 h +55 h +4 h +10 h +4 h +16246 m +57 h +4 h +16247 m +16248 m +92 h +4 h +1 h +1 h +147 h +14345 m +25 h +10 h +1 h +3 h +10 h +16249 m +196 h +10 h +4 h +79 h +56 h +1 h +16250 m +11 h +1128 m +4 h +4 h +1 h +195 h +41 h +10 h +4 h +1 h +692 h +4 h +11 h +10 h +1 h +59 h +4 h +4 h +238 h +16251 m +10 h +4 h +1 h +4256 m +4 h +65 h +1 h +16252 m +92 h +1 h +41 h +4 h +1725 m +65 h +1 h +16253 m +1 h +82 h +238 h +4 h +3558 m +1 h +11 h +10 h +1 h +1 h +4 h +2435 m +16254 m +1 h +779 h +16255 m +11 h +11691 m +322 h +4 h +104 h +16256 m +4 h +124 h +4 h +1 h +4 h +45 h +135 h +1 h +104 h +4 h +125 h +16257 m +10 h +10 h +4 h +11 h +16258 m +10 h +238 h +4 h +371 h +718 h +843 h +10 h +16259 m +4 h +82 h +1 h +4 h +4 h +276 h +10 h +1 h +10 h +1713 m +79 h +1 h +1 h +1 h +59 h +2002 h +1 h +1 h +1 h +31 h +57 h +11 h +109 h +3 h +45 h +10 h +16260 m +57 h +332 h +1 h +3 h +57 h +16261 m +2022 m +16262 m +1 h +1 h +8114 m +1 h +10 h +16263 m +16264 m +4 h +16265 m +4 h +10 h +4 h +1 h +224 h +16266 m +1 h +4 h +25 h +16267 m +1 h +270 h +1 h +4 h +4 h +4 h +692 h +12 h +4 h +55 h +140 h +1 h +11 h +1 h +56 h +16268 m +4 h +59 h +57 h +13043 m +16269 m +6784 m +9323 m +297 h +250 h +2928 m +170 h +10 h +1 h +1 h +10 h +1 h +10 h +57 h +16270 m +692 h +4 h +2490 m +4 h +31 h +4 h +181 h +1 h +10 h +11 h +82 h +16271 m +112 h +4 h +16272 m +279 h +1 h +1 h +4 h +4 h +41 h +6296 m +4 h +1 h +59 h +4714 m +538 h +27 h +97 h +4 h +4 h +16273 m +10 h +11 h +45 h +1 h +13 h +4 h +16274 m +1 h +276 h +4 h +16275 m +114 h +1 h +5065 m +195 h +1 h +368 h +12 h +5584 m +4 h +10 h +1 h +10 h +124 h +65 h +16276 m +4 h +2433 m +295 h +4 h +7900 m +1 h +4 h +22 h +203 h +114 h +1 h +4 h +698 m +16277 m +143 h +2865 m +48 h +83 h +16278 m +3 h +3 h +4 h +10 h +1 h +4 h +4 h +4 h +10 h +4 h +192 h +10 h +1 h +4 h +4 h +124 h +16279 m +4 h +16280 m +4 h +135 h +16281 m +16282 m +443 h +16283 m +16284 m +1 h +10 h +16285 m +4 h +1 h +13 h +10 h +16286 m +4 h +6107 m +12041 m +16287 m +1 h +4 h +11 h +1 h +10 h +1 h +16288 m +1 h +1 h +278 h +16289 m +1 h +4 h +195 h +1 h +1 h +147 h +488 m +299 h +4 h +16290 m +16291 m +3344 m +16292 m +464 h +4 h +1 h +16293 m +16294 m +169 h +55 h +4 h +1 h +464 h +10 h +4 h +16295 m +124 h +1 h +16296 m +4 h +1 h +11 h +4 h +11 h +4 h +1780 h +1685 h +4 h +41 h +143 h +186 h +97 h +190 h +4 h +10 h +4 h +1 h +4 h +459 h +3 h +1 h +11 h +16297 m +16298 m +4 h +109 h +1 h +11 h +16299 m +11 h +10 h +1 h +13 h +4 h +1 h +4 h +10 h +4 h +10 h +4 h +1 h +83 h +10 h +4 h +16300 m +31 h +2924 m +16301 m +16302 m +4 h +10 h +4111 m +94 h +4 h +16303 m +1261 h +10 h +1980 m +10 h +4 h +203 h +16304 m +1 h +65 h +16305 m +146 h +25 h +16306 m +112 h +16307 m +16308 m +10 h +36 h +16309 m +10 h +238 h +258 h +16310 m +16311 m +4 h +347 m +578 h +1 h +4 h +16312 m +10 h +1 h +16313 m +74 h +82 h +4 h +16314 m +3 h +170 h +5125 m +1 h +1 h +11 h +1123 m +172 h +109 h +16315 m +1 h +16316 m +297 h +1 h +16317 m +4 h +119 h +1 h +4 h +4 h +10 h +1 h +10 h +1 h +16318 m +17 m +4 h +16319 m +4 h +1 h +196 h +4 h +368 h +16320 m +196 h +10 h +4 h +2494 m +10 h +10 h +10 h +1 h +167 h +10 h +185 h +10 h +36 h +4 h +16321 m +1 h +10 h +1 h +110 h +4 h +16322 m +4 h +1 h +12 h +4 h +1 h +1409 m +1 h +16323 m +10 h +57 h +10 h +4 h +4 h +10 h +16324 m +139 h +1 h +4 h +4 h +1 h +25 h +92 h +83 h +4 h +147 h +3 h +16325 m +16326 m +1 h +4 h +1 h +10 h +16327 m +114 h +1 h +4 h +10 h +4 h +10 h +10 h +4 h +5008 m +25 h +1 h +10 h +16328 m +167 h +4 h +1016 h +1 h +104 h +10 h +3 h +124 h +4 h +1 h +4 h +1 h +4 h +563 m +4 h +10 h +10 h +16329 m +41 h +25 h +113 h +16330 m +124 h +10 h +4 h +238 h +1646 m +4 h +4 h +1 h +4 h +4 h +1 h +3 h +1967 m +16331 m +16332 m +10 h +10 h +1 h +10 h +1 h +124 h +1 h +172 h +16333 m +25 h +4 h +4 h +16334 m +16335 m +1 h +1 h +196 h +4 h +4 h +1 h +16336 m +10 h +4 h +1027 h +16337 m +4 h +1 h +16338 m +1 h +16339 m +4 h +11948 m +4 h +9467 m +16340 m +4 h +1 h +4 h +1 h +25 h +10 h +4 h +1 h +82 h +31 h +1386 m +114 h +64 h +10 h +157 h +143 h +1 h +10 h +1 h +1 h +16341 m +4 h +1 h +1 h +278 h +1 h +10 h +82 h +10 h +4030 m +10 h +146 h +125 h +4 h +16342 m +4 h +447 h +10 h +16343 m +10 h +10 h +16344 m +16345 m +4 h +2931 m +16346 m +10 h +4 h +16347 m +16348 m +196 h +109 h +16349 m +10 h +4 h +1470 h +4 h +16350 m +16351 m +55 h +36 h +172 h +10062 m +11 h +1 h +125 h +1278 m +16352 m +16353 m +4 h +147 h +16354 m +16355 m +1 h +10 h +143 h +1 h +1 h +4 h +4 h +25 h +16356 m +170 h +1 h +11 h +82 h +4 h +170 h +25 h +45 h +4 h +16357 m +10 h +4 h +464 h +2163 m +4 h +10 h +4 h +1 h +82 h +1 h +4 h +147 h +16358 m +16359 m +4 h +16360 m +10 h +16361 m +4 h +109 h +16362 m +4 h +31 h +124 h +16363 m +4 h +4 h +1 h +4 h +10 h +4 h +143 h +16364 m +266 h +82 h +1 h +16365 m +1 h +4 h +83 h +4 h +1279 m +36 h +250 h +143 h +615 m +307 h +92 h +125 h +4 h +16366 m +10 h +11 h +4 h +4 h +12066 m +16367 m +1 h +464 h +9077 m +1 h +4 h +4 h +16368 m +16369 m +10 h +16370 m +825 m +1 h +10 h +1 h +113 h +1 h +1 h +1 h +4 h +12 h +4 h +4 h +1 h +73 h +16371 m +1478 h +976 h +10 h +109 h +125 h +11 h +4 h +94 h +104 h +16372 m +172 h +238 h +258 h +109 h +4 h +4 h +1 h +4 h +16373 m +1 h +16374 m +59 h +10 h +45 h +10 h +41 h +16375 m +1 h +1 h +250 h +57 h +55 h +4 h +73 h +976 h +11 h +1886 m +10 h +4 h +16376 m +164 h +16377 m +11 h +10 h +1 h +190 h +408 m +83 h +10 h +10 h +118 h +10 h +3 h +238 h +4 h +16378 m +307 h +278 h +4 h +16379 m +4 h +11 h +40 m +332 h +16380 m +1 h +1 h +1 h +1470 h +169 h +4 h +1 h +1677 m +4 h +10 h +41 h +156 h +4 h +16381 m +913 m +1 h +6963 m +124 h +10 h +1 h +16382 m +1261 h +16383 m +10 h +4 h +10 h +4 h +1 h +1 h +4 h +330 h +266 h +55 h +94 h +41 h +4 h +16384 m +147 h +3558 h +4 h +10 h +1 h +536 h +1 h +190 h +313 m +125 h +1198 m +1 h +1 h +11 h +10 h +31 h +1 h +297 h +4 h +4 h +16385 m +4 h +190 h +1478 h +358 h +332 h +4 h +1 h +16386 m +4 h +4 h +4 h +4 h +1772 m +25 h +1 h +16387 m +911 h +10 h +109 h +4 h +443 h +10 h +16388 m +4 h +36 h +82 h +4 h +10 h +109 h +4 h +1 h +1 h +16389 m +55 h +435 h +10 h +1957 m +4 h +22 h +1 h +1 h +79 h +147 h +1542 m +1 h +10 h +4 h +256 h +1 h +1576 m +16390 m +1 h +4 h +4030 m +10 h +262 m +4 h +10 h +4 h +4 h +5505 m +11 h +1 h +10 h +14083 m +16391 m +10 h +4 h +4 h +16392 m +1 h +10 h +16393 m +11 h +3680 m +16394 m +1 h +16395 m +4 h +4 h +1 h +10 h +94 h +1 h +16396 m +22 h +10 h +2788 h +143 h +10 h +383 h +4 h +16397 m +103 h +73 h +4 h +1766 h +10 h +4 h +1 h +10 h +1 h +16398 m +11 h +82 h +10 h +3025 m +4 h +16399 m +4 h +4 h +83 h +31 h +109 h +1535 m +4 h +4 h +4 h +10 h +16400 m +10 h +238 h +1 h +16401 m +10 h +4 h +1 h +4 h +1 h +10 h +74 h +1 h +16402 m +16403 m +4 h +10 h +4 h +12 h +10 h +11 h +74 h +124 h +4 h +16404 m +4 h +172 h +1137 h +16405 m +4 h +10 h +4 h +4 h +238 h +1137 h +4 h +4 h +4 h +1 h +4 h +16406 m +29 m +1 h +358 h +10 h +4 h +16407 m +383 h +10 h +4 h +74 h +1 h +16408 m +16409 m +1685 h +224 h +4 h +109 h +16410 m +447 h +73 h +430 m +169 h +16411 m +16412 m +1 h +16413 m +13 h +4 h +1 h +1198 m +16414 m +10 h +1 h +4240 m +4 h +10 h +82 h +16415 m +16416 m +82 h +143 h +8555 m +16417 m +1 h +4 h +965 h +4 h +4 h +109 h +1 h +109 h +16418 m +146 h +4 h +4 h +4 h +4 h +16419 m +57 h +4 h +4 h +15 m +16420 m +1 h +16421 m +4 h +4 h +109 h +25 h +1 h +1 h +10 h +4 h +109 h +1 h +4 h +56 h +4 h +4 h +4 h +25 h +16422 m +4 h +358 h +16423 m +4 h +1403 h +59 h +10 h +4 h +12 h +94 h +10 h +1936 m +4 h +16424 m +1016 h +1 h +1957 m +1936 m +16425 m +536 h +16426 m +11 h +103 h +278 h +10 h +4 h +1 h +1 h +82 h +11 h +4 h +1 h +4 h +144 m +4 h +4 h +10 h +4 h +4 h +4 h +4 h +144 h +4 h +147 h +1470 h +57 h +1 h +10 h +1 h +16427 m +4 h +135 h +82 h +1 h +4 h +129 h +1 h +4 h +11 h +1 h +4 h +10 h +258 h +1 h +1 h +7479 m +28 h +4 h +16428 m +112 h +10 h +1128 m +4 h +181 h +1 h +10 h +4 h +4457 m +135 h +65 h +4 h +347 m +16429 m +1 h +1 h +10 h +4 h +16430 m +4 h +10 h +16431 m +16432 m +83 h +172 h +4 h +4 h +16433 m +14829 m +65 h +1016 h +1 h +57 h +4 h +10 h +10 h +16434 m +16435 m +1 h +1 h +16436 m +4 h +1 h +16437 m +181 h +10 h +16438 m +4 h +10 h +125 h +135 h +1 h +16439 m +185 h +16440 m +4 h +258 h +642 h +11 h +11 h +16441 m +97 h +143 h +172 h +16442 m +195 h +16443 m +16444 m +97 h +146 h +4 h +4 h +94 h +4 h +16445 m +11 h +1 h +4 h +8 h +4 h +4 h +10 h +16446 m +4 h +16447 m +1 h +16448 m +4 h +278 h +16449 m +4 h +270 h +1 h +57 h +82 h +114 h +1 h +1 h +31 h +4 h +1 h +16450 m +74 h +1 h +4 h +4 h +124 h +238 h +1685 h +4 h +1 h +10 h +97 h +1 h +1 h +1 h +4 h +4 h +10 h +97 h +57 h +16451 m +16452 m +119 h +41 h +16453 m +4 h +4 h +1 h +82 h +10 h +4 h +359 h +1 h +1 h +1 h +170 h +266 h +16454 m +4 h +1 h +4 h +109 h +16455 m +10 h +16456 m +16457 m +147 h +169 h +319 h +1 h +16458 m +4 h +1 h +4 h +16459 m +1 h +16460 m +25 h +158 h +10 h +16461 m +55 h +112 h +4 h +48 h +65 h +109 h +109 h +10 h +16462 m +4 h +14569 m +124 h +16463 m +1 h +1770 m +195 h +262 m +16464 m +10 h +8332 m +173 h +687 h +4 h +2788 h +1 h +4 h +307 h +10177 m +10 h +279 h +31 h +16465 m +1 h +10 h +601 h +59 h +4 h +4 h +41 h +94 h +1 h +4 h +1 h +4 h +4 h +4 h +10 h +186 h +1 h +16466 m +124 h +79 h +109 h +10 h +4 h +4 h +10 h +10 h +16467 m +10 h +4 h +4 h +94 h +3720 m +57 h +10 h +4 h +1 h +4 h +10 h +25 h +1 h +4 h +1 h +1 h +181 h +1 h +425 m +4 h +4 h +569 h +10 h +12 h +16468 m +1 h +10 h +4 h +10 h +4 h +31 h +1 h +10 h +114 h +10 h +57 h +10 h +97 h +4 h +4 h +41 h +16469 m +10 h +4 h +7388 m +536 h +1 h +16470 m +1347 m +4 h +4 h +16471 m +1 h +16472 m +4 h +41 h +1 h +4 h +1 h +82 h +4 h +1 h +4 h +10 h +1 h +181 h +4 h +10 h +16473 m +10 h +4 h +6545 m +1 h +92 h +1504 m +57 h +1 h +443 h +4 h +4 h +10 h +12 h +4 h +1 h +4 h +16474 m +1 h +16475 m +1 h +55 h +1105 h +1006 m +59 h +10 h +1 h +4 h +104 h +16476 m +1 h +4895 m +16477 m +468 m +1 h +135 h +1 h +196 h +57 h +16478 m +11 h +4 h +258 h +16479 m +569 h +1 h +10 h +1 h +16480 m +112 h +9912 m +332 h +4089 m +16481 m +195 h +31 h +4 h +1807 m +276 h +79 h +16482 m +1 h +578 h +36 h +4 h +5080 m +1 h +4 h +4 h +840 m +16483 m +10 h +1 h +16484 m +7839 m +1 h +1 h +16485 m +10 h +1 h +1 h +16486 m +3 h +16487 m +172 h +4 h +4 h +10 h +1 h +1511 m +82 h +4 h +4 h +83 h +16488 m +1198 h +16489 m +10 h +10 h +1650 h +4 h +3539 m +4 h +27 h +16490 m +258 h +4 h +10 h +1 h +16491 m +129 h +10 h +4 h +1 h +569 h +1 h +1 h +1 h +10 h +157 h +16492 m +16493 m +1 h +1 h +10 h +1 h +16494 m +4 h +3143 m +4 h +16495 m +4 h +1 h +16496 m +16497 m +9701 m +181 h +4 h +4 h +114 h +1 h +16498 m +1 h +4 h +16499 m +1 h +479 h +10 h +11 h +1 h +4 h +16500 m +10 h +3600 m +4 h +1 h +443 h +16501 m +11 h +10 h +4564 m +4 h +104 h +97 h +10 h +4 h +1 h +16502 m +1 h +7253 m +16503 m +45 h +4 h +4 h +104 h +11 h +10 h +4 h +10 h +1 h +4 h +10 h +4 h +4 h +16504 m +16505 m +1 h +10 h +14388 m +4 h +10 h +16506 m +16507 m +124 h +1 h +109 h +195 h +10 h +16508 m +109 h +1 h +4 h +10 h +692 h +2002 h +16509 m +4 h +10 h +16510 m +41 h +4 h +10 h +41 h +10 h +4 h +31 h +74 h +119 h +65 h +10 h +59 h +10 h +55 h +4 h +45 h +4 h +10 h +196 h +16511 m +11 h +1 h +10 h +4 h +1 h +1 h +4 h +1 h +5230 m +124 h +4 h +16512 m +16513 m +114 h +10 h +1 h +4 h +119 h +4 h +4 h +109 h +4 h +169 h +16514 m +16515 m +1 h +4 h +10 h +4 h +1714 h +1 h +1 h +297 h +31 h +16516 m +73 h +65 h +16517 m +640 h +147 h +3 h +4 h +16518 m +230 m +4 h +195 h +16519 m +4 h +1 h +4 h +40 m +4 h +4 h +10 h +1 h +4 h +4 h +648 m +4 h +69 h +16520 m +114 h +4 h +16521 m +4 h +11 h +13 h +146 h +1714 h +3 h +16522 m +16523 m +16524 m +3 h +10 h +270 h +1 h +10 h +1 h +10 h +1685 h +256 h +1 h +79 h +1 h +4372 m +16525 m +4 h +270 h +10 h +4 h +124 h +4 h +1 h +4 h +109 h +1 h +1 h +16526 m +4 h +16527 m +16528 m +1 h +238 h +228 m +59 h +10 h +1 h +1 h +4 h +65 h +40 h +1 h +629 m +16529 m +4 h +10 h +1 h +1 h +55 h +289 h +12 h +1714 h +157 h +10 h +8497 m +11 h +16530 m +1 h +3398 m +16531 m +13 h +5695 m +10 h +1 h +1 h +4 h +258 h +10 h +11 h +16532 m +4 h +10 h +16533 m +4 h +10 h +16534 m +10 h +125 h +16535 m +82 h +4 h +13 h +1 h +1016 h +10 h +4 h +4 h +4 h +4 h +1 h +10 h +16536 m +16537 m +779 h +1 h +3070 m +10 h +16538 m +16539 m +256 h +4 h +1 h +4 h +10 h +82 h +10 h +16540 m +4 h +1 h +3 h +41 h +1828 m +1 h +1 h +1 h +1 h +170 h +82 h +16541 m +10 h +16542 m +181 h +73 h +3 h +10 h +5 h +16543 m +16544 m +119 h +10942 m +10958 m +4 h +10 h +74 h +1 h +4 h +4 h +16545 m +16546 m +4 h +1 h +4 h +4 h +4 h +125 h +129 h +16547 m +139 h +1 h +4 h +4 h +59 h +4 h +10 h +4 h +57 h +4 h +14642 m +65 h +195 h +10 h +16548 m +10 h +4 h +83 h +4 h +4 h +65 h +1 h +10 h +16549 m +4 h +129 h +10 h +939 m +1939 m +16550 m +4 h +4 h +1 h +11 h +1 h +4 h +1 h +4 h +5348 m +22 h +45 h +10 h +4 h +139 h +114 h +4 h +4 h +1114 m +258 h +4 h +258 h +10 h +16551 m +1 h +157 h +172 h +55 h +4 h +2474 m +75 m +1 h +1 h +468 m +4 h +1 h +4 h +125 h +10 h +16552 m +59 h +1 h +1 h +10 h +4177 m +1 h +57 h +4 h +16553 m +7535 m +16554 m +4 h +16555 m +2265 m +16556 m +966 h +276 h +16147 m +10 h +13084 m +195 h +10 h +125 h +11 h +1 h +4 h +1 h +1 h +1 h +4 h +16557 m +10 h +16558 m +185 h +1 h +11 h +25 h +64 h +23 h +500 m +10 h +10 h +16559 m +1 h +10 h +10 h +1 h +16560 m +601 h +16561 m +64 h +1 h +45 h +16562 m +1 h +10 h +1 h +278 h +10378 m +10 h +27 h +10 h +11 h +10464 m +16563 m +1 h +1 h +3028 m +109 h +4 h +54 m +4 h +109 h +4 h +10 h +31 h +386 h +2923 h +4 h +140 h +10 h +4 h +17 m +27 h +10 h +1 h +1 h +172 h +4 h +97 h +4 h +10 h +10 h +16564 m +97 h +4 h +11141 m +59 h +4 h +1 h +4 h +4 h +16565 m +4 h +13 h +57 h +4 h +10 h +112 h +1766 h +1 h +31 h +1 h +173 h +4 h +1 h +266 h +4 h +25 h +12 h +4 h +82 h +4 h +1 h +36 h +4 h +12 h +16566 m +16567 m +94 h +7135 m +4 h +16568 m +489 m +4 h +82 h +16569 m +104 h +10 h +10 h +10 h +4 h +581 m +1 h +4 h +4 h +1 h +8 h +124 h +10 h +31 h +16570 m +4 h +332 h +4 h +4 h +4 h +359 h +16571 m +41 h +48 h +16572 m +4 h +16573 m +1 h +11 h +10682 m +4 h +4 h +16574 m +10 h +10 h +4 h +16575 m +10 h +1 h +16576 m +4 h +1 h +1 h +94 h +4 h +1 h +16577 m +16578 m +16579 m +16580 m +10 h +1 h +3673 m +4 h +1 h +10 h +4 h +4 h +41 h +113 h +297 h +65 h +16581 m +4 h +4 h +1470 h +4 h +10 h +10 h +1 h +16582 m +1 h +1 h +16583 m +16584 m +143 h +4 h +10 h +3 h +4 h +16585 m +1 h +1 h +4 h +10 h +138 h +10 h +4 h +13958 m +124 h +10 h +16586 m +25 h +57 h +16587 m +4522 m +16588 m +1 h +4 h +1 h +16589 m +1 h +146 h +4 h +1 h +16590 m +16591 m +224 h +181 h +195 h +10 h +4 h +10 h +59 h +10 h +4 h +4 h +1 h +4 h +4 h +4 h +5567 m +2391 m +10 h +16592 m +4 h +1 h +4 h +16593 m +4 h +16594 m +1 h +57 h +4 h +10 h +112 h +10 h +16595 m +4 h +10 h +57 h +4 h +1 h +109 h +1470 h +10 h +16596 m +114 h +1 h +11 h +443 h +10 h +911 h +1 h +10 h +1 h +1 h +16597 m +4 h +1 h +16598 m +16599 m +16600 m +16601 m +1 h +4 h +4 h +4 h +16602 m +16603 m +4 h +16604 m +12 h +6558 m +82 h +155 m +16605 m +11 h +4 h +16606 m +1 h +4 h +1 h +4 h +11 h +10 h +94 h +1 h +1 h +1 h +1 h +16607 m +16608 m +4 h +1249 m +1 h +10 h +4 h +10 h +1 h +9482 m +1 h +57 h +1 h +59 h +1 h +4 h +10 h +3 h +12 h +4 h +10 h +4 h +195 h +11 h +10 h +104 h +158 h +16609 m +4 h +4 h +1 h +16610 m +1 h +97 h +16611 m +10 h +4 h +16612 m +109 h +97 h +976 h +10 h +4 h +147 h +4 h +4 h +4 h +4 h +4 h +258 h +717 m +4 h +190 h +4 h +61 m +1 h +3606 m +1 h +4 h +6473 m +4 h +2480 m +8 h +59 h +4 h +146 h +986 h +10 h +3533 m +10 h +4 h +285 m +1 h +16613 m +16614 m +59 h +10 h +100 m +1 h +5003 m +11 h +7087 m +13 h +4 h +1 h +2887 m +10 h +16615 m +4 h +10 h +1 h +1 h +16616 m +1 h +110 h +590 m +57 h +22 h +12990 m +1642 h +143 h +1 h +4 h +16617 m +16618 m +4 h +4 h +10 h +4 h +1 h +16619 m +8 h +16620 m +45 h +79 h +10 h +16621 m +4 h +4 h +10 h +7382 m +1 h +1 h +16622 m +4 h +1 h +16623 m +10 h +11 h +1 h +16624 m +97 h +443 h +124 h +16625 m +57 h +83 h +55 h +4 h +3025 m +6678 m +4 h +82 h +1 h +1 h +266 h +16626 m +4 h +4 h +10 h +10 h +119 h +10 h +10 h +195 h +4 h +57 h +124 h +825 m +1 h +143 h +1 h +16627 m +16628 m +4 h +109 h +10 h +4 h +4 h +25 h +10 h +4 h +10 h +1 h +82 h +3 h +5230 m +1 h +196 h +4 h +16629 m +16630 m +11334 m +1 h +1 h +4 h +104 h +167 h +1 h +146 h +65 h +1 h +16631 m +4 h +10 h +4 h +10 h +16632 m +10 h +1 h +1 h +16633 m +10 h +16634 m +4 h +16635 m +16636 m +1 h +25 h +1 h +1 h +1766 h +4 h +5 h +73 h +16637 m +4 h +16638 m +195 h +1 h +1 h +1 h +10 h +4 h +104 h +4561 m +4 h +1 h +82 h +16639 m +10 h +1975 m +10 h +123 h +16640 m +25 h +16641 m +57 h +4 h +16642 m +16643 m +16644 m +82 h +124 h +82 h +164 h +4 h +16645 m +11 h +45 h +41 h +4 h +16646 m +4 h +10 h +8511 m +4 h +10 h +4 h +1 h +779 h +11 h +10 h +1 h +976 h +104 h +4 h +4 h +1 h +82 h +1 h +7661 m +4 h +1 h +4 h +10 h +278 h +82 h +41 h +4 h +16647 m +16648 m +65 h +3 h +770 m +339 m +2530 m +1 h +10 h +10 h +1 h +4 h +10 h +4 h +278 h +1 h +1 h +1 h +1 h +2184 m +4 h +1 h +11 h +4 h +4 h +4 h +16649 m +16650 m +4 h +4 h +16651 m +104 h +4 h +10 h +4 h +4 h +1 h +190 h +16652 m +4 h +4 h +4 h +4 h +125 h +45 h +1 h +16653 m +16654 m +1 h +16655 m +10 h +16656 m +109 h +4 h +4 h +4 h +10 h +11 h +10 h +85 m +10 h +536 h +4 h +25 h +16657 m +16658 m +1 h +10 h +4 h +16659 m +27 h +353 h +258 h +82 h +4 h +59 h +4 h +1 h +4 h +4 h +16660 m +83 h +190 h +250 h +4 h +601 h +976 h +4 h +4 h +83 h +1 h +10 h +16661 m +79 h +1 h +4 h +4 h +10 h +1 h +10 h +10 h +113 h +4 h +4 h +10 h +4 h +1 h +16662 m +14476 m +4 h +10 h +1 h +1 h +10 h +40 h +10 h +92 h +307 h +14 m +1685 h +64 h +16663 m +1 h +16664 m +16665 m +1 h +10 h +16666 m +1 h +10 h +10 h +57 h +124 h +1 h +16667 m +156 h +195 h +10 h +10 h +104 h +16668 m +557 m +274 h +3 h +10 h +468 h +4 h +1 h +16669 m +615 m +4 h +307 h +16670 m +386 h +4 h +1 h +1714 h +16671 m +16672 m +4 h +157 h +4 h +16673 m +278 h +1714 h +91 h +857 h +10 h +4 h +4 h +11 h +1 h +1 h +1 h +79 h +4 h +10 h +16674 m +36 h +16675 m +10 h +16676 m +10 h +16677 m +1 h +1 h +16678 m +146 h +10 h +4 h +125 h +45 h +4 h +1366 m +16679 m +4 h +1 h +4 h +16680 m +3 h +143 h +56 h +147 h +16681 m +1 h +190 h +16682 m +383 h +16683 m +477 m +1915 m +135 h +1 h +258 h +16684 m +16685 m +109 h +11 h +1 h +4 h +64 h +4 h +10 h +16686 m +11 h +41 h +1403 h +1 h +45 h +12585 m +1 h +10 h +16687 m +73 h +16688 m +83 h +265 h +258 h +4 h +74 h +10 h +55 h +4 h +10 h +16689 m +1 h +4 h +1003 m +10 h +4 h +10 h +1 h +4 h +16690 m +4 h +16691 m +4 h +16692 m +16693 m +4 h +1 h +36 h +1 h +16694 m +4 h +146 h +157 h +1 h +10 h +16695 m +16696 m +1105 h +11 h +2172 m +16697 m +1 h +22 h +4 h +1 h +1 h +601 h +16698 m +1 h +10 h +1 h +16699 m +1 h +1 h +16700 m +1 h +1 h +1 h +16701 m +1 h +16702 m +10 h +1 h +4 h +185 h +4 h +10 h +82 h +169 h +16703 m +1 h +4 h +41 h +4 h +57 h +16704 m +1 h +10 h +146 h +463 m +16705 m +1 h +1 h +11 h +4 h +1 h +104 h +4 h +103 h +56 h +1 h +16706 m +16707 m +109 h +10 h +16708 m +4 h +4 h +4 h +16709 m +1 h +4 h +1 h +10 h +109 h +11 h +10 h +4 h +1 h +4 h +1 h +800 m +266 h +4 h +493 m +16710 m +4 h +16711 m +3 h +59 h +1 h +16712 m +16713 m +3 h +4 h +10 h +3 h +4 h +83 h +25 h +146 h +4 h +40 h +4 h +16714 m +1 h +124 h +4 h +1 h +113 h +4 h +219 h +1 h +195 h +4 h +1470 h +55 h +16715 m +8 h +4 h +1 h +1 h +1 h +65 h +104 h +157 h +16716 m +16717 m +11 h +16718 m +57 h +4 h +83 h +3 h +16719 m +1 h +307 h +16720 m +4 h +11 h +16721 m +4861 m +16722 m +16723 m +10 h +4297 m +97 h +4 h +104 h +16724 m +1 h +16725 m +4101 m +1766 h +4 h +4 h +169 h +97 h +4 h +59 h +4 h +4 h +11 h +16726 m +250 h +10 h +1 h +4 h +16727 m +1 h +4 h +1 h +16728 m +615 m +16729 m +4 h +478 m +10 h +16730 m +1 h +10 h +16731 m +299 h +4 h +31 h +10 h +1545 m +1 h +4 h +1 h +4 h +114 h +11 h +4 h +1 h +16732 m +4 h +4 h +16733 m +4 h +1 h +10 h +2625 m +82 h +16734 m +16735 m +1 h +332 h +10 h +186 h +4 h +11 h +57 h +195 h +1 h +4 h +25 h +16736 m +57 h +10 h +64 h +1 h +10 h +4 h +1791 m +4 h +1 h +1 h +16737 m +1 h +10 h +1 h +10 h +1 h +1 h +16738 m +82 h +4 h +11 h +4 h +1 h +16739 m +16740 m +16741 m +1 h +65 h +10 h +10 h +4359 m +1 h +1 h +16742 m +45 h +4 h +16743 m +16744 m +10 h +16745 m +16746 m +4 h +3 h +1 h +16747 m +16748 m +10 h +11 h +1 h +1 h +16749 m +4 h +1083 h +16750 m +4 h +31 h +1 h +4 h +11 h +10 h +16751 m +16752 m +10 h +4 h +10 h +181 h +16753 m +2710 m +307 h +16754 m +16755 m +82 h +125 h +4 h +385 m +16756 m +1185 m +10 h +3 h +10 h +4 h +4 h +4 h +4 h +1766 h +16757 m +278 h +4 h +1 h +330 h +1 h +4 h +1 h +4 h +4 h +16758 m +16759 m +16760 m +135 h +10 h +25 h +124 h +601 h +390 m +2339 m +4 h +11 h +16761 m +4 h +355 m +146 h +10 h +16762 m +265 h +1 h +146 h +3533 m +1 h +64 h +10 h +82 h +16763 m +16764 m +4 h +4 h +13 h +59 h +16765 m +46 m +4 h +59 h +4 h +4 h +10 h +2733 h +10 h +16766 m +55 h +2971 m +1 h +10 h +1 h +10 h +109 h +16767 m +1 h +112 h +22 h +1 h +1 h +11 h +4 h +4 h +11 h +16768 m +125 h +1 h +4 h +1 h +10 h +1 h +13 h +16769 m +41 h +109 h +10 h +536 h +1 h +10 h +45 h +1 h +10 h +4 h +1 h +1 h +4 h +4 h +517 m +10 h +4 h +4 h +10 h +16770 m +16771 m +4 h +4 h +1 h +16772 m +10 h +4 h +4 h +2887 m +4441 m +41 h +10 h +16773 m +10 h +2710 m +4 h +109 h +10 h +4 h +1 h +59 h +322 h +16774 m +976 h +16775 m +238 h +4 h +1 h +4 h +1 h +4 h +16776 m +1 h +10 h +10 h +16777 m +10 h +31 h +10 h +4 h +4 h +241 h +10 h +1 h +124 h +55 h +12 h +10 h +1 h +10 h +3 h +4 h +506 m +4 h +16778 m +4 h +16779 m +4 h +1835 m +4 h +4 h +109 h +16780 m +16781 m +11 h +435 h +1 h +4 h +1 h +16782 m +2300 m +16783 m +1 h +976 h +1 h +1 h +1 h +16784 m +2379 h +16785 m +4 h +16786 m +6461 m +1 h +25 h +10 h +11 h +4 h +1 h +1 h +11 h +10 h +82 h +4 h +11 h +278 h +687 h +186 h +73 h +97 h +97 h +469 m +10 h +45 h +195 h +11 h +16787 m +16788 m +1321 m +16789 m +1 h +16790 m +1 h +10 h +4 h +270 h +10 h +4 h +65 h +31 h +10 h +114 h +4 h +125 h +4 h +1 h +8140 m +4 h +4 h +11707 m +4 h +16791 m +10 h +4 h +1 h +1 h +4 h +358 h +1 h +109 h +4 h +16792 m +1 h +4 h +158 h +16793 m +4 h +10 h +10 h +124 h +16794 m +964 m +4 h +1 h +119 h +25 h +16795 m +55 h +109 h +383 h +4 h +82 h +857 h +4724 m +104 h +4 h +16796 m +16797 m +16798 m +31 h +1 h +56 h +4 h +196 h +143 h +4 h +16799 m +82 h +1 h +10 h +4 h +10 h +4 h +11346 m +1 h +16800 m +1 h +16801 m +1 h +4 h +16802 m +10 h +10 h +57 h +10 h +4 h +229 h +16803 m +1 h +16804 m +1403 h +5308 m +16805 m +2851 h +16806 m +10 h +104 h +4 h +4 h +16514 m +358 h +157 h +4 h +10 h +1478 h +94 h +4 h +1 h +74 h +124 h +10 h +8497 m +16807 m +4 h +2040 m +4 h +359 h +31 h +1 h +10 h +1 h +10 h +157 h +4 h +16808 m +16809 m +1 h +1 h +1 h +464 h +1 h +16810 m +10 h +190 h +10 h +4 h +10 h +112 h +57 h +1 h +4905 h +2087 m +16811 m +83 h +55 h +83 h +10 h +99 m +4 h +10 h +146 h +4 h +1478 h +1016 h +16812 m +319 h +262 h +4 h +4 h +4 h +4 h +1 h +4 h +25 h +4 h +16813 m +278 h +4 h +4 h +16814 m +104 h +181 h +1 h +41 h +1714 h +359 h +82 h +4 h +10 h +1 h +16815 m +16816 m +16817 m +4 h +16748 m +1 h +70 m +172 h +16818 m +4 h +124 h +16819 m +4 h +2788 h +4 h +4 h +16820 m +16821 m +1083 h +10 h +4 h +4 h +297 h +2002 h +1666 m +1492 m +97 h +169 h +146 h +4 h +16822 m +1 h +4 h +570 m +4 h +1 h +16823 m +1 h +16824 m +1 h +10 h +16825 m +10 h +16826 m +10 h +75 m +12 h +10 h +4 h +10 h +1261 h +10 h +10 h +238 h +10 h +1 h +16827 m +173 h +16828 m +4 h +1137 h +9912 m +4 h +27 h +73 h +4 h +16829 m +59 h +10901 m +113 h +4 h +4 h +4 h +10 h +4 h +10 h +10 h +124 h +16830 m +10 h +1 h +190 h +16831 m +4 h +4 h +125 h +4 h +4 h +1 h +181 h +4 h +16832 m +82 h +12 h +124 h +1 h +11 h +1 h +1250 h +6266 m +16833 m +1766 h +1 h +74 h +10 h +11 h +13 h +65 h +4 h +1 h +4 h +16834 m +109 h +195 h +4 h +1 h +10 h +4 h +10 h +10 h +4 h +11 h +1 h +4 h +10 h +4 h +4 h +2192 m +1 h +1 h +1 h +4 h +16835 m +266 h +4 h +16836 m +10 h +4 h +4 h +4 h +10 h +4 h +4 h +1 h +55 h +16837 m +332 h +10 h +10 h +16838 m +4 h +4 h +1 h +399 h +16839 m +59 h +10 h +443 h +64 h +4 h +4 h +976 h +10 h +10 h +238 h +4 h +2495 m +11 h +2265 m +4 h +16840 m +1 h +1 h +16841 m +295 h +1 h +16842 m +1 h +10 h +6185 m +57 h +16843 m +4 h +25 h +16844 m +4 h +1 h +4 h +16845 m +124 h +10 h +1 h +16846 m +16847 m +16848 m +4 h +36 h +16849 m +1 h +59 h +1875 m +4 h +1 h +1886 m +11 h +16850 m +40 h +266 h +16851 m +94 h +16852 m +74 h +4 h +10 h +16853 m +4 h +109 h +204 m +4 h +1 h +172 h +1 h +1 h +1 h +4 h +10 h +10 h +794 m +4 h +4 h +1 h +4 h +31 h +4 h +10 h +10 h +4 h +2002 h +1 h +196 h +196 h +4 h +4 h +83 h +16854 m +4 h +125 h +1 h +16855 m +4 h +83 h +1 h +109 h +1 h +1 h +10 h +109 h +16856 m +1 h +10 h +1 h +11 h +4 h +4 h +31 h +16857 m +1 h +16858 m +10 h +4 h +3 h +59 h +14814 m +1 h +4 h +10 h +4 h +16859 m +12 h +1 h +278 h +10 h +4 h +135 h +1 h +16860 m +3 h +16861 m +4 h +4 h +114 h +1 h +1 h +10 h +1 h +7802 m +11 h +4 h +4 h +16862 m +56 h +338 h +59 h +10 h +4 h +4 h +4 h +114 h +888 h +1 h +181 h +1 h +146 h +1105 h +82 h +10 h +16863 m +4 h +5610 m +1564 m +10 h +16864 m +190 h +1 h +2914 m +109 h +10 h +1 h +1137 h +16865 m +16866 m +4 h +8 h +278 h +10 h +276 h +10 h +355 m +1 h +4 h +1 h +16867 m +11 h +118 h +1 h +1 h +4 h +16868 m +4 h +16869 m +4 h +1 h +16870 m +1 h +10 h +16871 m +13392 m +313 m +4 h +4 h +16872 m +4 h +4 h +135 h +10 h +16873 m +16874 m +4 h +195 h +4 h +1 h +16875 m +16876 m +4 h +4 h +4 h +4 h +16877 m +10 h +397 m +65 h +109 h +1437 m +5526 m +1 h +1 h +16878 m +12 h +1 h +10 h +1089 h +185 h +10 h +358 h +16879 m +110 h +4 h +4 h +4 h +10 h +156 h +164 h +190 h +3 h +190 h +41 h +4 h +16880 m +16881 m +4 h +1 h +1 h +16882 m +125 h +1 h +16883 m +737 m +4 h +3155 m +41 h +1 h +1 h +4 h +4 h +1 h +11 h +11 h +1 h +59 h +4 h +10 h +36 h +25 h +108 h +11 h +4 h +7 m +10 h +10 h +16884 m +16885 m +2194 m +124 h +4 h +16886 m +123 h +25 h +1 h +10 h +125 h +4 h +9933 m +718 h +56 h +4 h +4 h +109 h +1 h +279 h +1 h +16887 m +83 h +1137 h +3 h +1 h +4 h +1 h +4 h +1 h +25 h +4 h +4 h +4 h +332 h +16888 m +1 h +4 h +4 h +16889 m +1 h +1 h +4 h +10 h +10 h +4 h +332 h +10 h +4 h +11 h +124 h +147 h +4 h +4 h +16890 m +10 h +1 h +4 h +16891 m +5224 m +5709 m +10 h +1 h +203 h +4 h +16892 m +1 h +16893 m +150 m +16894 m +10 h +4 h +1 h +5357 m +16895 m +79 h +16896 m +56 h +31 h +4 h +4 h +10 h +11 h +170 h +4 h +4 h +1 h +82 h +10 h +4 h +4 h +289 h +444 m +10 h +16897 m +386 h +10 h +1083 h +1 h +16898 m +1 h +11731 m +1 h +10 h +4 h +8 h +16899 m +10 h +10 h +4 h +2233 m +4 h +147 h +4 h +4 h +45 h +10 h +1 h +12 h +5053 m +10 h +16900 m +258 h +16901 m +4 h +1 h +1 h +1886 m +4 h +16902 m +10 h +4 h +83 h +10 h +4 h +16903 m +1 h +1 h +1 h +135 h +1056 m +16904 m +1 h +109 h +16905 m +1 h +10 h +11 h +97 h +16906 m +10 h +104 h +74 h +1 h +16907 m +10 h +16908 m +2846 m +16909 m +10 h +123 h +4 h +4 h +4 h +158 h +74 h +4 h +10 h +1 h +4 h +11715 m +447 h +4848 m +16910 m +124 h +4 h +10 h +16911 m +167 h +10 h +1 h +4 h +1 h +10 h +16912 m +16913 m +16914 m +10 h +16915 m +1 h +620 m +124 h +1 h +1 h +1 h +1 h +36 h +10 h +173 h +11 h +10 h +57 h +16916 m +4 h +124 h +4 h +1 h +10 h +10 h +13 h +4 h +4 h +1 h +1 h +10 h +1 h +4 h +4 h +1 h +16917 m +4 h +4 h +16918 m +16919 m +355 h +4 h +16920 m +4 h +1 h +204 h +4 h +4 h +11 h +1 h +109 h +10 h +4 h +332 h +4608 m +82 h +55 h +16921 m +4 h +279 h +12 h +1406 m +990 m +4 h +3 h +1 h +330 h +1 h +4 h +57 h +41 h +1 h +1 h +1 h +1 h +10 h +338 h +3 h +125 h +36 h +1650 h +4 h +10 h +3 h +16922 m +4 h +353 h +1 h +4 h +169 h +1 h +1 h +11 h +359 h +41 h +16923 m +1 h +4 h +4 h +1504 m +1 h +11 h +4 h +16924 m +1 h +41 h +11825 m +1 h +16925 m +423 m +2436 m +16926 m +16927 m +4 h +10 h +4 h +1 h +10 h +1 h +10 h +16928 m +4 h +757 h +16929 m +1 h +4 h +4 h +7214 m +230 m +1250 h +1 h +1 h +59 h +16930 m +4 h +10 h +4 h +10 h +1337 m +4 h +4 h +10 h +65 h +10 h +2865 m +3 h +16931 m +4538 m +656 m +16932 m +4 h +266 h +16933 m +16934 m +16935 m +12 h +10 h +10 h +4590 m +1 h +16936 m +1 h +1 h +4 h +1 h +1772 m +1 h +228 m +1 h +11 h +16937 m +1642 h +57 h +124 h +16938 m +10 h +16939 m +140 h +74 h +4 h +16940 m +3 h +1 h +1 h +143 h +1 h +4 h +16941 m +0 m +109 h +238 h +94 h +1 h +1 h +16942 m +1 h +124 h +84 m +9411 m +4 h +1 h +55 h +4 h +16943 m +4 h +224 h +15156 m +4 h +16944 m +1 h +196 h +4 h +4 h +10 h +1 h +11 h +1975 m +173 h +1 h +59 h +1 h +10 h +1 h +16945 m +109 h +4 h +16946 m +59 h +601 h +1847 m +16947 m +74 h +10 h +1 h +1650 h +1137 h +1 h +4 h +1 h +125 h +4 h +4 h +16948 m +83 h +1 h +4 h +10 h +4 h +10 h +4 h +16949 m +11 h +3 h +1 h +10 h +4 h +196 h +16950 m +4 h +4 h +169 h +4 h +22 h +119 h +10 h +3236 m +10 h +181 h +1 h +4 h +297 h +10 h +1 h +57 h +114 h +4 h +4 h +10 h +104 h +1114 m +123 h +79 h +238 h +4 h +16951 m +5514 m +4 h +4 h +123 h +1 h +1 h +10 h +135 h +241 h +12 h +4 h +16952 m +4 h +1 h +10 h +2887 m +16953 m +4 h +976 h +65 h +278 h +1 h +1 h +16954 m +104 h +10 h +16955 m +1 h +112 h +41 h +59 h +265 h +59 h +10 h +1 h +4 h +16956 m +1 h +16957 m +59 h +157 h +4 h +10 h +10 h +12131 m +4 h +16958 m +10 h +13 h +276 h +10 h +147 h +4 h +1321 m +4 h +31 h +16959 m +4 h +4 h +1 h +204 h +4 h +16960 m +278 h +4 h +16961 m +16962 m +10 h +4 h +16963 m +73 h +4 h +144 h +16964 m +1 h +73 h +8 h +16965 m +22 h +10 h +1 h +4 h +119 h +1 h +1 h +266 h +976 h +167 h +16966 m +10 h +10 h +4 h +4030 h +17 m +630 m +10 h +109 h +10 h +1 h +1 h +16967 m +1 h +1 h +10 h +224 h +82 h +16968 m +4 h +4 h +1 h +1 h +4 h +1 h +92 h +59 h +4 h +31 h +16969 m +4 h +1 h +1 h +4 h +1 h +10 h +4 h +16970 m +1 h +83 h +447 h +16971 m +1 h +224 h +11 h +4 h +2379 h +16972 m +4 h +167 h +4 h +36 h +4 h +3 h +77 h +10 h +1 h +4 h +16973 m +10 h +4 h +10 h +11 h +55 h +112 h +4 h +146 h +124 h +10 h +16974 m +13833 m +10 h +11 h +10 h +1 h +16975 m +13 h +16976 m +1089 h +1 h +10 h +10 h +4 h +265 h +16977 m +16978 m +1 h +1 h +16979 m +10 h +4 h +57 h +10 h +16980 m +16981 m +4 h +4 h +4 h +557 m +332 h +2054 m +10 h +10 h +10 h +808 m +4 h +569 h +10 h +16982 m +1 h +4 h +4 h +40 h +10 h +41 h +109 h +104 h +10 h +10 h +629 m +45 h +4 h +16983 m +9396 m +229 h +11 h +10 h +1 h +1 h +10 h +16984 m +1 h +13 h +1016 h +4 h +7236 m +4 h +91 h +1 h +11 h +2358 m +230 m +447 h +33 m +10 h +4 h +16985 m +10 h +316 m +241 h +4 h +3 h +4 h +3987 m +143 h +55 h +4 h +463 m +1 h +16986 m +4 h +1 h +1260 m +10 h +1 h +4 h +2314 m +1 h +59 h +13324 m +278 h +1 h +92 h +332 h +2438 m +16987 m +1 h +103 h +1 h +74 h +371 h +684 m +16988 m +10 h +3 h +10 h +16989 m +16990 m +10 h +11 h +164 h +4 h +1 h +399 h +10 h +1 h +16991 m +4 h +169 h +119 h +4 h +4057 m +10 h +16992 m +4 h +1027 h +109 h +1 h +82 h +1 h +4 h +11 h +4 h +4 h +1470 h +266 h +16993 m +4 h +10 h +4 h +16994 m +1 h +4 h +190 h +16995 m +2951 m +1 h +1 h +25 h +1 h +4 h +125 h +4 h +1 h +265 h +4 h +10 h +64 h +1 h +203 h +4 h +10 h +3 h +297 h +16996 m +4 h +1 h +4 h +59 h +16997 m +1 h +10 h +10 h +1 h +1 h +16998 m +16999 m +17000 m +8040 m +82 h +4 h +17001 m +17002 m +11 h +888 h +10 h +169 h +119 h +57 h +4 h +17003 m +17004 m +4 h +1 h +4 h +4 h +31 h +10 h +170 h +4 h +1 h +10 h +10 h +1 h +4 h +41 h +4 h +10 h +112 h +1 h +13907 m +1 h +4 h +1 h +45 h +4 h +167 h +17005 m +135 h +17006 m +4 h +1 h +17007 m +10 h +10 h +82 h +1 h +64 h +57 h +10 h +17008 m +270 h +65 h +124 h +4 h +17009 m +4 h +1 h +169 h +17010 m +1016 h +1 h +196 h +4 h +4 h +4 h +31 h +17011 m +8486 m +1 h +258 h +4 h +17012 m +843 h +10 h +4 h +11 h +1370 m +4 h +45 h +74 h +1 h +1 h +578 h +1 h +55 h +443 h +10 h +97 h +83 h +4 h +17013 m +1 h +1 h +4 h +4 h +17014 m +4 h +17015 m +4 h +17016 m +97 h +1 h +11 h +4 h +17017 m +10 h +10 h +4 h +4 h +4 h +4 h +10 h +164 h +104 h +17018 m +4 h +4 h +779 h +400 m +4 h +10 h +17019 m +48 h +41 h +17020 m +17021 m +10 h +4 h +60 m +17022 m +4 h +347 h +1 h +4 h +17023 m +10 h +4 h +1 h +82 h +316 m +12 h +4 h +1083 h +4 h +1 h +4 h +4 h +4 h +169 h +4 h +10 h +4 h +17024 m +4 h +4240 m +181 h +4 h +17025 m +10 h +4 h +1 h +717 m +196 h +1 h +17026 m +4 h +65 h +64 h +10 h +1 h +6558 m +4 h +1 h +4 h +4 h +10 h +4 h +56 h +17027 m +17028 m +10 h +10 h +425 m +13 h +4 h +31 h +125 h +17029 m +1 h +4 h +4 h +1 h +3286 m +4 h +17030 m +10 h +10 h +109 h +184 h +4332 m +1 h +4 h +2340 m +1 h +185 h +17031 m +125 h +17032 m +17033 m +10 h +4 h +4 h +1 h +17034 m +17035 m +4 h +17036 m +4 h +4 h +4 h +4 h +10 h +17037 m +2887 m +65 h +11 h +6010 m +31 h +22 h +25 h +17038 m +1 h +10 h +1 h +770 m +4 h +4 h +10 h +4 h +195 h +4 h +4 h +17039 m +4 h +4 h +124 h +17040 m +4 h +82 h +17041 m +1 h +17042 m +17043 m +59 h +114 h +9396 m +17044 m +10 h +181 h +4 h +4 h +1 h +4 h +17045 m +1 h +4 h +229 h +9831 m +17046 m +11910 m +928 m +10 h +10 h +10 h +17047 m +1 h +41 h +11 h +1 h +17048 m +10 h +17049 m +17050 m +10 h +11 h +10 h +10 h +4 h +1 h +156 h +4 h +10 h +10 h +976 h +10 h +1 h +17051 m +17052 m +10 h +1 h +11 h +1 h +17053 m +1 h +1 h +1 h +976 h +10 h +4 h +17054 m +250 h +1 h +1 h +17055 m +3 h +195 h +4 h +17056 m +10 h +1105 h +1 h +10 h +36 h +3 h +1027 h +10 h +757 h +17057 m +25 h +10 h +1 h +1 h +368 h +4 h +11 h +125 h +185 h +17058 m +196 h +1 h +79 h +4 h +2172 m +299 h +59 h +17059 m +307 h +13 h +1 h +11 h +173 h +4 h +10 h +181 h +1 h +17060 m +17061 m +17062 m +371 h +1 h +4 h +262 h +10 h +10 h +408 m +17063 m +4 h +17064 m +10 h +1 h +10 h +1 h +4 h +2459 m +140 h +10 h +10 h +4 h +238 h +1 h +4 h +4 h +10 h +1 h +17065 m +4 h +147 h +125 h +4 h +17066 m +10 h +3322 m +17067 m +4 h +1 h +22 h +164 h +31 h +4 h +1 h +8 h +10 h +443 h +649 m +4 h +4 h +274 h +1 h +4 h +10 h +1 h +1 h +1 h +17068 m +4 h +3 h +74 h +4 h +4 h +4 h +1 h +17069 m +4 h +10 h +258 h +4 h +4 h +3 h +82 h +79 h +17070 m +104 h +1 h +195 h +692 h +11 h +11 h +17071 m +10 h +17072 m +1 h +10 h +10 h +17073 m +146 h +1 h +4 h +767 m +4 h +104 h +104 h +1 h +17074 m +4177 m +1 h +17075 m +17076 m +11 h +1 h +1620 m +4 h +17077 m +229 h +857 h +10062 m +170 h +17078 m +17079 m +17080 m +1 h +1 h +46 m +17081 m +45 h +17082 m +4 h +3 h +1 h +124 h +10 h +17083 m +41 h +1 h +4 h +10 h +17084 m +59 h +17085 m +3557 m +195 h +17086 m +17087 m +10 h +17088 m +17089 m +10 h +55 h +82 h +10 h +297 h +17090 m +569 h +17091 m +10 h +1 h +10 h +17092 m +10 h +6946 m +9800 m +1 h +31 h +4 h +124 h +82 h +1 h +17093 m +265 h +10 h +1 h +184 h +55 h +1 h +1 h +4 h +3 h +4 h +31 h +10 h +158 h +10 h +10 h +4 h +1 h +10 h +1 h +12 h +3704 m +1 h +297 h +25 h +57 h +4 h +59 h +1 h +28 h +11 h +36 h +1 h +4 h +5125 m +36 h +4 h +4 h +57 h +1 h +1 h +10 h +10 h +10 h +230 h +1 h +4 h +17094 m +1 h +73 h +358 h +64 h +278 h +4 h +5 h +1 h +4 h +10 h +4 h +10 h +1 h +10 h +17095 m +10 h +1 h +1454 h +17096 m +36 h +3707 m +2017 m +1 h +4 h +4919 m +4 h +10 h +4 h +172 h +11 h +10 h +196 h +10 h +10 h +10 h +1137 h +1677 m +10 h +157 h +4 h +109 h +4 h +109 h +17097 m +65 h +17098 m +124 h +17099 m +4 h +17100 m +4 h +10 h +4 h +124 h +112 h +17101 m +4 h +4 h +192 h +17102 m +4 h +45 h +403 h +1 h +238 h +4 h +1 h +17103 m +4 h +17104 m +4 h +1074 h +4 h +22 h +185 h +3025 m +10 h +125 h +156 h +1 h +92 h +4 h +10 h +5470 m +1 h +4 h +4 h +170 h +17105 m +83 h +5 h +17106 m +97 h +716 m +1062 m +4 h +59 h +110 h +1642 h +1 h +17107 m +1 h +10 h +17108 m +4 h +4 h +4 h +17109 m +124 h +4 h +4 h +31 h +1 h +282 m +1074 h +4 h +164 h +3 h +31 h +4 h +1 h +4 h +1 h +4 h +112 h +10 h +4 h +10 h +17110 m +125 h +11 h +17111 m +1 h +1 h +10 h +1 h +17112 m +17113 m +10 h +10 h +185 h +195 h +83 h +4 h +10 h +368 h +987 m +10 h +359 h +1710 m +17114 m +73 h +4 h +17115 m +17116 m +4 h +10 h +1 h +83 h +17117 m +17118 m +109 h +10 h +4 h +4 h +17119 m +1 h +338 h +10 h +4 h +12131 m +4 h +17120 m +17121 m +587 m +2041 m +4 h +124 h +11 h +954 m +4 h +79 h +4 h +10 h +9585 m +801 m +1 h +11 h +1 h +10 h +10 h +3 h +10 h +11 h +17122 m +13821 m +17123 m +55 h +13 h +238 h +4 h +1 h +520 h +4 h +1 h +17124 m +116 m +1 h +3 h +1 h +1 h +3 h +4 h +941 m +4 h +4 h +4 h +4 h +10 h +4 h +17125 m +17126 m +918 m +601 h +10 h +4 h +4 h +5967 m +4 h +966 h +17127 m +4 h +4 h +17128 m +1 h +1737 m +1 h +1 h +3562 m +17129 m +1 h +65 h +17130 m +4 h +1 h +104 h +10 h +10 h +17131 m +4 h +11 h +109 h +10 h +17132 m +1 h +4 h +1725 m +443 h +17133 m +425 m +17134 m +332 h +124 h +4 h +10 h +17135 m +17136 m +59 h +31 h +17137 m +10 h +172 h +31 h +10 h +157 h +17138 m +4 h +1137 h +17139 m +4 h +4 h +17140 m +276 h +1 h +4 h +10 h +17141 m +4 h +4 h +10739 m +4 h +4 h +17142 m +146 h +1 h +10 h +59 h +1 h +4 h +4 h +1337 m +4 h +3 h +17143 m +1 h +1 h +7900 m +258 h +17144 m +74 h +1 h +4 h +266 h +4 h +10 h +4 h +82 h +124 h +17145 m +1 h +1 h +1 h +4 h +55 h +276 h +31 h +17146 m +1 h +10 h +266 h +143 h +10 h +204 h +13 h +59 h +41 h +17147 m +4 h +10 h +4 h +10 h +124 h +119 h +2617 m +1 h +1 h +45 h +17148 m +10 h +195 h +4 h +10 h +10 h +1 h +17149 m +4 h +74 h +295 h +147 h +41 h +1 h +17150 m +1 h +219 h +4 h +4 h +4 h +1 h +25 h +506 m +73 h +10 h +447 h +150 m +4 h +10 h +1 h +17151 m +4 h +25 h +10 h +1 h +10 h +124 h +4 h +36 h +17152 m +1 h +10 h +17153 m +1 h +4 h +17154 m +4 h +1 h +4 h +167 h +4 h +79 h +8486 m +11 h +17155 m +488 m +97 h +1 h +4 h +57 h +13334 m +4 h +3 h +1 h +184 h +1 h +57 h +4 h +41 h +1 h +1 h +17156 m +276 h +114 h +1 h +10 h +1 h +4 h +195 h +17157 m +1 h +10 h +59 h +124 h +307 h +10 h +10 h +10 h +810 m +1 h +10 h +10 h +27 h +1 h +1 h +4 h +1470 h +4 h +12898 m +4 h +274 h +195 h +1 h +36 h +10 h +119 h +27 h +1 h +403 h +10 h +10 h +1 h +59 h +4 h +10 h +16395 m +10 h +4 h +1 h +17158 m +383 h +387 m +4 h +10 h +1 h +4 h +17159 m +1796 m +1 h +17160 m +11 h +10 h +17161 m +10 h +5 h +1 h +1 h +17162 m +274 h +17163 m +113 h +488 m +4 h +3 h +4 h +196 h +147 h +1 h +56 h +17164 m +270 h +31 h +4 h +3 h +1 h +7 m +3 h +124 h +17165 m +4 h +2788 h +403 h +12 h +1 h +1053 m +17166 m +17167 m +433 m +4 h +1 h +97 h +4 h +1 h +74 h +8 h +17168 m +55 h +25 h +4 h +17169 m +10556 m +17170 m +1 h +13 h +10 h +1304 m +104 h +97 h +10 h +10 h +17171 m +10 h +1 h +4 h +10 h +687 h +4 h +4 h +4 h +82 h +17172 m +17173 m +488 h +10 h +1 h +45 h +11 h +3 h +17174 m +1 h +4 h +4 h +1 h +276 h +10 h +4 h +1 h +59 h +17175 m +113 h +10 h +196 h +164 h +195 h +4 h +17176 m +1 h +3601 m +10 h +135 h +10 h +11 h +59 h +17177 m +10 h +17178 m +17179 m +4 h +114 h +17180 m +888 h +10 h +1 h +17181 m +2558 m +4 h +17182 m +10 h +41 h +10 h +10 h +17183 m +1 h +10 h +17184 m +17185 m +12993 m +10 h +114 h +1 h +2438 m +1 h +1 h +14570 m +763 m +10 h +1 h +872 m +1 h +4256 m +1 h +17186 m +17187 m +1 h +4 h +1556 m +1 h +17188 m +17189 m +195 h +17190 m +17191 m +10 h +1 h +1542 m +1 h +1 h +265 h +4 h +10 h +10 h +125 h +258 h +17192 m +13 h +1 h +1 h +601 h +10 h +114 h +3 h +935 m +1 h +10 h +124 h +1 h +1 h +17193 m +1 h +12244 m +41 h +10 h +17194 m +41 h +4 h +1 h +4 h +84 m +17195 m +17196 m +1 h +4 h +4 h +1322 m +4 h +4 h +4 h +25 h +10 h +74 h +10 h +1 h +17197 m +4 h +82 h +12911 m +10 h +4 h +17198 m +17199 m +147 h +41 h +10 h +4 h +10 h +17200 m +4 h +10 h +146 h +1 h +4 h +174 m +4 h +57 h +25 h +4 h +17201 m +1 h +10 h +3 h +4 h +4 h +1 h +125 h +3 h +4 h +27 h +104 h +1 h +10 h +10 h +4 h +1 h +124 h +31 h +4 h +74 h +1 h +1 h +204 h +83 h +57 h +17202 m +4747 m +124 h +224 h +4 h +10 h +17203 m +10 h +65 h +10 h +4 h +65 h +4 h +79 h +10 h +4 h +10 h +10 h +578 h +17204 m +17205 m +1 h +1 h +265 h +4 h +11 h +478 m +6505 m +1 h +3276 m +4 h +12 h +17206 m +6304 m +10 h +4 h +4 h +4 h +265 h +4 h +1 h +1 h +769 m +17207 m +170 h +17208 m +10 h +10 h +4 h +1 h +17209 m +17210 m +1 h +10 h +1 h +10 h +1 h +17211 m +10 h +10 h +124 h +10 h +17212 m +17213 m +4 h +1 h +91 h +17214 m +11 h +4 h +17215 m +4 h +1 h +17216 m +10 h +10 h +4 h +1 h +1 h +4 h +25 h +10 h +59 h +1 h +17217 m +17218 m +17219 m +10 h +10 h +59 h +1 h +109 h +10 h +4409 m +4 h +4 h +4 h +10 h +4240 m +1 h +10 h +45 h +4 h +10 h +10 h +124 h +4 h +4 h +17220 m +146 h +17221 m +262 h +4 h +17222 m +1 h +10 h +196 h +135 h +12655 m +4240 m +17223 m +1 h +1 h +4 h +4 h +1 h +97 h +74 h +1 h +295 h +4 h +4 h +1 h +22 h +1 h +55 h +779 h +1 h +1 h +4 h +4 h +73 h +4 h +10 h +1 h +17224 m +17225 m +65 h +10 h +4 h +17226 m +1 h +10 h +1 h +3188 m +17227 m +59 h +4 h +181 h +56 h +17228 m +493 m +566 m +17229 m +17230 m +1 h +256 h +10 h +4 h +10 h +3 h +1 h +1 h +10 h +4 h +36 h +570 m +1 h +10 h +10 h +17231 m +17232 m +10 h +31 h +10 h +97 h +5760 m +4 h +12 h +4 h +4 h +3 h +4 h +1 h +4 h +1403 h +10 h +10 h +1 h +17233 m +640 h +4 h +4 h +17234 m +31 h +802 m +17235 m +1 h +25 h +17236 m +1 h +17237 m +1835 m +17238 m +1089 h +1 h +79 h +1 h +124 h +17239 m +4 h +17240 m +4 h +146 h +135 h +319 h +109 h +12 h +295 h +3 h +1 h +4 h +15377 m +11 h +4 h +10 h +10 h +10 h +10 h +1045 m +17241 m +1 h +17242 m +147 h +1 h +31 h +11 h +17243 m +1 h +4 h +1 h +1 h +10 h +10 h +13 h +17244 m +104 h +4 h +10 h +4576 m +4 h +31 h +4 h +124 h +17245 m +687 h +181 h +17246 m +17247 m +196 h +195 h +17248 m +4 h +82 h +25 h +17249 m +17250 m +4 h +4292 m +17251 m +17252 m +11 h +10 h +1 h +13 h +17253 m +10 h +1 h +10 h +1 h +10 h +17254 m +5917 m +601 h +4 h +97 h +4 h +1403 h +17255 m +114 h +82 h +114 h +73 h +17256 m +17257 m +10 h +1 h +7125 m +12 h +10 h +14723 m +10 h +1 h +238 h +108 h +4576 m +4 h +10 h +1955 m +1 h +4 h +4 h +1 h +10 h +10 h +17258 m +109 h +1105 h +17259 m +7938 m +1 h +4 h +11 h +3 h +4 h +36 h +1 h +4 h +17260 m +104 h +17261 m +10 h +4 h +12 h +4 h +1 h +124 h +10 h +82 h +17262 m +17263 m +4 h +41 h +113 h +4 h +10 h +355 h +295 h +17264 m +17265 m +11 h +11 h +17266 m +1 h +412 h +1 h +57 h +11 h +109 h +17267 m +1 h +874 m +195 h +123 h +69 h +4 h +276 h +186 h +4 h +41 h +82 h +1 h +1 h +4 h +1 h +4 h +737 m +172 h +443 h +27 h +8 h +1 h +4 h +17268 m +4 h +1 h +538 h +1 h +1 h +4 h +17269 m +36 h +124 h +4 h +11 h +84 m +195 h +520 h +17270 m +17271 m +371 h +10 h +4 h +17272 m +17273 m +119 h +1 h +17274 m +4 h +258 h +114 h +31 h +4 h +3779 m +1 h +4 h +92 h +17275 m +4 h +5522 m +147 h +1 h +4 h +17276 m +59 h +4 h +1886 h +10 h +4 h +17277 m +4 h +1 h +4229 m +2172 m +11 h +1406 m +17278 m +4 h +17279 m +4 h +4 h +17280 m +10 h +17281 m +1835 m +10 h +83 h +4 h +1 h +10 h +10 h +1 h +1 h +2025 m +843 h +17282 m +1 h +17283 m +3 h +10 h +10 h +4 h +4 h +17284 m +10 h +1 h +17285 m +1 h +1 h +1 h +4 h +4 h +10 h +1 h +4 h +17286 m +4 h +4 h +4 h +4 h +11 h +4 h +1 h +1 h +1 h +4 h +447 h +82 h +4 h +4 h +17287 m +3768 m +17288 m +1 h +4 h +238 h +17289 m +64 h +64 h +10 h +4 h +11 h +1790 m +17290 m +1868 m +4 h +146 h +17291 m +276 h +125 h +10 h +10 h +17292 m +83 h +190 h +1 h +1 h +4 h +109 h +4 h +17293 m +4 h +1 h +1 h +10 h +10 h +1 h +3 h +11 h +4 h +17294 m +1771 m +266 h +1 h +1 h +4 h +4 h +1 h +31 h +4 h +11 h +17295 m +59 h +10 h +10 h +10 h +1 h +124 h +4 h +10 h +41 h +10 h +6135 m +1 h +1 h +125 h +17296 m +57 h +11 h +11 h +94 h +17297 m +4 h +1 h +10 h +46 m +536 h +82 h +4 h +4 h +1 h +10 h +4 h +4 h +358 h +17298 m +17299 m +4 h +17300 m +17301 m +4 h +1 h +1 h +307 h +82 h +4 h +10 h +17302 m +1 h +109 h +17303 m +17304 m +1 h +1 h +1 h +59 h +1 h +10 h +1 h +4 h +1 h +12 h +2627 m +10900 m +124 h +4 h +4 h +36 h +124 h +1 h +4 h +10 h +82 h +66 m +299 h +1 h +1 h +4 h +1796 m +4 h +167 h +10 h +1337 m +135 h +1 h +1 h +17305 m +94 h +4 h +79 h +1 h +10 h +1 h +4 h +190 h +10 h +112 h +4 h +16338 m +65 h +1 h +10 h +17306 m +17307 m +10 h +4 h +10 h +181 h +1 h +11 h +8511 m +4 h +10 h +10 h +146 h +4 h +143 h +10 h +17308 m +1 h +17309 m +17310 m +4 h +41 h +10 h +83 h +25 h +10 h +17311 m +140 h +17312 m +1 h +1 h +17313 m +1 h +83 h +106 h +276 h +10 h +45 h +1 h +59 h +1 h +3 h +10 h +10 h +17314 m +17315 m +1 h +4 h +41 h +1574 m +1027 h +4 h +1 h +17316 m +10 h +17317 m +10 h +10 h +4 h +4 h +4 h +123 h +17318 m +13 h +2002 h +1 h +17319 m +3 h +4 h +1 h +4 h +109 h +125 h +74 h +17320 m +17321 m +4 h +10 h +10 h +1 h +10 h +258 h +17322 m +4 h +10 h +17323 m +4 h +4 h +17324 m +4 h +82 h +59 h +82 h +4 h +59 h +17325 m +17326 m +124 h +1975 m +17327 m +4 h +1 h +10 h +4 h +94 h +79 h +69 h +1 h +17328 m +4 h +1 h +17329 m +1 h +10 h +1 h +10 h +12 h +48 h +124 h +124 h +17330 m +17331 m +1 h +5537 m +113 h +338 h +4 h +1 h +17332 m +4 h +110 h +10 h +17333 m +359 h +17334 m +1 h +1 h +4 h +4 h +10 h +1 h +4 h +17335 m +4 h +10 h +1 h +1 h +10 h +17336 m +1 h +17337 m +4 h +10 h +4 h +17338 m +10 h +170 h +17339 m +17340 m +17341 m +104 h +17342 m +11 h +1 h +1 h +1 h +4 h +1 h +10 h +124 h +10 h +4 h +295 h +4 h +1 h +11 h +4 h +10 h +4 h +278 h +59 h +123 h +1 h +4 h +10 h +10 h +278 h +59 h +17343 m +65 h +1 h +224 h +17344 m +17345 m +536 h +4 h +1 h +10 h +124 h +13 h +258 h +4 h +45 h +17346 m +17347 m +4 h +10 h +11 h +94 h +73 h +4 h +56 h +8767 m +10 h +4 h +73 h +1 h +17348 m +4 h +55 h +17349 m +1 h +15790 m +17350 m +10 h +17351 m +1 h +1 h +270 h +17352 m +65 h +10 h +17353 m +10 h +1 h +105 m +4 h +10 h +4 h +1 h +10 h +1725 m +83 h +59 h +1 h +45 h +266 h +124 h +97 h +4 h +1 h +104 h +4 h +17354 m +195 h +17355 m +190 h +4 h +10 h +106 h +888 h +45 h +1 h +4 h +17356 m +4 h +4 h +41 h +1 h +82 h +110 h +266 h +4 h +276 h +17357 m +170 h +11 h +10 h +10 h +41 h +10 h +1 h +4 h +823 m +1 h +4 h +10 h +10 h +135 h +1 h +229 h +119 h +173 h +1 h +4 h +167 h +10 h +10 h +4 h +800 m +4 h +17358 m +17359 m +17360 m +55 h +4 h +65 h +10 h +4 h +1 h +97 h +17361 m +17362 m +172 h +156 h +4 h +1 h +383 h +17363 m +4 h +1 h +4 h +2720 m +4 h +169 h +10 h +82 h +4 h +4 h +1 h +33 m +6132 m +17364 m +3 h +1 h +4 h +4 h +4 h +11 h +17365 m +82 h +17366 m +4 h +1 h +10 h +4 h +4 h +1 h +918 m +1 h +11 h +10 h +17367 m +41 h +1 h +65 h +1 h +1362 m +17368 m +74 h +1249 m +17369 m +1 h +82 h +57 h +4 h +3 h +1 h +4 h +59 h +125 h +10 h +22 h +83 h +1 h +4 h +4350 m +10 h +1 h +2925 m +10 h +17370 m +59 h +4 h +1 h +986 h +82 h +4 h +1309 h +17371 m +275 m +1 h +55 h +17372 m +170 h +1 h +10 h +1 h +17373 m +17374 m +1 h +4 h +1822 h +1 h +4 h +1 h +17375 m +4 h +17376 m +2418 m +57 h +4 h +17377 m +13879 m +10 h +45 h +4 h +17378 m +1772 m +17379 m +17380 m +4 h +4 h +4 h +763 m +3 h +4 h +31 h +125 h +1 h +10 h +265 h +4 h +57 h +10 h +10 h +1 h +1 h +173 h +10 h +4 h +27 h +4 h +17381 m +113 h +1 h +10 h +4 h +25 h +17382 m +989 m +3 h +4 h +10 h +17383 m +17384 m +156 h +3622 m +4 h +4 h +17385 m +12 h +4 h +10365 m +17386 m +169 h +1 h +4 h +94 h +4 h +368 h +109 h +229 h +17387 m +17388 m +10 h +57 h +17389 m +4 h +1 h +82 h +1 h +1 h +17390 m +82 h +1981 m +10 h +1100 m +1 h +105 m +17391 m +1 h +17392 m +146 h +1 h +11 h +55 h +10 h +64 h +17393 m +17394 m +11 h +4 h +65 h +266 h +17395 m +4 h +92 h +1 h +358 h +262 h +57 h +1 h +2786 m +4 h +1 h +1 h +10099 m +4 h +4 h +4 h +109 h +4 h +4 h +11 h +17396 m +4 h +112 h +17397 m +4 h +1 h +17398 m +82 h +17399 m +17400 m +1 h +4 h +10 h +17401 m +4 h +17402 m +332 h +1 h +1 h +10 h +17403 m +1 h +1 h +986 h +73 h +4 h +17404 m +4 h +125 h +4 h +3 h +73 h +4 h +10 h +1 h +12 h +1 h +59 h +1 h +4 h +17405 m +493 m +4 h +4 h +10 h +73 h +17406 m +17407 m +82 h +17408 m +10937 m +10 h +1 h +10 h +10 h +196 h +7535 m +12 h +4 h +1 h +10 h +4 h +1 h +1 h +17409 m +82 h +1 h +17410 m +124 h +17411 m +1 h +4 h +1 h +10 h +1 h +10 h +3089 m +1 h +11 h +1 h +1 h +1 h +4 h +57 h +1 h +4 h +4 h +4 h +17412 m +1 h +1 h +17413 m +17414 m +94 h +17415 m +4 h +4 h +4 h +40 h +17416 m +265 h +4 h +73 h +601 h +1 h +4145 m +10 h +2887 h +1 h +4 h +3 h +17417 m +10 h +4 h +986 h +10 h +10 h +10 h +464 h +4 h +10 h +17418 m +4 h +17419 m +10 h +123 h +17420 m +17421 m +10 h +17422 m +11 h +17423 m +4 h +4 h +91 h +4 h +4 h +4 h +1 h +10 h +10 h +82 h +17424 m +13 h +144 h +4 h +1 h +17425 m +93 m +12 h +1 h +1 h +338 h +278 h +4 h +190 h +4 h +295 h +642 h +4 h +17426 m +119 h +4 h +10 h +17427 m +1 h +17428 m +10 h +17429 m +59 h +17430 m +1 h +82 h +1 h +4 h +1 h +25 h +4 h +4 h +17431 m +4 h +10 h +17432 m +10 h +4 h +17433 m +45 h +10 h +17434 m +4 h +1 h +17435 m +1 h +17436 m +17437 m +1 h +17438 m +258 h +1 h +1 h +319 h +4 h +4 h +10 h +1 h +17439 m +17440 m +4 h +1 h +17441 m +4 h +10 h +146 h +17442 m +82 h +10 h +4 h +157 h +4 h +25 h +11 h +4 h +10 h +11 h +4 h +692 h +36 h +17443 m +4 h +4 h +1 h +17444 m +1 h +4 h +125 h +1 h +10 h +1337 m +4 h +1 h +4 h +10 h +1 h +1 h +10 h +124 h +10 h +2379 h +4 h +1389 m +4 h +10 h +28 h +17445 m +59 h +1 h +3161 m +17446 m +4 h +4 h +124 h +10 h +83 h +4 h +11 h +82 h +11 h +17447 m +135 h +316 h +196 h +10 h +4 h +1 h +825 m +4 h +74 h +17448 m +10 h +10 h +59 h +447 h +10 h +17449 m +13811 m +45 h +17450 m +4 h +17451 m +17452 m +1 h +4 h +73 h +1 h +184 h +173 h +4 h +276 h +17453 m +1 h +4 h +1 h +900 m +4 h +4 h +4 h +4 h +10 h +1 h +4 h +31 h +4297 m +13 h +270 h +170 h +17454 m +1 h +4 h +4 h +4 h +17455 m +1 h +16871 m +4 h +1 h +698 m +4 h +59 h +79 h +17456 m +4 h +147 h +11 h +358 h +10 h +147 h +109 h +1 h +172 h +10 h +10 h +1 h +4932 m +11 h +1 h +17457 m +2558 m +229 h +238 h +583 m +25 h +195 h +27 h +1 h +17458 m +4 h +1 h +1 h +4 h +10 h +1 h +10 h +4 h +4 h +8318 m +4 h +109 h +4 h +17459 m +4 h +1 h +10 h +4 h +10 h +57 h +7553 m +10 h +94 h +1 h +17460 m +1 h +1185 m +4 h +27 h +11 h +4 h +91 h +4 h +41 h +10 h +1 h +10 h +4 h +17461 m +295 h +181 h +17462 m +4 h +124 h +1 h +17463 m +17464 m +4 h +1 h +285 m +4 h +524 m +195 h +4384 m +4 h +17465 m +1 h +4 h +4 h +4 h +17466 m +17467 m +262 h +17468 m +4 h +17469 m +64 h +195 h +109 h +17470 m +8 h +4 h +4 h +447 h +4 h +59 h +1 h +4 h +1772 m +83 h +10 h +4 h +10 h +1 h +4 h +2205 m +17471 m +13 h +31 h +77 h +1 h +4 h +17472 m +4 h +1 h +1 h +4 h +4 h +59 h +1 h +4 h +1 h +31 h +16512 m +17473 m +1 h +1 h +2865 m +1 h +10 h +4 h +718 h +10 h +1 h +4 h +4 h +17474 m +4359 m +64 h +698 m +45 h +17475 m +4 h +17476 m +1 h +322 h +4 h +36 h +56 h +4 h +307 h +17477 m +59 h +10 h +147 h +1 h +1 h +125 h +5162 m +10 h +10 h +1 h +4 h +4 h +10 h +4 h +4 h +12 h +4 h +1 h +4 h +17478 m +10 h +1 h +17479 m +4 h +17480 m +10 h +11654 m +135 h +4 h +4 h +4 h +1 h +4 h +4 h +4 h +17481 m +10 h +4 h +45 h +17482 m +3 h +1 h +203 h +1 h +1 h +4 h +1 h +1 h +17483 m +4 h +11 h +4 h +10 h +10 h +1 h +10 h +4 h +124 h +11 h +17484 m +4 h +353 h +1 h +55 h +195 h +64 h +1322 m +124 h +4 h +4 h +125 h +17485 m +274 h +1 h +17486 m +3 h +4 h +4 h +1 h +10 h +57 h +10 h +238 h +4 h +6371 m +1 h +11 h +17487 m +17488 m +1556 m +196 h +1 h +316 h +4 h +17489 m +4 h +169 h +17490 m +25 h +17491 m +4 h +1 h +17492 m +10 h +327 m +1 h +383 h +17493 m +1 h +270 h +4 h +17494 m +737 m +10 h +124 h +4 h +109 h +359 h +10 h +143 h +17495 m +12956 m +10 h +3159 m +1 h +17496 m +649 m +4 h +4 h +109 h +4538 m +17497 m +59 h +1 h +4 h +4 h +4 h +1 h +1 h +7243 m +4 h +1 h +82 h +1 h +1 h +1 h +3188 m +17498 m +17499 m +4 h +1 h +4 h +10 h +4 h +4 h +10 h +4 h +17500 m +41 h +4 h +1667 m +1 h +444 m +10 h +4 h +17501 m +17502 m +10 h +4 h +125 h +4 h +3 h +17503 m +1 h +1 h +1 h +17504 m +124 h +4349 m +17505 m +4 h +17506 m +8 h +17507 m +55 h +57 h +1665 m +185 h +10 h +17508 m +1 h +258 h +4 h +1 h +57 h +2733 h +104 h +4 h +170 h +10 h +4 h +1 h +1 h +147 h +1 h +124 h +11 h +17509 m +4 h +17510 m +10 h +10 h +17511 m +25 h +258 h +17512 m +79 h +97 h +3908 m +1 h +1 h +40 h +10 h +10 h +1 h +118 h +2022 m +1 h +27 h +1 h +4 h +1 h +146 h +10 h +17513 m +295 h +17514 m +1 h +4 h +1 h +1 h +4 h +1 h +1 h +17515 m +124 h +2438 m +17516 m +230 h +25 h +17517 m +1 h +1 h +17518 m +25 h +10 h +17519 m +570 m +10 h +4 h +390 m +4 h +11 h +1 h +4 h +4 h +1 h +59 h +4 h +4 h +17520 m +4 h +109 h +4 h +17521 m +4 h +17522 m +10 h +4 h +146 h +935 m +17523 m +1 h +1 h +10 h +1772 h +17524 m +4 h +1 h +4 h +11 h +104 h +64 h +83 h +36 h +1 h +74 h +82 h +4 h +265 h +109 h +338 h +784 m +4 h +5387 m +156 h +4 h +17525 m +4 h +1 h +17526 m +601 h +15455 m +4 h +4 h +17527 m +10 h +4 h +1 h +170 h +8 h +4 h +57 h +1 h +17528 m +17529 m +1619 h +4 h +4 h +17530 m +3 h +185 h +17531 m +12192 m +17532 m +114 h +4 h +125 h +192 h +4 h +1 h +73 h +69 h +82 h +1 h +4 h +11 h +10 h +10 h +11 h +10 h +4 h +4 h +4 h +57 h +1 h +1 h +17533 m +17534 m +17535 m +9256 m +4 h +17536 m +1030 m +167 h +83 h +1 h +1 h +1 h +1 h +56 h +4 h +59 h +4 h +1 h +4 h +10 h +1 h +11 h +1372 m +10 h +124 h +112 h +1 h +4 h +4 h +266 h +1 h +4 h +79 h +181 h +73 h +4 h +17537 m +17538 m +124 h +10 h +4 h +17539 m +1 h +4 h +17540 m +332 h +10383 m +266 h +1 h +1 h +1 h +4 h +4 h +1 h +447 h +4 h +1 h +17541 m +4 h +109 h +17542 m +17543 m +110 h +4 h +4 h +4 h +97 h +10 h +10 h +13 h +10 h +4 h +1 h +59 h +79 h +27 h +10 h +17544 m +10 h +17545 m +258 h +4 h +2607 m +57 h +1 h +157 h +1 h +4 h +10 h +4 h +185 h +10 h +17546 m +4 h +17547 m +17548 m +10 h +17549 m +1 h +17550 m +17551 m +11 h +1 h +4 h +17552 m +1403 h +4 h +17553 m +10 h +4 h +17554 m +307 h +1 h +99 m +4 h +10 h +104 h +164 h +14814 m +119 h +9912 m +4 h +10 h +196 h +25 h +479 h +1 h +536 h +1 h +1 h +4 h +1 h +17555 m +11 h +885 m +97 h +109 h +4 h +4 h +1 h +139 h +4 h +10 h +687 h +12 h +97 h +10 h +4 h +45 h +1 h +1 h +8711 m +4 h +4 h +33 m +17556 m +82 h +4 h +10 h +83 h +4 h +1 h +169 h +10 h +73 h +4 h +11334 m +10 h +10 h +1 h +82 h +1 h +57 h +278 h +17557 m +4 h +5141 m +1 h +17558 m +17559 m +17560 m +10 h +1 h +17561 m +16841 m +630 m +2617 m +9397 m +65 h +83 h +4 h +4 h +4 h +79 h +4 h +4 h +196 h +1 h +17562 m +4 h +4 h +4 h +1691 m +17563 m +110 h +172 h +125 h +83 h +55 h +13 h +10 h +83 h +1 h +195 h +1 h +17564 m +10 h +8503 m +17565 m +3 h +10 h +10 h +10 h +1 h +295 h +1 h +170 h +4 h +4 h +5505 m +1 h +4 h +4 h +17566 m +17567 m +124 h +10 h +1027 h +1 h +17568 m +104 h +10 h +468 h +4 h +2733 h +124 h +4867 m +4 h +2172 m +4 h +4 h +1 h +4 h +8 h +17569 m +146 h +4441 h +41 h +17570 m +4 h +4 h +4 h +1 h +11 h +83 h +4 h +17571 m +4 h +17572 m +10 h +17573 m +10 h +22 h +1 h +10 h +1 h +17574 m +97 h +4350 m +14316 m +794 m +4 h +12020 m +4 h +17575 m +17576 m +10 h +4 h +4 h +17577 m +10 h +17578 m +4 h +230 h +4 h +4 h +109 h +7800 m +17579 m +4 h +10 h +143 h +4 h +1 h +10 h +31 h +17580 m +4 h +4 h +1 h +4 h +10 h +4 h +5348 m +77 h +4 h +1 h +186 h +119 h +4 h +4 h +17581 m +4 h +17582 m +10 h +17583 m +65 h +331 m +4 h +48 h +17584 m +125 h +185 h +170 h +17585 m +17586 m +17587 m +467 m +10 h +1 h +278 h +1074 h +25 h +17588 m +17589 m +17590 m +48 h +17591 m +74 h +464 h +94 h +4 h +1 h +4 h +2532 m +57 h +4 h +4 h +1 h +17592 m +4 h +97 h +17593 m +125 h +1027 h +17594 m +3 h +17595 m +1 h +10 h +692 h +238 h +156 h +11 h +10 h +10 h +4 h +1 h +1 h +1 h +172 h +1 h +109 h +17596 m +4 h +17597 m +1 h +57 h +13790 m +1768 m +4 h +17598 m +4 h +8741 m +17599 m +73 h +1 h +4 h +1 h +167 h +17600 m +185 h +1 h +1 h +1 h +17601 m +10 h +1 h +1 h +10 h +1 h +4 h +1 h +173 h +4 h +4 h +55 h +4 h +1619 h +10 h +1 h +278 h +195 h +1 h +11 h +59 h +10 h +4 h +4904 m +1 h +48 h +4 h +10 h +4 h +17602 m +4 h +59 h +1 h +8 h +4 h +1 h +11 h +4 h +110 h +4 h +4 h +1 h +1045 m +17603 m +97 h +1 h +8 h +1 h +3657 m +1 h +367 m +17604 m +1 h +1 h +2585 m +156 h +4 h +64 h +4 h +17605 m +4 h +10 h +4 h +17606 m +17607 m +17608 m +10 h +4 h +17609 m +4 h +25 h +4 h +332 h +4 h +4 h +12 h +17610 m +3 h +4 h +10 h +17611 m +57 h +1791 m +17612 m +4 h +8 h +1 h +4 h +1 h +10 h +17613 m +1 h +4 h +4 h +1 h +10 h +17614 m +146 h +4 h +1 h +1 h +4 h +4 h +28 h +4 h +57 h +4 h +307 h +112 h +11 h +97 h +11 h +1 h +10 h +22 h +10 h +3 h +17615 m +119 h +806 m +10 h +17616 m +135 h +4 h +79 h +4 h +1261 h +11 h +1 h +4 h +157 h +4 h +82 h +3 h +59 h +45 h +97 h +17617 m +17618 m +1 h +31 h +65 h +11 h +41 h +266 h +1 h +4 h +4 h +4 h +17619 m +10 h +4 h +229 h +1 h +1 h +10 h +1 h +276 h +10 h +27 h +10 h +17620 m +4 h +383 h +59 h +17621 m +581 m +4 h +4 h +1250 h +41 h +92 h +17622 m +990 m +10 h +1 h +17623 m +4 h +1 h +4 h +279 h +601 h +4 h +1 h +4 h +57 h +10 h +1 h +4 h +4 h +6851 m +4 h +157 h +135 h +10 h +1 h +1 h +4 h +1 h +4 h +258 h +14708 m +3 h +17624 m +1 h +146 h +55 h +262 h +158 h +1 h +4 h +1 h +1 h +13007 m +124 h +169 h +17625 m +170 h +4 h +17626 m +911 h +4 h +156 h +4 h +1 h +143 h +17627 m +4 h +41 h +10 h +4 h +17628 m +4 h +56 h +4 h +125 h +11 h +124 h +4292 m +17629 m +97 h +124 h +2733 h +36 h +4 h +17630 m +1 h +1260 m +4 h +10 h +17631 m diff --git a/pebble/internal/cache/value.go b/pebble/internal/cache/value.go new file mode 100644 index 0000000..6d2cae1 --- /dev/null +++ b/pebble/internal/cache/value.go @@ -0,0 +1,46 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package cache + +// Value holds a reference counted immutable value. +type Value struct { + buf []byte + // Reference count for the value. The value is freed when the reference count + // drops to zero. + ref refcnt +} + +// Buf returns the buffer associated with the value. The contents of the buffer +// should not be changed once the value has been added to the cache. Instead, a +// new Value should be created and added to the cache to replace the existing +// value. +func (v *Value) Buf() []byte { + if v == nil { + return nil + } + return v.buf +} + +// Truncate the buffer to the specified length. The buffer length should not be +// changed once the value has been added to the cache as there may be +// concurrent readers of the Value. Instead, a new Value should be created and +// added to the cache to replace the existing value. +func (v *Value) Truncate(n int) { + v.buf = v.buf[:n] +} + +func (v *Value) refs() int32 { + return v.ref.refs() +} + +func (v *Value) acquire() { + v.ref.acquire() +} + +func (v *Value) release() { + if v != nil && v.ref.release() { + v.free() + } +} diff --git a/pebble/internal/cache/value_invariants.go b/pebble/internal/cache/value_invariants.go new file mode 100644 index 0000000..1e30d27 --- /dev/null +++ b/pebble/internal/cache/value_invariants.go @@ -0,0 +1,55 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build (invariants && !race) || (tracing && !race) +// +build invariants,!race tracing,!race + +package cache + +import ( + "fmt" + "os" + + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/internal/manual" +) + +// newValue creates a Value with a manually managed buffer of size n. +// +// This definition of newValue is used when either the "invariants" or +// "tracing" build tags are specified. It hooks up a finalizer to the returned +// Value that checks for memory leaks when the GC determines the Value is no +// longer reachable. +func newValue(n int) *Value { + if n == 0 { + return nil + } + b := manual.New(n) + v := &Value{buf: b} + v.ref.init(1) + // Note: this is a no-op if invariants and tracing are disabled or race is + // enabled. + invariants.SetFinalizer(v, func(obj interface{}) { + v := obj.(*Value) + if v.buf != nil { + fmt.Fprintf(os.Stderr, "%p: cache value was not freed: refs=%d\n%s", + v, v.refs(), v.ref.traces()) + os.Exit(1) + } + }) + return v +} + +func (v *Value) free() { + // When "invariants" are enabled set the value contents to 0xff in order to + // cache use-after-free bugs. + for i := range v.buf { + v.buf[i] = 0xff + } + manual.Free(v.buf) + // Setting Value.buf to nil is needed for correctness of the leak checking + // that is performed when the "invariants" or "tracing" build tags are + // enabled. + v.buf = nil +} diff --git a/pebble/internal/cache/value_normal.go b/pebble/internal/cache/value_normal.go new file mode 100644 index 0000000..e03379d --- /dev/null +++ b/pebble/internal/cache/value_normal.go @@ -0,0 +1,57 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build (!invariants && !tracing) || race +// +build !invariants,!tracing race + +package cache + +import ( + "unsafe" + + "github.com/cockroachdb/pebble/internal/manual" +) + +const valueSize = int(unsafe.Sizeof(Value{})) + +func newValue(n int) *Value { + if n == 0 { + return nil + } + + if !cgoEnabled { + // If Cgo is disabled then all memory is allocated from the Go heap and we + // can't play the trick below to combine the Value and buffer allocation. + v := &Value{buf: make([]byte, n)} + v.ref.init(1) + return v + } + + // When we're not performing leak detection, the lifetime of the returned + // Value is exactly the lifetime of the backing buffer and we can manually + // allocate both. + // + // TODO(peter): It may be better to separate the allocation of the value and + // the buffer in order to reduce internal fragmentation in malloc. If the + // buffer is right at a power of 2, adding valueSize might push the + // allocation over into the next larger size. + b := manual.New(valueSize + n) + v := (*Value)(unsafe.Pointer(&b[0])) + v.buf = b[valueSize:] + v.ref.init(1) + return v +} + +func (v *Value) free() { + if !cgoEnabled { + return + } + + // When we're not performing leak detection, the Value and buffer were + // allocated contiguously. + n := valueSize + cap(v.buf) + buf := (*[manual.MaxArrayLen]byte)(unsafe.Pointer(v))[:n:n] + v.buf = nil + manual.Free(buf) +} diff --git a/pebble/internal/constants/constants.go b/pebble/internal/constants/constants.go new file mode 100644 index 0000000..8d9198c --- /dev/null +++ b/pebble/internal/constants/constants.go @@ -0,0 +1,17 @@ +// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package constants + +const ( + // oneIf64Bit is 1 on 64-bit platforms and 0 on 32-bit platforms. + oneIf64Bit = ^uint(0) >> 63 + + // MaxUint32OrInt returns min(MaxUint32, MaxInt), i.e + // - MaxUint32 on 64-bit platforms; + // - MaxInt on 32-bit platforms. + // It is used when slices are limited to Uint32 on 64-bit platforms (the + // length limit for slices is naturally MaxInt on 32-bit platforms). + MaxUint32OrInt = (1<<31)<>15|c<<17) + 0xa282ead8 +} diff --git a/pebble/internal/datatest/datatest.go b/pebble/internal/datatest/datatest.go new file mode 100644 index 0000000..40f78d5 --- /dev/null +++ b/pebble/internal/datatest/datatest.go @@ -0,0 +1,140 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +// Package datatest provides common datadriven test commands for use outside of +// the root Pebble package. +package datatest + +import ( + "strings" + "sync" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble" +) + +// TODO(jackson): Consider a refactoring that can consolidate this package and +// the datadriven commands defined in pebble/data_test.go. + +// DefineBatch interprets the provided datadriven command as a sequence of write +// operations, one-per-line, to apply to the provided batch. +func DefineBatch(d *datadriven.TestData, b *pebble.Batch) error { + for _, line := range strings.Split(d.Input, "\n") { + parts := strings.Fields(line) + if len(parts) == 0 { + continue + } + if parts[1] == `` { + parts[1] = "" + } + var err error + switch parts[0] { + case "set": + if len(parts) != 3 { + return errors.Errorf("%s expects 2 arguments", parts[0]) + } + err = b.Set([]byte(parts[1]), []byte(parts[2]), nil) + case "del": + if len(parts) != 2 { + return errors.Errorf("%s expects 1 argument", parts[0]) + } + err = b.Delete([]byte(parts[1]), nil) + case "singledel": + if len(parts) != 2 { + return errors.Errorf("%s expects 1 argument", parts[0]) + } + err = b.SingleDelete([]byte(parts[1]), nil) + case "del-range": + if len(parts) != 3 { + return errors.Errorf("%s expects 2 arguments", parts[0]) + } + err = b.DeleteRange([]byte(parts[1]), []byte(parts[2]), nil) + case "merge": + if len(parts) != 3 { + return errors.Errorf("%s expects 2 arguments", parts[0]) + } + err = b.Merge([]byte(parts[1]), []byte(parts[2]), nil) + case "range-key-set": + if len(parts) != 5 { + return errors.Errorf("%s expects 4 arguments", parts[0]) + } + err = b.RangeKeySet( + []byte(parts[1]), + []byte(parts[2]), + []byte(parts[3]), + []byte(parts[4]), + nil) + case "range-key-unset": + if len(parts) != 4 { + return errors.Errorf("%s expects 3 arguments", parts[0]) + } + err = b.RangeKeyUnset( + []byte(parts[1]), + []byte(parts[2]), + []byte(parts[3]), + nil) + case "range-key-del": + if len(parts) != 3 { + return errors.Errorf("%s expects 2 arguments", parts[0]) + } + err = b.RangeKeyDelete( + []byte(parts[1]), + []byte(parts[2]), + nil) + default: + return errors.Errorf("unknown op: %s", parts[0]) + } + if err != nil { + return err + } + } + return nil +} + +// CompactionTracker is a listener that tracks the number of compactions. +type CompactionTracker struct { + sync.Cond + count int + attached bool +} + +// NewCompactionTracker setups the necessary options to keep track of the +// compactions that are in flight. +func NewCompactionTracker(options *pebble.Options) *CompactionTracker { + ct := CompactionTracker{} + ct.Cond = sync.Cond{ + L: &sync.Mutex{}, + } + ct.attached = true + el := pebble.EventListener{ + CompactionEnd: func(info pebble.CompactionInfo) { + ct.L.Lock() + ct.count-- + ct.Broadcast() + ct.L.Unlock() + }, + CompactionBegin: func(info pebble.CompactionInfo) { + ct.L.Lock() + ct.count++ + ct.Broadcast() + ct.L.Unlock() + }, + } + + options.AddEventListener(el) + return &ct +} + +// WaitForInflightCompactionsToEqual waits until compactions meet the specified target. +func (cql *CompactionTracker) WaitForInflightCompactionsToEqual(target int) { + cql.L.Lock() + if !cql.attached { + panic("Cannot wait for compactions if listener has not been attached") + } + for cql.count != target { + cql.Wait() + } + cql.L.Unlock() +} diff --git a/pebble/internal/dsl/dsl.go b/pebble/internal/dsl/dsl.go new file mode 100644 index 0000000..ef546fd --- /dev/null +++ b/pebble/internal/dsl/dsl.go @@ -0,0 +1,160 @@ +// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +// Package dsl provides facilities for parsing lisp-like domain-specific +// languages (DSL). +package dsl + +import ( + "fmt" + "go/scanner" + "go/token" + "strconv" + "strings" + + "github.com/cockroachdb/errors" +) + +// NewParser constructs a new Parser of a lisp-like DSL. +func NewParser[T any]() *Parser[T] { + p := new(Parser[T]) + p.constants = make(map[string]func() T) + p.funcs = make(map[string]func(*Parser[T], *Scanner) T) + return p +} + +// NewPredicateParser constructs a new Parser of a Lisp-like DSL, where the +// resulting type implements Predicate[E]. NewPredicateParser predefines a few +// useful functions: Not, And, Or, OnIndex. +func NewPredicateParser[E any]() *Parser[Predicate[E]] { + p := NewParser[Predicate[E]]() + p.DefineFunc("Not", parseNot[E]) + p.DefineFunc("And", parseAnd[E]) + p.DefineFunc("Or", parseOr[E]) + p.DefineFunc("OnIndex", parseOnIndex[E]) + return p +} + +// A Parser holds the rules and logic for parsing a DSL. +type Parser[T any] struct { + constants map[string]func() T + funcs map[string]func(*Parser[T], *Scanner) T +} + +// DefineConstant adds a new constant to the Parser's supported DSL. Whenever +// the provided identifier is used within a constant context, the provided +// closure is invoked to instantiate an appropriate AST value. +func (p *Parser[T]) DefineConstant(identifier string, instantiate func() T) { + p.constants[identifier] = instantiate +} + +// DefineFunc adds a new func to the Parser's supported DSL. Whenever the +// provided identifier is used within a function invocation context, the +// provided closure is invoked to instantiate an appropriate AST value. +func (p *Parser[T]) DefineFunc(identifier string, parseFunc func(*Parser[T], *Scanner) T) { + p.funcs[identifier] = parseFunc +} + +// Parse parses the provided input string. +func (p *Parser[T]) Parse(d string) (ret T, err error) { + defer func() { + if r := recover(); r != nil { + var ok bool + err, ok = r.(error) + if !ok { + panic(r) + } + } + }() + + fset := token.NewFileSet() + file := fset.AddFile("", -1, len(d)) + var s Scanner + s.Init(file, []byte(strings.TrimSpace(d)), nil /* no error handler */, 0) + tok := s.Scan() + ret = p.ParseFromPos(&s, tok) + tok = s.Scan() + if tok.Kind == token.SEMICOLON { + tok = s.Scan() + } + assertTok(tok, token.EOF) + return ret, err +} + +// ParseFromPos parses from the provided current position and associated +// scanner. If the parser fails to parse, it panics. This function is intended +// to be used when composing Parsers of various types. +func (p *Parser[T]) ParseFromPos(s *Scanner, tok Token) T { + switch tok.Kind { + case token.IDENT: + // A constant without any parens, eg. `Reads`. + p, ok := p.constants[tok.Lit] + if !ok { + panic(errors.Errorf("dsl: unknown constant %q", tok.Lit)) + } + return p() + case token.LPAREN: + // Otherwise it's an expression, eg: (OnIndex 1) + tok = s.Consume(token.IDENT) + fp, ok := p.funcs[tok.Lit] + if !ok { + panic(errors.Errorf("dsl: unknown func %q", tok.Lit)) + } + return fp(p, s) + default: + panic(errors.Errorf("dsl: unexpected token %s; expected IDENT or LPAREN", tok.String())) + } +} + +// A Scanner holds the scanner's internal state while processing a given text. +type Scanner struct { + scanner.Scanner +} + +// Scan scans the next token and returns it. +func (s *Scanner) Scan() Token { + pos, tok, lit := s.Scanner.Scan() + return Token{pos, tok, lit} +} + +// Consume scans the next token. If the token is not of the provided token, it +// panics. It returns the token itself. +func (s *Scanner) Consume(expect token.Token) Token { + t := s.Scan() + assertTok(t, expect) + return t +} + +// ConsumeString scans the next token. It panics if the next token is not a +// string, or if unable to unquote the string. It returns the unquoted string +// contents. +func (s *Scanner) ConsumeString() string { + lit := s.Consume(token.STRING).Lit + str, err := strconv.Unquote(lit) + if err != nil { + panic(errors.Newf("dsl: unquoting %q: %v", lit, err)) + } + return str +} + +// Token is a lexical token scanned from an input text. +type Token struct { + pos token.Pos + Kind token.Token + Lit string +} + +// String implements fmt.Stringer. +func (t *Token) String() string { + if t.Lit != "" { + return fmt.Sprintf("(%s, %q) at pos %v", t.Kind, t.Lit, t.pos) + } + return fmt.Sprintf("%s at pos %v", t.Kind, t.pos) +} + +func assertTok(tok Token, expect token.Token) { + if tok.Kind != expect { + panic(errors.Errorf("dsl: unexpected token %s; expected %s", tok.String(), expect)) + } +} diff --git a/pebble/internal/dsl/predicates.go b/pebble/internal/dsl/predicates.go new file mode 100644 index 0000000..fff0fcd --- /dev/null +++ b/pebble/internal/dsl/predicates.go @@ -0,0 +1,136 @@ +// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package dsl + +import ( + "fmt" + "go/token" + "strconv" + "strings" + "sync/atomic" + + "github.com/cockroachdb/errors" +) + +// Predicate encodes conditional logic that yields a boolean. +type Predicate[E any] interface { + Evaluate(E) bool + String() string +} + +// Not returns a Predicate that negates the provided predicate. +func Not[E any](p Predicate[E]) Predicate[E] { return not[E]{Predicate: p} } + +// And returns a Predicate that evaluates to true if all its operands evaluate +// to true. +func And[E any](preds ...Predicate[E]) Predicate[E] { return and[E](preds) } + +// Or returns a Predicate that evaluates to true if any of its operands evaluate +// true. +func Or[E any](preds ...Predicate[E]) Predicate[E] { return or[E](preds) } + +// OnIndex returns a Predicate that evaluates to true on its N-th call. +func OnIndex[E any](n int32) *Index[E] { + p := new(Index[E]) + p.Int32.Store(n) + return p +} + +// Index is a Predicate that evaluates to true only on its N-th invocation. +type Index[E any] struct { + atomic.Int32 +} + +// String implements fmt.Stringer. +func (p *Index[E]) String() string { + return fmt.Sprintf("(OnIndex %d)", p.Int32.Load()) +} + +// Evaluate implements Predicate. +func (p *Index[E]) Evaluate(E) bool { return p.Int32.Add(-1) == -1 } + +type not[E any] struct { + Predicate[E] +} + +func (p not[E]) String() string { return fmt.Sprintf("(Not %s)", p.Predicate.String()) } +func (p not[E]) Evaluate(e E) bool { return !p.Predicate.Evaluate(e) } + +type and[E any] []Predicate[E] + +func (p and[E]) String() string { + var sb strings.Builder + sb.WriteString("(And") + for i := 0; i < len(p); i++ { + sb.WriteRune(' ') + sb.WriteString(p[i].String()) + } + sb.WriteRune(')') + return sb.String() +} + +func (p and[E]) Evaluate(e E) bool { + ok := true + for i := range p { + ok = ok && p[i].Evaluate(e) + } + return ok +} + +type or[E any] []Predicate[E] + +func (p or[E]) String() string { + var sb strings.Builder + sb.WriteString("(Or") + for i := 0; i < len(p); i++ { + sb.WriteRune(' ') + sb.WriteString(p[i].String()) + } + sb.WriteRune(')') + return sb.String() +} + +func (p or[E]) Evaluate(e E) bool { + ok := false + for i := range p { + ok = ok || p[i].Evaluate(e) + } + return ok +} + +func parseNot[E any](p *Parser[Predicate[E]], s *Scanner) Predicate[E] { + preds := parseVariadicPredicate(p, s) + if len(preds) != 1 { + panic(errors.Newf("dsl: not accepts exactly 1 argument, given %d", len(preds))) + } + return not[E]{Predicate: preds[0]} +} + +func parseAnd[E any](p *Parser[Predicate[E]], s *Scanner) Predicate[E] { + return And[E](parseVariadicPredicate[E](p, s)...) +} + +func parseOr[E any](p *Parser[Predicate[E]], s *Scanner) Predicate[E] { + return Or[E](parseVariadicPredicate[E](p, s)...) +} + +func parseOnIndex[E any](p *Parser[Predicate[E]], s *Scanner) Predicate[E] { + i, err := strconv.ParseInt(s.Consume(token.INT).Lit, 10, 32) + if err != nil { + panic(err) + } + s.Consume(token.RPAREN) + return OnIndex[E](int32(i)) +} + +func parseVariadicPredicate[E any](p *Parser[Predicate[E]], s *Scanner) (ret []Predicate[E]) { + tok := s.Scan() + for tok.Kind == token.LPAREN || tok.Kind == token.IDENT { + ret = append(ret, p.ParseFromPos(s, tok)) + tok = s.Scan() + } + assertTok(tok, token.RPAREN) + return ret +} diff --git a/pebble/internal/fastrand/fastrand.go b/pebble/internal/fastrand/fastrand.go new file mode 100644 index 0000000..dd3ec9c --- /dev/null +++ b/pebble/internal/fastrand/fastrand.go @@ -0,0 +1,17 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package fastrand + +import _ "unsafe" // required by go:linkname + +// Uint32 returns a lock free uint32 value. +// +//go:linkname Uint32 runtime.fastrand +func Uint32() uint32 + +// Uint32n returns a lock free uint32 value in the interval [0, n). +// +//go:linkname Uint32n runtime.fastrandn +func Uint32n(n uint32) uint32 diff --git a/pebble/internal/fastrand/fastrand_test.go b/pebble/internal/fastrand/fastrand_test.go new file mode 100644 index 0000000..581c056 --- /dev/null +++ b/pebble/internal/fastrand/fastrand_test.go @@ -0,0 +1,86 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package fastrand + +import ( + "fmt" + "sync" + "testing" + "time" + + "golang.org/x/exp/rand" +) + +type defaultRand struct { + mu sync.Mutex + src rand.PCGSource +} + +func newDefaultRand() *defaultRand { + r := &defaultRand{} + r.src.Seed(uint64(time.Now().UnixNano())) + return r +} + +func (r *defaultRand) Uint32() uint32 { + r.mu.Lock() + i := uint32(r.src.Uint64()) + r.mu.Unlock() + return i +} + +func BenchmarkFastRand(b *testing.B) { + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + Uint32() + } + }) +} + +func BenchmarkDefaultRand(b *testing.B) { + r := newDefaultRand() + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + r.Uint32() + } + }) +} + +// Benchmarks for single-threaded (ST) use of fastrand compared to +// constructing a Rand, which can have heap allocation overhead. + +// Global state to disable elision of benchmark code. +var xg uint32 + +func BenchmarkSTFastRand(b *testing.B) { + var x uint32 + for i := 0; i < b.N; i++ { + // Arbitrary constant. + x = Uint32n(2097152) + } + xg = x +} + +func BenchmarkSTDefaultRand(b *testing.B) { + for _, newPeriod := range []int{0, 10, 100, 1000} { + name := "no-new" + if newPeriod > 0 { + name = fmt.Sprintf("new-period=%d", newPeriod) + } + b.Run(name, func(b *testing.B) { + r := rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + b.ResetTimer() + var x uint32 + for i := 0; i < b.N; i++ { + if newPeriod > 0 && i%newPeriod == 0 { + r = rand.New(rand.NewSource(uint64(time.Now().UnixNano()))) + } + // Arbitrary constant. + x = uint32(r.Uint64n(2097152)) + } + xg = x + }) + } +} diff --git a/pebble/internal/humanize/humanize.go b/pebble/internal/humanize/humanize.go new file mode 100644 index 0000000..cb82343 --- /dev/null +++ b/pebble/internal/humanize/humanize.go @@ -0,0 +1,68 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package humanize + +import ( + "fmt" + "math" + + "github.com/cockroachdb/redact" +) + +func logn(n, b float64) float64 { + return math.Log(n) / math.Log(b) +} + +func humanate(s uint64, base float64, suffixes []string) string { + if s < 10 { + return fmt.Sprintf("%d%s", s, suffixes[0]) + } + e := math.Floor(logn(float64(s), base)) + suffix := suffixes[int(e)] + val := math.Floor(float64(s)/math.Pow(base, e)*10+0.5) / 10 + f := "%.0f%s" + if val < 10 { + f = "%.1f%s" + } + + return fmt.Sprintf(f, val, suffix) +} + +type config struct { + base float64 + suffix []string +} + +// Bytes produces human readable representations of byte values in IEC units. +var Bytes = config{1024, []string{"B", "KB", "MB", "GB", "TB", "PB", "EB"}} + +// Count produces human readable representations of unitless values in SI units. +var Count = config{1000, []string{"", "K", "M", "G", "T", "P", "E"}} + +// Int64 produces a human readable representation of the value. +func (c *config) Int64(s int64) FormattedString { + if s < 0 { + return FormattedString("-" + humanate(uint64(-s), c.base, c.suffix)) + } + return FormattedString(humanate(uint64(s), c.base, c.suffix)) +} + +// Uint64 produces a human readable representation of the value. +func (c *config) Uint64(s uint64) FormattedString { + return FormattedString(humanate(s, c.base, c.suffix)) +} + +// FormattedString represents a human readable representation of a value. It +// implements the redact.SafeValue interface to signal that it represents a +// a string that does not need to be redacted. +type FormattedString string + +var _ redact.SafeValue = FormattedString("") + +// SafeValue implements redact.SafeValue. +func (fs FormattedString) SafeValue() {} + +// String implements fmt.Stringer. +func (fs FormattedString) String() string { return string(fs) } diff --git a/pebble/internal/humanize/humanize_test.go b/pebble/internal/humanize/humanize_test.go new file mode 100644 index 0000000..a4a42c3 --- /dev/null +++ b/pebble/internal/humanize/humanize_test.go @@ -0,0 +1,38 @@ +// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package humanize + +import ( + "bytes" + "fmt" + "strconv" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" +) + +func TestHumanize(t *testing.T) { + datadriven.RunTest(t, "testdata/humanize", func(t *testing.T, td *datadriven.TestData) string { + var c config + switch td.Cmd { + case "bytes": + c = Bytes + case "count": + c = Count + default: + td.Fatalf(t, "invalid command %q", td.Cmd) + } + var buf bytes.Buffer + for _, row := range strings.Split(td.Input, "\n") { + val, err := strconv.ParseInt(row, 10, 64) + if err != nil { + td.Fatalf(t, "error parsing %q: %v", row, err) + } + fmt.Fprintf(&buf, "%s\n", c.Int64(val)) + } + return buf.String() + }) +} diff --git a/pebble/internal/humanize/testdata/humanize b/pebble/internal/humanize/testdata/humanize new file mode 100644 index 0000000..27f554a --- /dev/null +++ b/pebble/internal/humanize/testdata/humanize @@ -0,0 +1,49 @@ +bytes +0 +1 +9 +99 +123 +123456 +12345678 +1234567890 +1234567890123 +123456789012345 +123456789012345678 +---- +0B +1B +9B +99B +123B +121KB +12MB +1.1GB +1.1TB +112TB +110PB + +count +0 +1 +9 +99 +123 +123456 +12345678 +1234567890 +1234567890123 +123456789012345 +123456789012345678 +---- +0 +1 +9 +99 +123 +124K +12M +1.2G +1.2T +124T +124P diff --git a/pebble/internal/intern/intern.go b/pebble/internal/intern/intern.go new file mode 100644 index 0000000..9f8bad5 --- /dev/null +++ b/pebble/internal/intern/intern.go @@ -0,0 +1,27 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package intern + +import "sync" + +var pool = sync.Pool{ + New: func() interface{} { + return make(map[string]string) + }, +} + +// Bytes returns b converted to a string, interned. +func Bytes(b []byte) string { + m := pool.Get().(map[string]string) + c, ok := m[string(b)] + if ok { + pool.Put(m) + return c + } + s := string(b) + m[s] = s + pool.Put(m) + return s +} diff --git a/pebble/internal/intern/intern_test.go b/pebble/internal/intern/intern_test.go new file mode 100644 index 0000000..1db6581 --- /dev/null +++ b/pebble/internal/intern/intern_test.go @@ -0,0 +1,30 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package intern + +import ( + "bytes" + "testing" + + "github.com/cockroachdb/pebble/internal/invariants" +) + +func TestBytes(t *testing.T) { + if invariants.RaceEnabled { + // sync.Pool is a no-op under -race, making this test fail. + t.Skip("not supported under -race") + } + + const abc = "abc" + s := bytes.Repeat([]byte(abc), 100) + n := testing.AllocsPerRun(100, func() { + for i := 0; i < 100; i++ { + _ = Bytes(s[i*len(abc) : (i+1)*len(abc)]) + } + }) + if n > 0 { + t.Fatalf("Bytes allocated %d, want 0", int(n)) + } +} diff --git a/pebble/internal/invalidating/iter.go b/pebble/internal/invalidating/iter.go new file mode 100644 index 0000000..e27db58 --- /dev/null +++ b/pebble/internal/invalidating/iter.go @@ -0,0 +1,168 @@ +// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package invalidating + +import ( + "context" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/fastrand" + "github.com/cockroachdb/pebble/internal/invariants" +) + +// MaybeWrapIfInvariants wraps some iterators with an invalidating iterator. +// MaybeWrapIfInvariants does nothing in non-invariant builds. +func MaybeWrapIfInvariants(iter base.InternalIterator) base.InternalIterator { + if invariants.Enabled { + if fastrand.Uint32n(10) == 1 { + return NewIter(iter) + } + } + return iter +} + +// iter tests unsafe key/value slice reuse by modifying the last +// returned key/value to all 1s. +type iter struct { + iter base.InternalIterator + lastKey *base.InternalKey + lastValue base.LazyValue + ignoreKinds [base.InternalKeyKindMax + 1]bool + err error +} + +// Option configures the behavior of an invalidating iterator. +type Option interface { + apply(*iter) +} + +type funcOpt func(*iter) + +func (f funcOpt) apply(i *iter) { f(i) } + +// IgnoreKinds constructs an Option that configures an invalidating iterator to +// skip trashing k/v pairs with the provided key kinds. Some iterators provided +// key stability guarantees for specific key kinds. +func IgnoreKinds(kinds ...base.InternalKeyKind) Option { + return funcOpt(func(i *iter) { + for _, kind := range kinds { + i.ignoreKinds[kind] = true + } + }) +} + +// NewIter constructs a new invalidating iterator that wraps the provided +// iterator, trashing buffers for previously returned keys. +func NewIter(originalIterator base.InternalIterator, opts ...Option) base.InternalIterator { + i := &iter{iter: originalIterator} + for _, opt := range opts { + opt.apply(i) + } + return i +} + +func (i *iter) update( + key *base.InternalKey, value base.LazyValue, +) (*base.InternalKey, base.LazyValue) { + i.trashLastKV() + if key == nil { + i.lastKey = nil + i.lastValue = base.LazyValue{} + return nil, base.LazyValue{} + } + + i.lastKey = &base.InternalKey{} + *i.lastKey = key.Clone() + i.lastValue = base.LazyValue{ + ValueOrHandle: append(make([]byte, 0, len(value.ValueOrHandle)), value.ValueOrHandle...), + } + if value.Fetcher != nil { + fetcher := new(base.LazyFetcher) + *fetcher = *value.Fetcher + i.lastValue.Fetcher = fetcher + } + return i.lastKey, i.lastValue +} + +func (i *iter) trashLastKV() { + if i.lastKey == nil { + return + } + if i.ignoreKinds[i.lastKey.Kind()] { + return + } + + if i.lastKey != nil { + for j := range i.lastKey.UserKey { + i.lastKey.UserKey[j] = 0xff + } + i.lastKey.Trailer = 0xffffffffffffffff + } + for j := range i.lastValue.ValueOrHandle { + i.lastValue.ValueOrHandle[j] = 0xff + } + if i.lastValue.Fetcher != nil { + // Not all the LazyFetcher fields are visible, so we zero out the last + // value's Fetcher struct entirely. + *i.lastValue.Fetcher = base.LazyFetcher{} + } +} + +func (i *iter) SeekGE(key []byte, flags base.SeekGEFlags) (*base.InternalKey, base.LazyValue) { + return i.update(i.iter.SeekGE(key, flags)) +} + +func (i *iter) SeekPrefixGE( + prefix, key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + return i.update(i.iter.SeekPrefixGE(prefix, key, flags)) +} + +func (i *iter) SeekLT(key []byte, flags base.SeekLTFlags) (*base.InternalKey, base.LazyValue) { + return i.update(i.iter.SeekLT(key, flags)) +} + +func (i *iter) First() (*base.InternalKey, base.LazyValue) { + return i.update(i.iter.First()) +} + +func (i *iter) Last() (*base.InternalKey, base.LazyValue) { + return i.update(i.iter.Last()) +} + +func (i *iter) Next() (*base.InternalKey, base.LazyValue) { + return i.update(i.iter.Next()) +} + +func (i *iter) Prev() (*base.InternalKey, base.LazyValue) { + return i.update(i.iter.Prev()) +} + +func (i *iter) NextPrefix(succKey []byte) (*base.InternalKey, base.LazyValue) { + return i.update(i.iter.NextPrefix(succKey)) +} + +func (i *iter) Error() error { + if err := i.iter.Error(); err != nil { + return err + } + return i.err +} + +func (i *iter) Close() error { + return i.iter.Close() +} + +func (i *iter) SetBounds(lower, upper []byte) { + i.iter.SetBounds(lower, upper) +} + +func (i *iter) SetContext(ctx context.Context) { + i.iter.SetContext(ctx) +} + +func (i *iter) String() string { + return i.iter.String() +} diff --git a/pebble/internal/invariants/finalizer_off.go b/pebble/internal/invariants/finalizer_off.go new file mode 100644 index 0000000..d2c600a --- /dev/null +++ b/pebble/internal/invariants/finalizer_off.go @@ -0,0 +1,14 @@ +// Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build (!invariants && !tracing) || race +// +build !invariants,!tracing race + +package invariants + +// SetFinalizer is a wrapper around runtime.SetFinalizer that is a no-op under +// race builds or if neither the invariants or tracing build tags are +// specified. +func SetFinalizer(obj, finalizer interface{}) { +} diff --git a/pebble/internal/invariants/finalizer_on.go b/pebble/internal/invariants/finalizer_on.go new file mode 100644 index 0000000..da4e307 --- /dev/null +++ b/pebble/internal/invariants/finalizer_on.go @@ -0,0 +1,17 @@ +// Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build (invariants && !race) || (tracing && !race) +// +build invariants,!race tracing,!race + +package invariants + +import "runtime" + +// SetFinalizer is a wrapper around runtime.SetFinalizer that is a no-op under +// race builds or if neither the invariants or tracing build tags are +// specified. +func SetFinalizer(obj, finalizer interface{}) { + runtime.SetFinalizer(obj, finalizer) +} diff --git a/pebble/internal/invariants/off.go b/pebble/internal/invariants/off.go new file mode 100644 index 0000000..01513f2 --- /dev/null +++ b/pebble/internal/invariants/off.go @@ -0,0 +1,11 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build !invariants && !race +// +build !invariants,!race + +package invariants + +// Enabled is true if we were built with the "invariants" or "race" build tags. +const Enabled = false diff --git a/pebble/internal/invariants/on.go b/pebble/internal/invariants/on.go new file mode 100644 index 0000000..b418680 --- /dev/null +++ b/pebble/internal/invariants/on.go @@ -0,0 +1,11 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build invariants || race +// +build invariants race + +package invariants + +// Enabled is true if we were built with the "invariants" or "race" build tags. +const Enabled = true diff --git a/pebble/internal/invariants/race_off.go b/pebble/internal/invariants/race_off.go new file mode 100644 index 0000000..b2b8c5e --- /dev/null +++ b/pebble/internal/invariants/race_off.go @@ -0,0 +1,11 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build !race +// +build !race + +package invariants + +// RaceEnabled is true if we were built with the "race" build tag. +const RaceEnabled = false diff --git a/pebble/internal/invariants/race_on.go b/pebble/internal/invariants/race_on.go new file mode 100644 index 0000000..46613f7 --- /dev/null +++ b/pebble/internal/invariants/race_on.go @@ -0,0 +1,11 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build race +// +build race + +package invariants + +// RaceEnabled is true if we were built with the "race" build tag. +const RaceEnabled = true diff --git a/pebble/internal/itertest/datadriven.go b/pebble/internal/itertest/datadriven.go new file mode 100644 index 0000000..6c2feef --- /dev/null +++ b/pebble/internal/itertest/datadriven.go @@ -0,0 +1,196 @@ +// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +// Package itertest provides facilities for testing internal iterators. +package itertest + +import ( + "bytes" + "fmt" + "io" + "strconv" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/stretchr/testify/require" +) + +type iterCmdOpts struct { + fmtKV func(io.Writer, *base.InternalKey, []byte, base.InternalIterator) + stats *base.InternalIteratorStats +} + +// An IterOpt configures the behavior of RunInternalIterCmd. +type IterOpt func(*iterCmdOpts) + +// Verbose configures RunInternalIterCmd to output verbose results. +func Verbose(opts *iterCmdOpts) { opts.fmtKV = verboseFmt } + +// Condensed configures RunInternalIterCmd to output condensed results without +// values. +func Condensed(opts *iterCmdOpts) { opts.fmtKV = condensedFmt } + +// WithStats configures RunInternalIterCmd to collect iterator stats in the +// struct pointed to by stats. +func WithStats(stats *base.InternalIteratorStats) IterOpt { + return func(opts *iterCmdOpts) { + opts.stats = stats + } +} + +func defaultFmt(w io.Writer, key *base.InternalKey, v []byte, iter base.InternalIterator) { + if key != nil { + fmt.Fprintf(w, "%s:%s\n", key.UserKey, v) + } else if err := iter.Error(); err != nil { + fmt.Fprintf(w, "err=%v\n", err) + } else { + fmt.Fprintf(w, ".\n") + } +} + +func condensedFmt(w io.Writer, key *base.InternalKey, v []byte, iter base.InternalIterator) { + if key != nil { + fmt.Fprintf(w, "<%s:%d>", key.UserKey, key.SeqNum()) + } else if err := iter.Error(); err != nil { + fmt.Fprintf(w, "err=%v", err) + } else { + fmt.Fprint(w, ".") + } +} + +func verboseFmt(w io.Writer, key *base.InternalKey, v []byte, iter base.InternalIterator) { + if key != nil { + fmt.Fprintf(w, "%s:%s\n", key, v) + return + } + defaultFmt(w, key, v, iter) +} + +// RunInternalIterCmd evaluates a datadriven command controlling an internal +// iterator, returning a string with the results of the iterator operations. +func RunInternalIterCmd( + t *testing.T, d *datadriven.TestData, iter base.InternalIterator, opts ...IterOpt, +) string { + var buf bytes.Buffer + RunInternalIterCmdWriter(t, &buf, d, iter, opts...) + return buf.String() +} + +// RunInternalIterCmdWriter evaluates a datadriven command controlling an +// internal iterator, writing the results of the iterator operations to the +// provided Writer. +func RunInternalIterCmdWriter( + t *testing.T, w io.Writer, d *datadriven.TestData, iter base.InternalIterator, opts ...IterOpt, +) { + o := iterCmdOpts{fmtKV: defaultFmt} + for _, opt := range opts { + opt(&o) + } + + getKV := func(key *base.InternalKey, val base.LazyValue) (*base.InternalKey, []byte) { + v, _, err := val.Value(nil) + require.NoError(t, err) + return key, v + } + var prefix []byte + for _, line := range strings.Split(d.Input, "\n") { + parts := strings.Fields(line) + if len(parts) == 0 { + continue + } + var key *base.InternalKey + var value []byte + switch parts[0] { + case "seek-ge": + if len(parts) < 2 || len(parts) > 3 { + fmt.Fprint(w, "seek-ge []\n") + return + } + prefix = nil + var flags base.SeekGEFlags + if len(parts) == 3 { + if trySeekUsingNext, err := strconv.ParseBool(parts[2]); err != nil { + fmt.Fprintf(w, "%s", err.Error()) + return + } else if trySeekUsingNext { + flags = flags.EnableTrySeekUsingNext() + } + } + key, value = getKV(iter.SeekGE([]byte(strings.TrimSpace(parts[1])), flags)) + case "seek-prefix-ge": + if len(parts) != 2 && len(parts) != 3 { + fmt.Fprint(w, "seek-prefix-ge []\n") + return + } + prefix = []byte(strings.TrimSpace(parts[1])) + var flags base.SeekGEFlags + if len(parts) == 3 { + if trySeekUsingNext, err := strconv.ParseBool(parts[2]); err != nil { + fmt.Fprintf(w, "%s", err.Error()) + return + } else if trySeekUsingNext { + flags = flags.EnableTrySeekUsingNext() + } + } + key, value = getKV(iter.SeekPrefixGE(prefix, prefix /* key */, flags)) + case "seek-lt": + if len(parts) != 2 { + fmt.Fprint(w, "seek-lt \n") + return + } + prefix = nil + key, value = getKV(iter.SeekLT([]byte(strings.TrimSpace(parts[1])), base.SeekLTFlagsNone)) + case "first": + prefix = nil + key, value = getKV(iter.First()) + case "last": + prefix = nil + key, value = getKV(iter.Last()) + case "next": + key, value = getKV(iter.Next()) + case "prev": + key, value = getKV(iter.Prev()) + case "set-bounds": + if len(parts) <= 1 || len(parts) > 3 { + fmt.Fprint(w, "set-bounds lower= upper=\n") + return + } + var lower []byte + var upper []byte + for _, part := range parts[1:] { + arg := strings.Split(strings.TrimSpace(part), "=") + switch arg[0] { + case "lower": + lower = []byte(arg[1]) + case "upper": + upper = []byte(arg[1]) + default: + fmt.Fprintf(w, "set-bounds: unknown arg: %s", arg) + return + } + } + iter.SetBounds(lower, upper) + continue + case "stats": + if o.stats != nil { + // The timing is non-deterministic, so set to 0. + o.stats.BlockReadDuration = 0 + fmt.Fprintf(w, "%+v\n", *o.stats) + } + continue + case "reset-stats": + if o.stats != nil { + *o.stats = base.InternalIteratorStats{} + } + continue + default: + fmt.Fprintf(w, "unknown op: %s", parts[0]) + return + } + o.fmtKV(w, key, value, iter) + + } +} diff --git a/pebble/internal/keyspan/bounded.go b/pebble/internal/keyspan/bounded.go new file mode 100644 index 0000000..70dd395 --- /dev/null +++ b/pebble/internal/keyspan/bounded.go @@ -0,0 +1,268 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import "github.com/cockroachdb/pebble/internal/base" + +// TODO(jackson): Consider removing this type and adding bounds enforcement +// directly to the MergingIter. This type is probably too lightweight to warrant +// its own type, but for now we implement it separately for expediency. + +// boundedIterPos records the position of the BoundedIter relative to the +// underlying iterator's position. It's used to avoid Next/Prev-ing the iterator +// if there can't possibly be another span within bounds, because the current +// span overlaps the bound. +// +// Imagine bounds [a,c) and an iterator that seeks to a span [b,d). The span +// [b,d) overlaps some portion of the iterator bounds, so the iterator must +// return it. If the iterator is subsequently Nexted, Next can tell that the +// iterator is exhausted without advancing the underlying iterator because the +// current span's end bound of d is ≥ the upper bound of c. In this case, the +// bounded iterator returns nil and records i.pos as posAtUpperLimit to remember +// that the underlying iterator position does not match the current BoundedIter +// position. +type boundedIterPos int8 + +const ( + posAtLowerLimit boundedIterPos = -1 + posAtIterSpan boundedIterPos = 0 + posAtUpperLimit boundedIterPos = +1 +) + +// BoundedIter implements FragmentIterator and enforces bounds. +// +// Like the point InternalIterator interface, the bounded iterator's forward +// positioning routines (SeekGE, First, and Next) only check the upper bound. +// The reverse positioning routines (SeekLT, Last, and Prev) only check the +// lower bound. It is up to the caller to ensure that the forward positioning +// routines respect the lower bound and the reverse positioning routines respect +// the upper bound (i.e. calling SeekGE instead of First if there is a lower +// bound, and SeekLT instead of Last if there is an upper bound). +// +// When the hasPrefix parameter indicates that the iterator is in prefix +// iteration mode, BoundedIter elides any spans that do not overlap with the +// prefix's keyspace. In prefix iteration mode, reverse iteration is disallowed, +// except for an initial SeekLT with a seek key greater than or equal to the +// prefix. In prefix iteration mode, the first seek must position the iterator +// at or immediately before the first fragment covering a key greater than or +// equal to the prefix. +type BoundedIter struct { + iter FragmentIterator + iterSpan *Span + cmp base.Compare + split base.Split + lower []byte + upper []byte + hasPrefix *bool + prefix *[]byte + pos boundedIterPos +} + +// Init initializes the bounded iterator. +// +// In addition to the iterator bounds, Init takes pointers to a boolean +// indicating whether the iterator is in prefix iteration mode and the prefix +// key if it is. This is used to exclude spans that are outside the iteration +// prefix. +// +// hasPrefix and prefix are allowed to be nil, however if hasPrefix != nil, +// prefix must also not be nil. +func (i *BoundedIter) Init( + cmp base.Compare, + split base.Split, + iter FragmentIterator, + lower, upper []byte, + hasPrefix *bool, + prefix *[]byte, +) { + *i = BoundedIter{ + iter: iter, + cmp: cmp, + split: split, + lower: lower, + upper: upper, + hasPrefix: hasPrefix, + prefix: prefix, + } +} + +var _ FragmentIterator = (*BoundedIter)(nil) + +// Seek calls. +// +// Seek calls check iterator bounds in the direction of the seek. Additionally, +// if the iterator is in prefix iteration mode, seek calls check both start and +// end bounds against the prefix's bounds. We check both bounds for defense in +// depth. This optimization has been a source of various bugs due to various +// other prefix iteration optimizations that can result in seek keys that don't +// respect the prefix bounds. + +// SeekGE implements FragmentIterator. +func (i *BoundedIter) SeekGE(key []byte) *Span { + s := i.iter.SeekGE(key) + s = i.checkPrefixSpanStart(s) + s = i.checkPrefixSpanEnd(s) + return i.checkForwardBound(s) +} + +// SeekLT implements FragmentIterator. +func (i *BoundedIter) SeekLT(key []byte) *Span { + s := i.iter.SeekLT(key) + s = i.checkPrefixSpanStart(s) + s = i.checkPrefixSpanEnd(s) + return i.checkBackwardBound(s) +} + +// First implements FragmentIterator. +func (i *BoundedIter) First() *Span { + s := i.iter.First() + s = i.checkPrefixSpanStart(s) + return i.checkForwardBound(s) +} + +// Last implements FragmentIterator. +func (i *BoundedIter) Last() *Span { + s := i.iter.Last() + s = i.checkPrefixSpanEnd(s) + return i.checkBackwardBound(s) +} + +// Next implements FragmentIterator. +func (i *BoundedIter) Next() *Span { + switch i.pos { + case posAtLowerLimit: + // The BoundedIter had previously returned nil, because it knew from + // i.iterSpan's bounds that there was no previous span. To Next, we only + // need to return the current iter span and reset i.pos to reflect that + // we're no longer positioned at the limit. + i.pos = posAtIterSpan + return i.iterSpan + case posAtIterSpan: + // If the span at the underlying iterator position extends to or beyond the + // upper bound, we can avoid advancing because the next span is necessarily + // out of bounds. + if i.iterSpan != nil && i.upper != nil && i.cmp(i.iterSpan.End, i.upper) >= 0 { + i.pos = posAtUpperLimit + return nil + } + // Similarly, if the span extends to the next prefix and we're in prefix + // iteration mode, we can avoid advancing. + if i.iterSpan != nil && i.hasPrefix != nil && *i.hasPrefix { + ei := i.split(i.iterSpan.End) + if i.cmp(i.iterSpan.End[:ei], *i.prefix) > 0 { + i.pos = posAtUpperLimit + return nil + } + } + return i.checkForwardBound(i.checkPrefixSpanStart(i.iter.Next())) + case posAtUpperLimit: + // Already exhausted. + return nil + default: + panic("unreachable") + } +} + +// Prev implements FragmentIterator. +func (i *BoundedIter) Prev() *Span { + switch i.pos { + case posAtLowerLimit: + // Already exhausted. + return nil + case posAtIterSpan: + // If the span at the underlying iterator position extends to or beyond + // the lower bound, we can avoid advancing because the previous span is + // necessarily out of bounds. + if i.iterSpan != nil && i.lower != nil && i.cmp(i.iterSpan.Start, i.lower) <= 0 { + i.pos = posAtLowerLimit + return nil + } + // Similarly, if the span extends to or beyond the current prefix and + // we're in prefix iteration mode, we can avoid advancing. + if i.iterSpan != nil && i.hasPrefix != nil && *i.hasPrefix { + si := i.split(i.iterSpan.Start) + if i.cmp(i.iterSpan.Start[:si], *i.prefix) < 0 { + i.pos = posAtLowerLimit + return nil + } + } + return i.checkBackwardBound(i.checkPrefixSpanEnd(i.iter.Prev())) + case posAtUpperLimit: + // The BoundedIter had previously returned nil, because it knew from + // i.iterSpan's bounds that there was no next span. To Prev, we only + // need to return the current iter span and reset i.pos to reflect that + // we're no longer positioned at the limit. + i.pos = posAtIterSpan + return i.iterSpan + default: + panic("unreachable") + } +} + +// Error implements FragmentIterator. +func (i *BoundedIter) Error() error { + return i.iter.Error() +} + +// Close implements FragmentIterator. +func (i *BoundedIter) Close() error { + return i.iter.Close() +} + +// SetBounds modifies the FragmentIterator's bounds. +func (i *BoundedIter) SetBounds(lower, upper []byte) { + i.lower, i.upper = lower, upper +} + +func (i *BoundedIter) checkPrefixSpanStart(span *Span) *Span { + // Compare to the prefix's bounds, if in prefix iteration mode. + if span != nil && i.hasPrefix != nil && *i.hasPrefix { + si := i.split(span.Start) + if i.cmp(span.Start[:si], *i.prefix) > 0 { + // This span starts at a prefix that sorts after our current prefix. + span = nil + } + } + return span +} + +// checkForwardBound enforces the upper bound, returning nil if the provided +// span is wholly outside the upper bound. It also updates i.pos and i.iterSpan +// to reflect the new iterator position. +func (i *BoundedIter) checkForwardBound(span *Span) *Span { + // Compare to the upper bound. + if span != nil && i.upper != nil && i.cmp(span.Start, i.upper) >= 0 { + span = nil + } + i.iterSpan = span + if i.pos != posAtIterSpan { + i.pos = posAtIterSpan + } + return span +} + +func (i *BoundedIter) checkPrefixSpanEnd(span *Span) *Span { + // Compare to the prefix's bounds, if in prefix iteration mode. + if span != nil && i.hasPrefix != nil && *i.hasPrefix && i.cmp(span.End, *i.prefix) <= 0 { + // This span ends before the current prefix. + span = nil + } + return span +} + +// checkBackward enforces the lower bound, returning nil if the provided span is +// wholly outside the lower bound. It also updates i.pos and i.iterSpan to +// reflect the new iterator position. +func (i *BoundedIter) checkBackwardBound(span *Span) *Span { + // Compare to the lower bound. + if span != nil && i.lower != nil && i.cmp(span.End, i.lower) <= 0 { + span = nil + } + i.iterSpan = span + if i.pos != posAtIterSpan { + i.pos = posAtIterSpan + } + return span +} diff --git a/pebble/internal/keyspan/bounded_test.go b/pebble/internal/keyspan/bounded_test.go new file mode 100644 index 0000000..edb3b5a --- /dev/null +++ b/pebble/internal/keyspan/bounded_test.go @@ -0,0 +1,69 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "bytes" + "fmt" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/testkeys" +) + +func TestBoundedIter(t *testing.T) { + getBounds := func(td *datadriven.TestData) (lower, upper []byte) { + for _, cmdArg := range td.CmdArgs { + switch cmdArg.Key { + case "lower": + if len(cmdArg.Vals[0]) > 0 { + lower = []byte(cmdArg.Vals[0]) + } + case "upper": + if len(cmdArg.Vals[0]) > 0 { + upper = []byte(cmdArg.Vals[0]) + } + } + } + return lower, upper + } + + cmp := testkeys.Comparer.Compare + split := testkeys.Comparer.Split + var buf bytes.Buffer + var iter BoundedIter + var hasPrefix bool + var prefix []byte + datadriven.RunTest(t, "testdata/bounded_iter", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + var spans []Span + lines := strings.Split(strings.TrimSpace(td.Input), "\n") + for _, line := range lines { + spans = append(spans, ParseSpan(line)) + } + inner := &invalidatingIter{iter: NewIter(cmp, spans)} + lower, upper := getBounds(td) + iter.Init(cmp, split, inner, lower, upper, &hasPrefix, &prefix) + return "" + case "set-prefix": + hasPrefix = len(td.CmdArgs) > 0 + if hasPrefix { + prefix = []byte(td.CmdArgs[0].String()) + return fmt.Sprintf("set prefix to %q\n", prefix) + } + return "cleared prefix" + case "iter": + buf.Reset() + lower, upper := getBounds(td) + iter.SetBounds(lower, upper) + runIterCmd(t, td, &iter, &buf) + return buf.String() + default: + return fmt.Sprintf("unrecognized command %q", td.Cmd) + } + }) +} diff --git a/pebble/internal/keyspan/datadriven_test.go b/pebble/internal/keyspan/datadriven_test.go new file mode 100644 index 0000000..5b1d7aa --- /dev/null +++ b/pebble/internal/keyspan/datadriven_test.go @@ -0,0 +1,432 @@ +// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "fmt" + "go/token" + "io" + "reflect" + "strconv" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/dsl" +) + +// This file contains testing facilities for Spans and FragmentIterators. It's +// defined here so that it may be used by the keyspan package to test its +// various FragmentIterator implementations. +// +// TODO(jackson): Move keyspan.{Span,Key,FragmentIterator} into internal/base, +// and then move the testing facilities to an independent package, eg +// internal/itertest. + +// probe defines an interface for probes that may inspect or mutate internal +// span iterator behavior. +type probe interface { + // probe inspects, and possibly manipulates, iterator operations' results. + probe(*probeContext) +} + +func parseProbes(probeDSLs ...string) []probe { + probes := make([]probe, len(probeDSLs)) + var err error + for i := range probeDSLs { + probes[i], err = probeParser.Parse(probeDSLs[i]) + if err != nil { + panic(err) + } + } + return probes +} + +func attachProbes(iter FragmentIterator, pctx probeContext, probes ...probe) FragmentIterator { + if pctx.log == nil { + pctx.log = io.Discard + } + for i := range probes { + iter = &probeIterator{ + iter: iter, + probe: probes[i], + probeCtx: pctx, + } + } + return iter +} + +// probeContext provides the context within which a probe is run. It includes +// information about the iterator operation in progress. +type probeContext struct { + op + log io.Writer +} + +type op struct { + Kind OpKind + SeekKey []byte + Span *Span + Err error +} + +// ErrInjected is an error artificially injected for testing. +var ErrInjected = &errorProbe{name: "ErrInjected", err: errors.New("injected error")} + +var probeParser = func() *dsl.Parser[probe] { + valuerParser := dsl.NewParser[valuer]() + valuerParser.DefineConstant("StartKey", func() valuer { return startKey{} }) + valuerParser.DefineFunc("Bytes", + func(p *dsl.Parser[valuer], s *dsl.Scanner) valuer { + v := bytesConstant{bytes: []byte(s.ConsumeString())} + s.Consume(token.RPAREN) + return v + }) + + predicateParser := dsl.NewPredicateParser[*probeContext]() + predicateParser.DefineFunc("Equal", + func(p *dsl.Parser[dsl.Predicate[*probeContext]], s *dsl.Scanner) dsl.Predicate[*probeContext] { + eq := equal{ + valuerParser.ParseFromPos(s, s.Scan()), + valuerParser.ParseFromPos(s, s.Scan()), + } + s.Consume(token.RPAREN) + return eq + }) + for i, name := range opNames { + opKind := OpKind(i) + predicateParser.DefineConstant(name, func() dsl.Predicate[*probeContext] { + // An OpKind implements dsl.Predicate[*probeContext]. + return opKind + }) + } + probeParser := dsl.NewParser[probe]() + probeParser.DefineConstant("ErrInjected", func() probe { return ErrInjected }) + probeParser.DefineConstant("noop", func() probe { return noop{} }) + probeParser.DefineFunc("If", + func(p *dsl.Parser[probe], s *dsl.Scanner) probe { + probe := ifProbe{ + predicateParser.ParseFromPos(s, s.Scan()), + probeParser.ParseFromPos(s, s.Scan()), + probeParser.ParseFromPos(s, s.Scan()), + } + s.Consume(token.RPAREN) + return probe + }) + probeParser.DefineFunc("Return", + func(p *dsl.Parser[probe], s *dsl.Scanner) (ret probe) { + switch tok := s.Scan(); tok.Kind { + case token.STRING: + str, err := strconv.Unquote(tok.Lit) + if err != nil { + panic(err) + } + span := ParseSpan(str) + ret = returnSpan{s: &span} + case token.IDENT: + switch tok.Lit { + case "nil": + ret = returnSpan{s: nil} + default: + panic(errors.Newf("unrecognized return value %q", tok.Lit)) + } + } + s.Consume(token.RPAREN) + return ret + }) + probeParser.DefineFunc("Log", + func(p *dsl.Parser[probe], s *dsl.Scanner) (ret probe) { + ret = loggingProbe{prefix: s.ConsumeString()} + s.Consume(token.RPAREN) + return ret + }) + return probeParser +}() + +// probe implementations + +type errorProbe struct { + name string + err error +} + +func (p *errorProbe) String() string { return p.name } +func (p *errorProbe) Error() error { return p.err } +func (p *errorProbe) probe(pctx *probeContext) { + pctx.op.Err = p.err + pctx.op.Span = nil +} + +// ifProbe is a conditional probe. If its predicate evaluates to true, it probes +// using its Then probe. If its predicate evalutes to false, it probes using its +// Else probe. +type ifProbe struct { + Predicate dsl.Predicate[*probeContext] + Then probe + Else probe +} + +func (p ifProbe) String() string { return fmt.Sprintf("(If %s %s %s)", p.Predicate, p.Then, p.Else) } +func (p ifProbe) probe(pctx *probeContext) { + if p.Predicate.Evaluate(pctx) { + p.Then.probe(pctx) + } else { + p.Else.probe(pctx) + } +} + +type returnSpan struct { + s *Span +} + +func (p returnSpan) String() string { + if p.s == nil { + return "(Return nil)" + } + return fmt.Sprintf("(Return %q)", p.s.String()) +} + +func (p returnSpan) probe(pctx *probeContext) { + pctx.op.Span = p.s + pctx.op.Err = nil +} + +type noop struct{} + +func (noop) String() string { return "Noop" } +func (noop) probe(pctx *probeContext) {} + +type loggingProbe struct { + prefix string +} + +func (lp loggingProbe) String() string { return fmt.Sprintf("(Log %q)", lp.prefix) } +func (lp loggingProbe) probe(pctx *probeContext) { + opStr := strings.TrimPrefix(pctx.op.Kind.String(), "Op") + fmt.Fprintf(pctx.log, "%s%s(", lp.prefix, opStr) + if pctx.op.SeekKey != nil { + fmt.Fprintf(pctx.log, "%q", pctx.op.SeekKey) + } + fmt.Fprint(pctx.log, ") = ") + if pctx.op.Span == nil { + fmt.Fprint(pctx.log, "nil") + if pctx.op.Err != nil { + fmt.Fprintf(pctx.log, " ", pctx.op.Err) + } + } else { + fmt.Fprint(pctx.log, pctx.op.Span.String()) + } + fmt.Fprintln(pctx.log) +} + +// dsl.Predicate[*probeContext] implementations. + +type equal struct { + a, b valuer +} + +func (e equal) String() string { return fmt.Sprintf("(Equal %s %s)", e.a, e.b) } +func (e equal) Evaluate(pctx *probeContext) bool { + return reflect.DeepEqual(e.a.value(pctx), e.b.value(pctx)) +} + +// OpKind indicates the type of iterator operation being performed. +type OpKind int8 + +const ( + OpSeekGE OpKind = iota + OpSeekLT + OpFirst + OpLast + OpNext + OpPrev + OpClose + numOpKinds +) + +func (o OpKind) String() string { return opNames[o] } +func (o OpKind) Evaluate(pctx *probeContext) bool { return pctx.op.Kind == o } + +var opNames = [numOpKinds]string{ + OpSeekGE: "OpSeekGE", + OpSeekLT: "OpSeekLT", + OpFirst: "OpFirst", + OpLast: "OpLast", + OpNext: "OpNext", + OpPrev: "OpPrev", + OpClose: "OpClose", +} + +// valuer implementations + +type valuer interface { + fmt.Stringer + value(pctx *probeContext) any +} + +type bytesConstant struct { + bytes []byte +} + +func (b bytesConstant) String() string { return fmt.Sprintf("%q", string(b.bytes)) } +func (b bytesConstant) value(pctx *probeContext) any { return b.bytes } + +type startKey struct{} + +func (s startKey) String() string { return "StartKey" } +func (s startKey) value(pctx *probeContext) any { + if pctx.op.Span == nil { + return nil + } + return pctx.op.Span.Start +} + +type probeIterator struct { + iter FragmentIterator + err error + probe probe + probeCtx probeContext +} + +// Assert that probeIterator implements the fragment iterator interface. +var _ FragmentIterator = (*probeIterator)(nil) + +func (p *probeIterator) handleOp(preProbeOp op) *Span { + p.probeCtx.op = preProbeOp + if preProbeOp.Span == nil && p.iter != nil { + p.probeCtx.op.Err = p.iter.Error() + } + + p.probe.probe(&p.probeCtx) + p.err = p.probeCtx.op.Err + return p.probeCtx.op.Span +} + +func (p *probeIterator) SeekGE(key []byte) *Span { + op := op{ + Kind: OpSeekGE, + SeekKey: key, + } + if p.iter != nil { + op.Span = p.iter.SeekGE(key) + } + return p.handleOp(op) +} + +func (p *probeIterator) SeekLT(key []byte) *Span { + op := op{ + Kind: OpSeekLT, + SeekKey: key, + } + if p.iter != nil { + op.Span = p.iter.SeekLT(key) + } + return p.handleOp(op) +} + +func (p *probeIterator) First() *Span { + op := op{Kind: OpFirst} + if p.iter != nil { + op.Span = p.iter.First() + } + return p.handleOp(op) +} + +func (p *probeIterator) Last() *Span { + op := op{Kind: OpLast} + if p.iter != nil { + op.Span = p.iter.Last() + } + return p.handleOp(op) +} + +func (p *probeIterator) Next() *Span { + op := op{Kind: OpNext} + if p.iter != nil { + op.Span = p.iter.Next() + } + return p.handleOp(op) +} + +func (p *probeIterator) Prev() *Span { + op := op{Kind: OpPrev} + if p.iter != nil { + op.Span = p.iter.Prev() + } + return p.handleOp(op) +} + +func (p *probeIterator) Error() error { + return p.err +} + +func (p *probeIterator) Close() error { + op := op{Kind: OpClose} + if p.iter != nil { + op.Err = p.iter.Close() + } + + p.probeCtx.op = op + p.probe.probe(&p.probeCtx) + p.err = p.probeCtx.op.Err + return p.err +} + +// runIterCmd evaluates a datadriven command controlling an internal +// keyspan.FragmentIterator, writing the results of the iterator operations to +// the provided writer. +func runIterCmd(t *testing.T, td *datadriven.TestData, iter FragmentIterator, w io.Writer) { + lines := strings.Split(strings.TrimSpace(td.Input), "\n") + for i, line := range lines { + if i > 0 { + fmt.Fprintln(w) + } + line = strings.TrimSpace(line) + i := strings.IndexByte(line, '#') + iterCmd := line + if i > 0 { + iterCmd = string(line[:i]) + } + runIterOp(w, iter, iterCmd) + } +} + +var iterDelim = map[rune]bool{',': true, ' ': true, '(': true, ')': true, '"': true} + +func runIterOp(w io.Writer, it FragmentIterator, op string) { + fields := strings.FieldsFunc(op, func(r rune) bool { return iterDelim[r] }) + var s *Span + switch strings.ToLower(fields[0]) { + case "first": + s = it.First() + case "last": + s = it.Last() + case "seekge", "seek-ge": + if len(fields) == 1 { + panic(fmt.Sprintf("unable to parse iter op %q", op)) + } + s = it.SeekGE([]byte(fields[1])) + case "seeklt", "seek-lt": + if len(fields) == 1 { + panic(fmt.Sprintf("unable to parse iter op %q", op)) + } + s = it.SeekLT([]byte(fields[1])) + case "next": + s = it.Next() + case "prev": + s = it.Prev() + default: + panic(fmt.Sprintf("unrecognized iter op %q", fields[0])) + } + if s == nil { + fmt.Fprint(w, "") + if err := it.Error(); err != nil { + fmt.Fprintf(w, " err=<%s>", it.Error()) + } + return + } + fmt.Fprint(w, s) +} diff --git a/pebble/internal/keyspan/defragment.go b/pebble/internal/keyspan/defragment.go new file mode 100644 index 0000000..d056ef0 --- /dev/null +++ b/pebble/internal/keyspan/defragment.go @@ -0,0 +1,539 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "bytes" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/bytealloc" + "github.com/cockroachdb/pebble/internal/invariants" +) + +// bufferReuseMaxCapacity is the maximum capacity of a DefragmentingIter buffer +// that DefragmentingIter will reuse. Buffers larger than this will be +// discarded and reallocated as necessary. +const bufferReuseMaxCapacity = 10 << 10 // 10 KB + +// keysReuseMaxCapacity is the maximum capacity of a []keyspan.Key buffer that +// DefragmentingIter will reuse. Buffers larger than this will be discarded and +// reallocated as necessary. +const keysReuseMaxCapacity = 100 + +// DefragmentMethod configures the defragmentation performed by the +// DefragmentingIter. +type DefragmentMethod interface { + // ShouldDefragment takes two abutting spans and returns whether the two + // spans should be combined into a single, defragmented Span. + ShouldDefragment(equal base.Equal, left, right *Span) bool +} + +// The DefragmentMethodFunc type is an adapter to allow the use of ordinary +// functions as DefragmentMethods. If f is a function with the appropriate +// signature, DefragmentMethodFunc(f) is a DefragmentMethod that calls f. +type DefragmentMethodFunc func(equal base.Equal, left, right *Span) bool + +// ShouldDefragment calls f(equal, left, right). +func (f DefragmentMethodFunc) ShouldDefragment(equal base.Equal, left, right *Span) bool { + return f(equal, left, right) +} + +// DefragmentInternal configures a DefragmentingIter to defragment spans +// only if they have identical keys. It requires spans' keys to be sorted in +// trailer descending order. +// +// This defragmenting method is intended for use in compactions that may see +// internal range keys fragments that may now be joined, because the state that +// required their fragmentation has been dropped. +var DefragmentInternal DefragmentMethod = DefragmentMethodFunc(func(equal base.Equal, a, b *Span) bool { + if a.KeysOrder != ByTrailerDesc || b.KeysOrder != ByTrailerDesc { + panic("pebble: span keys unexpectedly not in trailer descending order") + } + if len(a.Keys) != len(b.Keys) { + return false + } + for i := range a.Keys { + if a.Keys[i].Trailer != b.Keys[i].Trailer { + return false + } + if !equal(a.Keys[i].Suffix, b.Keys[i].Suffix) { + return false + } + if !bytes.Equal(a.Keys[i].Value, b.Keys[i].Value) { + return false + } + } + return true +}) + +// DefragmentReducer merges the current and next Key slices, returning a new Key +// slice. +// +// Implementations should modify and return `cur` to save on allocations, or +// consider allocating a new slice, as the `cur` slice may be retained by the +// DefragmentingIter and mutated. The `next` slice must not be mutated. +// +// The incoming slices are sorted by (SeqNum, Kind) descending. The output slice +// must also have this sort order. +type DefragmentReducer func(cur, next []Key) []Key + +// StaticDefragmentReducer is a no-op DefragmentReducer that simply returns the +// current key slice, effectively retaining the first set of keys encountered +// for a defragmented span. +// +// This reducer can be used, for example, when the set of Keys for each Span +// being reduced is not expected to change, and therefore the keys from the +// first span encountered can be used without considering keys in subsequent +// spans. +var StaticDefragmentReducer DefragmentReducer = func(cur, _ []Key) []Key { + return cur +} + +// iterPos is an enum indicating the position of the defragmenting iter's +// wrapped iter. The defragmenting iter must look ahead or behind when +// defragmenting forward or backwards respectively, and this enum records that +// current position. +type iterPos int8 + +const ( + iterPosPrev iterPos = -1 + iterPosCurr iterPos = 0 + iterPosNext iterPos = +1 +) + +// DefragmentingIter wraps a key span iterator, defragmenting physical +// fragmentation during iteration. +// +// During flushes and compactions, keys applied over a span may be split at +// sstable boundaries. This fragmentation can produce internal key bounds that +// do not match any of the bounds ever supplied to a user operation. This +// physical fragmentation is necessary to avoid excessively wide sstables. +// +// The defragmenting iterator undoes this physical fragmentation, joining spans +// with abutting bounds and equal state. The defragmenting iterator takes a +// DefragmentMethod to determine what is "equal state" for a span. The +// DefragmentMethod is a function type, allowing arbitrary comparisons between +// Span keys. +// +// Seeking (SeekGE, SeekLT) poses an obstacle to defragmentation. A seek may +// land on a physical fragment in the middle of several fragments that must be +// defragmented. A seek that lands in a fragment straddling the seek key must +// first degfragment in the opposite direction of iteration to find the +// beginning of the defragmented span, and then defragments in the iteration +// direction, ensuring it's found a whole defragmented span. +type DefragmentingIter struct { + // DefragmentingBuffers holds buffers used for copying iterator state. + *DefragmentingBuffers + comparer *base.Comparer + equal base.Equal + iter FragmentIterator + iterSpan *Span + iterPos iterPos + + // curr holds the span at the current iterator position. + curr Span + + // method is a comparison function for two spans. method is called when two + // spans are abutting to determine whether they may be defragmented. + // method does not itself check for adjacency for the two spans. + method DefragmentMethod + + // reduce is the reducer function used to collect Keys across all spans that + // constitute a defragmented span. + reduce DefragmentReducer +} + +// DefragmentingBuffers holds buffers used for copying iterator state. +type DefragmentingBuffers struct { + // currBuf is a buffer for use when copying user keys for curr. currBuf is + // cleared between positioning methods. + currBuf bytealloc.A + // keysBuf is a buffer for use when copying Keys for DefragmentingIter.curr. + keysBuf []Key + // keyBuf is a buffer specifically for the defragmented start key when + // defragmenting backwards or the defragmented end key when defragmenting + // forwards. These bounds are overwritten repeatedly during defragmentation, + // and the defragmentation routines overwrite keyBuf repeatedly to store + // these extended bounds. + keyBuf []byte +} + +// PrepareForReuse discards any excessively large buffers. +func (bufs *DefragmentingBuffers) PrepareForReuse() { + if cap(bufs.currBuf) > bufferReuseMaxCapacity { + bufs.currBuf = nil + } + if cap(bufs.keyBuf) > bufferReuseMaxCapacity { + bufs.keyBuf = nil + } + if cap(bufs.keysBuf) > keysReuseMaxCapacity { + bufs.keysBuf = nil + } +} + +// Assert that *DefragmentingIter implements the FragmentIterator interface. +var _ FragmentIterator = (*DefragmentingIter)(nil) + +// Init initializes the defragmenting iter using the provided defragment +// method. +func (i *DefragmentingIter) Init( + comparer *base.Comparer, + iter FragmentIterator, + equal DefragmentMethod, + reducer DefragmentReducer, + bufs *DefragmentingBuffers, +) { + *i = DefragmentingIter{ + DefragmentingBuffers: bufs, + comparer: comparer, + equal: comparer.Equal, + iter: iter, + method: equal, + reduce: reducer, + } +} + +// Error returns any accumulated error. +func (i *DefragmentingIter) Error() error { + return i.iter.Error() +} + +// Close closes the underlying iterators. +func (i *DefragmentingIter) Close() error { + return i.iter.Close() +} + +// SeekGE moves the iterator to the first span covering a key greater than or +// equal to the given key. This is equivalent to seeking to the first span with +// an end key greater than the given key. +func (i *DefragmentingIter) SeekGE(key []byte) *Span { + i.iterSpan = i.iter.SeekGE(key) + if i.iterSpan == nil { + i.iterPos = iterPosCurr + return nil + } else if i.iterSpan.Empty() { + i.iterPos = iterPosCurr + return i.iterSpan + } + // If the span starts strictly after key, we know there mustn't be an + // earlier span that ends at i.iterSpan.Start, otherwise i.iter would've + // returned that span instead. + if i.comparer.Compare(i.iterSpan.Start, key) > 0 { + return i.defragmentForward() + } + + // The span we landed on has a Start bound ≤ key. There may be additional + // fragments before this span. Defragment backward to find the start of the + // defragmented span. + i.defragmentBackward() + + // Defragmenting backward may have stopped because it encountered an error. + // If so, we must not continue so that i.iter.Error() (and thus i.Error()) + // yields the error. + if i.iterSpan == nil && i.iter.Error() != nil { + return nil + } + + if i.iterPos == iterPosPrev { + // Next once back onto the span. + i.iterSpan = i.iter.Next() + } + // Defragment the full span from its start. + return i.defragmentForward() +} + +// SeekLT moves the iterator to the last span covering a key less than the +// given key. This is equivalent to seeking to the last span with a start +// key less than the given key. +func (i *DefragmentingIter) SeekLT(key []byte) *Span { + i.iterSpan = i.iter.SeekLT(key) + if i.iterSpan == nil { + i.iterPos = iterPosCurr + return nil + } else if i.iterSpan.Empty() { + i.iterPos = iterPosCurr + return i.iterSpan + } + // If the span ends strictly before key, we know there mustn't be a later + // span that starts at i.iterSpan.End, otherwise i.iter would've returned + // that span instead. + if i.comparer.Compare(i.iterSpan.End, key) < 0 { + return i.defragmentBackward() + } + + // The span we landed on has a End bound ≥ key. There may be additional + // fragments after this span. Defragment forward to find the end of the + // defragmented span. + i.defragmentForward() + + // Defragmenting forward may have stopped because it encountered an error. + // If so, we must not continue so that i.iter.Error() (and thus i.Error()) + // yields the error. + if i.iterSpan == nil && i.iter.Error() != nil { + return nil + } + + if i.iterPos == iterPosNext { + // Prev once back onto the span. + i.iterSpan = i.iter.Prev() + } + // Defragment the full span from its end. + return i.defragmentBackward() +} + +// First seeks the iterator to the first span and returns it. +func (i *DefragmentingIter) First() *Span { + i.iterSpan = i.iter.First() + if i.iterSpan == nil { + i.iterPos = iterPosCurr + return nil + } + return i.defragmentForward() +} + +// Last seeks the iterator to the last span and returns it. +func (i *DefragmentingIter) Last() *Span { + i.iterSpan = i.iter.Last() + if i.iterSpan == nil { + i.iterPos = iterPosCurr + return nil + } + return i.defragmentBackward() +} + +// Next advances to the next span and returns it. +func (i *DefragmentingIter) Next() *Span { + switch i.iterPos { + case iterPosPrev: + // Switching directions; The iterator is currently positioned over the + // last span of the previous set of fragments. In the below diagram, + // the iterator is positioned over the last span that contributes to + // the defragmented x position. We want to be positioned over the first + // span that contributes to the z position. + // + // x x x y y y z z z + // ^ ^ + // old new + // + // Next once to move onto y, defragment forward to land on the first z + // position. + i.iterSpan = i.iter.Next() + if invariants.Enabled && i.iterSpan == nil && i.iter.Error() == nil { + panic("pebble: invariant violation: no next span while switching directions") + } + // We're now positioned on the first span that was defragmented into the + // current iterator position. Skip over the rest of the current iterator + // position's constitutent fragments. In the above example, this would + // land on the first 'z'. + i.defragmentForward() + if i.iterSpan == nil { + i.iterPos = iterPosCurr + return nil + } + + // Now that we're positioned over the first of the next set of + // fragments, defragment forward. + return i.defragmentForward() + case iterPosCurr: + // iterPosCurr is only used when the iter is exhausted or when the iterator + // is at an empty span. + if invariants.Enabled && i.iterSpan != nil && !i.iterSpan.Empty() { + panic("pebble: invariant violation: iterPosCurr with valid iterSpan") + } + + i.iterSpan = i.iter.Next() + if i.iterSpan == nil { + return nil + } + return i.defragmentForward() + case iterPosNext: + // Already at the next span. + if i.iterSpan == nil { + i.iterPos = iterPosCurr + return nil + } + return i.defragmentForward() + default: + panic("unreachable") + } +} + +// Prev steps back to the previous span and returns it. +func (i *DefragmentingIter) Prev() *Span { + switch i.iterPos { + case iterPosPrev: + // Already at the previous span. + if i.iterSpan == nil { + i.iterPos = iterPosCurr + return nil + } + return i.defragmentBackward() + case iterPosCurr: + // iterPosCurr is only used when the iter is exhausted or when the iterator + // is at an empty span. + if invariants.Enabled && i.iterSpan != nil && !i.iterSpan.Empty() { + panic("pebble: invariant violation: iterPosCurr with valid iterSpan") + } + + i.iterSpan = i.iter.Prev() + if i.iterSpan == nil { + return nil + } + return i.defragmentBackward() + case iterPosNext: + // Switching directions; The iterator is currently positioned over the + // first fragment of the next set of fragments. In the below diagram, + // the iterator is positioned over the first span that contributes to + // the defragmented z position. We want to be positioned over the last + // span that contributes to the x position. + // + // x x x y y y z z z + // ^ ^ + // new old + // + // Prev once to move onto y, defragment backward to land on the last x + // position. + i.iterSpan = i.iter.Prev() + if invariants.Enabled && i.iterSpan == nil && i.iter.Error() == nil { + panic("pebble: invariant violation: no previous span while switching directions") + } + // We're now positioned on the last span that was defragmented into the + // current iterator position. Skip over the rest of the current iterator + // position's constitutent fragments. In the above example, this would + // land on the last 'x'. + i.defragmentBackward() + + // Now that we're positioned over the last of the prev set of + // fragments, defragment backward. + if i.iterSpan == nil { + i.iterPos = iterPosCurr + return nil + } + return i.defragmentBackward() + default: + panic("unreachable") + } +} + +// checkEqual checks the two spans for logical equivalence. It uses the passed-in +// DefragmentMethod and ensures both spans are NOT empty; not defragmenting empty +// spans is an optimization that lets us load fewer sstable blocks. +func (i *DefragmentingIter) checkEqual(left, right *Span) bool { + return (!left.Empty() && !right.Empty()) && i.method.ShouldDefragment(i.equal, i.iterSpan, &i.curr) +} + +// defragmentForward defragments spans in the forward direction, starting from +// i.iter's current position. The span at the current position must be non-nil, +// but may be Empty(). +func (i *DefragmentingIter) defragmentForward() *Span { + if i.iterSpan.Empty() { + // An empty span will never be equal to another span; see checkEqual for + // why. To avoid loading non-empty range keys further ahead by calling Next, + // return early. + i.iterPos = iterPosCurr + return i.iterSpan + } + i.saveCurrent() + + i.iterPos = iterPosNext + i.iterSpan = i.iter.Next() + for i.iterSpan != nil { + if !i.equal(i.curr.End, i.iterSpan.Start) { + // Not a continuation. + break + } + if !i.checkEqual(i.iterSpan, &i.curr) { + // Not a continuation. + break + } + i.keyBuf = append(i.keyBuf[:0], i.iterSpan.End...) + i.curr.End = i.keyBuf + i.keysBuf = i.reduce(i.keysBuf, i.iterSpan.Keys) + i.iterSpan = i.iter.Next() + } + // i.iterSpan == nil + // + // The inner iterator may return nil when it encounters an error. If there + // was an error, we don't know whether there is another span we should + // defragment or not. Return nil so that the caller knows they should check + // Error(). + if i.iter.Error() != nil { + return nil + } + i.curr.Keys = i.keysBuf + return &i.curr +} + +// defragmentBackward defragments spans in the backward direction, starting from +// i.iter's current position. The span at the current position must be non-nil, +// but may be Empty(). +func (i *DefragmentingIter) defragmentBackward() *Span { + if i.iterSpan.Empty() { + // An empty span will never be equal to another span; see checkEqual for + // why. To avoid loading non-empty range keys further ahead by calling Next, + // return early. + i.iterPos = iterPosCurr + return i.iterSpan + } + i.saveCurrent() + + i.iterPos = iterPosPrev + i.iterSpan = i.iter.Prev() + for i.iterSpan != nil { + if !i.equal(i.curr.Start, i.iterSpan.End) { + // Not a continuation. + break + } + if !i.checkEqual(i.iterSpan, &i.curr) { + // Not a continuation. + break + } + i.keyBuf = append(i.keyBuf[:0], i.iterSpan.Start...) + i.curr.Start = i.keyBuf + i.keysBuf = i.reduce(i.keysBuf, i.iterSpan.Keys) + i.iterSpan = i.iter.Prev() + } + // i.iterSpan == nil + // + // The inner iterator may return nil when it encounters an error. If there + // was an error, we don't know whether there is another span we should + // defragment or not. Return nil so that the caller knows they should check + // Error(). + if i.iter.Error() != nil { + return nil + } + i.curr.Keys = i.keysBuf + return &i.curr +} + +func (i *DefragmentingIter) saveCurrent() { + i.currBuf.Reset() + i.keysBuf = i.keysBuf[:0] + i.keyBuf = i.keyBuf[:0] + if i.iterSpan == nil { + return + } + i.curr = Span{ + Start: i.saveBytes(i.iterSpan.Start), + End: i.saveBytes(i.iterSpan.End), + KeysOrder: i.iterSpan.KeysOrder, + } + for j := range i.iterSpan.Keys { + i.keysBuf = append(i.keysBuf, Key{ + Trailer: i.iterSpan.Keys[j].Trailer, + Suffix: i.saveBytes(i.iterSpan.Keys[j].Suffix), + Value: i.saveBytes(i.iterSpan.Keys[j].Value), + }) + } + i.curr.Keys = i.keysBuf +} + +func (i *DefragmentingIter) saveBytes(b []byte) []byte { + if b == nil { + return nil + } + i.currBuf, b = i.currBuf.Copy(b) + return b +} diff --git a/pebble/internal/keyspan/defragment_test.go b/pebble/internal/keyspan/defragment_test.go new file mode 100644 index 0000000..b9856da --- /dev/null +++ b/pebble/internal/keyspan/defragment_test.go @@ -0,0 +1,271 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "bytes" + "fmt" + "math/rand" + "sort" + "strings" + "testing" + "time" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/pmezard/go-difflib/difflib" +) + +func TestDefragmentingIter(t *testing.T) { + comparer := testkeys.Comparer + cmp := comparer.Compare + internalEqual := DefragmentInternal + alwaysEqual := DefragmentMethodFunc(func(_ base.Equal, _, _ *Span) bool { return true }) + staticReducer := StaticDefragmentReducer + collectReducer := func(cur, next []Key) []Key { + c := keysBySeqNumKind(append(cur, next...)) + sort.Sort(&c) + return c + } + + var buf bytes.Buffer + var spans []Span + datadriven.RunTest(t, "testdata/defragmenting_iter", func(t *testing.T, td *datadriven.TestData) string { + buf.Reset() + switch td.Cmd { + case "define": + spans = spans[:0] + lines := strings.Split(strings.TrimSpace(td.Input), "\n") + for _, line := range lines { + spans = append(spans, ParseSpan(line)) + } + return "" + case "iter": + equal := internalEqual + reducer := staticReducer + var probes []probe + for _, cmdArg := range td.CmdArgs { + switch cmd := cmdArg.Key; cmd { + case "equal": + if len(cmdArg.Vals) != 1 { + return fmt.Sprintf("only one equal func expected; got %d", len(cmdArg.Vals)) + } + switch val := cmdArg.Vals[0]; val { + case "internal": + equal = internalEqual + case "always": + equal = alwaysEqual + default: + return fmt.Sprintf("unknown reducer %s", val) + } + case "reducer": + if len(cmdArg.Vals) != 1 { + return fmt.Sprintf("only one reducer expected; got %d", len(cmdArg.Vals)) + } + switch val := cmdArg.Vals[0]; val { + case "collect": + reducer = collectReducer + case "static": + reducer = staticReducer + default: + return fmt.Sprintf("unknown reducer %s", val) + } + case "probes": + probes = parseProbes(cmdArg.Vals...) + default: + return fmt.Sprintf("unknown command: %s", cmd) + } + } + var miter MergingIter + miter.Init(cmp, noopTransform, new(MergingBuffers), NewIter(cmp, spans)) + innerIter := attachProbes(&miter, probeContext{log: &buf}, probes...) + var iter DefragmentingIter + iter.Init(comparer, innerIter, equal, reducer, new(DefragmentingBuffers)) + for _, line := range strings.Split(td.Input, "\n") { + runIterOp(&buf, &iter, line) + fmt.Fprintln(&buf) + } + return strings.TrimSpace(buf.String()) + default: + return fmt.Sprintf("unrecognized command %q", td.Cmd) + } + }) +} + +func TestDefragmentingIter_Randomized(t *testing.T) { + seed := time.Now().UnixNano() + for i := int64(0); i < 100; i++ { + testDefragmentingIteRandomizedOnce(t, seed+i) + } +} + +func TestDefragmentingIter_RandomizedFixedSeed(t *testing.T) { + const seed = 1648173101214881000 + testDefragmentingIteRandomizedOnce(t, seed) +} + +func testDefragmentingIteRandomizedOnce(t *testing.T, seed int64) { + comparer := testkeys.Comparer + cmp := comparer.Compare + formatKey := comparer.FormatKey + + rng := rand.New(rand.NewSource(seed)) + t.Logf("seed = %d", seed) + + // Use a key space of alphanumeric strings, with a random max length between + // 1-2. Repeat keys are more common at the lower max lengths. + ks := testkeys.Alpha(rng.Intn(2) + 1) + + // Generate between 1-15 range keys. + const maxRangeKeys = 15 + var original, fragmented []Span + numRangeKeys := 1 + rng.Intn(maxRangeKeys) + for i := 0; i < numRangeKeys; i++ { + startIdx := rng.Int63n(ks.Count()) + endIdx := rng.Int63n(ks.Count()) + for startIdx == endIdx { + endIdx = rng.Int63n(ks.Count()) + } + if startIdx > endIdx { + startIdx, endIdx = endIdx, startIdx + } + + key := Key{ + Trailer: base.MakeTrailer(uint64(i), base.InternalKeyKindRangeKeySet), + Value: []byte(fmt.Sprintf("v%d", rng.Intn(3))), + } + // Generate suffixes 0, 1, 2, or 3 with 0 indicating none. + if suffix := rng.Int63n(4); suffix > 0 { + key.Suffix = testkeys.Suffix(suffix) + } + original = append(original, Span{ + Start: testkeys.Key(ks, startIdx), + End: testkeys.Key(ks, endIdx), + Keys: []Key{key}, + }) + + for startIdx < endIdx { + width := rng.Int63n(endIdx-startIdx) + 1 + fragmented = append(fragmented, Span{ + Start: testkeys.Key(ks, startIdx), + End: testkeys.Key(ks, startIdx+width), + Keys: []Key{key}, + }) + startIdx += width + } + } + + // Both the original and the deliberately fragmented spans may contain + // overlaps, so we need to sort and fragment them. + original = fragment(cmp, formatKey, original) + fragmented = fragment(cmp, formatKey, fragmented) + + var originalInner MergingIter + originalInner.Init(cmp, noopTransform, new(MergingBuffers), NewIter(cmp, original)) + var fragmentedInner MergingIter + fragmentedInner.Init(cmp, noopTransform, new(MergingBuffers), NewIter(cmp, fragmented)) + + var referenceIter, fragmentedIter DefragmentingIter + referenceIter.Init(comparer, &originalInner, DefragmentInternal, StaticDefragmentReducer, new(DefragmentingBuffers)) + fragmentedIter.Init(comparer, &fragmentedInner, DefragmentInternal, StaticDefragmentReducer, new(DefragmentingBuffers)) + + // Generate 100 random operations and run them against both iterators. + const numIterOps = 100 + type opKind struct { + weight int + fn func() string + } + ops := []opKind{ + {weight: 2, fn: func() string { return "first" }}, + {weight: 2, fn: func() string { return "last" }}, + {weight: 50, fn: func() string { return "next" }}, + {weight: 50, fn: func() string { return "prev" }}, + {weight: 5, fn: func() string { + k := testkeys.Key(ks, rng.Int63n(ks.Count())) + return fmt.Sprintf("seekge(%s)", k) + }}, + {weight: 5, fn: func() string { + k := testkeys.Key(ks, rng.Int63n(ks.Count())) + return fmt.Sprintf("seeklt(%s)", k) + }}, + } + var totalWeight int + for _, op := range ops { + totalWeight += op.weight + } + var referenceHistory, fragmentedHistory bytes.Buffer + for i := 0; i < numIterOps; i++ { + p := rng.Intn(totalWeight) + opIndex := 0 + if i == 0 { + // First op is always a First(). + } else { + for i, op := range ops { + if p < op.weight { + opIndex = i + break + } + p -= op.weight + } + } + op := ops[opIndex].fn() + runIterOp(&referenceHistory, &referenceIter, op) + runIterOp(&fragmentedHistory, &fragmentedIter, op) + if !bytes.Equal(referenceHistory.Bytes(), fragmentedHistory.Bytes()) { + t.Fatal(debugContext(cmp, formatKey, original, fragmented, + referenceHistory.String(), fragmentedHistory.String())) + } + fmt.Fprintln(&referenceHistory) + fmt.Fprintln(&fragmentedHistory) + } +} + +func fragment(cmp base.Compare, formatKey base.FormatKey, spans []Span) []Span { + Sort(cmp, spans) + var fragments []Span + f := Fragmenter{ + Cmp: cmp, + Format: formatKey, + Emit: func(f Span) { + fragments = append(fragments, f) + }, + } + for _, s := range spans { + f.Add(s) + } + f.Finish() + return fragments +} + +func debugContext( + cmp base.Compare, + formatKey base.FormatKey, + original, fragmented []Span, + refHistory, fragHistory string, +) string { + var buf bytes.Buffer + fmt.Fprintln(&buf, "Reference:") + for _, s := range original { + fmt.Fprintln(&buf, s) + } + fmt.Fprintln(&buf) + fmt.Fprintln(&buf, "Fragmented:") + for _, s := range fragmented { + fmt.Fprintln(&buf, s) + } + fmt.Fprintln(&buf) + fmt.Fprintln(&buf, "\nOperations diff:") + diff, err := difflib.GetUnifiedDiffString(difflib.UnifiedDiff{ + A: difflib.SplitLines(refHistory), + B: difflib.SplitLines(fragHistory), + Context: 5, + }) + if err != nil { + panic(err) + } + fmt.Fprintln(&buf, diff) + return buf.String() +} diff --git a/pebble/internal/keyspan/doc.go b/pebble/internal/keyspan/doc.go new file mode 100644 index 0000000..e05aad2 --- /dev/null +++ b/pebble/internal/keyspan/doc.go @@ -0,0 +1,13 @@ +// Package keyspan provides facilities for sorting, fragmenting and +// iterating over spans of user keys. +// +// A Span represents a range of user key space with an inclusive start +// key and exclusive end key. A span may hold any number of Keys which are +// applied over the entirety of the span's keyspace. +// +// Spans are used within Pebble as an in-memory representation of range +// deletion tombstones, and range key sets, unsets and deletes. Spans +// are fragmented at overlapping key boundaries by the Fragmenter type. +// This package's various iteration facilities require these +// non-overlapping fragmented spans. +package keyspan diff --git a/pebble/internal/keyspan/filter.go b/pebble/internal/keyspan/filter.go new file mode 100644 index 0000000..a63a43c --- /dev/null +++ b/pebble/internal/keyspan/filter.go @@ -0,0 +1,115 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import "github.com/cockroachdb/pebble/internal/base" + +// FilterFunc defines a transform from the input Span into the output Span. The +// function returns true if the Span should be returned by the iterator, and +// false if the Span should be skipped. The FilterFunc is permitted to mutate +// the output Span, for example, to elice certain keys, or update the Span's +// bounds if so desired. The output Span's Keys slice may be reused to reduce +// allocations. +type FilterFunc func(in *Span, out *Span) (keep bool) + +// filteringIter is a FragmentIterator that uses a FilterFunc to select which +// Spans from the input iterator are returned in the output. +// +// A note on Span lifetimes: as the FilterFunc reuses a Span with a mutable +// slice of Keys to reduce allocations, Spans returned by this iterator are only +// valid until the next relative or absolute positioning method is called. +type filteringIter struct { + iter FragmentIterator + filterFn FilterFunc + cmp base.Compare + + // span is a mutable Span passed to the filterFn. The filterFn is free to + // mutate this Span. The slice of Keys in the Span is reused with every call + // to the filterFn. + span Span +} + +var _ FragmentIterator = (*filteringIter)(nil) + +// Filter returns a new filteringIter that will filter the Spans from the +// provided child iterator using the provided FilterFunc. +func Filter(iter FragmentIterator, filter FilterFunc, cmp base.Compare) FragmentIterator { + return &filteringIter{iter: iter, filterFn: filter, cmp: cmp} +} + +// SeekGE implements FragmentIterator. +func (i *filteringIter) SeekGE(key []byte) *Span { + span := i.filter(i.iter.SeekGE(key), +1) + // i.filter could return a span that's less than key, _if_ the filterFunc + // (which has no knowledge of the seek key) mutated the span to end at a key + // less than or equal to `key`. Detect this case and next/invalidate the iter. + if span != nil && i.cmp(span.End, key) <= 0 { + return i.Next() + } + return span +} + +// SeekLT implements FragmentIterator. +func (i *filteringIter) SeekLT(key []byte) *Span { + span := i.filter(i.iter.SeekLT(key), -1) + // i.filter could return a span that's >= key, _if_ the filterFunc (which has + // no knowledge of the seek key) mutated the span to start at a key greater + // than or equal to `key`. Detect this case and prev/invalidate the iter. + if span != nil && i.cmp(span.Start, key) >= 0 { + return i.Prev() + } + return span +} + +// First implements FragmentIterator. +func (i *filteringIter) First() *Span { + return i.filter(i.iter.First(), +1) +} + +// Last implements FragmentIterator. +func (i *filteringIter) Last() *Span { + return i.filter(i.iter.Last(), -1) +} + +// Next implements FragmentIterator. +func (i *filteringIter) Next() *Span { + return i.filter(i.iter.Next(), +1) +} + +// Prev implements FragmentIterator. +func (i *filteringIter) Prev() *Span { + return i.filter(i.iter.Prev(), -1) +} + +// Error implements FragmentIterator. +func (i *filteringIter) Error() error { + return i.iter.Error() +} + +// Close implements FragmentIterator. +func (i *filteringIter) Close() error { + return i.iter.Close() +} + +// filter uses the filterFn (if configured) to filter and possibly mutate the +// given Span. If the current Span is to be skipped, the iterator continues +// iterating in the given direction until it lands on a Span that should be +// returned, or the iterator becomes invalid. +func (i *filteringIter) filter(span *Span, dir int8) *Span { + if i.filterFn == nil { + return span + } + for i.Error() == nil && span != nil { + if keep := i.filterFn(span, &i.span); keep { + return &i.span + } + if dir == +1 { + span = i.iter.Next() + } else { + span = i.iter.Prev() + } + } + return span +} diff --git a/pebble/internal/keyspan/filter_test.go b/pebble/internal/keyspan/filter_test.go new file mode 100644 index 0000000..beb4de8 --- /dev/null +++ b/pebble/internal/keyspan/filter_test.go @@ -0,0 +1,79 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "fmt" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/testkeys" +) + +func TestFilteringIter(t *testing.T) { + // makeFilter returns a FilterFunc that will filter out all keys in a Span + // that are not of the given kind. Empty spans are skipped. + makeFilter := func(kind base.InternalKeyKind) FilterFunc { + return func(in *Span, out *Span) (keep bool) { + out.Start, out.End = in.Start, in.End + out.Keys = out.Keys[:0] + for _, k := range in.Keys { + if k.Kind() != kind { + continue + } + out.Keys = append(out.Keys, k) + } + return len(out.Keys) > 0 + } + } + + cmp := testkeys.Comparer.Compare + var spans []Span + datadriven.RunTest(t, "testdata/filtering_iter", func(t *testing.T, td *datadriven.TestData) string { + switch cmd := td.Cmd; cmd { + case "define": + spans = spans[:0] + lines := strings.Split(strings.TrimSpace(td.Input), "\n") + for _, line := range lines { + spans = append(spans, ParseSpan(line)) + } + return "" + + case "iter": + var filter FilterFunc + for _, cmdArg := range td.CmdArgs { + switch cmdArg.Key { + case "filter": + for _, s := range cmdArg.Vals { + switch s { + case "no-op": + filter = nil + case "key-kind-set": + filter = makeFilter(base.InternalKeyKindRangeKeySet) + case "key-kind-unset": + filter = makeFilter(base.InternalKeyKindRangeKeyUnset) + case "key-kind-del": + filter = makeFilter(base.InternalKeyKindRangeKeyDelete) + default: + return fmt.Sprintf("unknown filter: %s", s) + } + } + default: + return fmt.Sprintf("unknown command: %s", cmdArg.Key) + } + } + innerIter := NewIter(cmp, spans) + iter := Filter(innerIter, filter, cmp) + defer iter.Close() + s := runFragmentIteratorCmd(iter, td.Input, nil) + return s + + default: + return fmt.Sprintf("unknown command: %s", cmd) + } + }) +} diff --git a/pebble/internal/keyspan/fragmenter.go b/pebble/internal/keyspan/fragmenter.go new file mode 100644 index 0000000..d4a410d --- /dev/null +++ b/pebble/internal/keyspan/fragmenter.go @@ -0,0 +1,483 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "fmt" + "sort" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invariants" +) + +type spansByStartKey struct { + cmp base.Compare + buf []Span +} + +func (v *spansByStartKey) Len() int { return len(v.buf) } +func (v *spansByStartKey) Less(i, j int) bool { + return v.cmp(v.buf[i].Start, v.buf[j].Start) < 0 +} +func (v *spansByStartKey) Swap(i, j int) { + v.buf[i], v.buf[j] = v.buf[j], v.buf[i] +} + +type spansByEndKey struct { + cmp base.Compare + buf []Span +} + +func (v *spansByEndKey) Len() int { return len(v.buf) } +func (v *spansByEndKey) Less(i, j int) bool { + return v.cmp(v.buf[i].End, v.buf[j].End) < 0 +} +func (v *spansByEndKey) Swap(i, j int) { + v.buf[i], v.buf[j] = v.buf[j], v.buf[i] +} + +// keysBySeqNumKind sorts spans by the start key's sequence number in +// descending order. If two spans have equal sequence number, they're compared +// by key kind in descending order. This ordering matches the ordering of +// base.InternalCompare among keys with matching user keys. +type keysBySeqNumKind []Key + +func (v *keysBySeqNumKind) Len() int { return len(*v) } +func (v *keysBySeqNumKind) Less(i, j int) bool { return (*v)[i].Trailer > (*v)[j].Trailer } +func (v *keysBySeqNumKind) Swap(i, j int) { (*v)[i], (*v)[j] = (*v)[j], (*v)[i] } + +// Sort the spans by start key. This is the ordering required by the +// Fragmenter. Usually spans are naturally sorted by their start key, +// but that isn't true for range deletion tombstones in the legacy +// range-del-v1 block format. +func Sort(cmp base.Compare, spans []Span) { + sorter := spansByStartKey{ + cmp: cmp, + buf: spans, + } + sort.Sort(&sorter) +} + +// Fragmenter fragments a set of spans such that overlapping spans are +// split at their overlap points. The fragmented spans are output to the +// supplied Output function. +type Fragmenter struct { + Cmp base.Compare + Format base.FormatKey + // Emit is called to emit a fragmented span and its keys. Every key defined + // within the emitted Span applies to the entirety of the Span's key span. + // Keys are ordered in decreasing order of their sequence numbers, and if + // equal, decreasing order of key kind. + Emit func(Span) + // pending contains the list of pending fragments that have not been + // flushed to the block writer. Note that the spans have not been + // fragmented on the end keys yet. That happens as the spans are + // flushed. All pending spans have the same Start. + pending []Span + // doneBuf is used to buffer completed span fragments when flushing to a + // specific key (e.g. TruncateAndFlushTo). It is cached in the Fragmenter to + // allow reuse. + doneBuf []Span + // sortBuf is used to sort fragments by end key when flushing. + sortBuf spansByEndKey + // flushBuf is used to sort keys by (seqnum,kind) before emitting. + flushBuf keysBySeqNumKind + // flushedKey is the key that fragments have been flushed up to. Any + // additional spans added to the fragmenter must have a start key >= + // flushedKey. A nil value indicates flushedKey has not been set. + flushedKey []byte + finished bool +} + +func (f *Fragmenter) checkInvariants(buf []Span) { + for i := 1; i < len(buf); i++ { + if f.Cmp(buf[i].Start, buf[i].End) >= 0 { + panic(fmt.Sprintf("pebble: empty pending span invariant violated: %s", buf[i])) + } + if f.Cmp(buf[i-1].Start, buf[i].Start) != 0 { + panic(fmt.Sprintf("pebble: pending span invariant violated: %s %s", + f.Format(buf[i-1].Start), f.Format(buf[i].Start))) + } + } +} + +// Add adds a span to the fragmenter. Spans may overlap and the +// fragmenter will internally split them. The spans must be presented in +// increasing start key order. That is, Add must be called with a series +// of spans like: +// +// a---e +// c---g +// c-----i +// j---n +// j-l +// +// We need to fragment the spans at overlap points. In the above +// example, we'd create: +// +// a-c-e +// c-e-g +// c-e-g-i +// j-l-n +// j-l +// +// The fragments need to be output sorted by start key, and for equal start +// keys, sorted by descending sequence number. This last part requires a mild +// bit of care as the fragments are not created in descending sequence number +// order. +// +// Once a start key has been seen, we know that we'll never see a smaller +// start key and can thus flush all of the fragments that lie before that +// start key. +// +// Walking through the example above, we start with: +// +// a---e +// +// Next we add [c,g) resulting in: +// +// a-c-e +// c---g +// +// The fragment [a,c) is flushed leaving the pending spans as: +// +// c-e +// c---g +// +// The next span is [c,i): +// +// c-e +// c---g +// c-----i +// +// No fragments are flushed. The next span is [j,n): +// +// c-e +// c---g +// c-----i +// j---n +// +// The fragments [c,e), [c,g) and [c,i) are flushed. We sort these fragments +// by their end key, then split the fragments on the end keys: +// +// c-e +// c-e-g +// c-e---i +// +// The [c,e) fragments all get flushed leaving: +// +// e-g +// e---i +// +// This process continues until there are no more fragments to flush. +// +// WARNING: the slices backing Start, End, Keys, Key.Suffix and Key.Value are +// all retained after this method returns and should not be modified. This is +// safe for spans that are added from a memtable or batch. It is partially +// unsafe for a span read from an sstable. Specifically, the Keys slice of a +// Span returned during sstable iteration is only valid until the next iterator +// operation. The stability of the user keys depend on whether the block is +// prefix compressed, and in practice Pebble never prefix compresses range +// deletion and range key blocks, so these keys are stable. Because of this key +// stability, typically callers only need to perform a shallow clone of the Span +// before Add-ing it to the fragmenter. +// +// Add requires the provided span's keys are sorted in Trailer descending order. +func (f *Fragmenter) Add(s Span) { + if f.finished { + panic("pebble: span fragmenter already finished") + } else if s.KeysOrder != ByTrailerDesc { + panic("pebble: span keys unexpectedly not in trailer descending order") + } + if f.flushedKey != nil { + switch c := f.Cmp(s.Start, f.flushedKey); { + case c < 0: + panic(fmt.Sprintf("pebble: start key (%s) < flushed key (%s)", + f.Format(s.Start), f.Format(f.flushedKey))) + } + } + if f.Cmp(s.Start, s.End) >= 0 { + // An empty span, we can ignore it. + return + } + if invariants.RaceEnabled { + f.checkInvariants(f.pending) + defer func() { f.checkInvariants(f.pending) }() + } + + if len(f.pending) > 0 { + // Since all of the pending spans have the same start key, we only need + // to compare against the first one. + switch c := f.Cmp(f.pending[0].Start, s.Start); { + case c > 0: + panic(fmt.Sprintf("pebble: keys must be added in order: %s > %s", + f.Format(f.pending[0].Start), f.Format(s.Start))) + case c == 0: + // The new span has the same start key as the existing pending + // spans. Add it to the pending buffer. + f.pending = append(f.pending, s) + return + } + + // At this point we know that the new start key is greater than the pending + // spans start keys. + f.truncateAndFlush(s.Start) + } + + f.pending = append(f.pending, s) +} + +// Cover is returned by Framenter.Covers and describes a span's relationship to +// a key at a particular snapshot. +type Cover int8 + +const ( + // NoCover indicates the tested key does not fall within the span's bounds, + // or the span contains no keys with sequence numbers higher than the key's. + NoCover Cover = iota + // CoversInvisibly indicates the tested key does fall within the span's + // bounds and the span contains at least one key with a higher sequence + // number, but none visible at the provided snapshot. + CoversInvisibly + // CoversVisibly indicates the tested key does fall within the span's + // bounds, and the span constains at least one key with a sequence number + // higher than the key's sequence number that is visible at the provided + // snapshot. + CoversVisibly +) + +// Covers returns an enum indicating whether the specified key is covered by one +// of the pending keys. The provided key must be consistent with the ordering of +// the spans. That is, it is invalid to specify a key here that is out of order +// with the span start keys passed to Add. +func (f *Fragmenter) Covers(key base.InternalKey, snapshot uint64) Cover { + if f.finished { + panic("pebble: span fragmenter already finished") + } + if len(f.pending) == 0 { + return NoCover + } + + if f.Cmp(f.pending[0].Start, key.UserKey) > 0 { + panic(fmt.Sprintf("pebble: keys must be in order: %s > %s", + f.Format(f.pending[0].Start), key.Pretty(f.Format))) + } + + cover := NoCover + seqNum := key.SeqNum() + for _, s := range f.pending { + if f.Cmp(key.UserKey, s.End) < 0 { + // NB: A range deletion tombstone does not delete a point operation + // at the same sequence number, and broadly a span is not considered + // to cover a point operation at the same sequence number. + + for i := range s.Keys { + if kseq := s.Keys[i].SeqNum(); kseq > seqNum { + // This key from the span has a higher sequence number than + // `key`. It covers `key`, although the span's key might not + // be visible if its snapshot is too high. + // + // Batch keys are always be visible. + if kseq < snapshot || kseq&base.InternalKeySeqNumBatch != 0 { + return CoversVisibly + } + // s.Keys[i] is not visible. + cover = CoversInvisibly + } + } + } + } + return cover +} + +// Empty returns true if all fragments added so far have finished flushing. +func (f *Fragmenter) Empty() bool { + return f.finished || len(f.pending) == 0 +} + +// TruncateAndFlushTo flushes all of the fragments with a start key <= key, +// truncating spans to the specified end key. Used during compaction to force +// emitting of spans which straddle an sstable boundary. Consider +// the scenario: +// +// a---------k#10 +// f#8 +// f#7 +// +// Let's say the next user key after f is g. Calling TruncateAndFlushTo(g) will +// flush this span: +// +// a-------g#10 +// f#8 +// f#7 +// +// And leave this one in f.pending: +// +// g----k#10 +// +// WARNING: The fragmenter could hold on to the specified end key. Ensure it's +// a safe byte slice that could outlast the current sstable output, and one +// that will never be modified. +func (f *Fragmenter) TruncateAndFlushTo(key []byte) { + if f.finished { + panic("pebble: span fragmenter already finished") + } + if f.flushedKey != nil { + switch c := f.Cmp(key, f.flushedKey); { + case c < 0: + panic(fmt.Sprintf("pebble: start key (%s) < flushed key (%s)", + f.Format(key), f.Format(f.flushedKey))) + } + } + if invariants.RaceEnabled { + f.checkInvariants(f.pending) + defer func() { f.checkInvariants(f.pending) }() + } + if len(f.pending) > 0 { + // Since all of the pending spans have the same start key, we only need + // to compare against the first one. + switch c := f.Cmp(f.pending[0].Start, key); { + case c > 0: + panic(fmt.Sprintf("pebble: keys must be added in order: %s > %s", + f.Format(f.pending[0].Start), f.Format(key))) + case c == 0: + return + } + } + f.truncateAndFlush(key) +} + +// Start returns the start key of the first span in the pending buffer, or nil +// if there are no pending spans. The start key of all pending spans is the same +// as that of the first one. +func (f *Fragmenter) Start() []byte { + if len(f.pending) > 0 { + return f.pending[0].Start + } + return nil +} + +// Flushes all pending spans up to key (exclusive). +// +// WARNING: The specified key is stored without making a copy, so all callers +// must ensure it is safe. +func (f *Fragmenter) truncateAndFlush(key []byte) { + f.flushedKey = append(f.flushedKey[:0], key...) + done := f.doneBuf[:0] + pending := f.pending + f.pending = f.pending[:0] + + // pending and f.pending share the same underlying storage. As we iterate + // over pending we append to f.pending, but only one entry is appended in + // each iteration, after we have read the entry being overwritten. + for _, s := range pending { + if f.Cmp(key, s.End) < 0 { + // s: a--+--e + // new: c------ + if f.Cmp(s.Start, key) < 0 { + done = append(done, Span{ + Start: s.Start, + End: key, + Keys: s.Keys, + }) + } + f.pending = append(f.pending, Span{ + Start: key, + End: s.End, + Keys: s.Keys, + }) + } else { + // s: a-----e + // new: e---- + done = append(done, s) + } + } + + f.doneBuf = done[:0] + f.flush(done, nil) +} + +// flush a group of range spans to the block. The spans are required to all have +// the same start key. We flush all span fragments until startKey > lastKey. If +// lastKey is nil, all span fragments are flushed. The specification of a +// non-nil lastKey occurs for range deletion tombstones during compaction where +// we want to flush (but not truncate) all range tombstones that start at or +// before the first key in the next sstable. Consider: +// +// a---e#10 +// a------h#9 +// +// If a compaction splits the sstables at key c we want the first sstable to +// contain the tombstones [a,e)#10 and [a,e)#9. Fragmentation would naturally +// produce a tombstone [e,h)#9, but we don't need to output that tombstone to +// the first sstable. +func (f *Fragmenter) flush(buf []Span, lastKey []byte) { + if invariants.RaceEnabled { + f.checkInvariants(buf) + } + + // Sort the spans by end key. This will allow us to walk over the spans and + // easily determine the next split point (the smallest end-key). + f.sortBuf.cmp = f.Cmp + f.sortBuf.buf = buf + sort.Sort(&f.sortBuf) + + // Loop over the spans, splitting by end key. + for len(buf) > 0 { + // A prefix of spans will end at split. remove represents the count of + // that prefix. + remove := 1 + split := buf[0].End + f.flushBuf = append(f.flushBuf[:0], buf[0].Keys...) + + for i := 1; i < len(buf); i++ { + if f.Cmp(split, buf[i].End) == 0 { + remove++ + } + f.flushBuf = append(f.flushBuf, buf[i].Keys...) + } + + sort.Sort(&f.flushBuf) + + f.Emit(Span{ + Start: buf[0].Start, + End: split, + // Copy the sorted keys to a new slice. + // + // This allocation is an unfortunate side effect of the Fragmenter and + // the expectation that the spans it produces are available in-memory + // indefinitely. + // + // Eventually, we should be able to replace the fragmenter with the + // keyspan.MergingIter which will perform just-in-time + // fragmentation, and only guaranteeing the memory lifetime for the + // current span. The MergingIter fragments while only needing to + // access one Span per level. It only accesses the Span at the + // current position for each level. During compactions, we can write + // these spans to sstables without retaining previous Spans. + Keys: append([]Key(nil), f.flushBuf...), + }) + + if lastKey != nil && f.Cmp(split, lastKey) > 0 { + break + } + + // Adjust the start key for every remaining span. + buf = buf[remove:] + for i := range buf { + buf[i].Start = split + } + } +} + +// Finish flushes any remaining fragments to the output. It is an error to call +// this if any other spans will be added. +func (f *Fragmenter) Finish() { + if f.finished { + panic("pebble: span fragmenter already finished") + } + f.flush(f.pending, nil) + f.finished = true +} diff --git a/pebble/internal/keyspan/fragmenter_test.go b/pebble/internal/keyspan/fragmenter_test.go new file mode 100644 index 0000000..6916f15 --- /dev/null +++ b/pebble/internal/keyspan/fragmenter_test.go @@ -0,0 +1,320 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "bytes" + "fmt" + "regexp" + "strconv" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/stretchr/testify/require" +) + +var spanRe = regexp.MustCompile(`(\d+):\s*(\w+)-*(\w+)\w*([^\n]*)`) + +func parseSpanSingleKey(t *testing.T, s string, kind base.InternalKeyKind) Span { + m := spanRe.FindStringSubmatch(s) + if len(m) != 5 { + t.Fatalf("expected 5 components, but found %d: %s", len(m), s) + } + seqNum, err := strconv.Atoi(m[1]) + require.NoError(t, err) + return Span{ + Start: []byte(m[2]), + End: []byte(m[3]), + Keys: []Key{ + { + Trailer: base.MakeTrailer(uint64(seqNum), kind), + Value: []byte(strings.TrimSpace(m[4])), + }, + }, + } +} + +func buildSpans( + t *testing.T, cmp base.Compare, formatKey base.FormatKey, s string, kind base.InternalKeyKind, +) []Span { + var spans []Span + f := &Fragmenter{ + Cmp: cmp, + Format: formatKey, + Emit: func(fragmented Span) { + spans = append(spans, fragmented) + }, + } + for _, line := range strings.Split(s, "\n") { + if strings.HasPrefix(line, "truncate-and-flush-to ") { + parts := strings.Split(line, " ") + if len(parts) != 2 { + t.Fatalf("expected 2 components, but found %d: %s", len(parts), line) + } + f.TruncateAndFlushTo([]byte(parts[1])) + continue + } + + f.Add(parseSpanSingleKey(t, line, kind)) + } + f.Finish() + return spans +} + +func formatAlphabeticSpans(spans []Span) string { + isLetter := func(b []byte) bool { + if len(b) != 1 { + return false + } + return b[0] >= 'a' && b[0] <= 'z' + } + + var buf bytes.Buffer + for _, v := range spans { + switch { + case !v.Valid(): + fmt.Fprintf(&buf, "\n") + case v.Empty(): + fmt.Fprintf(&buf, "\n") + case !isLetter(v.Start) || !isLetter(v.End) || v.Start[0] == v.End[0]: + for _, k := range v.Keys { + fmt.Fprintf(&buf, "%d: %s-%s", k.SeqNum(), v.Start, v.End) + if len(k.Value) > 0 { + buf.WriteString(strings.Repeat(" ", int('z'-v.End[0]+1))) + buf.WriteString(string(k.Value)) + } + fmt.Fprintln(&buf) + } + default: + for _, k := range v.Keys { + fmt.Fprintf(&buf, "%d: %s%s%s%s", + k.SeqNum(), + strings.Repeat(" ", int(v.Start[0]-'a')), + v.Start, + strings.Repeat("-", int(v.End[0]-v.Start[0]-1)), + v.End) + if len(k.Value) > 0 { + buf.WriteString(strings.Repeat(" ", int('z'-v.End[0]+1))) + buf.WriteString(string(k.Value)) + } + fmt.Fprintln(&buf) + } + } + } + return buf.String() +} + +func TestFragmenter(t *testing.T) { + cmp := base.DefaultComparer.Compare + fmtKey := base.DefaultComparer.FormatKey + + var getRe = regexp.MustCompile(`(\w+)#(\d+)`) + + parseGet := func(t *testing.T, s string) (string, int) { + m := getRe.FindStringSubmatch(s) + if len(m) != 3 { + t.Fatalf("expected 3 components, but found %d", len(m)) + } + seq, err := strconv.Atoi(m[2]) + require.NoError(t, err) + return m[1], seq + } + + var iter FragmentIterator + + // Returns true if the specified pair is deleted at the specified + // read sequence number. Get ignores spans newer than the read sequence + // number. This is a simple version of what full processing of range + // tombstones looks like. + deleted := func(key []byte, seq, readSeq uint64) bool { + s := Get(cmp, iter, key) + return s != nil && s.CoversAt(readSeq, seq) + } + + datadriven.RunTest(t, "testdata/fragmenter", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "build": + return func() (result string) { + defer func() { + if r := recover(); r != nil { + result = fmt.Sprint(r) + } + }() + + spans := buildSpans(t, cmp, fmtKey, d.Input, base.InternalKeyKindRangeDelete) + iter = NewIter(cmp, spans) + return formatAlphabeticSpans(spans) + }() + + case "get": + if len(d.CmdArgs) != 1 { + return fmt.Sprintf("expected 1 argument, but found %s", d.CmdArgs) + } + if d.CmdArgs[0].Key != "t" { + return fmt.Sprintf("expected timestamp argument, but found %s", d.CmdArgs[0]) + } + readSeq, err := strconv.Atoi(d.CmdArgs[0].Vals[0]) + require.NoError(t, err) + + var results []string + for _, p := range strings.Split(d.Input, " ") { + key, seq := parseGet(t, p) + if deleted([]byte(key), uint64(seq), uint64(readSeq)) { + results = append(results, "deleted") + } else { + results = append(results, "alive") + } + } + return strings.Join(results, " ") + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestFragmenterCovers(t *testing.T) { + datadriven.RunTest(t, "testdata/fragmenter_covers", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "build": + f := &Fragmenter{ + Cmp: base.DefaultComparer.Compare, + Format: base.DefaultComparer.FormatKey, + Emit: func(fragmented Span) { + }, + } + var buf bytes.Buffer + for _, line := range strings.Split(d.Input, "\n") { + switch { + case strings.HasPrefix(line, "add "): + t := parseSpanSingleKey(t, strings.TrimPrefix(line, "add "), base.InternalKeyKindRangeDelete) + f.Add(t) + case strings.HasPrefix(line, "deleted "): + fields := strings.Fields(strings.TrimPrefix(line, "deleted ")) + key := base.ParseInternalKey(fields[0]) + snapshot, err := strconv.ParseUint(fields[1], 10, 64) + if err != nil { + return err.Error() + } + func() { + defer func() { + if r := recover(); r != nil { + fmt.Fprintf(&buf, "%s: %s\n", key, r) + } + }() + switch f.Covers(key, snapshot) { + case NoCover: + fmt.Fprintf(&buf, "%s: none\n", key) + case CoversInvisibly: + fmt.Fprintf(&buf, "%s: invisibly\n", key) + case CoversVisibly: + fmt.Fprintf(&buf, "%s: visibly\n", key) + } + }() + } + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestFragmenterTruncateAndFlushTo(t *testing.T) { + cmp := base.DefaultComparer.Compare + fmtKey := base.DefaultComparer.FormatKey + + datadriven.RunTest(t, "testdata/fragmenter_truncate_and_flush_to", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "build": + return func() (result string) { + defer func() { + if r := recover(); r != nil { + result = fmt.Sprint(r) + } + }() + + spans := buildSpans(t, cmp, fmtKey, d.Input, base.InternalKeyKindRangeDelete) + return formatAlphabeticSpans(spans) + }() + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestFragmenter_Values(t *testing.T) { + cmp := base.DefaultComparer.Compare + fmtKey := base.DefaultComparer.FormatKey + + datadriven.RunTest(t, "testdata/fragmenter_values", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "build": + return func() (result string) { + defer func() { + if r := recover(); r != nil { + result = fmt.Sprint(r) + } + }() + + spans := buildSpans(t, cmp, fmtKey, d.Input, base.InternalKeyKindRangeKeySet) + return formatAlphabeticSpans(spans) + }() + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestFragmenter_EmitOrder(t *testing.T) { + var buf bytes.Buffer + + datadriven.RunTest(t, "testdata/fragmenter_emit_order", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "build": + buf.Reset() + f := Fragmenter{ + Cmp: base.DefaultComparer.Compare, + Format: base.DefaultComparer.FormatKey, + Emit: func(span Span) { + fmt.Fprintf(&buf, "%s %s:", + base.DefaultComparer.FormatKey(span.Start), + base.DefaultComparer.FormatKey(span.End)) + for i, k := range span.Keys { + if i == 0 { + fmt.Fprint(&buf, " ") + } else { + fmt.Fprint(&buf, ", ") + } + fmt.Fprintf(&buf, "#%d,%s", k.SeqNum(), k.Kind()) + } + fmt.Fprintln(&buf, "\n-") + }, + } + for _, line := range strings.Split(d.Input, "\n") { + fields := strings.Fields(line) + if len(fields) != 2 { + panic(fmt.Sprintf("datadriven test: expect 2 fields, found %d", len(fields))) + } + k := base.ParseInternalKey(fields[0]) + f.Add(Span{ + Start: k.UserKey, + End: []byte(fields[1]), + Keys: []Key{{Trailer: k.Trailer}}, + }) + } + + f.Finish() + return buf.String() + default: + panic(fmt.Sprintf("unrecognized command %q", d.Cmd)) + } + }) +} diff --git a/pebble/internal/keyspan/get.go b/pebble/internal/keyspan/get.go new file mode 100644 index 0000000..c07f8c8 --- /dev/null +++ b/pebble/internal/keyspan/get.go @@ -0,0 +1,53 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import "github.com/cockroachdb/pebble/internal/base" + +// Get returns the newest span that contains the target key. If no span +// contains the target key, an empty span is returned. The snapshot +// parameter controls the visibility of spans (only spans older than the +// snapshot sequence number are visible). The iterator must contain +// fragmented spans: no span may overlap another. +func Get(cmp base.Compare, iter FragmentIterator, key []byte) *Span { + // NB: We use SeekLT in order to land on the proper span for a search + // key that resides in the middle of a span. Consider the scenario: + // + // a---e + // e---i + // + // The spans are indexed by their start keys `a` and `e`. If the + // search key is `c` we want to land on the span [a,e). If we were + // to use SeekGE then the search key `c` would land on the span + // [e,i) and we'd have to backtrack. The one complexity here is what + // happens for the search key `e`. In that case SeekLT will land us + // on the span [a,e) and we'll have to move forward. + iterSpan := iter.SeekLT(key) + if iterSpan == nil { + iterSpan = iter.Next() + if iterSpan == nil { + // The iterator is empty. + return nil + } + if cmp(key, iterSpan.Start) < 0 { + // The search key lies before the first span. + return nil + } + } + + // Invariant: key > iterSpan.Start + if cmp(key, iterSpan.End) >= 0 { + // The current span lies before the search key. Advance the iterator + // once to potentially land on a key with a start key exactly equal to + // key. (See the comment at the beginning of this function.) + iterSpan = iter.Next() + if iterSpan == nil || cmp(key, iterSpan.Start) < 0 { + // We've run out of spans or we've moved on to a span which + // starts after our search key. + return nil + } + } + return iterSpan +} diff --git a/pebble/internal/keyspan/interleaving_iter.go b/pebble/internal/keyspan/interleaving_iter.go new file mode 100644 index 0000000..e1fd600 --- /dev/null +++ b/pebble/internal/keyspan/interleaving_iter.go @@ -0,0 +1,1149 @@ +// Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "context" + "fmt" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invariants" +) + +// A SpanMask may be used to configure an interleaving iterator to skip point +// keys that fall within the bounds of some spans. +type SpanMask interface { + // SpanChanged is invoked by an interleaving iterator whenever the current + // span changes. As the iterator passes into or out of a Span, it invokes + // SpanChanged, passing the new Span. When the iterator passes out of a + // span's boundaries and is no longer covered by any span, SpanChanged is + // invoked with a nil span. + // + // SpanChanged is invoked before SkipPoint, and callers may use SpanChanged + // to recalculate state used by SkipPoint for masking. + // + // SpanChanged may be invoked consecutively with identical spans under some + // circumstances, such as repeatedly absolutely positioning an iterator to + // positions covered by the same span, or while changing directions. + SpanChanged(*Span) + // SkipPoint is invoked by the interleaving iterator whenever the iterator + // encounters a point key covered by a Span. If SkipPoint returns true, the + // interleaving iterator skips the point key and all larger keys with the + // same prefix. This is used during range key iteration to skip over point + // keys 'masked' by range keys. + SkipPoint(userKey []byte) bool +} + +// InterleavingIter combines an iterator over point keys with an iterator over +// key spans. +// +// Throughout Pebble, some keys apply at single discrete points within the user +// keyspace. Other keys apply over continuous spans of the user key space. +// Internally, iterators over point keys adhere to the base.InternalIterator +// interface, and iterators over spans adhere to the keyspan.FragmentIterator +// interface. The InterleavingIterator wraps a point iterator and span iterator, +// providing access to all the elements of both iterators. +// +// The InterleavingIterator implements the point base.InternalIterator +// interface. After any of the iterator's methods return a key, a caller may +// call Span to retrieve the span covering the returned key, if any. A span is +// considered to 'cover' a returned key if the span's [start, end) bounds +// include the key's user key. +// +// In addition to tracking the current covering span, InterleavingIter returns a +// special InternalKey at span start boundaries. Start boundaries are surfaced +// as a synthetic span marker: an InternalKey with the boundary as the user key, +// the infinite sequence number and a key kind selected from an arbitrary key +// the infinite sequence number and an arbitrary contained key's kind. Since +// which of the Span's key's kind is surfaced is undefined, the caller should +// not use the InternalKey's kind. The caller should only rely on the `Span` +// method for retrieving information about spanning keys. The interleaved +// synthetic keys have the infinite sequence number so that they're interleaved +// before any point keys with the same user key when iterating forward and after +// when iterating backward. +// +// Interleaving the synthetic start key boundaries at the maximum sequence +// number provides an opportunity for the higher-level, public Iterator to +// observe the Span, even if no live points keys exist within the boudns of the +// Span. +// +// When returning a synthetic marker key for a start boundary, InterleavingIter +// will truncate the span's start bound to the SeekGE or SeekPrefixGE search +// key. For example, a SeekGE("d") that finds a span [a, z) may return a +// synthetic span marker key `d#72057594037927935,21`. +// +// If bounds have been applied to the iterator through SetBounds, +// InterleavingIter will truncate the bounds of spans returned through Span to +// the set bounds. The bounds returned through Span are not truncated by a +// SeekGE or SeekPrefixGE search key. Consider, for example SetBounds('c', 'e'), +// with an iterator containing the Span [a,z): +// +// First() = `c#72057594037927935,21` Span() = [c,e) +// SeekGE('d') = `d#72057594037927935,21` Span() = [c,e) +// +// InterleavedIter does not interleave synthetic markers for spans that do not +// contain any keys. +// +// # SpanMask +// +// InterelavingIter takes a SpanMask parameter that may be used to configure the +// behavior of the iterator. See the documentation on the SpanMask type. +// +// All spans containing keys are exposed during iteration. +type InterleavingIter struct { + cmp base.Compare + comparer *base.Comparer + pointIter base.InternalIterator + keyspanIter FragmentIterator + mask SpanMask + + // lower and upper hold the iteration bounds set through SetBounds. + lower, upper []byte + // keyBuf is used to copy SeekGE or SeekPrefixGE arguments when they're used + // to truncate a span. The byte slices backing a SeekGE/SeekPrefixGE search + // keys can come directly from the end user, so they're copied into keyBuf + // to ensure key stability. + keyBuf []byte + // nextPrefixBuf is used during SeekPrefixGE calls to store the truncated + // upper bound of the returned spans. SeekPrefixGE truncates the returned + // spans to an upper bound of the seeked prefix's immediate successor. + nextPrefixBuf []byte + pointKey *base.InternalKey + pointVal base.LazyValue + // err holds an iterator error from either pointIter or keyspanIter. It's + // reset to nil on seeks. An overview of error-handling mechanics: + // + // Whenever either pointIter or keyspanIter is respositioned and a nil + // key/span is returned, the code performing the positioning is responsible + // for checking the iterator's Error() value. This happens in savePoint and + // saveSpan[Forward,Backward]. + // + // Once i.err is non-nil, the computation of i.pos must set i.pos = + // posExhausted. This happens in compute[Smallest|Largest]Pos and + // [next|prev]Pos. Setting i.pos to posExhausted ensures we'll yield nil to + // the caller, which they'll interpret as a signal they must check Error(). + // + // INVARIANTS: + // i.err != nil => i.pos = posExhausted + err error + // prefix records the iterator's current prefix if the iterator is in prefix + // mode. During prefix mode, Pebble will truncate spans to the next prefix. + // If the iterator subsequently leaves prefix mode, the existing span cached + // in i.span must be invalidated because its bounds do not reflect the + // original span's true bounds. + prefix []byte + // span holds the span at the keyspanIter's current position. If the span is + // wholly contained within the iterator bounds, this span is directly + // returned to the iterator consumer through Span(). If either bound needed + // to be truncated to the iterator bounds, then truncated is set to true and + // Span() must return a pointer to truncatedSpan. + span *Span + // spanMarker holds the synthetic key that is returned when the iterator + // passes over a key span's start bound. + spanMarker base.InternalKey + // truncated indicates whether or not the span at the current position + // needed to be truncated. If it did, truncatedSpan holds the truncated + // span that should be returned. + truncatedSpan Span + truncated bool + + // Keeping all of the bools/uint8s together reduces the sizeof the struct. + + // pos encodes the current position of the iterator: exhausted, on the point + // key, on a keyspan start, or on a keyspan end. + pos interleavePos + // withinSpan indicates whether the iterator is currently positioned within + // the bounds of the current span (i.span). withinSpan must be updated + // whenever the interleaving iterator's position enters or exits the bounds + // of a span. + withinSpan bool + // spanMarkerTruncated is set by SeekGE/SeekPrefixGE calls that truncate a + // span's start bound marker to the search key. It's returned to false on + // the next repositioning of the keyspan iterator. + spanMarkerTruncated bool + // maskSpanChangedCalled records whether or not the last call to + // SpanMask.SpanChanged provided the current span (i.span) or not. + maskSpanChangedCalled bool + // dir indicates the direction of iteration: forward (+1) or backward (-1) + dir int8 +} + +// interleavePos indicates the iterator's current position. Note that both +// keyspanStart and keyspanEnd positions correspond to their user key boundaries +// with maximal sequence numbers. This means in the forward direction +// posKeyspanStart and posKeyspanEnd are always interleaved before a posPointKey +// with the same user key. +type interleavePos int8 + +const ( + posUninitialized interleavePos = iota + posExhausted + posPointKey + posKeyspanStart + posKeyspanEnd +) + +// Assert that *InterleavingIter implements the InternalIterator interface. +var _ base.InternalIterator = &InterleavingIter{} + +// InterleavingIterOpts holds options configuring the behavior of a +// InterleavingIter. +type InterleavingIterOpts struct { + Mask SpanMask + LowerBound, UpperBound []byte +} + +// Init initializes the InterleavingIter to interleave point keys from pointIter +// with key spans from keyspanIter. +// +// The point iterator must already have the bounds provided on opts. Init does +// not propagate the bounds down the iterator stack. +func (i *InterleavingIter) Init( + comparer *base.Comparer, + pointIter base.InternalIterator, + keyspanIter FragmentIterator, + opts InterleavingIterOpts, +) { + *i = InterleavingIter{ + cmp: comparer.Compare, + comparer: comparer, + pointIter: pointIter, + keyspanIter: keyspanIter, + mask: opts.Mask, + lower: opts.LowerBound, + upper: opts.UpperBound, + } +} + +// InitSeekGE may be called after Init but before any positioning method. +// InitSeekGE initializes the current position of the point iterator and then +// performs a SeekGE on the keyspan iterator using the provided key. InitSeekGE +// returns whichever point or keyspan key is smaller. After InitSeekGE, the +// iterator is positioned and may be repositioned using relative positioning +// methods. +// +// This method is used specifically for lazily constructing combined iterators. +// It allows for seeding the iterator with the current position of the point +// iterator. +func (i *InterleavingIter) InitSeekGE( + prefix, key []byte, pointKey *base.InternalKey, pointValue base.LazyValue, +) (*base.InternalKey, base.LazyValue) { + i.dir = +1 + i.clearMask() + i.prefix = prefix + i.savePoint(pointKey, pointValue) + // NB: This keyspanSeekGE call will truncate the span to the seek key if + // necessary. This truncation is important for cases where a switch to + // combined iteration is made during a user-initiated SeekGE. + i.keyspanSeekGE(key, prefix) + i.computeSmallestPos() + return i.yieldPosition(key, i.nextPos) +} + +// InitSeekLT may be called after Init but before any positioning method. +// InitSeekLT initializes the current position of the point iterator and then +// performs a SeekLT on the keyspan iterator using the provided key. InitSeekLT +// returns whichever point or keyspan key is larger. After InitSeekLT, the +// iterator is positioned and may be repositioned using relative positioning +// methods. +// +// This method is used specifically for lazily constructing combined iterators. +// It allows for seeding the iterator with the current position of the point +// iterator. +func (i *InterleavingIter) InitSeekLT( + key []byte, pointKey *base.InternalKey, pointValue base.LazyValue, +) (*base.InternalKey, base.LazyValue) { + i.dir = -1 + i.clearMask() + i.savePoint(pointKey, pointValue) + i.keyspanSeekLT(key) + i.computeLargestPos() + return i.yieldPosition(i.lower, i.prevPos) +} + +// SeekGE implements (base.InternalIterator).SeekGE. +// +// If there exists a span with a start key ≤ the first matching point key, +// SeekGE will return a synthetic span marker key for the span. If this span's +// start key is less than key, the returned marker will be truncated to key. +// Note that this search-key truncation of the marker's key is not applied to +// the span returned by Span. +// +// NB: In accordance with the base.InternalIterator contract: +// +// i.lower ≤ key +func (i *InterleavingIter) SeekGE( + key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + i.err = nil + i.clearMask() + i.disablePrefixMode() + i.savePoint(i.pointIter.SeekGE(key, flags)) + + // We need to seek the keyspan iterator too. If the keyspan iterator was + // already positioned at a span, we might be able to avoid the seek if the + // seek key falls within the existing span's bounds. + if i.span != nil && i.cmp(key, i.span.End) < 0 && i.cmp(key, i.span.Start) >= 0 { + // We're seeking within the existing span's bounds. We still might need + // truncate the span to the iterator's bounds. + i.saveSpanForward(i.span) + i.savedKeyspan() + } else { + i.keyspanSeekGE(key, nil /* prefix */) + } + + i.dir = +1 + i.computeSmallestPos() + return i.yieldPosition(key, i.nextPos) +} + +// SeekPrefixGE implements (base.InternalIterator).SeekPrefixGE. +// +// If there exists a span with a start key ≤ the first matching point key, +// SeekPrefixGE will return a synthetic span marker key for the span. If this +// span's start key is less than key, the returned marker will be truncated to +// key. Note that this search-key truncation of the marker's key is not applied +// to the span returned by Span. +// +// NB: In accordance with the base.InternalIterator contract: +// +// i.lower ≤ key +func (i *InterleavingIter) SeekPrefixGE( + prefix, key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + i.err = nil + i.clearMask() + i.prefix = prefix + i.savePoint(i.pointIter.SeekPrefixGE(prefix, key, flags)) + + // We need to seek the keyspan iterator too. If the keyspan iterator was + // already positioned at a span, we might be able to avoid the seek if the + // entire seek prefix key falls within the existing span's bounds. + // + // During a SeekPrefixGE, Pebble defragments range keys within the bounds of + // the prefix. For example, a SeekPrefixGE('c', 'c@8') must defragment the + // any overlapping range keys within the bounds of [c,c\00). + // + // If range keys are fragmented within a prefix (eg, because a version + // within a prefix was chosen as an sstable boundary), then it's possible + // the seek key falls into the current i.span, but the current i.span does + // not wholly cover the seek prefix. + // + // For example, a SeekPrefixGE('d@5') may only defragment a range key to + // the bounds of [c@2,e). A subsequent SeekPrefixGE('c@0') must re-seek the + // keyspan iterator, because although 'c@0' is contained within [c@2,e), the + // full span of the prefix is not. + // + // Similarly, a SeekPrefixGE('a@3') may only defragment a range key to the + // bounds [a,c@8). A subsequent SeekPrefixGE('c@10') must re-seek the + // keyspan iterator, because although 'c@10' is contained within [a,c@8), + // the full span of the prefix is not. + seekKeyspanIter := true + if i.span != nil && i.cmp(prefix, i.span.Start) >= 0 { + if ei := i.comparer.Split(i.span.End); i.cmp(prefix, i.span.End[:ei]) < 0 { + // We're seeking within the existing span's bounds. We still might need + // truncate the span to the iterator's bounds. + i.saveSpanForward(i.span) + i.savedKeyspan() + seekKeyspanIter = false + } + } + if seekKeyspanIter { + i.keyspanSeekGE(key, prefix) + } + + i.dir = +1 + i.computeSmallestPos() + return i.yieldPosition(key, i.nextPos) +} + +// SeekLT implements (base.InternalIterator).SeekLT. +func (i *InterleavingIter) SeekLT( + key []byte, flags base.SeekLTFlags, +) (*base.InternalKey, base.LazyValue) { + i.err = nil + i.clearMask() + i.disablePrefixMode() + i.savePoint(i.pointIter.SeekLT(key, flags)) + + // We need to seek the keyspan iterator too. If the keyspan iterator was + // already positioned at a span, we might be able to avoid the seek if the + // seek key falls within the existing span's bounds. + if i.span != nil && i.cmp(key, i.span.Start) > 0 && i.cmp(key, i.span.End) < 0 { + // We're seeking within the existing span's bounds. We still might need + // truncate the span to the iterator's bounds. + i.saveSpanBackward(i.span) + // The span's start key is still not guaranteed to be less than key, + // because of the bounds enforcement. Consider the following example: + // + // Bounds are set to [d,e). The user performs a SeekLT(d). The + // FragmentIterator.SeekLT lands on a span [b,f). This span has a start + // key less than d, as expected. Above, saveSpanBackward truncates the + // span to match the iterator's current bounds, modifying the span to + // [d,e), which does not overlap the search space of [-∞, d). + // + // This problem is a consequence of the SeekLT's exclusive search key + // and the fact that we don't perform bounds truncation at every leaf + // iterator. + if i.span != nil && i.truncated && i.cmp(i.truncatedSpan.Start, key) >= 0 { + i.span = nil + } + i.savedKeyspan() + } else { + i.keyspanSeekLT(key) + } + + i.dir = -1 + i.computeLargestPos() + return i.yieldPosition(i.lower, i.prevPos) +} + +// First implements (base.InternalIterator).First. +func (i *InterleavingIter) First() (*base.InternalKey, base.LazyValue) { + i.err = nil + i.clearMask() + i.disablePrefixMode() + i.savePoint(i.pointIter.First()) + i.saveSpanForward(i.keyspanIter.First()) + i.savedKeyspan() + i.dir = +1 + i.computeSmallestPos() + return i.yieldPosition(i.lower, i.nextPos) +} + +// Last implements (base.InternalIterator).Last. +func (i *InterleavingIter) Last() (*base.InternalKey, base.LazyValue) { + i.err = nil + i.clearMask() + i.disablePrefixMode() + i.savePoint(i.pointIter.Last()) + i.saveSpanBackward(i.keyspanIter.Last()) + i.savedKeyspan() + i.dir = -1 + i.computeLargestPos() + return i.yieldPosition(i.lower, i.prevPos) +} + +// Next implements (base.InternalIterator).Next. +func (i *InterleavingIter) Next() (*base.InternalKey, base.LazyValue) { + if i.dir == -1 { + // Switching directions. + i.dir = +1 + + if i.mask != nil { + // Clear the mask while we reposition the point iterator. While + // switching directions, we may move the point iterator outside of + // i.span's bounds. + i.clearMask() + } + + // When switching directions, iterator state corresponding to the + // current iterator position (as indicated by i.pos) is already correct. + // However any state that has yet to be interleaved describes a position + // behind the current iterator position and needs to be updated to + // describe the position ahead of the current iterator position. + switch i.pos { + case posExhausted: + // Nothing to do. The below nextPos call will move both the point + // key and span to their next positions and return + // MIN(point,s.Start). + case posPointKey: + // If we're currently on a point key, the below nextPos will + // correctly Next the point key iterator to the next point key. + // Do we need to move the span forwards? If the current span lies + // entirely behind the current key (!i.withinSpan), then we + // need to move it to the first span in the forward direction. + if !i.withinSpan { + i.saveSpanForward(i.keyspanIter.Next()) + i.savedKeyspan() + } + case posKeyspanStart: + i.withinSpan = true + // Since we're positioned on a Span, the pointIter is positioned + // entirely behind the current iterator position. Reposition it + // ahead of the current iterator position. + i.savePoint(i.pointIter.Next()) + case posKeyspanEnd: + // Since we're positioned on a Span, the pointIter is positioned + // entirely behind of the current iterator position. Reposition it + // ahead the current iterator position. + i.savePoint(i.pointIter.Next()) + } + // Fallthrough to calling i.nextPos. + } + i.nextPos() + return i.yieldPosition(i.lower, i.nextPos) +} + +// NextPrefix implements (base.InternalIterator).NextPrefix. +func (i *InterleavingIter) NextPrefix(succKey []byte) (*base.InternalKey, base.LazyValue) { + if i.dir == -1 { + panic("pebble: cannot switch directions with NextPrefix") + } + + switch i.pos { + case posExhausted: + return nil, base.LazyValue{} + case posPointKey: + i.savePoint(i.pointIter.NextPrefix(succKey)) + if i.withinSpan { + if i.pointKey == nil || i.cmp(i.span.End, i.pointKey.UserKey) <= 0 { + i.pos = posKeyspanEnd + } else { + i.pos = posPointKey + } + } else { + i.computeSmallestPos() + } + case posKeyspanStart, posKeyspanEnd: + i.nextPos() + } + return i.yieldPosition(i.lower, i.nextPos) +} + +// Prev implements (base.InternalIterator).Prev. +func (i *InterleavingIter) Prev() (*base.InternalKey, base.LazyValue) { + if i.dir == +1 { + // Switching directions. + i.dir = -1 + + if i.mask != nil { + // Clear the mask while we reposition the point iterator. While + // switching directions, we may move the point iterator outside of + // i.span's bounds. + i.clearMask() + } + + // When switching directions, iterator state corresponding to the + // current iterator position (as indicated by i.pos) is already correct. + // However any state that has yet to be interleaved describes a position + // ahead of the current iterator position and needs to be updated to + // describe the position behind the current iterator position. + switch i.pos { + case posExhausted: + // Nothing to do. The below prevPos call will move both the point + // key and span to previous positions and return MAX(point, s.End). + case posPointKey: + // If we're currently on a point key, the point iterator is in the + // right place and the call to prevPos will correctly Prev the point + // key iterator to the previous point key. Do we need to move the + // span backwards? If the current span lies entirely ahead of the + // current key (!i.withinSpan), then we need to move it to the first + // span in the reverse direction. + if !i.withinSpan { + i.saveSpanBackward(i.keyspanIter.Prev()) + i.savedKeyspan() + } + case posKeyspanStart: + // Since we're positioned on a Span, the pointIter is positioned + // entirely ahead of the current iterator position. Reposition it + // behind the current iterator position. + i.savePoint(i.pointIter.Prev()) + // Without considering truncation of spans to seek keys, the keyspan + // iterator is already in the right place. But consider span [a, z) + // and this sequence of iterator calls: + // + // SeekGE('c') = c.RANGEKEYSET#72057594037927935 + // Prev() = a.RANGEKEYSET#72057594037927935 + // + // If the current span's start key was last surfaced truncated due + // to a SeekGE or SeekPrefixGE call, then it's still relevant in the + // reverse direction with an untruncated start key. + if i.spanMarkerTruncated { + // When we fallthrough to calling prevPos, we want to move to + // MAX(point, span.Start). We cheat here by claiming we're + // currently on the end boundary, so that we'll move on to the + // untruncated start key if necessary. + i.pos = posKeyspanEnd + } + case posKeyspanEnd: + // Since we're positioned on a Span, the pointIter is positioned + // entirely ahead of the current iterator position. Reposition it + // behind the current iterator position. + i.savePoint(i.pointIter.Prev()) + } + + if i.spanMarkerTruncated { + // Save the keyspan again to clear truncation. + i.savedKeyspan() + } + // Fallthrough to calling i.prevPos. + } + i.prevPos() + return i.yieldPosition(i.lower, i.prevPos) +} + +// computeSmallestPos sets i.{pos,withinSpan} to: +// +// MIN(i.pointKey, i.span.Start) +func (i *InterleavingIter) computeSmallestPos() { + if i.err == nil { + if i.span != nil && (i.pointKey == nil || i.cmp(i.startKey(), i.pointKey.UserKey) <= 0) { + i.withinSpan = true + i.pos = posKeyspanStart + return + } + i.withinSpan = false + if i.pointKey != nil { + i.pos = posPointKey + return + } + } + i.pos = posExhausted +} + +// computeLargestPos sets i.{pos,withinSpan} to: +// +// MAX(i.pointKey, i.span.End) +func (i *InterleavingIter) computeLargestPos() { + if i.err == nil { + if i.span != nil && (i.pointKey == nil || i.cmp(i.span.End, i.pointKey.UserKey) > 0) { + i.withinSpan = true + i.pos = posKeyspanEnd + return + } + i.withinSpan = false + if i.pointKey != nil { + i.pos = posPointKey + return + } + } + i.pos = posExhausted +} + +// nextPos advances the iterator one position in the forward direction. +func (i *InterleavingIter) nextPos() { + if invariants.Enabled { + defer func() { + if i.err != nil && i.pos != posExhausted { + panic(errors.AssertionFailedf("iterator has accumulated error but i.pos = %d", i.pos)) + } + }() + } + // NB: If i.err != nil or any of the positioning methods performed in this + // function result in i.err != nil, we must set i.pos = posExhausted. We + // perform this check explicitly here, but if any of the branches below + // advance either iterator, they must also check i.err and set posExhausted + // if necessary. + if i.err != nil { + i.pos = posExhausted + return + } + + switch i.pos { + case posExhausted: + i.savePoint(i.pointIter.Next()) + i.saveSpanForward(i.keyspanIter.Next()) + i.savedKeyspan() + i.computeSmallestPos() + case posPointKey: + i.savePoint(i.pointIter.Next()) + if i.err != nil { + i.pos = posExhausted + return + } + // If we're not currently within the span, we want to chose the + // MIN(pointKey,span.Start), which is exactly the calculation performed + // by computeSmallestPos. + if !i.withinSpan { + i.computeSmallestPos() + return + } + // i.withinSpan=true + // Since we previously were within the span, we want to choose the + // MIN(pointKey,span.End). + switch { + case i.span == nil: + panic("i.withinSpan=true and i.span=nil") + case i.pointKey == nil: + // Since i.withinSpan=true, we step onto the end boundary of the + // keyspan. + i.pos = posKeyspanEnd + default: + // i.withinSpan && i.pointKey != nil && i.span != nil + if i.cmp(i.span.End, i.pointKey.UserKey) <= 0 { + i.pos = posKeyspanEnd + } else { + i.pos = posPointKey + } + } + case posKeyspanStart: + // Either a point key or the span's end key comes next. + if i.pointKey != nil && i.cmp(i.pointKey.UserKey, i.span.End) < 0 { + i.pos = posPointKey + } else { + i.pos = posKeyspanEnd + } + case posKeyspanEnd: + i.saveSpanForward(i.keyspanIter.Next()) + i.savedKeyspan() + i.computeSmallestPos() + default: + panic(fmt.Sprintf("unexpected pos=%d", i.pos)) + } +} + +// prevPos advances the iterator one position in the reverse direction. +func (i *InterleavingIter) prevPos() { + if invariants.Enabled { + defer func() { + if i.err != nil && i.pos != posExhausted { + panic(errors.AssertionFailedf("iterator has accumulated error but i.pos = %d", i.pos)) + } + }() + } + // NB: If i.err != nil or any of the positioning methods performed in this + // function result in i.err != nil, we must set i.pos = posExhausted. We + // perform this check explicitly here, but if any of the branches below + // advance either iterator, they must also check i.err and set posExhausted + // if necessary. + if i.err != nil { + i.pos = posExhausted + return + } + + switch i.pos { + case posExhausted: + i.savePoint(i.pointIter.Prev()) + i.saveSpanBackward(i.keyspanIter.Prev()) + i.savedKeyspan() + i.computeLargestPos() + case posPointKey: + i.savePoint(i.pointIter.Prev()) + if i.err != nil { + i.pos = posExhausted + return + } + // If we're not currently covered by the span, we want to chose the + // MAX(pointKey,span.End), which is exactly the calculation performed + // by computeLargestPos. + if !i.withinSpan { + i.computeLargestPos() + return + } + switch { + case i.span == nil: + panic("withinSpan=true, but i.span == nil") + case i.pointKey == nil: + i.pos = posKeyspanEnd + default: + // i.withinSpan && i.pointKey != nil && i.span != nil + if i.cmp(i.span.Start, i.pointKey.UserKey) > 0 { + i.pos = posKeyspanStart + } else { + i.pos = posPointKey + } + } + case posKeyspanStart: + i.saveSpanBackward(i.keyspanIter.Prev()) + i.savedKeyspan() + i.computeLargestPos() + case posKeyspanEnd: + // Either a point key or the span's start key is previous. + if i.pointKey != nil && i.cmp(i.pointKey.UserKey, i.span.Start) >= 0 { + i.pos = posPointKey + } else { + i.pos = posKeyspanStart + } + default: + panic(fmt.Sprintf("unexpected pos=%d", i.pos)) + } +} + +func (i *InterleavingIter) yieldPosition( + lowerBound []byte, advance func(), +) (*base.InternalKey, base.LazyValue) { + // This loop returns the first visible position in the current iteration + // direction. Some positions are not visible and skipped. For example, if + // masking is enabled and the iterator is positioned over a masked point + // key, this loop skips the position. If a span's start key should be + // interleaved next, but the span is empty, the loop continues to the next + // key. Currently, span end keys are also always skipped, and are used only + // for maintaining internal state. + for { + switch i.pos { + case posExhausted: + return i.yieldNil() + case posPointKey: + if i.pointKey == nil { + panic("i.pointKey is nil") + } + + if i.mask != nil { + i.maybeUpdateMask() + if i.withinSpan && i.mask.SkipPoint(i.pointKey.UserKey) { + // The span covers the point key. If a SkipPoint hook is + // configured, ask it if we should skip this point key. + if i.prefix != nil { + // During prefix-iteration node, once a point is masked, + // all subsequent keys with the same prefix must also be + // masked according to the key ordering. We can stop and + // return nil. + // + // NB: The above is not just an optimization. During + // prefix-iteration mode, the internal iterator contract + // prohibits us from Next-ing beyond the first key + // beyond the iteration prefix. If we didn't already + // stop early, we would need to check if this masked + // point is already beyond the prefix. + return i.yieldNil() + } + // TODO(jackson): If we thread a base.Comparer through to + // InterleavingIter so that we have access to + // ImmediateSuccessor, we could use NextPrefix. We'd need to + // tweak the SpanMask interface slightly. + + // Advance beyond the masked point key. + advance() + continue + } + } + return i.yieldPointKey() + case posKeyspanEnd: + // Don't interleave end keys; just advance. + advance() + continue + case posKeyspanStart: + // Don't interleave an empty span. + if i.span.Empty() { + advance() + continue + } + return i.yieldSyntheticSpanMarker(lowerBound) + default: + panic(fmt.Sprintf("unexpected interleavePos=%d", i.pos)) + } + } +} + +// keyspanSeekGE seeks the keyspan iterator to the first span covering a key ≥ k. +func (i *InterleavingIter) keyspanSeekGE(k []byte, prefix []byte) { + i.saveSpanForward(i.keyspanIter.SeekGE(k)) + i.savedKeyspan() +} + +// keyspanSeekLT seeks the keyspan iterator to the last span covering a key < k. +func (i *InterleavingIter) keyspanSeekLT(k []byte) { + i.saveSpanBackward(i.keyspanIter.SeekLT(k)) + // The current span's start key is not guaranteed to be less than key, + // because of the bounds enforcement. Consider the following example: + // + // Bounds are set to [d,e). The user performs a SeekLT(d). The + // FragmentIterator.SeekLT lands on a span [b,f). This span has a start key + // less than d, as expected. Above, saveSpanBackward truncates the span to + // match the iterator's current bounds, modifying the span to [d,e), which + // does not overlap the search space of [-∞, d). + // + // This problem is a consequence of the SeekLT's exclusive search key and + // the fact that we don't perform bounds truncation at every leaf iterator. + if i.span != nil && i.truncated && i.cmp(i.truncatedSpan.Start, k) >= 0 { + i.span = nil + } + i.savedKeyspan() +} + +func (i *InterleavingIter) saveSpanForward(span *Span) { + i.span = span + i.truncated = false + i.truncatedSpan = Span{} + if i.span == nil { + i.err = firstError(i.err, i.keyspanIter.Error()) + return + } + if invariants.Enabled { + if err := i.keyspanIter.Error(); err != nil { + panic(errors.WithSecondaryError( + errors.AssertionFailedf("pebble: %T keyspan iterator returned non-nil span %s while iter has error", i.keyspanIter, i.span), + err)) + } + } + // Check the upper bound if we have one. + if i.upper != nil && i.cmp(i.span.Start, i.upper) >= 0 { + i.span = nil + return + } + + // TODO(jackson): The key comparisons below truncate bounds whenever the + // keyspan iterator is repositioned. We could perform this lazily, and do it + // the first time the user actually asks for this span's bounds in + // SpanBounds. This would reduce work in the case where there's no span + // covering the point and the keyspan iterator is non-empty. + + // NB: These truncations don't require setting `keyspanMarkerTruncated`: + // That flag only applies to truncated span marker keys. + if i.lower != nil && i.cmp(i.span.Start, i.lower) < 0 { + i.truncated = true + i.truncatedSpan = *i.span + i.truncatedSpan.Start = i.lower + } + if i.upper != nil && i.cmp(i.upper, i.span.End) < 0 { + if !i.truncated { + i.truncated = true + i.truncatedSpan = *i.span + } + i.truncatedSpan.End = i.upper + } + // If this is a part of a SeekPrefixGE call, we may also need to truncate to + // the prefix's bounds. + if i.prefix != nil { + if !i.truncated { + i.truncated = true + i.truncatedSpan = *i.span + } + if i.cmp(i.prefix, i.truncatedSpan.Start) > 0 { + i.truncatedSpan.Start = i.prefix + } + i.nextPrefixBuf = i.comparer.ImmediateSuccessor(i.nextPrefixBuf[:0], i.prefix) + if i.truncated && i.cmp(i.nextPrefixBuf, i.truncatedSpan.End) < 0 { + i.truncatedSpan.End = i.nextPrefixBuf + } + } + + if i.truncated && i.comparer.Equal(i.truncatedSpan.Start, i.truncatedSpan.End) { + i.span = nil + } +} + +func (i *InterleavingIter) saveSpanBackward(span *Span) { + i.span = span + i.truncated = false + i.truncatedSpan = Span{} + if i.span == nil { + i.err = firstError(i.err, i.keyspanIter.Error()) + return + } + if invariants.Enabled { + if err := i.keyspanIter.Error(); err != nil { + panic(errors.WithSecondaryError( + errors.AssertionFailedf("pebble: %T keyspan iterator returned non-nil span %s while iter has error", i.keyspanIter, i.span), + err)) + } + } + + // Check the lower bound if we have one. + if i.lower != nil && i.cmp(i.span.End, i.lower) <= 0 { + i.span = nil + return + } + + // TODO(jackson): The key comparisons below truncate bounds whenever the + // keyspan iterator is repositioned. We could perform this lazily, and do it + // the first time the user actually asks for this span's bounds in + // SpanBounds. This would reduce work in the case where there's no span + // covering the point and the keyspan iterator is non-empty. + + // NB: These truncations don't require setting `keyspanMarkerTruncated`: + // That flag only applies to truncated span marker keys. + if i.lower != nil && i.cmp(i.span.Start, i.lower) < 0 { + i.truncated = true + i.truncatedSpan = *i.span + i.truncatedSpan.Start = i.lower + } + if i.upper != nil && i.cmp(i.upper, i.span.End) < 0 { + if !i.truncated { + i.truncated = true + i.truncatedSpan = *i.span + } + i.truncatedSpan.End = i.upper + } + if i.truncated && i.comparer.Equal(i.truncatedSpan.Start, i.truncatedSpan.End) { + i.span = nil + } +} + +func (i *InterleavingIter) yieldNil() (*base.InternalKey, base.LazyValue) { + i.withinSpan = false + i.clearMask() + return i.verify(nil, base.LazyValue{}) +} + +func (i *InterleavingIter) yieldPointKey() (*base.InternalKey, base.LazyValue) { + return i.verify(i.pointKey, i.pointVal) +} + +func (i *InterleavingIter) yieldSyntheticSpanMarker( + lowerBound []byte, +) (*base.InternalKey, base.LazyValue) { + i.spanMarker.UserKey = i.startKey() + i.spanMarker.Trailer = base.MakeTrailer(base.InternalKeySeqNumMax, i.span.Keys[0].Kind()) + + // Truncate the key we return to our lower bound if we have one. Note that + // we use the lowerBound function parameter, not i.lower. The lowerBound + // argument is guaranteed to be ≥ i.lower. It may be equal to the SetBounds + // lower bound, or it could come from a SeekGE or SeekPrefixGE search key. + if lowerBound != nil && i.cmp(lowerBound, i.startKey()) > 0 { + // Truncating to the lower bound may violate the upper bound if + // lowerBound == i.upper. For example, a SeekGE(k) uses k as a lower + // bound for truncating a span. The span a-z will be truncated to [k, + // z). If i.upper == k, we'd mistakenly try to return a span [k, k), an + // invariant violation. + if i.comparer.Equal(lowerBound, i.upper) { + return i.yieldNil() + } + + // If the lowerBound argument came from a SeekGE or SeekPrefixGE + // call, and it may be backed by a user-provided byte slice that is not + // guaranteed to be stable. + // + // If the lowerBound argument is the lower bound set by SetBounds, + // Pebble owns the slice's memory. However, consider two successive + // calls to SetBounds(). The second may overwrite the lower bound. + // Although the external contract requires a seek after a SetBounds, + // Pebble's tests don't always. For this reason and to simplify + // reasoning around lifetimes, always copy the bound into keyBuf when + // truncating. + i.keyBuf = append(i.keyBuf[:0], lowerBound...) + i.spanMarker.UserKey = i.keyBuf + i.spanMarkerTruncated = true + } + i.maybeUpdateMask() + return i.verify(&i.spanMarker, base.LazyValue{}) +} + +func (i *InterleavingIter) disablePrefixMode() { + if i.prefix != nil { + i.prefix = nil + // Clear the existing span. It may not hold the true end bound of the + // underlying span. + i.span = nil + } +} + +func (i *InterleavingIter) verify( + k *base.InternalKey, v base.LazyValue, +) (*base.InternalKey, base.LazyValue) { + // Wrap the entire function body in the invariants build tag, so that + // production builds elide this entire function. + if invariants.Enabled { + switch { + case i.dir == -1 && i.spanMarkerTruncated: + panic("pebble: invariant violation: truncated span key in reverse iteration") + case k != nil && i.lower != nil && i.cmp(k.UserKey, i.lower) < 0: + panic("pebble: invariant violation: key < lower bound") + case k != nil && i.upper != nil && i.cmp(k.UserKey, i.upper) >= 0: + panic("pebble: invariant violation: key ≥ upper bound") + case i.err != nil && k != nil: + panic("pebble: invariant violation: accumulated error swallowed") + case i.err == nil && i.pointIter.Error() != nil: + panic("pebble: invariant violation: pointIter swallowed") + case i.err == nil && i.keyspanIter.Error() != nil: + panic("pebble: invariant violation: keyspanIter error swallowed") + } + } + return k, v +} + +func (i *InterleavingIter) savedKeyspan() { + i.spanMarkerTruncated = false + i.maskSpanChangedCalled = false +} + +// updateMask updates the current mask, if a mask is configured and the mask +// hasn't been updated with the current keyspan yet. +func (i *InterleavingIter) maybeUpdateMask() { + switch { + case i.mask == nil, i.maskSpanChangedCalled: + return + case !i.withinSpan || i.span.Empty(): + i.clearMask() + case i.truncated: + i.mask.SpanChanged(&i.truncatedSpan) + i.maskSpanChangedCalled = true + default: + i.mask.SpanChanged(i.span) + i.maskSpanChangedCalled = true + } +} + +// clearMask clears the current mask, if a mask is configured and no mask should +// be active. +func (i *InterleavingIter) clearMask() { + if i.mask != nil { + i.maskSpanChangedCalled = false + i.mask.SpanChanged(nil) + } +} + +func (i *InterleavingIter) startKey() []byte { + if i.truncated { + return i.truncatedSpan.Start + } + return i.span.Start +} + +func (i *InterleavingIter) savePoint(key *base.InternalKey, value base.LazyValue) { + i.pointKey, i.pointVal = key, value + if key == nil { + i.err = firstError(i.err, i.pointIter.Error()) + } + if invariants.Enabled { + if err := i.pointIter.Error(); key != nil && err != nil { + panic(errors.WithSecondaryError( + errors.AssertionFailedf("pebble: %T point iterator returned non-nil key %q while iter has error", i.pointIter, key), + err)) + } + } +} + +// Span returns the span covering the last key returned, if any. A span key is +// considered to 'cover' a key if the key falls within the span's user key +// bounds. The returned span is owned by the InterleavingIter. The caller is +// responsible for copying if stability is required. +// +// Span will never return an invalid or empty span. +func (i *InterleavingIter) Span() *Span { + if !i.withinSpan || len(i.span.Keys) == 0 { + return nil + } else if i.truncated { + return &i.truncatedSpan + } + return i.span +} + +// SetBounds implements (base.InternalIterator).SetBounds. +func (i *InterleavingIter) SetBounds(lower, upper []byte) { + i.lower, i.upper = lower, upper + i.pointIter.SetBounds(lower, upper) + i.Invalidate() +} + +// SetContext implements (base.InternalIterator).SetContext. +func (i *InterleavingIter) SetContext(ctx context.Context) { + i.pointIter.SetContext(ctx) +} + +// Invalidate invalidates the interleaving iterator's current position, clearing +// its state. This prevents optimizations such as reusing the current span on +// seek. +func (i *InterleavingIter) Invalidate() { + i.span = nil + i.pointKey = nil + i.pointVal = base.LazyValue{} +} + +// Error implements (base.InternalIterator).Error. +func (i *InterleavingIter) Error() error { + return i.err +} + +// Close implements (base.InternalIterator).Close. +func (i *InterleavingIter) Close() error { + perr := i.pointIter.Close() + rerr := i.keyspanIter.Close() + return firstError(perr, rerr) +} + +// String implements (base.InternalIterator).String. +func (i *InterleavingIter) String() string { + return fmt.Sprintf("keyspan-interleaving(%q)", i.pointIter.String()) +} + +func firstError(err0, err1 error) error { + if err0 != nil { + return err0 + } + return err1 +} diff --git a/pebble/internal/keyspan/interleaving_iter_test.go b/pebble/internal/keyspan/interleaving_iter_test.go new file mode 100644 index 0000000..116f037 --- /dev/null +++ b/pebble/internal/keyspan/interleaving_iter_test.go @@ -0,0 +1,291 @@ +// Copyright 2021 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "bytes" + "context" + "fmt" + "io" + "sort" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/stretchr/testify/require" +) + +func TestInterleavingIter(t *testing.T) { + runInterleavingIterTest(t, "testdata/interleaving_iter") +} + +func TestInterleavingIter_Masking(t *testing.T) { + runInterleavingIterTest(t, "testdata/interleaving_iter_masking") +} + +type maskingHooks struct { + log io.Writer + cmp base.Compare + split base.Split + threshold []byte + maskSuffix []byte +} + +func (m *maskingHooks) SpanChanged(s *Span) { + if m.log != nil { + if s == nil { + fmt.Fprintln(m.log, "-- SpanChanged(nil)") + } else { + fmt.Fprintf(m.log, "-- SpanChanged(%s)\n", s) + } + } + + // Find the smallest suffix of a key contained within the Span, excluding + // suffixes less than m.threshold. + m.maskSuffix = nil + if s == nil || m.threshold == nil || len(s.Keys) == 0 { + return + } + for i := range s.Keys { + if s.Keys[i].Suffix == nil { + continue + } + if m.cmp(s.Keys[i].Suffix, m.threshold) < 0 { + continue + } + if m.maskSuffix == nil || m.cmp(m.maskSuffix, s.Keys[i].Suffix) > 0 { + m.maskSuffix = s.Keys[i].Suffix + } + } +} + +func (m *maskingHooks) SkipPoint(userKey []byte) bool { + pointSuffix := userKey[m.split(userKey):] + return m.maskSuffix != nil && len(pointSuffix) > 0 && m.cmp(m.maskSuffix, pointSuffix) < 0 +} + +func runInterleavingIterTest(t *testing.T, filename string) { + cmp := testkeys.Comparer.Compare + var keyspanIter MergingIter + var pointIter pointIterator + var iter InterleavingIter + var buf bytes.Buffer + hooks := maskingHooks{ + log: &buf, + cmp: testkeys.Comparer.Compare, + split: testkeys.Comparer.Split, + } + + var prevKey *base.InternalKey + formatKey := func(k *base.InternalKey, _ base.LazyValue) { + if k == nil { + fmt.Fprint(&buf, ".") + return + } + prevKey = k + s := iter.Span() + fmt.Fprintf(&buf, "PointKey: %s\n", k.String()) + if s != nil { + fmt.Fprintf(&buf, "Span: %s\n-", s) + } else { + fmt.Fprintf(&buf, "Span: %s\n-", Span{}) + } + } + + datadriven.RunTest(t, filename, func(t *testing.T, td *datadriven.TestData) string { + buf.Reset() + switch td.Cmd { + case "set-masking-threshold": + hooks.threshold = []byte(strings.TrimSpace(td.Input)) + return "OK" + case "define-rangekeys": + var spans []Span + lines := strings.Split(strings.TrimSpace(td.Input), "\n") + for _, line := range lines { + spans = append(spans, ParseSpan(line)) + } + keyspanIter.Init(cmp, noopTransform, new(MergingBuffers), NewIter(cmp, spans)) + hooks.maskSuffix = nil + iter.Init(testkeys.Comparer, &pointIter, &keyspanIter, + InterleavingIterOpts{Mask: &hooks}) + return "OK" + case "define-pointkeys": + var points []base.InternalKey + lines := strings.Split(strings.TrimSpace(td.Input), "\n") + for _, line := range lines { + points = append(points, base.ParseInternalKey(line)) + } + pointIter = pointIterator{cmp: cmp, keys: points} + hooks.maskSuffix = nil + iter.Init(testkeys.Comparer, &pointIter, &keyspanIter, + InterleavingIterOpts{Mask: &hooks}) + return "OK" + case "iter": + buf.Reset() + // Clear any previous bounds. + iter.SetBounds(nil, nil) + prevKey = nil + lines := strings.Split(strings.TrimSpace(td.Input), "\n") + for _, line := range lines { + bufLen := buf.Len() + line = strings.TrimSpace(line) + i := strings.IndexByte(line, ' ') + iterCmd := line + if i > 0 { + iterCmd = string(line[:i]) + } + switch iterCmd { + case "first": + formatKey(iter.First()) + case "last": + formatKey(iter.Last()) + case "next": + formatKey(iter.Next()) + case "next-prefix": + succKey := testkeys.Comparer.ImmediateSuccessor(nil, prevKey.UserKey[:testkeys.Comparer.Split(prevKey.UserKey)]) + formatKey(iter.NextPrefix(succKey)) + case "prev": + formatKey(iter.Prev()) + case "seek-ge": + formatKey(iter.SeekGE([]byte(strings.TrimSpace(line[i:])), base.SeekGEFlagsNone)) + case "seek-prefix-ge": + key := []byte(strings.TrimSpace(line[i:])) + prefix := key[:testkeys.Comparer.Split(key)] + formatKey(iter.SeekPrefixGE(prefix, key, base.SeekGEFlagsNone)) + case "seek-lt": + formatKey(iter.SeekLT([]byte(strings.TrimSpace(line[i:])), base.SeekLTFlagsNone)) + case "set-bounds": + bounds := strings.Fields(line[i:]) + if len(bounds) != 2 { + return fmt.Sprintf("set-bounds expects 2 bounds, got %d", len(bounds)) + } + l, u := []byte(bounds[0]), []byte(bounds[1]) + if bounds[0] == "." { + l = nil + } + if bounds[1] == "." { + u = nil + } + iter.SetBounds(l, u) + default: + return fmt.Sprintf("unrecognized iter command %q", iterCmd) + } + require.NoError(t, iter.Error()) + if buf.Len() > bufLen { + fmt.Fprintln(&buf) + } + } + return strings.TrimSpace(buf.String()) + default: + return fmt.Sprintf("unrecognized command %q", td.Cmd) + } + }) + require.NoError(t, iter.Close()) +} + +type pointIterator struct { + cmp base.Compare + keys []base.InternalKey + lower []byte + upper []byte + index int +} + +var _ base.InternalIterator = &pointIterator{} + +func (i *pointIterator) SeekGE( + key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + i.index = sort.Search(len(i.keys), func(j int) bool { + return i.cmp(i.keys[j].UserKey, key) >= 0 + }) + if i.index < 0 || i.index >= len(i.keys) { + return nil, base.LazyValue{} + } + if i.upper != nil && i.cmp(i.keys[i.index].UserKey, i.upper) >= 0 { + return nil, base.LazyValue{} + } + return &i.keys[i.index], base.LazyValue{} +} + +func (i *pointIterator) SeekPrefixGE( + prefix, key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + return i.SeekGE(key, flags) +} + +func (i *pointIterator) SeekLT( + key []byte, flags base.SeekLTFlags, +) (*base.InternalKey, base.LazyValue) { + i.index = sort.Search(len(i.keys), func(j int) bool { + return i.cmp(i.keys[j].UserKey, key) >= 0 + }) + i.index-- + if i.index < 0 || i.index >= len(i.keys) { + return nil, base.LazyValue{} + } + if i.lower != nil && i.cmp(i.keys[i.index].UserKey, i.lower) < 0 { + return nil, base.LazyValue{} + } + return &i.keys[i.index], base.LazyValue{} +} + +func (i *pointIterator) First() (*base.InternalKey, base.LazyValue) { + i.index = 0 + if i.index < 0 || i.index >= len(i.keys) { + return nil, base.LazyValue{} + } + if i.upper != nil && i.cmp(i.keys[i.index].UserKey, i.upper) >= 0 { + return nil, base.LazyValue{} + } + return &i.keys[i.index], base.LazyValue{} +} + +func (i *pointIterator) Last() (*base.InternalKey, base.LazyValue) { + i.index = len(i.keys) - 1 + if i.index < 0 || i.index >= len(i.keys) { + return nil, base.LazyValue{} + } + if i.lower != nil && i.cmp(i.keys[i.index].UserKey, i.lower) < 0 { + return nil, base.LazyValue{} + } + return &i.keys[i.index], base.LazyValue{} +} + +func (i *pointIterator) Next() (*base.InternalKey, base.LazyValue) { + i.index++ + if i.index < 0 || i.index >= len(i.keys) { + return nil, base.LazyValue{} + } + if i.upper != nil && i.cmp(i.keys[i.index].UserKey, i.upper) >= 0 { + return nil, base.LazyValue{} + } + return &i.keys[i.index], base.LazyValue{} +} + +func (i *pointIterator) NextPrefix(succKey []byte) (*base.InternalKey, base.LazyValue) { + return i.SeekGE(succKey, base.SeekGEFlagsNone) +} + +func (i *pointIterator) Prev() (*base.InternalKey, base.LazyValue) { + i.index-- + if i.index < 0 || i.index >= len(i.keys) { + return nil, base.LazyValue{} + } + if i.lower != nil && i.cmp(i.keys[i.index].UserKey, i.lower) < 0 { + return nil, base.LazyValue{} + } + return &i.keys[i.index], base.LazyValue{} +} + +func (i *pointIterator) Close() error { return nil } +func (i *pointIterator) Error() error { return nil } +func (i *pointIterator) String() string { return "test-point-iterator" } +func (i *pointIterator) SetBounds(lower, upper []byte) { + i.lower, i.upper = lower, upper +} +func (i *pointIterator) SetContext(_ context.Context) {} diff --git a/pebble/internal/keyspan/internal_iter_shim.go b/pebble/internal/keyspan/internal_iter_shim.go new file mode 100644 index 0000000..bb9e37b --- /dev/null +++ b/pebble/internal/keyspan/internal_iter_shim.go @@ -0,0 +1,125 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "context" + + "github.com/cockroachdb/pebble/internal/base" +) + +// InternalIteratorShim is a temporary iterator type used as a shim between +// keyspan.MergingIter and base.InternalIterator. It's used temporarily for +// range deletions during compactions, allowing range deletions to be +// interleaved by a compaction input iterator. +// +// TODO(jackson): This type should be removed, and the usages converted to using +// an InterleavingIterator type that interleaves keyspan.Spans from a +// keyspan.FragmentIterator with point keys. +type InternalIteratorShim struct { + miter MergingIter + mbufs MergingBuffers + span *Span + iterKey base.InternalKey +} + +// Assert that InternalIteratorShim implements InternalIterator. +var _ base.InternalIterator = &InternalIteratorShim{} + +// Init initializes the internal iterator shim to merge the provided fragment +// iterators. +func (i *InternalIteratorShim) Init(cmp base.Compare, iters ...FragmentIterator) { + i.miter.Init(cmp, noopTransform, &i.mbufs, iters...) +} + +// Span returns the span containing the full set of keys over the key span at +// the current iterator position. +func (i *InternalIteratorShim) Span() *Span { + return i.span +} + +// SeekGE implements (base.InternalIterator).SeekGE. +func (i *InternalIteratorShim) SeekGE( + key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + panic("unimplemented") +} + +// SeekPrefixGE implements (base.InternalIterator).SeekPrefixGE. +func (i *InternalIteratorShim) SeekPrefixGE( + prefix, key []byte, flags base.SeekGEFlags, +) (*base.InternalKey, base.LazyValue) { + panic("unimplemented") +} + +// SeekLT implements (base.InternalIterator).SeekLT. +func (i *InternalIteratorShim) SeekLT( + key []byte, flags base.SeekLTFlags, +) (*base.InternalKey, base.LazyValue) { + panic("unimplemented") +} + +// First implements (base.InternalIterator).First. +func (i *InternalIteratorShim) First() (*base.InternalKey, base.LazyValue) { + i.span = i.miter.First() + for i.span != nil && i.span.Empty() { + i.span = i.miter.Next() + } + if i.span == nil { + return nil, base.LazyValue{} + } + i.iterKey = base.InternalKey{UserKey: i.span.Start, Trailer: i.span.Keys[0].Trailer} + return &i.iterKey, base.MakeInPlaceValue(i.span.End) +} + +// Last implements (base.InternalIterator).Last. +func (i *InternalIteratorShim) Last() (*base.InternalKey, base.LazyValue) { + panic("unimplemented") +} + +// Next implements (base.InternalIterator).Next. +func (i *InternalIteratorShim) Next() (*base.InternalKey, base.LazyValue) { + i.span = i.miter.Next() + for i.span != nil && i.span.Empty() { + i.span = i.miter.Next() + } + if i.span == nil { + return nil, base.LazyValue{} + } + i.iterKey = base.InternalKey{UserKey: i.span.Start, Trailer: i.span.Keys[0].Trailer} + return &i.iterKey, base.MakeInPlaceValue(i.span.End) +} + +// NextPrefix implements (base.InternalIterator).NextPrefix. +func (i *InternalIteratorShim) NextPrefix([]byte) (*base.InternalKey, base.LazyValue) { + panic("unimplemented") +} + +// Prev implements (base.InternalIterator).Prev. +func (i *InternalIteratorShim) Prev() (*base.InternalKey, base.LazyValue) { + panic("unimplemented") +} + +// Error implements (base.InternalIterator).Error. +func (i *InternalIteratorShim) Error() error { + return i.miter.Error() +} + +// Close implements (base.InternalIterator).Close. +func (i *InternalIteratorShim) Close() error { + return i.miter.Close() +} + +// SetBounds implements (base.InternalIterator).SetBounds. +func (i *InternalIteratorShim) SetBounds(lower, upper []byte) { +} + +// SetContext implements (base.InternalIterator).SetContext. +func (i *InternalIteratorShim) SetContext(_ context.Context) {} + +// String implements fmt.Stringer. +func (i *InternalIteratorShim) String() string { + return i.miter.String() +} diff --git a/pebble/internal/keyspan/iter.go b/pebble/internal/keyspan/iter.go new file mode 100644 index 0000000..7f8ceb8 --- /dev/null +++ b/pebble/internal/keyspan/iter.go @@ -0,0 +1,220 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/manifest" +) + +// FragmentIterator defines an iterator interface over spans. The spans +// surfaced by a FragmentIterator must be non-overlapping. This is achieved by +// fragmenting spans at overlap points (see Fragmenter). +// +// A Span returned by a FragmentIterator is only valid until the next +// positioning method. Some implementations (eg, keyspan.Iter) may provide +// longer lifetimes but implementations need only guarantee stability until the +// next positioning method. +type FragmentIterator interface { + // SeekGE moves the iterator to the first span covering a key greater than + // or equal to the given key. This is equivalent to seeking to the first + // span with an end key greater than the given key. + SeekGE(key []byte) *Span + + // SeekLT moves the iterator to the last span covering a key less than the + // given key. This is equivalent to seeking to the last span with a start + // key less than the given key. + SeekLT(key []byte) *Span + + // First moves the iterator to the first span. + First() *Span + + // Last moves the iterator to the last span. + Last() *Span + + // Next moves the iterator to the next span. + // + // It is valid to call Next when the iterator is positioned before the first + // key/value pair due to either a prior call to SeekLT or Prev which + // returned an invalid span. It is not allowed to call Next when the + // previous call to SeekGE, SeekPrefixGE or Next returned an invalid span. + Next() *Span + + // Prev moves the iterator to the previous span. + // + // It is valid to call Prev when the iterator is positioned after the last + // key/value pair due to either a prior call to SeekGE or Next which + // returned an invalid span. It is not allowed to call Prev when the + // previous call to SeekLT or Prev returned an invalid span. + Prev() *Span + + // Error returns any accumulated error. + // + // TODO(jackson): Lift errors into return values on the positioning methods. + Error() error + + // Close closes the iterator and returns any accumulated error. Exhausting + // the iterator is not considered to be an error. It is valid to call Close + // multiple times. Other methods should not be called after the iterator has + // been closed. + Close() error +} + +// TableNewSpanIter creates a new iterator for range key spans for the given +// file. +type TableNewSpanIter func(file *manifest.FileMetadata, iterOptions SpanIterOptions) (FragmentIterator, error) + +// SpanIterOptions is a subset of IterOptions that are necessary to instantiate +// per-sstable span iterators. +type SpanIterOptions struct { + // RangeKeyFilters can be used to avoid scanning tables and blocks in tables + // when iterating over range keys. + RangeKeyFilters []base.BlockPropertyFilter +} + +// Iter is an iterator over a set of fragmented spans. +type Iter struct { + cmp base.Compare + spans []Span + index int +} + +// Iter implements the FragmentIterator interface. +var _ FragmentIterator = (*Iter)(nil) + +// NewIter returns a new iterator over a set of fragmented spans. +func NewIter(cmp base.Compare, spans []Span) *Iter { + i := &Iter{} + i.Init(cmp, spans) + return i +} + +// Count returns the number of spans contained by Iter. +func (i *Iter) Count() int { + return len(i.spans) +} + +// Init initializes an Iter with the provided spans. +func (i *Iter) Init(cmp base.Compare, spans []Span) { + *i = Iter{ + cmp: cmp, + spans: spans, + index: -1, + } +} + +// SeekGE implements FragmentIterator.SeekGE. +func (i *Iter) SeekGE(key []byte) *Span { + // NB: manually inlined sort.Search is ~5% faster. + // + // Define f(j) = false iff the span i.spans[j] is strictly before `key` + // (equivalently, i.spans[j].End ≤ key.) + // + // Define f(-1) == false and f(n) == true. + // Invariant: f(index-1) == false, f(upper) == true. + i.index = 0 + upper := len(i.spans) + for i.index < upper { + h := int(uint(i.index+upper) >> 1) // avoid overflow when computing h + // i.index ≤ h < upper + if i.cmp(key, i.spans[h].End) >= 0 { + i.index = h + 1 // preserves f(i-1) == false + } else { + upper = h // preserves f(j) == true + } + } + + // i.index == upper, f(i.index-1) == false, and f(upper) (= f(i.index)) == + // true => answer is i.index. + if i.index >= len(i.spans) { + return nil + } + return &i.spans[i.index] +} + +// SeekLT implements FragmentIterator.SeekLT. +func (i *Iter) SeekLT(key []byte) *Span { + // NB: manually inlined sort.Search is ~5% faster. + // + // Define f(-1) == false and f(n) == true. + // Invariant: f(index-1) == false, f(upper) == true. + i.index = 0 + upper := len(i.spans) + for i.index < upper { + h := int(uint(i.index+upper) >> 1) // avoid overflow when computing h + // i.index ≤ h < upper + if i.cmp(key, i.spans[h].Start) > 0 { + i.index = h + 1 // preserves f(i-1) == false + } else { + upper = h // preserves f(j) == true + } + } + // i.index == upper, f(i.index-1) == false, and f(upper) (= f(i.index)) == + // true => answer is i.index. + + // Since keys are strictly increasing, if i.index > 0 then i.index-1 will be + // the largest whose key is < the key sought. + i.index-- + if i.index < 0 { + return nil + } + return &i.spans[i.index] +} + +// First implements FragmentIterator.First. +func (i *Iter) First() *Span { + if len(i.spans) == 0 { + return nil + } + i.index = 0 + return &i.spans[i.index] +} + +// Last implements FragmentIterator.Last. +func (i *Iter) Last() *Span { + if len(i.spans) == 0 { + return nil + } + i.index = len(i.spans) - 1 + return &i.spans[i.index] +} + +// Next implements FragmentIterator.Next. +func (i *Iter) Next() *Span { + if i.index >= len(i.spans) { + return nil + } + i.index++ + if i.index >= len(i.spans) { + return nil + } + return &i.spans[i.index] +} + +// Prev implements FragmentIterator.Prev. +func (i *Iter) Prev() *Span { + if i.index < 0 { + return nil + } + i.index-- + if i.index < 0 { + return nil + } + return &i.spans[i.index] +} + +// Error implements FragmentIterator.Error. +func (i *Iter) Error() error { + return nil +} + +// Close implements FragmentIterator.Close. +func (i *Iter) Close() error { + return nil +} + +func (i *Iter) String() string { + return "fragmented-spans" +} diff --git a/pebble/internal/keyspan/iter_test.go b/pebble/internal/keyspan/iter_test.go new file mode 100644 index 0000000..c269f3b --- /dev/null +++ b/pebble/internal/keyspan/iter_test.go @@ -0,0 +1,147 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "bytes" + "fmt" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" +) + +func runFragmentIteratorCmd(iter FragmentIterator, input string, extraInfo func() string) string { + var b bytes.Buffer + for _, line := range strings.Split(input, "\n") { + parts := strings.Fields(line) + if len(parts) == 0 { + continue + } + var span *Span + switch parts[0] { + case "seek-ge": + if len(parts) != 2 { + return "seek-ge \n" + } + span = iter.SeekGE([]byte(strings.TrimSpace(parts[1]))) + case "seek-lt": + if len(parts) != 2 { + return "seek-lt \n" + } + span = iter.SeekLT([]byte(strings.TrimSpace(parts[1]))) + case "first": + span = iter.First() + case "last": + span = iter.Last() + case "next": + span = iter.Next() + case "prev": + span = iter.Prev() + default: + return fmt.Sprintf("unknown op: %s", parts[0]) + } + if span != nil { + fmt.Fprintf(&b, "%s", span) + if extraInfo != nil { + fmt.Fprintf(&b, " (%s)", extraInfo()) + } + b.WriteByte('\n') + } else if err := iter.Error(); err != nil { + fmt.Fprintf(&b, "err=%v\n", err) + } else { + fmt.Fprintf(&b, ".\n") + } + } + return b.String() +} + +func TestIter(t *testing.T) { + var spans []Span + datadriven.RunTest(t, "testdata/iter", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + spans = nil + for _, line := range strings.Split(d.Input, "\n") { + spans = append(spans, ParseSpan(line)) + } + return "" + + case "iter": + iter := NewIter(base.DefaultComparer.Compare, spans) + defer iter.Close() + return runFragmentIteratorCmd(iter, d.Input, nil) + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +// invalidatingIter wraps a FragmentIterator and implements FragmentIterator +// itself. Spans surfaced by the inner iterator are copied to buffers that are +// zeroed by sbubsequent iterator positioning calls. This is intended to help +// surface bugs in improper lifetime expectations of Spans. +type invalidatingIter struct { + iter FragmentIterator + bufs [][]byte + keys []Key + span Span +} + +// invalidatingIter implements FragmentIterator. +var _ FragmentIterator = (*invalidatingIter)(nil) + +func (i *invalidatingIter) invalidate(s *Span) *Span { + // Zero the entirety of the byte bufs and the keys slice. + for j := range i.bufs { + for k := range i.bufs[j] { + i.bufs[j][k] = 0x00 + } + i.bufs[j] = nil + } + for j := range i.keys { + i.keys[j] = Key{} + } + if s == nil { + return nil + } + + // Copy all of the span's slices into slices owned by the invalidating iter + // that we can invalidate on a subsequent positioning method. + i.bufs = i.bufs[:0] + i.keys = i.keys[:0] + i.span = Span{ + Start: i.saveBytes(s.Start), + End: i.saveBytes(s.End), + } + for j := range s.Keys { + i.keys = append(i.keys, Key{ + Trailer: s.Keys[j].Trailer, + Suffix: i.saveBytes(s.Keys[j].Suffix), + Value: i.saveBytes(s.Keys[j].Value), + }) + } + i.span.Keys = i.keys + return &i.span +} + +func (i *invalidatingIter) saveBytes(b []byte) []byte { + if b == nil { + return nil + } + saved := append([]byte(nil), b...) + i.bufs = append(i.bufs, saved) + return saved +} + +func (i *invalidatingIter) SeekGE(key []byte) *Span { return i.invalidate(i.iter.SeekGE(key)) } +func (i *invalidatingIter) SeekLT(key []byte) *Span { return i.invalidate(i.iter.SeekLT(key)) } +func (i *invalidatingIter) First() *Span { return i.invalidate(i.iter.First()) } +func (i *invalidatingIter) Last() *Span { return i.invalidate(i.iter.Last()) } +func (i *invalidatingIter) Next() *Span { return i.invalidate(i.iter.Next()) } +func (i *invalidatingIter) Prev() *Span { return i.invalidate(i.iter.Prev()) } +func (i *invalidatingIter) Close() error { return i.iter.Close() } +func (i *invalidatingIter) Error() error { return i.iter.Error() } diff --git a/pebble/internal/keyspan/level_iter.go b/pebble/internal/keyspan/level_iter.go new file mode 100644 index 0000000..6dd7ac6 --- /dev/null +++ b/pebble/internal/keyspan/level_iter.go @@ -0,0 +1,521 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "fmt" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/internal/manifest" +) + +// LevelIter provides a merged view of spans from sstables in a level. +// It takes advantage of level invariants to only have one sstable span block +// open at one time, opened using the newIter function passed in. +type LevelIter struct { + cmp base.Compare + // Denotes the kind of key the level iterator should read. If the key type + // is KeyTypePoint, the level iterator will read range tombstones (which + // only affect point keys). If the key type is KeyTypeRange, the level + // iterator will read range keys. It is invalid to configure an iterator + // with the KeyTypePointAndRange key type. + // + // If key type is KeyTypePoint, no straddle spans are emitted between files, + // and point key bounds are used to find files instead of range key bounds. + // + // TODO(bilal): Straddle spans can safely be produced in rangedel mode once + // we can guarantee that we will never read sstables in a level that split + // user keys across them. This might be guaranteed in a future release, but + // as of CockroachDB 22.2 it is not guaranteed, so to be safe disable it when + // keyType == KeyTypePoint + keyType manifest.KeyType + // The LSM level this LevelIter is initialized for. Used in logging. + level manifest.Level + // The below fields are used to fill in gaps between adjacent files' range + // key spaces. This is an optimization to avoid unnecessarily loading files + // in cases where range keys are sparse and rare. dir is set by every + // positioning operation, straddleDir is set to dir whenever a straddling + // Span is synthesized and the last positioning operation returned a + // synthesized straddle span. + // + // Note that when a straddle span is initialized, iterFile is modified to + // point to the next file in the straddleDir direction. A change of direction + // on a straddle key therefore necessitates the value of iterFile to be + // reverted. + dir int + straddle Span + straddleDir int + // The iter for the current file (iterFile). It is nil under any of the + // following conditions: + // - files.Current() == nil + // - err != nil + // - straddleDir != 0, in which case iterFile is not nil and points to the + // next file (in the straddleDir direction). + // - some other constraint, like the bounds in opts, caused the file at index to not + // be relevant to the iteration. + iter FragmentIterator + // iterFile holds the current file. + // INVARIANT: iterFile = files.Current() + iterFile *manifest.FileMetadata + newIter TableNewSpanIter + files manifest.LevelIterator + err error + + // The options that were passed in. + tableOpts SpanIterOptions + + // TODO(bilal): Add InternalIteratorStats. +} + +// LevelIter implements the keyspan.FragmentIterator interface. +var _ FragmentIterator = (*LevelIter)(nil) + +// NewLevelIter returns a LevelIter. +func NewLevelIter( + opts SpanIterOptions, + cmp base.Compare, + newIter TableNewSpanIter, + files manifest.LevelIterator, + level manifest.Level, + keyType manifest.KeyType, +) *LevelIter { + l := &LevelIter{} + l.Init(opts, cmp, newIter, files, level, keyType) + return l +} + +// Init initializes a LevelIter. +func (l *LevelIter) Init( + opts SpanIterOptions, + cmp base.Compare, + newIter TableNewSpanIter, + files manifest.LevelIterator, + level manifest.Level, + keyType manifest.KeyType, +) { + l.err = nil + l.level = level + l.tableOpts = opts + l.cmp = cmp + l.iterFile = nil + l.newIter = newIter + switch keyType { + case manifest.KeyTypePoint: + l.keyType = keyType + l.files = files.Filter(keyType) + case manifest.KeyTypeRange: + l.keyType = keyType + l.files = files.Filter(keyType) + default: + panic(fmt.Sprintf("unsupported key type: %v", keyType)) + } +} + +func (l *LevelIter) findFileGE(key []byte) *manifest.FileMetadata { + // Find the earliest file whose largest key is >= key. + // + // If the earliest file has its largest key == key and that largest key is a + // range deletion sentinel, we know that we manufactured this sentinel to convert + // the exclusive range deletion end key into an inclusive key (reminder: [start, end)#seqnum + // is the form of a range deletion sentinel which can contribute a largest key = end#sentinel). + // In this case we don't return this as the earliest file since there is nothing actually + // equal to key in it. + + m := l.files.SeekGE(l.cmp, key) + for m != nil { + largestKey := m.LargestRangeKey + if l.keyType == manifest.KeyTypePoint { + largestKey = m.LargestPointKey + } + if !largestKey.IsExclusiveSentinel() || l.cmp(largestKey.UserKey, key) != 0 { + break + } + m = l.files.Next() + } + return m +} + +func (l *LevelIter) findFileLT(key []byte) *manifest.FileMetadata { + // Find the last file whose smallest key is < key. + return l.files.SeekLT(l.cmp, key) +} + +type loadFileReturnIndicator int8 + +const ( + noFileLoaded loadFileReturnIndicator = iota + fileAlreadyLoaded + newFileLoaded +) + +func (l *LevelIter) loadFile(file *manifest.FileMetadata, dir int) loadFileReturnIndicator { + indicator := noFileLoaded + if l.iterFile == file { + if l.err != nil { + return noFileLoaded + } + if l.iter != nil { + // We are already at the file, but we would need to check for bounds. + // Set indicator accordingly. + indicator = fileAlreadyLoaded + } + // We were already at file, but don't have an iterator, probably because the file was + // beyond the iteration bounds. It may still be, but it is also possible that the bounds + // have changed. We handle that below. + } + + // Note that LevelIter.Close() can be called multiple times. + if indicator != fileAlreadyLoaded { + if err := l.Close(); err != nil { + return noFileLoaded + } + } + + l.iterFile = file + if file == nil { + return noFileLoaded + } + if indicator != fileAlreadyLoaded { + l.iter, l.err = l.newIter(file, l.tableOpts) + indicator = newFileLoaded + } + if l.err != nil { + return noFileLoaded + } + return indicator +} + +// SeekGE implements keyspan.FragmentIterator. +func (l *LevelIter) SeekGE(key []byte) *Span { + l.dir = +1 + l.straddle = Span{} + l.straddleDir = 0 + l.err = nil // clear cached iteration error + + f := l.findFileGE(key) + if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(key, f.SmallestRangeKey.UserKey) < 0 { + // Peek at the previous file. + prevFile := l.files.Prev() + l.files.Next() + if prevFile != nil { + // We could unconditionally return an empty span between the seek key and + // f.SmallestRangeKey, however if this span is to the left of all range + // keys on this level, it could lead to inconsistent behaviour in relative + // positioning operations. Consider this example, with a b-c range key: + // + // SeekGE(a) -> a-b:{} + // Next() -> b-c{(#5,RANGEKEYSET,@4,foo)} + // Prev() -> nil + // + // Iterators higher up in the iterator stack rely on this sort of relative + // positioning consistency. + // + // TODO(bilal): Investigate ways to be able to return straddle spans in + // cases similar to the above, while still retaining correctness. + // Return a straddling key instead of loading the file. + l.iterFile = f + if err := l.Close(); err != nil { + return l.verify(nil) + } + l.straddleDir = +1 + l.straddle = Span{ + Start: prevFile.LargestRangeKey.UserKey, + End: f.SmallestRangeKey.UserKey, + Keys: nil, + } + return l.verify(&l.straddle) + } + } + loadFileIndicator := l.loadFile(f, +1) + if loadFileIndicator == noFileLoaded { + return l.verify(nil) + } + if span := l.iter.SeekGE(key); span != nil { + return l.verify(span) + } + return l.skipEmptyFileForward() +} + +// SeekLT implements keyspan.FragmentIterator. +func (l *LevelIter) SeekLT(key []byte) *Span { + l.dir = -1 + l.straddle = Span{} + l.straddleDir = 0 + l.err = nil // clear cached iteration error + + f := l.findFileLT(key) + if f != nil && l.keyType == manifest.KeyTypeRange && l.cmp(f.LargestRangeKey.UserKey, key) < 0 { + // Peek at the next file. + nextFile := l.files.Next() + l.files.Prev() + if nextFile != nil { + // We could unconditionally return an empty span between f.LargestRangeKey + // and the seek key, however if this span is to the right of all range keys + // on this level, it could lead to inconsistent behaviour in relative + // positioning operations. Consider this example, with a b-c range key: + // + // SeekLT(d) -> c-d:{} + // Prev() -> b-c{(#5,RANGEKEYSET,@4,foo)} + // Next() -> nil + // + // Iterators higher up in the iterator stack rely on this sort of relative + // positioning consistency. + // + // TODO(bilal): Investigate ways to be able to return straddle spans in + // cases similar to the above, while still retaining correctness. + // Return a straddling key instead of loading the file. + l.iterFile = f + if err := l.Close(); err != nil { + return l.verify(nil) + } + l.straddleDir = -1 + l.straddle = Span{ + Start: f.LargestRangeKey.UserKey, + End: nextFile.SmallestRangeKey.UserKey, + Keys: nil, + } + return l.verify(&l.straddle) + } + } + if l.loadFile(f, -1) == noFileLoaded { + return l.verify(nil) + } + if span := l.iter.SeekLT(key); span != nil { + return l.verify(span) + } + return l.skipEmptyFileBackward() +} + +// First implements keyspan.FragmentIterator. +func (l *LevelIter) First() *Span { + l.dir = +1 + l.straddle = Span{} + l.straddleDir = 0 + l.err = nil // clear cached iteration error + + if l.loadFile(l.files.First(), +1) == noFileLoaded { + return l.verify(nil) + } + if span := l.iter.First(); span != nil { + return l.verify(span) + } + return l.skipEmptyFileForward() +} + +// Last implements keyspan.FragmentIterator. +func (l *LevelIter) Last() *Span { + l.dir = -1 + l.straddle = Span{} + l.straddleDir = 0 + l.err = nil // clear cached iteration error + + if l.loadFile(l.files.Last(), -1) == noFileLoaded { + return l.verify(nil) + } + if span := l.iter.Last(); span != nil { + return l.verify(span) + } + return l.skipEmptyFileBackward() +} + +// Next implements keyspan.FragmentIterator. +func (l *LevelIter) Next() *Span { + if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir > 0) { + return l.verify(nil) + } + if l.iter == nil && l.iterFile == nil { + // l.dir <= 0 + return l.First() + } + l.dir = +1 + + if l.iter != nil { + if span := l.iter.Next(); span != nil { + return l.verify(span) + } + } + return l.skipEmptyFileForward() +} + +// Prev implements keyspan.FragmentIterator. +func (l *LevelIter) Prev() *Span { + if l.err != nil || (l.iter == nil && l.iterFile == nil && l.dir < 0) { + return l.verify(nil) + } + if l.iter == nil && l.iterFile == nil { + // l.dir >= 0 + return l.Last() + } + l.dir = -1 + + if l.iter != nil { + if span := l.iter.Prev(); span != nil { + return l.verify(span) + } + } + return l.skipEmptyFileBackward() +} + +func (l *LevelIter) skipEmptyFileForward() *Span { + if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange && + l.iterFile != nil && l.iter != nil { + // We were at a file that had spans. Check if the next file that has + // spans is not directly adjacent to the current file i.e. there is a + // gap in the span keyspace between the two files. In that case, synthesize + // a "straddle span" in l.straddle and return that. + // + // Straddle spans are not created in rangedel mode. + if err := l.Close(); err != nil { + l.err = err + return l.verify(nil) + } + startKey := l.iterFile.LargestRangeKey.UserKey + // Resetting l.iterFile without loading the file into l.iter is okay and + // does not change the logic in loadFile() as long as l.iter is also nil; + // which it should be due to the Close() call above. + l.iterFile = l.files.Next() + if l.iterFile == nil { + return l.verify(nil) + } + endKey := l.iterFile.SmallestRangeKey.UserKey + if l.cmp(startKey, endKey) < 0 { + // There is a gap between the two files. Synthesize a straddling span + // to avoid unnecessarily loading the next file. + l.straddle = Span{ + Start: startKey, + End: endKey, + } + l.straddleDir = +1 + return l.verify(&l.straddle) + } + } else if l.straddleDir < 0 { + // We were at a straddle key, but are now changing directions. l.iterFile + // was already moved backward by skipEmptyFileBackward, so advance it + // forward. + l.iterFile = l.files.Next() + } + l.straddle = Span{} + l.straddleDir = 0 + var span *Span + for span.Empty() { + fileToLoad := l.iterFile + if l.keyType == manifest.KeyTypePoint { + // We haven't iterated to the next file yet if we're in point key + // (rangedel) mode. + fileToLoad = l.files.Next() + } + if l.loadFile(fileToLoad, +1) == noFileLoaded { + return l.verify(nil) + } + span = l.iter.First() + // In rangedel mode, we can expect to get empty files that we'd need to + // skip over, but not in range key mode. + if l.keyType == manifest.KeyTypeRange { + break + } + } + return l.verify(span) +} + +func (l *LevelIter) skipEmptyFileBackward() *Span { + // We were at a file that had spans. Check if the previous file that has + // spans is not directly adjacent to the current file i.e. there is a + // gap in the span keyspace between the two files. In that case, synthesize + // a "straddle span" in l.straddle and return that. + // + // Straddle spans are not created in rangedel mode. + if l.straddleDir == 0 && l.keyType == manifest.KeyTypeRange && + l.iterFile != nil && l.iter != nil { + if err := l.Close(); err != nil { + l.err = err + return l.verify(nil) + } + endKey := l.iterFile.SmallestRangeKey.UserKey + // Resetting l.iterFile without loading the file into l.iter is okay and + // does not change the logic in loadFile() as long as l.iter is also nil; + // which it should be due to the Close() call above. + l.iterFile = l.files.Prev() + if l.iterFile == nil { + return l.verify(nil) + } + startKey := l.iterFile.LargestRangeKey.UserKey + if l.cmp(startKey, endKey) < 0 { + // There is a gap between the two files. Synthesize a straddling span + // to avoid unnecessarily loading the next file. + l.straddle = Span{ + Start: startKey, + End: endKey, + } + l.straddleDir = -1 + return l.verify(&l.straddle) + } + } else if l.straddleDir > 0 { + // We were at a straddle key, but are now changing directions. l.iterFile + // was already advanced forward by skipEmptyFileForward, so move it + // backward. + l.iterFile = l.files.Prev() + } + l.straddle = Span{} + l.straddleDir = 0 + var span *Span + for span.Empty() { + fileToLoad := l.iterFile + if l.keyType == manifest.KeyTypePoint { + fileToLoad = l.files.Prev() + } + if l.loadFile(fileToLoad, -1) == noFileLoaded { + return l.verify(nil) + } + span = l.iter.Last() + // In rangedel mode, we can expect to get empty files that we'd need to + // skip over, but not in range key mode as the filter on the FileMetadata + // should guarantee we always get a non-empty file. + if l.keyType == manifest.KeyTypeRange { + break + } + } + return l.verify(span) +} + +// verify is invoked whenever a span is returned from an iterator positioning +// method to a caller. During invariant builds, it asserts invariants to the +// caller. +func (l *LevelIter) verify(s *Span) *Span { + // NB: Do not add any logic outside the invariants.Enabled conditional to + // ensure that verify is always compiled away in production builds. + if invariants.Enabled { + if f := l.files.Current(); f != l.iterFile { + panic(fmt.Sprintf("LevelIter.files.Current (%s) and l.iterFile (%s) diverged", + f, l.iterFile)) + } + } + return s +} + +// Error implements keyspan.FragmentIterator. +func (l *LevelIter) Error() error { + if l.err != nil || l.iter == nil { + return l.err + } + return l.iter.Error() +} + +// Close implements keyspan.FragmentIterator. +func (l *LevelIter) Close() error { + if l.iter != nil { + l.err = l.iter.Close() + l.iter = nil + } + return l.err +} + +// String implements keyspan.FragmentIterator. +func (l *LevelIter) String() string { + if l.iterFile != nil { + return fmt.Sprintf("%s: fileNum=%s", l.level, l.iterFile.FileNum) + } + return fmt.Sprintf("%s: fileNum=", l.level) +} diff --git a/pebble/internal/keyspan/level_iter_test.go b/pebble/internal/keyspan/level_iter_test.go new file mode 100644 index 0000000..6e30396 --- /dev/null +++ b/pebble/internal/keyspan/level_iter_test.go @@ -0,0 +1,472 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "fmt" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/manifest" + "github.com/stretchr/testify/require" +) + +func TestLevelIterEquivalence(t *testing.T) { + type level [][]Span + testCases := []struct { + name string + levels []level + }{ + { + "single level, no gaps, no overlaps", + []level{ + { + { + Span{ + Start: []byte("a"), + End: []byte("b"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + Span{ + Start: []byte("b"), + End: []byte("c"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + Span{ + Start: []byte("c"), + End: []byte("d"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + }, + { + Span{ + Start: []byte("d"), + End: []byte("e"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + Span{ + Start: []byte("e"), + End: []byte("f"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + Span{ + Start: []byte("f"), + End: []byte("g"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + }, + }, + }, + }, + { + "single level, overlapping fragments", + []level{ + { + { + Span{ + Start: []byte("a"), + End: []byte("b"), + Keys: []Key{ + { + Trailer: base.MakeTrailer(4, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("bar"), + }, + { + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }, + }, + }, + Span{ + Start: []byte("b"), + End: []byte("c"), + Keys: []Key{ + { + Trailer: base.MakeTrailer(4, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("bar"), + }, + { + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }, + }, + }, + Span{ + Start: []byte("c"), + End: []byte("d"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + }, + { + Span{ + Start: []byte("d"), + End: []byte("e"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + Span{ + Start: []byte("e"), + End: []byte("f"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + Span{ + Start: []byte("f"), + End: []byte("g"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + }, + }, + }, + }, + { + "single level, gaps between files and range keys", + []level{ + { + { + Span{ + Start: []byte("a"), + End: []byte("b"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + Span{ + Start: []byte("c"), + End: []byte("d"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + Span{ + Start: []byte("e"), + End: []byte("f"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + }, + { + Span{ + Start: []byte("g"), + End: []byte("h"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + Span{ + Start: []byte("i"), + End: []byte("j"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + Span{ + Start: []byte("k"), + End: []byte("l"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + }, + }, + }, + }, + { + "two levels, one with overlapping unset", + []level{ + { + { + Span{ + Start: []byte("a"), + End: []byte("h"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + }, + { + Span{ + Start: []byte("l"), + End: []byte("u"), + Keys: []Key{{ + Trailer: base.MakeTrailer(2, base.InternalKeyKindRangeKeyUnset), + Suffix: nil, + Value: nil, + }}, + }, + }, + }, + { + { + Span{ + Start: []byte("e"), + End: []byte("r"), + Keys: []Key{{ + Trailer: base.MakeTrailer(1, base.InternalKeyKindRangeKeySet), + Suffix: nil, + Value: []byte("foo"), + }}, + }, + }, + }, + }, + }, + } + + for _, tc := range testCases { + var fileIters []FragmentIterator + var levelIters []FragmentIterator + var iter1, iter2 MergingIter + for j, level := range tc.levels { + j := j // Copy for use in closures down below. + var levelIter LevelIter + var metas []*manifest.FileMetadata + for k, file := range level { + fileIters = append(fileIters, NewIter(base.DefaultComparer.Compare, file)) + meta := &manifest.FileMetadata{ + FileNum: base.FileNum(k + 1), + Size: 1024, + SmallestSeqNum: 2, + LargestSeqNum: 2, + SmallestRangeKey: base.MakeInternalKey(file[0].Start, file[0].SmallestKey().SeqNum(), file[0].SmallestKey().Kind()), + LargestRangeKey: base.MakeExclusiveSentinelKey(file[len(file)-1].LargestKey().Kind(), file[len(file)-1].End), + HasPointKeys: false, + HasRangeKeys: true, + } + meta.InitPhysicalBacking() + meta.ExtendRangeKeyBounds(base.DefaultComparer.Compare, meta.SmallestRangeKey, meta.LargestRangeKey) + metas = append(metas, meta) + } + + tableNewIters := func(file *manifest.FileMetadata, iterOptions SpanIterOptions) (FragmentIterator, error) { + return NewIter(base.DefaultComparer.Compare, tc.levels[j][file.FileNum-1]), nil + } + // Add all the fileMetadatas to L6. + b := &manifest.BulkVersionEdit{} + amap := make(map[base.FileNum]*manifest.FileMetadata) + for i := range metas { + amap[metas[i].FileNum] = metas[i] + } + b.Added[6] = amap + v, err := b.Apply(nil, base.DefaultComparer.Compare, base.DefaultFormatter, 0, 0, nil, manifest.ProhibitSplitUserKeys) + require.NoError(t, err) + levelIter.Init( + SpanIterOptions{}, base.DefaultComparer.Compare, tableNewIters, + v.Levels[6].Iter(), 0, manifest.KeyTypeRange, + ) + levelIters = append(levelIters, &levelIter) + } + + iter1.Init(base.DefaultComparer.Compare, VisibleTransform(base.InternalKeySeqNumMax), new(MergingBuffers), fileIters...) + iter2.Init(base.DefaultComparer.Compare, VisibleTransform(base.InternalKeySeqNumMax), new(MergingBuffers), levelIters...) + // Check iter1 and iter2 for equivalence. + + require.Equal(t, iter1.First(), iter2.First(), "failed on test case %q", tc.name) + valid := true + for valid { + f1 := iter1.Next() + var f2 *Span + for { + f2 = iter2.Next() + // The level iter could produce empty spans that straddle between + // files. Ignore those. + if f2 == nil || !f2.Empty() { + break + } + } + + require.Equal(t, f1, f2, "failed on test case %q", tc.name) + valid = f1 != nil && f2 != nil + } + } +} + +func TestLevelIter(t *testing.T) { + var level [][]Span + var rangedels [][]Span + var metas []*manifest.FileMetadata + var iter FragmentIterator + var extraInfo func() string + + datadriven.RunTest(t, "testdata/level_iter", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + level = level[:0] + metas = metas[:0] + rangedels = rangedels[:0] + if iter != nil { + iter.Close() + iter = nil + } + var pointKeys []base.InternalKey + var currentRangeDels []Span + var currentFile []Span + for _, key := range strings.Split(d.Input, "\n") { + if strings.HasPrefix(key, "file") { + // Skip the very first file creation. + if len(level) != 0 || len(currentFile) != 0 { + meta := &manifest.FileMetadata{ + FileNum: base.FileNum(len(level) + 1), + } + if len(currentFile) > 0 { + smallest := base.MakeInternalKey(currentFile[0].Start, currentFile[0].SmallestKey().SeqNum(), currentFile[0].SmallestKey().Kind()) + largest := base.MakeExclusiveSentinelKey(currentFile[len(currentFile)-1].LargestKey().Kind(), currentFile[len(currentFile)-1].End) + meta.ExtendRangeKeyBounds(base.DefaultComparer.Compare, smallest, largest) + } + if len(pointKeys) != 0 { + meta.ExtendPointKeyBounds(base.DefaultComparer.Compare, pointKeys[0], pointKeys[len(pointKeys)-1]) + } + meta.InitPhysicalBacking() + level = append(level, currentFile) + metas = append(metas, meta) + rangedels = append(rangedels, currentRangeDels) + currentRangeDels = nil + currentFile = nil + pointKeys = nil + } + continue + } + key = strings.TrimSpace(key) + if strings.HasPrefix(key, "point:") { + key = strings.TrimPrefix(key, "point:") + j := strings.Index(key, ":") + ikey := base.ParseInternalKey(key[:j]) + pointKeys = append(pointKeys, ikey) + if ikey.Kind() == base.InternalKeyKindRangeDelete { + currentRangeDels = append(currentRangeDels, Span{ + Start: ikey.UserKey, End: []byte(key[j+1:]), Keys: []Key{{Trailer: ikey.Trailer}}}) + } + continue + } + span := ParseSpan(key) + currentFile = append(currentFile, span) + } + meta := &manifest.FileMetadata{ + FileNum: base.FileNum(len(level) + 1), + } + meta.InitPhysicalBacking() + level = append(level, currentFile) + rangedels = append(rangedels, currentRangeDels) + if len(currentFile) > 0 { + smallest := base.MakeInternalKey(currentFile[0].Start, currentFile[0].SmallestKey().SeqNum(), currentFile[0].SmallestKey().Kind()) + largest := base.MakeExclusiveSentinelKey(currentFile[len(currentFile)-1].LargestKey().Kind(), currentFile[len(currentFile)-1].End) + meta.ExtendRangeKeyBounds(base.DefaultComparer.Compare, smallest, largest) + } + if len(pointKeys) != 0 { + meta.ExtendPointKeyBounds(base.DefaultComparer.Compare, pointKeys[0], pointKeys[len(pointKeys)-1]) + } + metas = append(metas, meta) + return "" + case "num-files": + return fmt.Sprintf("%d", len(level)) + case "close-iter": + _ = iter.Close() + iter = nil + return "ok" + case "iter": + keyType := manifest.KeyTypeRange + for _, arg := range d.CmdArgs { + if strings.Contains(arg.Key, "rangedel") { + keyType = manifest.KeyTypePoint + } + } + if iter == nil { + var lastFileNum base.FileNum + tableNewIters := func(file *manifest.FileMetadata, _ SpanIterOptions) (FragmentIterator, error) { + keyType := keyType + spans := level[file.FileNum-1] + if keyType == manifest.KeyTypePoint { + spans = rangedels[file.FileNum-1] + } + lastFileNum = file.FileNum + return NewIter(base.DefaultComparer.Compare, spans), nil + } + b := &manifest.BulkVersionEdit{} + amap := make(map[base.FileNum]*manifest.FileMetadata) + for i := range metas { + amap[metas[i].FileNum] = metas[i] + } + b.Added[6] = amap + v, err := b.Apply(nil, base.DefaultComparer.Compare, base.DefaultFormatter, 0, 0, nil, manifest.ProhibitSplitUserKeys) + require.NoError(t, err) + iter = NewLevelIter( + SpanIterOptions{}, base.DefaultComparer.Compare, + tableNewIters, v.Levels[6].Iter(), 6, keyType, + ) + extraInfo = func() string { + return fmt.Sprintf("file = %s.sst", lastFileNum) + } + } + + return runFragmentIteratorCmd(iter, d.Input, extraInfo) + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) + + if iter != nil { + iter.Close() + } +} diff --git a/pebble/internal/keyspan/merging_iter.go b/pebble/internal/keyspan/merging_iter.go new file mode 100644 index 0000000..c73ba59 --- /dev/null +++ b/pebble/internal/keyspan/merging_iter.go @@ -0,0 +1,1209 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "bytes" + "fmt" + "sort" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/cockroachdb/pebble/internal/manifest" +) + +// TODO(jackson): Consider implementing an optimization to seek lower levels +// past higher levels' RANGEKEYDELs. This would be analaogous to the +// optimization pebble.mergingIter performs for RANGEDELs during point key +// seeks. It may not be worth it, because range keys are rare and cascading +// seeks would require introducing key comparisons to switchTo{Min,Max}Heap +// where there currently are none. + +// TODO(jackson): There are several opportunities to use base.Equal in the +// MergingIter implementation, but will require a bit of plumbing to thread the +// Equal function. + +// MergingIter merges spans across levels of the LSM, exposing an iterator over +// spans that yields sets of spans fragmented at unique user key boundaries. +// +// A MergingIter is initialized with an arbitrary number of child iterators over +// fragmented spans. Each child iterator exposes fragmented key spans, such that +// overlapping keys are surfaced in a single Span. Key spans from one child +// iterator may overlap key spans from another child iterator arbitrarily. +// +// The spans combined by MergingIter will return spans with keys sorted by +// trailer descending. If the MergingIter is configured with a Transformer, it's +// permitted to modify the ordering of the spans' keys returned by MergingIter. +// +// # Algorithm +// +// The merging iterator wraps child iterators, merging and fragmenting spans +// across levels. The high-level algorithm is: +// +// 1. Initialize the heap with bound keys from child iterators' spans. +// 2. Find the next [or previous] two unique user keys' from bounds. +// 3. Consider the span formed between the two unique user keys a candidate +// span. +// 4. Determine if any of the child iterators' spans overlap the candidate +// span. +// 4a. If any of the child iterator's current bounds are end keys +// (during forward iteration) or start keys (during reverse +// iteration), then all the spans with that bound overlap the +// candidate span. +// 4b. Apply the configured transform, which may remove keys. +// 4c. If no spans overlap, forget the smallest (forward iteration) +// or largest (reverse iteration) unique user key and advance +// the iterators to the next unique user key. Start again from 3. +// +// # Detailed algorithm +// +// Each level (i0, i1, ...) has a user-provided input FragmentIterator. The +// merging iterator steps through individual boundaries of the underlying +// spans separately. If the underlying FragmentIterator has fragments +// [a,b){#2,#1} [b,c){#1} the mergingIterLevel.{next,prev} step through: +// +// (a, start), (b, end), (b, start), (c, end) +// +// Note that (a, start) and (b, end) are observed ONCE each, despite two keys +// sharing those bounds. Also note that (b, end) and (b, start) are two distinct +// iterator positions of a mergingIterLevel. +// +// The merging iterator maintains a heap (min during forward iteration, max +// during reverse iteration) containing the boundKeys. Each boundKey is a +// 3-tuple holding the bound user key, whether the bound is a start or end key +// and the set of keys from that level that have that bound. The heap orders +// based on the boundKey's user key only. +// +// The merging iterator is responsible for merging spans across levels to +// determine which span is next, but it's also responsible for fragmenting +// overlapping spans. Consider the example: +// +// i0: b---d e-----h +// i1: a---c h-----k +// i2: a------------------------------p +// +// fragments: a-b-c-d-e-----h-----k----------p +// +// None of the individual child iterators contain a span with the exact bounds +// [c,d), but the merging iterator must produce a span [c,d). To accomplish +// this, the merging iterator visits every span between unique boundary user +// keys. In the above example, this is: +// +// [a,b), [b,c), [c,d), [d,e), [e, h), [h, k), [k, p) +// +// The merging iterator first initializes the heap to prepare for iteration. +// The description below discusses the mechanics of forward iteration after a +// call to First, but the mechanics are similar for reverse iteration and +// other positioning methods. +// +// During a call to First, the heap is initialized by seeking every +// mergingIterLevel to the first bound of the first fragment. In the above +// example, this seeks the child iterators to: +// +// i0: (b, boundKindFragmentStart, [ [b,d) ]) +// i1: (a, boundKindFragmentStart, [ [a,c) ]) +// i2: (a, boundKindFragmentStart, [ [a,p) ]) +// +// After fixing up the heap, the root of the heap is a boundKey with the +// smallest user key ('a' in the example). Once the heap is setup for iteration +// in the appropriate direction and location, the merging iterator uses +// find{Next,Prev}FragmentSet to find the next/previous span bounds. +// +// During forward iteration, the root of the heap's user key is the start key +// key of next merged span. findNextFragmentSet sets m.start to this user +// key. The heap may contain other boundKeys with the same user key if another +// level has a fragment starting or ending at the same key, so the +// findNextFragmentSet method pulls from the heap until it finds the first key +// greater than m.start. This key is used as the end key. +// +// In the above example, this results in m.start = 'a', m.end = 'b' and child +// iterators in the following positions: +// +// i0: (b, boundKindFragmentStart, [ [b,d) ]) +// i1: (c, boundKindFragmentEnd, [ [a,c) ]) +// i2: (p, boundKindFragmentEnd, [ [a,p) ]) +// +// With the user key bounds of the next merged span established, +// findNextFragmentSet must determine which, if any, fragments overlap the span. +// During forward iteration any child iterator that is now positioned at an end +// boundary has an overlapping span. (Justification: The child iterator's end +// boundary is ≥ m.end. The corresponding start boundary must be ≤ m.start since +// there were no other user keys between m.start and m.end. So the fragments +// associated with the iterator's current end boundary have start and end bounds +// such that start ≤ m.start < m.end ≤ end). +// +// findNextFragmentSet iterates over the levels, collecting keys from any child +// iterators positioned at end boundaries. In the above example, i1 and i2 are +// positioned at end boundaries, so findNextFragmentSet collects the keys of +// [a,c) and [a,p). These spans contain the merging iterator's [m.start, m.end) +// span, but they may also extend beyond the m.start and m.end. The merging +// iterator returns the keys with the merging iter's m.start and m.end bounds, +// preserving the underlying keys' sequence numbers, key kinds and values. +// +// A MergingIter is configured with a Transform that's applied to the span +// before surfacing it to the iterator user. A Transform may remove keys +// arbitrarily, but it may not modify the values themselves. +// +// It may be the case that findNextFragmentSet finds no levels positioned at end +// boundaries, or that there are no spans remaining after applying a transform, +// in which case the span [m.start, m.end) overlaps with nothing. In this case +// findNextFragmentSet loops, repeating the above process again until it finds a +// span that does contain keys. +// +// # Memory safety +// +// The FragmentIterator interface only guarantees stability of a Span and its +// associated slices until the next positioning method is called. Adjacent Spans +// may be contained in different sstables, requring the FragmentIterator +// implementation to close one sstable, releasing its memory, before opening the +// next. Most of the state used by the MergingIter is derived from spans at +// current child iterator positions only, ensuring state is stable. The one +// exception is the start bound during forward iteration and the end bound +// during reverse iteration. +// +// If the heap root originates from an end boundary when findNextFragmentSet +// begins, a Next on the heap root level may invalidate the end boundary. To +// accommodate this, find{Next,Prev}FragmentSet copy the initial boundary if the +// subsequent Next/Prev would move to the next span. +type MergingIter struct { + *MergingBuffers + // start and end hold the bounds for the span currently under the + // iterator position. + // + // Invariant: None of the levels' iterators contain spans with a bound + // between start and end. For all bounds b, b ≤ start || b ≥ end. + start, end []byte + + // transformer defines a transformation to be applied to a span before it's + // yielded to the user. Transforming may filter individual keys contained + // within the span. + transformer Transformer + // span holds the iterator's current span. This span is used as the + // destination for transforms. Every tranformed span overwrites the + // previous. + span Span + err error + dir int8 + + // alloc preallocates mergingIterLevel and mergingIterItems for use by the + // merging iterator. As long as the merging iterator is used with + // manifest.NumLevels+3 and fewer fragment iterators, the merging iterator + // will not need to allocate upon initialization. The value NumLevels+3 + // mirrors the preallocated levels in iterAlloc used for point iterators. + // Invariant: cap(levels) == cap(items) + alloc struct { + levels [manifest.NumLevels + 3]mergingIterLevel + items [manifest.NumLevels + 3]mergingIterItem + } +} + +// MergingBuffers holds buffers used while merging keyspans. +type MergingBuffers struct { + // keys holds all of the keys across all levels that overlap the key span + // [start, end), sorted by Trailer descending. This slice is reconstituted + // in synthesizeKeys from each mergingIterLevel's keys every time the + // [start, end) bounds change. + // + // Each element points into a child iterator's memory, so the keys may not + // be directly modified. + keys keysBySeqNumKind + // levels holds levels allocated by MergingIter.init. The MergingIter will + // prefer use of its `manifest.NumLevels+3` array, so this slice will be + // longer if set. + levels []mergingIterLevel + // heap holds a slice for the merging iterator heap allocated by + // MergingIter.init. The MergingIter will prefer use of its + // `manifest.NumLevels+3` items array, so this slice will be longer if set. + heap mergingIterHeap + // buf is a buffer used to save [start, end) boundary keys. + buf []byte +} + +// PrepareForReuse discards any excessively large buffers. +func (bufs *MergingBuffers) PrepareForReuse() { + if cap(bufs.buf) > bufferReuseMaxCapacity { + bufs.buf = nil + } +} + +// MergingIter implements the FragmentIterator interface. +var _ FragmentIterator = (*MergingIter)(nil) + +type mergingIterLevel struct { + iter FragmentIterator + + // heapKey holds the current key at this level for use within the heap. + heapKey boundKey +} + +func (l *mergingIterLevel) next() { + if l.heapKey.kind == boundKindFragmentStart { + l.heapKey = boundKey{ + kind: boundKindFragmentEnd, + key: l.heapKey.span.End, + span: l.heapKey.span, + } + return + } + if s := l.iter.Next(); s == nil { + l.heapKey = boundKey{kind: boundKindInvalid} + } else { + l.heapKey = boundKey{ + kind: boundKindFragmentStart, + key: s.Start, + span: s, + } + } +} + +func (l *mergingIterLevel) prev() { + if l.heapKey.kind == boundKindFragmentEnd { + l.heapKey = boundKey{ + kind: boundKindFragmentStart, + key: l.heapKey.span.Start, + span: l.heapKey.span, + } + return + } + if s := l.iter.Prev(); s == nil { + l.heapKey = boundKey{kind: boundKindInvalid} + } else { + l.heapKey = boundKey{ + kind: boundKindFragmentEnd, + key: s.End, + span: s, + } + } +} + +// Init initializes the merging iterator with the provided fragment iterators. +func (m *MergingIter) Init( + cmp base.Compare, transformer Transformer, bufs *MergingBuffers, iters ...FragmentIterator, +) { + *m = MergingIter{ + MergingBuffers: bufs, + transformer: transformer, + } + m.heap.cmp = cmp + levels, items := m.levels, m.heap.items + + // Invariant: cap(levels) >= cap(items) + // Invariant: cap(alloc.levels) == cap(alloc.items) + if len(iters) <= len(m.alloc.levels) { + // The slices allocated on the MergingIter struct are large enough. + m.levels = m.alloc.levels[:len(iters)] + m.heap.items = m.alloc.items[:0] + } else if len(iters) <= cap(levels) { + // The existing heap-allocated slices are large enough, so reuse them. + m.levels = levels[:len(iters)] + m.heap.items = items[:0] + } else { + // Heap allocate new slices. + m.levels = make([]mergingIterLevel, len(iters)) + m.heap.items = make([]mergingIterItem, 0, len(iters)) + } + for i := range m.levels { + m.levels[i] = mergingIterLevel{iter: iters[i]} + } +} + +// AddLevel adds a new level to the bottom of the merging iterator. AddLevel +// must be called after Init and before any other method. +func (m *MergingIter) AddLevel(iter FragmentIterator) { + m.levels = append(m.levels, mergingIterLevel{iter: iter}) +} + +// SeekGE moves the iterator to the first span covering a key greater than +// or equal to the given key. This is equivalent to seeking to the first +// span with an end key greater than the given key. +func (m *MergingIter) SeekGE(key []byte) *Span { + m.invalidate() // clear state about current position + + // SeekGE(k) seeks to the first span with an end key greater than the given + // key. The merged span M that we're searching for might straddle the seek + // `key`. In this case, the M.Start may be a key ≤ the seek key. + // + // Consider a SeekGE(dog) in the following example. + // + // i0: b---d e-----h + // i1: a---c h-----k + // i2: a------------------------------p + // merged: a-b-c-d-e-----h-----k----------p + // + // The merged span M containing 'dog' is [d,e). The 'd' of the merged span + // comes from i0's [b,d)'s end boundary. The [b,d) span does not cover any + // key >= dog, so we cannot find the span by positioning the child iterators + // using a SeekGE(dog). + // + // Instead, if we take all the child iterators' spans bounds: + // a b c d e h k p + // We want to partition them into keys ≤ `key` and keys > `key`. + // dog + // │ + // a b c d│e h k p + // │ + // The largest key on the left of the partition forms the merged span's + // start key, and the smallest key on the right of the partition forms the + // merged span's end key. Recharacterized: + // + // M.Start: the largest boundary ≤ k of any child span + // M.End: the smallest boundary > k of any child span + // + // The FragmentIterator interface doesn't implement seeking by all bounds, + // it implements seeking by containment. A SeekGE(k) will ensure we observe + // all start boundaries ≥ k and all end boundaries > k but does not ensure + // we observe end boundaries = k or any boundaries < k. A SeekLT(k) will + // ensure we observe all start boundaries < k and all end boundaries ≤ k but + // does not ensure we observe any start boundaries = k or any boundaries > + // k. This forces us to seek in one direction and step in the other. + // + // In a SeekGE, we want to end up oriented in the forward direction when + // complete, so we begin with searching for M.Start by SeekLT-ing every + // child iterator to `k`. For every child span found, we determine the + // largest bound ≤ `k` and use it to initialize our max heap. The resulting + // root of the max heap is a preliminary value for `M.Start`. + for i := range m.levels { + l := &m.levels[i] + s := l.iter.SeekLT(key) + if s == nil { + l.heapKey = boundKey{kind: boundKindInvalid} + } else if m.cmp(s.End, key) <= 0 { + l.heapKey = boundKey{ + kind: boundKindFragmentEnd, + key: s.End, + span: s, + } + } else { + // s.End > key && s.Start < key + // We need to use this span's start bound, since that's the largest + // bound ≤ key. + l.heapKey = boundKey{ + kind: boundKindFragmentStart, + key: s.Start, + span: s, + } + } + } + m.initMaxHeap() + if m.err != nil { + return nil + } else if len(m.heap.items) == 0 { + // There are no spans covering any key < `key`. There is no span that + // straddles the seek key. Reorient the heap into a min heap and return + // the first span we find in the forward direction. + m.switchToMinHeap() + return m.findNextFragmentSet() + } + + // The heap root is now the largest boundary key b such that: + // 1. b < k + // 2. b = k, and b is an end boundary + // There's a third case that we will need to consider later, after we've + // switched to a min heap: + // 3. there exists a start boundary key b such that b = k. + // A start boundary key equal to k would not be surfaced when we seeked all + // the levels using SeekLT(k), since no key `key`, which will serve as our candidate end + // bound. + m.switchToMinHeap() + if m.err != nil { + return nil + } else if len(m.heap.items) == 0 { + return nil + } + + // Check for the case 3 described above. It's possible that when we switch + // heap directions, we discover a start boundary of some child span that is + // equal to the seek key `key`. In this case, we want this key to be our + // start boundary. + if m.heap.items[0].boundKey.kind == boundKindFragmentStart && + m.cmp(m.heap.items[0].boundKey.key, key) == 0 { + // Call findNextFragmentSet, which will set m.start to the heap root and + // proceed forward. + return m.findNextFragmentSet() + } + + m.end = m.heap.items[0].boundKey.key + if found, s := m.synthesizeKeys(+1); found && s != nil { + return s + } + return m.findNextFragmentSet() + +} + +// SeekLT moves the iterator to the last span covering a key less than the +// given key. This is equivalent to seeking to the last span with a start +// key less than the given key. +func (m *MergingIter) SeekLT(key []byte) *Span { + m.invalidate() // clear state about current position + + // SeekLT(k) seeks to the last span with a start key less than the given + // key. The merged span M that we're searching for might straddle the seek + // `key`. In this case, the M.End may be a key ≥ the seek key. + // + // Consider a SeekLT(dog) in the following example. + // + // i0: b---d e-----h + // i1: a---c h-----k + // i2: a------------------------------p + // merged: a-b-c-d-e-----h-----k----------p + // + // The merged span M containing the largest key <'dog' is [d,e). The 'e' of + // the merged span comes from i0's [e,h)'s start boundary. The [e,h) span + // does not cover any key < dog, so we cannot find the span by positioning + // the child iterators using a SeekLT(dog). + // + // Instead, if we take all the child iterators' spans bounds: + // a b c d e h k p + // We want to partition them into keys < `key` and keys ≥ `key`. + // dog + // │ + // a b c d│e h k p + // │ + // The largest key on the left of the partition forms the merged span's + // start key, and the smallest key on the right of the partition forms the + // merged span's end key. Recharacterized: + // + // M.Start: the largest boundary < k of any child span + // M.End: the smallest boundary ≥ k of any child span + // + // The FragmentIterator interface doesn't implement seeking by all bounds, + // it implements seeking by containment. A SeekGE(k) will ensure we observe + // all start boundaries ≥ k and all end boundaries > k but does not ensure + // we observe end boundaries = k or any boundaries < k. A SeekLT(k) will + // ensure we observe all start boundaries < k and all end boundaries ≤ k but + // does not ensure we observe any start boundaries = k or any boundaries > + // k. This forces us to seek in one direction and step in the other. + // + // In a SeekLT, we want to end up oriented in the backward direction when + // complete, so we begin with searching for M.End by SeekGE-ing every + // child iterator to `k`. For every child span found, we determine the + // smallest bound ≥ `k` and use it to initialize our min heap. The resulting + // root of the min heap is a preliminary value for `M.End`. + for i := range m.levels { + l := &m.levels[i] + s := l.iter.SeekGE(key) + if s == nil { + l.heapKey = boundKey{kind: boundKindInvalid} + } else if m.cmp(s.Start, key) >= 0 { + l.heapKey = boundKey{ + kind: boundKindFragmentStart, + key: s.Start, + span: s, + } + } else { + // s.Start < key + // We need to use this span's end bound, since that's the smallest + // bound > key. + l.heapKey = boundKey{ + kind: boundKindFragmentEnd, + key: s.End, + span: s, + } + } + } + m.initMinHeap() + if m.err != nil { + return nil + } else if len(m.heap.items) == 0 { + // There are no spans covering any key ≥ `key`. There is no span that + // straddles the seek key. Reorient the heap into a max heap and return + // the first span we find in the reverse direction. + m.switchToMaxHeap() + return m.findPrevFragmentSet() + } + + // The heap root is now the smallest boundary key b such that: + // 1. b > k + // 2. b = k, and b is a start boundary + // There's a third case that we will need to consider later, after we've + // switched to a max heap: + // 3. there exists an end boundary key b such that b = k. + // An end boundary key equal to k would not be surfaced when we seeked all + // the levels using SeekGE(k), since k would not be contained within the + // exclusive end boundary. + // + // Assume that the tightest boundary ≥ k is the current heap root (cases 1 & + // 2). After we switch to a max heap, we'll check for the third case and + // adjust the end boundary if necessary. + m.end = m.heap.items[0].boundKey.key + + // Before switching the direction of the heap, save a copy of the end + // boundary if it's the start boundary of some child span. Prev-ing the + // child iterator might switch files and invalidate the memory of the bound. + if m.heap.items[0].boundKey.kind == boundKindFragmentStart { + m.buf = append(m.buf[:0], m.end...) + m.end = m.buf + } + + // Switch to a max heap. This will move each level to the previous bound in + // every level, and then establish a max heap. This allows us to obtain the + // largest boundary key < `key`, which will serve as our candidate start + // bound. + m.switchToMaxHeap() + if m.err != nil { + return nil + } else if len(m.heap.items) == 0 { + return nil + } + // Check for the case 3 described above. It's possible that when we switch + // heap directions, we discover an end boundary of some child span that is + // equal to the seek key `key`. In this case, we want this key to be our end + // boundary. + if m.heap.items[0].boundKey.kind == boundKindFragmentEnd && + m.cmp(m.heap.items[0].boundKey.key, key) == 0 { + // Call findPrevFragmentSet, which will set m.end to the heap root and + // proceed backwards. + return m.findPrevFragmentSet() + } + + m.start = m.heap.items[0].boundKey.key + if found, s := m.synthesizeKeys(-1); found && s != nil { + return s + } + return m.findPrevFragmentSet() +} + +// First seeks the iterator to the first span. +func (m *MergingIter) First() *Span { + m.invalidate() // clear state about current position + for i := range m.levels { + if s := m.levels[i].iter.First(); s == nil { + m.levels[i].heapKey = boundKey{kind: boundKindInvalid} + } else { + m.levels[i].heapKey = boundKey{ + kind: boundKindFragmentStart, + key: s.Start, + span: s, + } + } + } + m.initMinHeap() + return m.findNextFragmentSet() +} + +// Last seeks the iterator to the last span. +func (m *MergingIter) Last() *Span { + m.invalidate() // clear state about current position + for i := range m.levels { + if s := m.levels[i].iter.Last(); s == nil { + m.levels[i].heapKey = boundKey{kind: boundKindInvalid} + } else { + m.levels[i].heapKey = boundKey{ + kind: boundKindFragmentEnd, + key: s.End, + span: s, + } + } + } + m.initMaxHeap() + return m.findPrevFragmentSet() +} + +// Next advances the iterator to the next span. +func (m *MergingIter) Next() *Span { + if m.err != nil { + return nil + } + if m.dir == +1 && (m.end == nil || m.start == nil) { + return nil + } + if m.dir != +1 { + m.switchToMinHeap() + } + return m.findNextFragmentSet() +} + +// Prev advances the iterator to the previous span. +func (m *MergingIter) Prev() *Span { + if m.err != nil { + return nil + } + if m.dir == -1 && (m.end == nil || m.start == nil) { + return nil + } + if m.dir != -1 { + m.switchToMaxHeap() + } + return m.findPrevFragmentSet() +} + +// Error returns any accumulated error. +func (m *MergingIter) Error() error { + if m.heap.len() == 0 || m.err != nil { + return m.err + } + return m.levels[m.heap.items[0].index].iter.Error() +} + +// Close closes the iterator, releasing all acquired resources. +func (m *MergingIter) Close() error { + for i := range m.levels { + if err := m.levels[i].iter.Close(); err != nil && m.err == nil { + m.err = err + } + } + m.levels = nil + m.heap.items = m.heap.items[:0] + return m.err +} + +// String implements fmt.Stringer. +func (m *MergingIter) String() string { + return "merging-keyspan" +} + +func (m *MergingIter) initMinHeap() { + m.dir = +1 + m.heap.reverse = false + m.initHeap() +} + +func (m *MergingIter) initMaxHeap() { + m.dir = -1 + m.heap.reverse = true + m.initHeap() +} + +func (m *MergingIter) initHeap() { + m.heap.items = m.heap.items[:0] + for i := range m.levels { + if l := &m.levels[i]; l.heapKey.kind != boundKindInvalid { + m.heap.items = append(m.heap.items, mergingIterItem{ + index: i, + boundKey: &l.heapKey, + }) + } else { + m.err = firstError(m.err, l.iter.Error()) + if m.err != nil { + return + } + } + } + m.heap.init() +} + +func (m *MergingIter) switchToMinHeap() { + // switchToMinHeap reorients the heap for forward iteration, without moving + // the current MergingIter position. + + // The iterator is currently positioned at the span [m.start, m.end), + // oriented in the reverse direction, so each level's iterator is positioned + // to the largest key ≤ m.start. To reorient in the forward direction, we + // must advance each level's iterator to the smallest key ≥ m.end. Consider + // this three-level example. + // + // i0: b---d e-----h + // i1: a---c h-----k + // i2: a------------------------------p + // + // merged: a-b-c-d-e-----h-----k----------p + // + // If currently positioned at the merged span [c,d), then the level + // iterators' heap keys are: + // + // i0: (b, [b, d)) i1: (c, [a,c)) i2: (a, [a,p)) + // + // Reversing the heap should not move the merging iterator and should not + // change the current [m.start, m.end) bounds. It should only prepare for + // forward iteration by updating the child iterators' heap keys to: + // + // i0: (d, [b, d)) i1: (h, [h,k)) i2: (p, [a,p)) + // + // In every level the first key ≥ m.end is the next in the iterator. + // Justification: Suppose not and a level iterator's next key was some key k + // such that k < m.end. The max-heap invariant dictates that the current + // iterator position is the largest entry with a user key ≥ m.start. This + // means k > m.start. We started with the assumption that k < m.end, so + // m.start < k < m.end. But then k is between our current span bounds, + // and reverse iteration would have constructed the current interval to be + // [k, m.end) not [m.start, m.end). + + if invariants.Enabled { + for i := range m.levels { + l := &m.levels[i] + if l.heapKey.kind != boundKindInvalid && m.cmp(l.heapKey.key, m.start) > 0 { + panic("pebble: invariant violation: max-heap key > m.start") + } + } + } + + for i := range m.levels { + m.levels[i].next() + } + m.initMinHeap() +} + +func (m *MergingIter) switchToMaxHeap() { + // switchToMaxHeap reorients the heap for reverse iteration, without moving + // the current MergingIter position. + + // The iterator is currently positioned at the span [m.start, m.end), + // oriented in the forward direction. Each level's iterator is positioned at + // the smallest bound ≥ m.end. To reorient in the reverse direction, we must + // move each level's iterator to the largest key ≤ m.start. Consider this + // three-level example. + // + // i0: b---d e-----h + // i1: a---c h-----k + // i2: a------------------------------p + // + // merged: a-b-c-d-e-----h-----k----------p + // + // If currently positioned at the merged span [c,d), then the level + // iterators' heap keys are: + // + // i0: (d, [b, d)) i1: (h, [h,k)) i2: (p, [a,p)) + // + // Reversing the heap should not move the merging iterator and should not + // change the current [m.start, m.end) bounds. It should only prepare for + // reverse iteration by updating the child iterators' heap keys to: + // + // i0: (b, [b, d)) i1: (c, [a,c)) i2: (a, [a,p)) + // + // In every level the largest key ≤ m.start is the prev in the iterator. + // Justification: Suppose not and a level iterator's prev key was some key k + // such that k > m.start. The min-heap invariant dictates that the current + // iterator position is the smallest entry with a user key ≥ m.end. This + // means k < m.end, otherwise the iterator would be positioned at k. We + // started with the assumption that k > m.start, so m.start < k < m.end. But + // then k is between our current span bounds, and reverse iteration + // would have constructed the current interval to be [m.start, k) not + // [m.start, m.end). + + if invariants.Enabled { + for i := range m.levels { + l := &m.levels[i] + if l.heapKey.kind != boundKindInvalid && m.cmp(l.heapKey.key, m.end) < 0 { + panic("pebble: invariant violation: min-heap key < m.end") + } + } + } + + for i := range m.levels { + m.levels[i].prev() + } + m.initMaxHeap() +} + +func (m *MergingIter) cmp(a, b []byte) int { + return m.heap.cmp(a, b) +} + +func (m *MergingIter) findNextFragmentSet() *Span { + // Each iteration of this loop considers a new merged span between unique + // user keys. An iteration may find that there exists no overlap for a given + // span, (eg, if the spans [a,b), [d, e) exist within level iterators, the + // below loop will still consider [b,d) before continuing to [d, e)). It + // returns when it finds a span that is covered by at least one key. + + for m.heap.len() > 0 && m.err == nil { + // Initialize the next span's start bound. SeekGE and First prepare the + // heap without advancing. Next leaves the heap in a state such that the + // root is the smallest bound key equal to the returned span's end key, + // so the heap is already positioned at the next merged span's start key. + + // NB: m.heapRoot() might be either an end boundary OR a start boundary + // of a level's span. Both end and start boundaries may still be a start + // key of a span in the set of fragmented spans returned by MergingIter. + // Consider the scenario: + // a----------l #1 + // b-----------m #2 + // + // The merged, fully-fragmented spans that MergingIter exposes to the caller + // have bounds: + // a-b #1 + // b--------l #1 + // b--------l #2 + // l-m #2 + // + // When advancing to l-m#2, we must set m.start to 'l', which originated + // from [a,l)#1's end boundary. + m.start = m.heap.items[0].boundKey.key + + // Before calling nextEntry, consider whether it might invalidate our + // start boundary. If the start boundary key originated from an end + // boundary, then we need to copy the start key before advancing the + // underlying iterator to the next Span. + if m.heap.items[0].boundKey.kind == boundKindFragmentEnd { + m.buf = append(m.buf[:0], m.start...) + m.start = m.buf + } + + // There may be many entries all with the same user key. Spans in other + // levels may also start or end at this same user key. For eg: + // L1: [a, c) [c, d) + // L2: [c, e) + // If we're positioned at L1's end(c) end boundary, we want to advance + // to the first bound > c. + m.nextEntry() + for len(m.heap.items) > 0 && m.err == nil && m.cmp(m.heapRoot(), m.start) == 0 { + m.nextEntry() + } + if len(m.heap.items) == 0 || m.err != nil { + break + } + + // The current entry at the top of the heap is the first key > m.start. + // It must become the end bound for the span we will return to the user. + // In the above example, the root of the heap is L1's end(d). + m.end = m.heap.items[0].boundKey.key + + // Each level within m.levels may have a span that overlaps the + // fragmented key span [m.start, m.end). Update m.keys to point to them + // and sort them by kind, sequence number. There may not be any keys + // defined over [m.start, m.end) if we're between the end of one span + // and the start of the next, OR if the configured transform filters any + // keys out. We allow empty spans that were emitted by child iterators, but + // we elide empty spans created by the mergingIter itself that don't overlap + // with any child iterator returned spans (i.e. empty spans that bridge two + // distinct child-iterator-defined spans). + if found, s := m.synthesizeKeys(+1); found && s != nil { + return s + } + } + // Exhausted. + m.clear() + return nil +} + +func (m *MergingIter) findPrevFragmentSet() *Span { + // Each iteration of this loop considers a new merged span between unique + // user keys. An iteration may find that there exists no overlap for a given + // span, (eg, if the spans [a,b), [d, e) exist within level iterators, the + // below loop will still consider [b,d) before continuing to [a, b)). It + // returns when it finds a span that is covered by at least one key. + + for m.heap.len() > 0 && m.err == nil { + // Initialize the next span's end bound. SeekLT and Last prepare the + // heap without advancing. Prev leaves the heap in a state such that the + // root is the largest bound key equal to the returned span's start key, + // so the heap is already positioned at the next merged span's end key. + + // NB: m.heapRoot() might be either an end boundary OR a start boundary + // of a level's span. Both end and start boundaries may still be a start + // key of a span returned by MergingIter. Consider the scenario: + // a----------l #2 + // b-----------m #1 + // + // The merged, fully-fragmented spans that MergingIter exposes to the caller + // have bounds: + // a-b #2 + // b--------l #2 + // b--------l #1 + // l-m #1 + // + // When Preving to a-b#2, we must set m.end to 'b', which originated + // from [b,m)#1's start boundary. + m.end = m.heap.items[0].boundKey.key + + // Before calling prevEntry, consider whether it might invalidate our + // end boundary. If the end boundary key originated from a start + // boundary, then we need to copy the end key before advancing the + // underlying iterator to the previous Span. + if m.heap.items[0].boundKey.kind == boundKindFragmentStart { + m.buf = append(m.buf[:0], m.end...) + m.end = m.buf + } + + // There may be many entries all with the same user key. Spans in other + // levels may also start or end at this same user key. For eg: + // L1: [a, c) [c, d) + // L2: [c, e) + // If we're positioned at L1's start(c) start boundary, we want to prev + // to move to the first bound < c. + m.prevEntry() + for len(m.heap.items) > 0 && m.err == nil && m.cmp(m.heapRoot(), m.end) == 0 { + m.prevEntry() + } + if len(m.heap.items) == 0 || m.err != nil { + break + } + + // The current entry at the top of the heap is the first key < m.end. + // It must become the start bound for the span we will return to the + // user. In the above example, the root of the heap is L1's start(a). + m.start = m.heap.items[0].boundKey.key + + // Each level within m.levels may have a set of keys that overlap the + // fragmented key span [m.start, m.end). Update m.keys to point to them + // and sort them by kind, sequence number. There may not be any keys + // spanning [m.start, m.end) if we're between the end of one span and + // the start of the next, OR if the configured transform filters any + // keys out. We allow empty spans that were emitted by child iterators, but + // we elide empty spans created by the mergingIter itself that don't overlap + // with any child iterator returned spans (i.e. empty spans that bridge two + // distinct child-iterator-defined spans). + if found, s := m.synthesizeKeys(-1); found && s != nil { + return s + } + } + // Exhausted. + m.clear() + return nil +} + +func (m *MergingIter) heapRoot() []byte { + return m.heap.items[0].boundKey.key +} + +// synthesizeKeys is called by find{Next,Prev}FragmentSet to populate and +// sort the set of keys overlapping [m.start, m.end). +// +// During forward iteration, if the current heap item is a fragment end, +// then the fragment's start must be ≤ m.start and the fragment overlaps the +// current iterator position of [m.start, m.end). +// +// During reverse iteration, if the current heap item is a fragment start, +// then the fragment's end must be ≥ m.end and the fragment overlaps the +// current iteration position of [m.start, m.end). +// +// The boolean return value, `found`, is true if the returned span overlaps +// with a span returned by a child iterator. +func (m *MergingIter) synthesizeKeys(dir int8) (bool, *Span) { + if invariants.Enabled { + if m.cmp(m.start, m.end) >= 0 { + panic(fmt.Sprintf("pebble: invariant violation: span start ≥ end: %s >= %s", m.start, m.end)) + } + } + + m.keys = m.keys[:0] + found := false + for i := range m.levels { + if dir == +1 && m.levels[i].heapKey.kind == boundKindFragmentEnd || + dir == -1 && m.levels[i].heapKey.kind == boundKindFragmentStart { + m.keys = append(m.keys, m.levels[i].heapKey.span.Keys...) + found = true + } + } + // TODO(jackson): We should be able to remove this sort and instead + // guarantee that we'll return keys in the order of the levels they're from. + // With careful iterator construction, this would guarantee that they're + // sorted by trailer descending for the range key iteration use case. + sort.Sort(&m.keys) + + // Apply the configured transform. See VisibleTransform. + m.span = Span{ + Start: m.start, + End: m.end, + Keys: m.keys, + KeysOrder: ByTrailerDesc, + } + // NB: m.heap.cmp is a base.Compare, whereas m.cmp is a method on + // MergingIter. + if err := m.transformer.Transform(m.heap.cmp, m.span, &m.span); err != nil { + m.err = err + return false, nil + } + return found, &m.span +} + +func (m *MergingIter) invalidate() { + m.err = nil +} + +func (m *MergingIter) clear() { + for fi := range m.keys { + m.keys[fi] = Key{} + } + m.keys = m.keys[:0] +} + +// nextEntry steps to the next entry. +func (m *MergingIter) nextEntry() { + l := &m.levels[m.heap.items[0].index] + l.next() + if !l.heapKey.valid() { + // l.iter is exhausted. + m.err = l.iter.Error() + if m.err == nil { + m.heap.pop() + } + return + } + + if m.heap.len() > 1 { + m.heap.fix(0) + } +} + +// prevEntry steps to the previous entry. +func (m *MergingIter) prevEntry() { + l := &m.levels[m.heap.items[0].index] + l.prev() + if !l.heapKey.valid() { + // l.iter is exhausted. + m.err = l.iter.Error() + if m.err == nil { + m.heap.pop() + } + return + } + + if m.heap.len() > 1 { + m.heap.fix(0) + } +} + +// DebugString returns a string representing the current internal state of the +// merging iterator and its heap for debugging purposes. +func (m *MergingIter) DebugString() string { + var buf bytes.Buffer + fmt.Fprintf(&buf, "Current bounds: [%q, %q)\n", m.start, m.end) + for i := range m.levels { + fmt.Fprintf(&buf, "%d: heap key %s\n", i, m.levels[i].heapKey) + } + return buf.String() +} + +type mergingIterItem struct { + // boundKey points to the corresponding mergingIterLevel's `iterKey`. + *boundKey + // index is the index of this level within the MergingIter's levels field. + index int +} + +// mergingIterHeap is copied from mergingIterHeap defined in the root pebble +// package for use with point keys. + +type mergingIterHeap struct { + cmp base.Compare + reverse bool + items []mergingIterItem +} + +func (h *mergingIterHeap) len() int { + return len(h.items) +} + +func (h *mergingIterHeap) less(i, j int) bool { + // This key comparison only uses the user key and not the boundKind. Bound + // kind doesn't matter because when stepping over a user key, + // findNextFragmentSet and findPrevFragmentSet skip past all heap items with + // that user key, and makes no assumptions on ordering. All other heap + // examinations only consider the user key. + ik, jk := h.items[i].key, h.items[j].key + c := h.cmp(ik, jk) + if h.reverse { + return c > 0 + } + return c < 0 +} + +func (h *mergingIterHeap) swap(i, j int) { + h.items[i], h.items[j] = h.items[j], h.items[i] +} + +// init, fix, up and down are copied from the go stdlib. +func (h *mergingIterHeap) init() { + // heapify + n := h.len() + for i := n/2 - 1; i >= 0; i-- { + h.down(i, n) + } +} + +func (h *mergingIterHeap) fix(i int) { + if !h.down(i, h.len()) { + h.up(i) + } +} + +func (h *mergingIterHeap) pop() *mergingIterItem { + n := h.len() - 1 + h.swap(0, n) + h.down(0, n) + item := &h.items[n] + h.items = h.items[:n] + return item +} + +func (h *mergingIterHeap) up(j int) { + for { + i := (j - 1) / 2 // parent + if i == j || !h.less(j, i) { + break + } + h.swap(i, j) + j = i + } +} + +func (h *mergingIterHeap) down(i0, n int) bool { + i := i0 + for { + j1 := 2*i + 1 + if j1 >= n || j1 < 0 { // j1 < 0 after int overflow + break + } + j := j1 // left child + if j2 := j1 + 1; j2 < n && h.less(j2, j1) { + j = j2 // = 2*i + 2 // right child + } + if !h.less(j, i) { + break + } + h.swap(i, j) + i = j + } + return i > i0 +} + +type boundKind int8 + +const ( + boundKindInvalid boundKind = iota + boundKindFragmentStart + boundKindFragmentEnd +) + +type boundKey struct { + kind boundKind + key []byte + // span holds the span the bound key comes from. + // + // If kind is boundKindFragmentStart, then key is span.Start. If kind is + // boundKindFragmentEnd, then key is span.End. + span *Span +} + +func (k boundKey) valid() bool { + return k.kind != boundKindInvalid +} + +func (k boundKey) String() string { + var buf bytes.Buffer + switch k.kind { + case boundKindInvalid: + fmt.Fprint(&buf, "invalid") + case boundKindFragmentStart: + fmt.Fprint(&buf, "fragment-start") + case boundKindFragmentEnd: + fmt.Fprint(&buf, "fragment-end ") + default: + fmt.Fprintf(&buf, "unknown-kind(%d)", k.kind) + } + fmt.Fprintf(&buf, " %s [", k.key) + fmt.Fprintf(&buf, "%s", k.span) + fmt.Fprint(&buf, "]") + return buf.String() +} diff --git a/pebble/internal/keyspan/merging_iter_test.go b/pebble/internal/keyspan/merging_iter_test.go new file mode 100644 index 0000000..18650bb --- /dev/null +++ b/pebble/internal/keyspan/merging_iter_test.go @@ -0,0 +1,252 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "bytes" + "fmt" + "math/rand" + "slices" + "strconv" + "strings" + "testing" + "time" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/stretchr/testify/require" +) + +func TestMergingIter(t *testing.T) { + cmp := base.DefaultComparer.Compare + + var definedIters []FragmentIterator + var buf bytes.Buffer + datadriven.RunTest(t, "testdata/merging_iter", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "define": + definedIters = definedIters[:0] + lines := strings.Split(strings.TrimSpace(td.Input), "\n") + var spans []Span + for _, line := range lines { + if line == "--" { + definedIters = append(definedIters, &invalidatingIter{iter: NewIter(cmp, spans)}) + spans = nil + continue + } + spans = append(spans, ParseSpan(line)) + } + if len(spans) > 0 { + definedIters = append(definedIters, &invalidatingIter{iter: NewIter(cmp, spans)}) + } + return fmt.Sprintf("%d levels", len(definedIters)) + case "iter": + buf.Reset() + pctx := probeContext{log: &buf} + snapshot := base.InternalKeySeqNumMax + iters := slices.Clone(definedIters) + for _, cmdArg := range td.CmdArgs { + switch cmdArg.Key { + case "snapshot": + var err error + snapshot, err = strconv.ParseUint(cmdArg.Vals[0], 10, 64) + require.NoError(t, err) + case "probes": + // The first value indicates which of the merging iterator's + // child iterators is the target. + i, err := strconv.Atoi(cmdArg.Vals[0]) + if err != nil { + return err.Error() + } + // The remaining values define probes to attach. + iters[i] = attachProbes(iters[i], pctx, parseProbes(cmdArg.Vals[1:]...)...) + default: + return fmt.Sprintf("unrecognized arg %q", cmdArg.Key) + } + } + var iter MergingIter + iter.Init(cmp, VisibleTransform(snapshot), new(MergingBuffers), iters...) + runIterCmd(t, td, &iter, &buf) + return buf.String() + default: + return fmt.Sprintf("unrecognized command %q", td.Cmd) + } + }) +} + +// TestMergingIter_FragmenterEquivalence tests for equivalence between the +// fragmentation performed on-the-fly by the MergingIter and the fragmentation +// performed by the Fragmenter. +// +// It does this by producing 1-10 levels of well-formed fragments. Generated +// fragments may overlap other levels arbitrarily, but within their level +// generated fragments may only overlap other fragments that share the same user +// key bounds. +// +// The test then feeds all the fragments, across all levels, into a Fragmenter +// and produces a Iter over those fragments. The test also constructs a +// MergingIter with a separate Iter for each level. It runs a random +// series of operations, applying each operation to both. It asserts that each +// operation has identical results on both iterators. +func TestMergingIter_FragmenterEquivalence(t *testing.T) { + seed := time.Now().UnixNano() + for i := int64(0); i < 10; i++ { + testFragmenterEquivalenceOnce(t, seed+i) + } +} + +func TestMergingIter_FragmenterEquivalence_Seed(t *testing.T) { + // This test uses a fixed seed. It's useful to manually edit its seed when + // debugging a test failure of the variable-seed test. + const seed = 1644517830186873000 + testFragmenterEquivalenceOnce(t, seed) +} + +func testFragmenterEquivalenceOnce(t *testing.T, seed int64) { + cmp := testkeys.Comparer.Compare + rng := rand.New(rand.NewSource(seed)) + t.Logf("seed = %d", seed) + + // Use a key space of alphanumeric strings, with a random max length between + // 1-3. Repeat keys are more common at the lower max lengths. + ks := testkeys.Alpha(rng.Intn(3) + 1) + + // Generate between 1 and 10 levels of fragment iterators. + levels := make([][]Span, rng.Intn(10)+1) + iters := make([]FragmentIterator, len(levels)) + var allSpans []Span + var buf bytes.Buffer + for l := 0; l < len(levels); l++ { + fmt.Fprintf(&buf, "level %d: ", l) + for keyspaceStartIdx := int64(0); keyspaceStartIdx < ks.Count(); { + // Generate spans of lengths of up to a third of the keyspace. + spanStartIdx := keyspaceStartIdx + rng.Int63n(ks.Count()/3) + spanEndIdx := spanStartIdx + rng.Int63n(ks.Count()/3) + 1 + + if spanEndIdx < ks.Count() { + keyCount := uint64(rng.Intn(3) + 1) + s := Span{ + Start: testkeys.Key(ks, spanStartIdx), + End: testkeys.Key(ks, spanEndIdx), + Keys: make([]Key, 0, keyCount), + } + for k := keyCount; k > 0; k-- { + seqNum := uint64((len(levels)-l)*3) + k + s.Keys = append(s.Keys, Key{ + Trailer: base.MakeTrailer(seqNum, base.InternalKeyKindRangeKeySet), + }) + } + if len(levels[l]) > 0 { + fmt.Fprint(&buf, ", ") + } + fmt.Fprintf(&buf, "%s", s) + + levels[l] = append(levels[l], s) + allSpans = append(allSpans, s) + } + keyspaceStartIdx = spanEndIdx + } + iters[l] = &invalidatingIter{iter: NewIter(cmp, levels[l])} + fmt.Fprintln(&buf) + } + + // Fragment the spans across the levels. + var allFragmented []Span + f := Fragmenter{ + Cmp: cmp, + Format: testkeys.Comparer.FormatKey, + Emit: func(span Span) { + allFragmented = append(allFragmented, span) + }, + } + Sort(f.Cmp, allSpans) + for _, s := range allSpans { + f.Add(s) + } + f.Finish() + + // Log all the levels and their fragments, as well as the fully-fragmented + // spans produced by the Fragmenter. + fmt.Fprintln(&buf, "Fragmenter fragments:") + for i, s := range allFragmented { + if i > 0 { + fmt.Fprint(&buf, ", ") + } + fmt.Fprint(&buf, s) + } + t.Logf("%d levels:\n%s\n", len(levels), buf.String()) + + fragmenterIter := NewIter(f.Cmp, allFragmented) + mergingIter := &MergingIter{} + mergingIter.Init(f.Cmp, VisibleTransform(base.InternalKeySeqNumMax), new(MergingBuffers), iters...) + + // Position both so that it's okay to perform relative positioning + // operations immediately. + mergingIter.First() + fragmenterIter.First() + + type opKind struct { + weight int + fn func() (str string, f *Span, m *Span) + } + ops := []opKind{ + {weight: 2, fn: func() (string, *Span, *Span) { + return "First()", fragmenterIter.First(), mergingIter.First() + }}, + {weight: 2, fn: func() (string, *Span, *Span) { + return "Last()", fragmenterIter.Last(), mergingIter.Last() + }}, + {weight: 5, fn: func() (string, *Span, *Span) { + k := testkeys.Key(ks, rng.Int63n(ks.Count())) + return fmt.Sprintf("SeekGE(%q)", k), + fragmenterIter.SeekGE(k), + mergingIter.SeekGE(k) + }}, + {weight: 5, fn: func() (string, *Span, *Span) { + k := testkeys.Key(ks, rng.Int63n(ks.Count())) + return fmt.Sprintf("SeekLT(%q)", k), + fragmenterIter.SeekLT(k), + mergingIter.SeekLT(k) + }}, + {weight: 50, fn: func() (string, *Span, *Span) { + return "Next()", fragmenterIter.Next(), mergingIter.Next() + }}, + {weight: 50, fn: func() (string, *Span, *Span) { + return "Prev()", fragmenterIter.Prev(), mergingIter.Prev() + }}, + } + var totalWeight int + for _, op := range ops { + totalWeight += op.weight + } + + var fragmenterBuf bytes.Buffer + var mergingBuf bytes.Buffer + opCount := rng.Intn(200) + 50 + for i := 0; i < opCount; i++ { + p := rng.Intn(totalWeight) + opIndex := 0 + for i, op := range ops { + if p < op.weight { + opIndex = i + break + } + p -= op.weight + } + + opString, fs, ms := ops[opIndex].fn() + + fragmenterBuf.Reset() + mergingBuf.Reset() + fmt.Fprint(&fragmenterBuf, fs) + fmt.Fprint(&mergingBuf, ms) + if fragmenterBuf.String() != mergingBuf.String() { + t.Fatalf("seed %d, op %d: %s = %s, fragmenter iterator returned %s", + seed, i, opString, mergingBuf.String(), fragmenterBuf.String()) + } + t.Logf("op %d: %s = %s", i, opString, fragmenterBuf.String()) + } +} diff --git a/pebble/internal/keyspan/seek.go b/pebble/internal/keyspan/seek.go new file mode 100644 index 0000000..efcf682 --- /dev/null +++ b/pebble/internal/keyspan/seek.go @@ -0,0 +1,48 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import "github.com/cockroachdb/pebble/internal/base" + +// SeekLE seeks to the span that contains or is before the target key. +func SeekLE(cmp base.Compare, iter FragmentIterator, key []byte) *Span { + // NB: We use SeekLT in order to land on the proper span for a search + // key that resides in the middle of a span. Consider the scenario: + // + // a---e + // e---i + // + // The spans are indexed by their start keys `a` and `e`. If the + // search key is `c` we want to land on the span [a,e). If we were to + // use SeekGE then the search key `c` would land on the span [e,i) and + // we'd have to backtrack. The one complexity here is what happens for the + // search key `e`. In that case SeekLT will land us on the span [a,e) + // and we'll have to move forward. + iterSpan := iter.SeekLT(key) + + if iterSpan == nil { + // Advance the iterator once to see if the next span has a start key + // equal to key. + iterSpan = iter.Next() + if iterSpan == nil || cmp(key, iterSpan.Start) < 0 { + // The iterator is exhausted or we've hit the next span. + return nil + } + } else { + // Invariant: key > iterSpan.Start + if cmp(key, iterSpan.End) >= 0 { + // The current span lies entirely before the search key. Check to see if + // the next span contains the search key. If it doesn't, we'll backup + // and return to our earlier candidate. + iterSpan = iter.Next() + if iterSpan == nil || cmp(key, iterSpan.Start) < 0 { + // The next span is past our search key or there is no next span. Go + // back. + iterSpan = iter.Prev() + } + } + } + return iterSpan +} diff --git a/pebble/internal/keyspan/seek_test.go b/pebble/internal/keyspan/seek_test.go new file mode 100644 index 0000000..aa1d643 --- /dev/null +++ b/pebble/internal/keyspan/seek_test.go @@ -0,0 +1,63 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "bytes" + "fmt" + "strconv" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" +) + +func TestSeek(t *testing.T) { + cmp := base.DefaultComparer.Compare + fmtKey := base.DefaultComparer.FormatKey + var iter FragmentIterator + var buf bytes.Buffer + + datadriven.RunTest(t, "testdata/seek", func(t *testing.T, d *datadriven.TestData) string { + buf.Reset() + switch d.Cmd { + case "build": + spans := buildSpans(t, cmp, fmtKey, d.Input, base.InternalKeyKindRangeDelete) + for _, s := range spans { + fmt.Fprintln(&buf, s) + } + iter = NewIter(cmp, spans) + return buf.String() + case "seek-ge", "seek-le": + seek := SeekLE + if d.Cmd == "seek-ge" { + seek = func(_ base.Compare, iter FragmentIterator, key []byte) *Span { + return iter.SeekGE(key) + } + } + + for _, line := range strings.Split(d.Input, "\n") { + parts := strings.Fields(line) + if len(parts) != 2 { + return fmt.Sprintf("malformed input: %s", line) + } + seq, err := strconv.ParseUint(parts[1], 10, 64) + if err != nil { + return err.Error() + } + span := seek(cmp, iter, []byte(parts[0])) + if span != nil { + visible := span.Visible(seq) + span = &visible + } + fmt.Fprintln(&buf, span) + } + return buf.String() + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} diff --git a/pebble/internal/keyspan/span.go b/pebble/internal/keyspan/span.go new file mode 100644 index 0000000..257b373 --- /dev/null +++ b/pebble/internal/keyspan/span.go @@ -0,0 +1,467 @@ +// Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan // import "github.com/cockroachdb/pebble/internal/keyspan" + +import ( + "bytes" + "fmt" + "sort" + "strconv" + "strings" + "unicode" + + "github.com/cockroachdb/pebble/internal/base" +) + +// Span represents a set of keys over a span of user key space. All of the keys +// within a Span are applied across the span's key span indicated by Start and +// End. Each internal key applied over the user key span appears as a separate +// Key, with its own kind and sequence number. Optionally, each Key may also +// have a Suffix and/or Value. +// +// Note that the start user key is inclusive and the end user key is exclusive. +// +// Currently the only supported key kinds are: +// +// RANGEDEL, RANGEKEYSET, RANGEKEYUNSET, RANGEKEYDEL. +type Span struct { + // Start and End encode the user key range of all the contained items, with + // an inclusive start key and exclusive end key. Both Start and End must be + // non-nil, or both nil if representing an invalid Span. + Start, End []byte + // Keys holds the set of keys applied over the [Start, End) user key range. + // Keys is sorted by (SeqNum, Kind) descending, unless otherwise specified + // by the context. If SeqNum and Kind are equal, the order of Keys is + // undefined. Keys may be empty, even if Start and End are non-nil. + // + // Keys are a decoded representation of the internal keys stored in batches + // or sstable blocks. A single internal key in a range key block may produce + // several decoded Keys. + Keys []Key + KeysOrder KeysOrder +} + +// KeysOrder describes the ordering of Keys within a Span. +type KeysOrder int8 + +const ( + // ByTrailerDesc indicates a Span's keys are sorted by Trailer descending. + // This is the default ordering, and the ordering used during physical + // storage. + ByTrailerDesc KeysOrder = iota + // BySuffixAsc indicates a Span's keys are sorted by Suffix ascending. This + // ordering is used during user iteration of range keys. + BySuffixAsc +) + +// Key represents a single key applied over a span of user keys. A Key is +// contained by a Span which specifies the span of user keys over which the Key +// is applied. +type Key struct { + // Trailer contains the key kind and sequence number. + Trailer uint64 + // Suffix holds an optional suffix associated with the key. This is only + // non-nil for RANGEKEYSET and RANGEKEYUNSET keys. + Suffix []byte + // Value holds a logical value associated with the Key. It is NOT the + // internal value stored in a range key or range deletion block. This is + // only non-nil for RANGEKEYSET keys. + Value []byte +} + +// SeqNum returns the sequence number component of the key. +func (k Key) SeqNum() uint64 { + return k.Trailer >> 8 +} + +// VisibleAt returns true if the provided key is visible at the provided +// snapshot sequence number. It interprets batch sequence numbers as always +// visible, because non-visible batch span keys are filtered when they're +// fragmented. +func (k Key) VisibleAt(snapshot uint64) bool { + seq := k.SeqNum() + return seq < snapshot || seq&base.InternalKeySeqNumBatch != 0 +} + +// Kind returns the kind component of the key. +func (k Key) Kind() base.InternalKeyKind { + return base.InternalKeyKind(k.Trailer & 0xff) +} + +// Equal returns true if this Key is equal to the given key. Two keys are said +// to be equal if the two Keys have equal trailers, suffix and value. Suffix +// comparison uses the provided base.Compare func. Value comparison is bytewise. +func (k Key) Equal(equal base.Equal, b Key) bool { + return k.Trailer == b.Trailer && + equal(k.Suffix, b.Suffix) && + bytes.Equal(k.Value, b.Value) +} + +// Valid returns true if the span is defined. +func (s *Span) Valid() bool { + return s.Start != nil && s.End != nil +} + +// Empty returns true if the span does not contain any keys. An empty span may +// still be Valid. A non-empty span must be Valid. +// +// An Empty span may be produced by Visible, or be produced by iterators in +// order to surface the gaps between keys. +func (s *Span) Empty() bool { + return s == nil || len(s.Keys) == 0 +} + +// SmallestKey returns the smallest internal key defined by the span's keys. +// It requires the Span's keys be in ByTrailerDesc order. It panics if the span +// contains no keys or its keys are sorted in a different order. +func (s *Span) SmallestKey() base.InternalKey { + if len(s.Keys) == 0 { + panic("pebble: Span contains no keys") + } else if s.KeysOrder != ByTrailerDesc { + panic("pebble: span's keys unexpectedly not in trailer order") + } + // The first key has the highest (sequence number,kind) tuple. + return base.InternalKey{ + UserKey: s.Start, + Trailer: s.Keys[0].Trailer, + } +} + +// LargestKey returns the largest internal key defined by the span's keys. The +// returned key will always be a "sentinel key" at the end boundary. The +// "sentinel key" models the exclusive end boundary by returning an InternalKey +// with the maximal sequence number, ensuring all InternalKeys with the same +// user key sort after the sentinel key. +// +// It requires the Span's keys be in ByTrailerDesc order. It panics if the span +// contains no keys or its keys are sorted in a different order. +func (s *Span) LargestKey() base.InternalKey { + if len(s.Keys) == 0 { + panic("pebble: Span contains no keys") + } else if s.KeysOrder != ByTrailerDesc { + panic("pebble: span's keys unexpectedly not in trailer order") + } + // The last key has the lowest (sequence number,kind) tuple. + kind := s.Keys[len(s.Keys)-1].Kind() + return base.MakeExclusiveSentinelKey(kind, s.End) +} + +// SmallestSeqNum returns the smallest sequence number of a key contained within +// the span. It requires the Span's keys be in ByTrailerDesc order. It panics if +// the span contains no keys or its keys are sorted in a different order. +func (s *Span) SmallestSeqNum() uint64 { + if len(s.Keys) == 0 { + panic("pebble: Span contains no keys") + } else if s.KeysOrder != ByTrailerDesc { + panic("pebble: span's keys unexpectedly not in trailer order") + } + + return s.Keys[len(s.Keys)-1].SeqNum() +} + +// LargestSeqNum returns the largest sequence number of a key contained within +// the span. It requires the Span's keys be in ByTrailerDesc order. It panics if +// the span contains no keys or its keys are sorted in a different order. +func (s *Span) LargestSeqNum() uint64 { + if len(s.Keys) == 0 { + panic("pebble: Span contains no keys") + } else if s.KeysOrder != ByTrailerDesc { + panic("pebble: span's keys unexpectedly not in trailer order") + } + return s.Keys[0].SeqNum() +} + +// TODO(jackson): Replace most of the calls to Visible with more targeted calls +// that avoid the need to construct a new Span. + +// Visible returns a span with the subset of keys visible at the provided +// sequence number. It requires the Span's keys be in ByTrailerDesc order. It +// panics if the span's keys are sorted in a different order. +// +// Visible may incur an allocation, so callers should prefer targeted, +// non-allocating methods when possible. +func (s Span) Visible(snapshot uint64) Span { + if s.KeysOrder != ByTrailerDesc { + panic("pebble: span's keys unexpectedly not in trailer order") + } + + ret := Span{Start: s.Start, End: s.End} + if len(s.Keys) == 0 { + return ret + } + + // Keys from indexed batches may force an allocation. The Keys slice is + // ordered by sequence number, so ordinarily we can return the trailing + // subslice containing keys with sequence numbers less than `seqNum`. + // + // However, batch keys are special. Only visible batch keys are included + // when an Iterator's batch spans are fragmented. They must always be + // visible. + // + // Batch keys can create a sandwich of visible batch keys at the beginning + // of the slice and visible committed keys at the end of the slice, forcing + // us to allocate a new slice and copy the contents. + // + // Care is taking to only incur an allocation only when batch keys and + // visible keys actually sandwich non-visible keys. + + // lastBatchIdx and lastNonVisibleIdx are set to the last index of a batch + // key and a non-visible key respectively. + lastBatchIdx := -1 + lastNonVisibleIdx := -1 + for i := range s.Keys { + if seqNum := s.Keys[i].SeqNum(); seqNum&base.InternalKeySeqNumBatch != 0 { + // Batch key. Always visible. + lastBatchIdx = i + } else if seqNum >= snapshot { + // This key is not visible. + lastNonVisibleIdx = i + } + } + + // In the following comments: b = batch, h = hidden, v = visible (committed). + switch { + case lastNonVisibleIdx == -1: + // All keys are visible. + // + // [b b b], [v v v] and [b b b v v v] + ret.Keys = s.Keys + case lastBatchIdx == -1: + // There are no batch keys, so we can return the continuous subslice + // starting after the last non-visible Key. + // + // h h h [v v v] + ret.Keys = s.Keys[lastNonVisibleIdx+1:] + case lastNonVisibleIdx == len(s.Keys)-1: + // While we have a batch key and non-visible keys, there are no + // committed visible keys. The 'sandwich' is missing the bottom layer, + // so we can return the continuous sublice at the beginning. + // + // [b b b] h h h + ret.Keys = s.Keys[0 : lastBatchIdx+1] + default: + // This is the problematic sandwich case. Allocate a new slice, copying + // the batch keys and the visible keys into it. + // + // [b b b] h h h [v v v] + ret.Keys = make([]Key, (lastBatchIdx+1)+(len(s.Keys)-lastNonVisibleIdx-1)) + copy(ret.Keys, s.Keys[:lastBatchIdx+1]) + copy(ret.Keys[lastBatchIdx+1:], s.Keys[lastNonVisibleIdx+1:]) + } + return ret +} + +// VisibleAt returns true if the span contains a key visible at the provided +// snapshot. Keys with sequence numbers with the batch bit set are treated as +// always visible. +// +// VisibleAt requires the Span's keys be in ByTrailerDesc order. It panics if +// the span's keys are sorted in a different order. +func (s *Span) VisibleAt(snapshot uint64) bool { + if s.KeysOrder != ByTrailerDesc { + panic("pebble: span's keys unexpectedly not in trailer order") + } + if len(s.Keys) == 0 { + return false + } else if first := s.Keys[0].SeqNum(); first&base.InternalKeySeqNumBatch != 0 { + // Only visible batch keys are included when an Iterator's batch spans + // are fragmented. They must always be visible. + return true + } else { + // Otherwise we check the last key. Since keys are ordered decreasing in + // sequence number, the last key has the lowest sequence number of any + // of the span's keys. If any of the keys are visible, the last key must + // be visible. Or put differently: if the last key is not visible, then + // no key is visible. + return s.Keys[len(s.Keys)-1].SeqNum() < snapshot + } +} + +// ShallowClone returns the span with a Keys slice owned by the span itself. +// None of the key byte slices are cloned (see Span.DeepClone). +func (s *Span) ShallowClone() Span { + c := Span{ + Start: s.Start, + End: s.End, + Keys: make([]Key, len(s.Keys)), + KeysOrder: s.KeysOrder, + } + copy(c.Keys, s.Keys) + return c +} + +// DeepClone clones the span, creating copies of all contained slices. DeepClone +// is intended for non-production code paths like tests, the level checker, etc +// because it is allocation heavy. +func (s *Span) DeepClone() Span { + c := Span{ + Start: make([]byte, len(s.Start)), + End: make([]byte, len(s.End)), + Keys: make([]Key, len(s.Keys)), + KeysOrder: s.KeysOrder, + } + copy(c.Start, s.Start) + copy(c.End, s.End) + for i := range s.Keys { + c.Keys[i].Trailer = s.Keys[i].Trailer + if len(s.Keys[i].Suffix) > 0 { + c.Keys[i].Suffix = make([]byte, len(s.Keys[i].Suffix)) + copy(c.Keys[i].Suffix, s.Keys[i].Suffix) + } + if len(s.Keys[i].Value) > 0 { + c.Keys[i].Value = make([]byte, len(s.Keys[i].Value)) + copy(c.Keys[i].Value, s.Keys[i].Value) + } + } + return c +} + +// Contains returns true if the specified key resides within the span's bounds. +func (s *Span) Contains(cmp base.Compare, key []byte) bool { + return cmp(s.Start, key) <= 0 && cmp(key, s.End) < 0 +} + +// Covers returns true if the span covers keys at seqNum. +// +// Covers requires the Span's keys be in ByTrailerDesc order. It panics if the +// span's keys are sorted in a different order. +func (s Span) Covers(seqNum uint64) bool { + if s.KeysOrder != ByTrailerDesc { + panic("pebble: span's keys unexpectedly not in trailer order") + } + return !s.Empty() && s.Keys[0].SeqNum() > seqNum +} + +// CoversAt returns true if the span contains a key that is visible at the +// provided snapshot sequence number, and that key's sequence number is higher +// than seqNum. +// +// Keys with sequence numbers with the batch bit set are treated as always +// visible. +// +// CoversAt requires the Span's keys be in ByTrailerDesc order. It panics if the +// span's keys are sorted in a different order. +func (s *Span) CoversAt(snapshot, seqNum uint64) bool { + if s.KeysOrder != ByTrailerDesc { + panic("pebble: span's keys unexpectedly not in trailer order") + } + // NB: A key is visible at `snapshot` if its sequence number is strictly + // less than `snapshot`. See base.Visible. + for i := range s.Keys { + if kseq := s.Keys[i].SeqNum(); kseq&base.InternalKeySeqNumBatch != 0 { + // Only visible batch keys are included when an Iterator's batch spans + // are fragmented. They must always be visible. + return kseq > seqNum + } else if kseq < snapshot { + return kseq > seqNum + } + } + return false +} + +// String returns a string representation of the span. +func (s Span) String() string { + return fmt.Sprint(prettySpan{Span: s, formatKey: base.DefaultFormatter}) +} + +// Pretty returns a formatter for the span. +func (s Span) Pretty(f base.FormatKey) fmt.Formatter { + // TODO(jackson): Take a base.FormatValue to format Key.Value too. + return prettySpan{s, f} +} + +type prettySpan struct { + Span + formatKey base.FormatKey +} + +func (s prettySpan) Format(fs fmt.State, c rune) { + if !s.Valid() { + fmt.Fprintf(fs, "") + return + } + fmt.Fprintf(fs, "%s-%s:{", s.formatKey(s.Start), s.formatKey(s.End)) + for i, k := range s.Keys { + if i > 0 { + fmt.Fprint(fs, " ") + } + fmt.Fprintf(fs, "(#%d,%s", k.SeqNum(), k.Kind()) + if len(k.Suffix) > 0 || len(k.Value) > 0 { + fmt.Fprintf(fs, ",%s", k.Suffix) + } + if len(k.Value) > 0 { + fmt.Fprintf(fs, ",%s", k.Value) + } + fmt.Fprint(fs, ")") + } + fmt.Fprintf(fs, "}") +} + +// SortKeysByTrailer sorts a keys slice by trailer. +func SortKeysByTrailer(keys *[]Key) { + // NB: keys is a pointer to a slice instead of a slice to avoid `sorted` + // escaping to the heap. + sorted := (*keysBySeqNumKind)(keys) + sort.Sort(sorted) +} + +// KeysBySuffix implements sort.Interface, sorting its member Keys slice to by +// Suffix in the order dictated by Cmp. +type KeysBySuffix struct { + Cmp base.Compare + Keys []Key +} + +func (s *KeysBySuffix) Len() int { return len(s.Keys) } +func (s *KeysBySuffix) Less(i, j int) bool { return s.Cmp(s.Keys[i].Suffix, s.Keys[j].Suffix) < 0 } +func (s *KeysBySuffix) Swap(i, j int) { s.Keys[i], s.Keys[j] = s.Keys[j], s.Keys[i] } + +// ParseSpan parses the string representation of a Span. It's intended for +// tests. ParseSpan panics if passed a malformed span representation. +func ParseSpan(input string) Span { + var s Span + parts := strings.FieldsFunc(input, func(r rune) bool { + switch r { + case '-', ':', '{', '}': + return true + default: + return unicode.IsSpace(r) + } + }) + s.Start, s.End = []byte(parts[0]), []byte(parts[1]) + + // Each of the remaining parts represents a single Key. + s.Keys = make([]Key, 0, len(parts)-2) + for _, p := range parts[2:] { + keyFields := strings.FieldsFunc(p, func(r rune) bool { + switch r { + case '#', ',', '(', ')': + return true + default: + return unicode.IsSpace(r) + } + }) + + var k Key + // Parse the sequence number. + seqNum, err := strconv.ParseUint(keyFields[0], 10, 64) + if err != nil { + panic(fmt.Sprintf("invalid sequence number: %q: %s", keyFields[0], err)) + } + // Parse the key kind. + kind := base.ParseKind(keyFields[1]) + k.Trailer = base.MakeTrailer(seqNum, kind) + // Parse the optional suffix. + if len(keyFields) >= 3 { + k.Suffix = []byte(keyFields[2]) + } + // Parse the optional value. + if len(keyFields) >= 4 { + k.Value = []byte(keyFields[3]) + } + s.Keys = append(s.Keys, k) + } + return s +} diff --git a/pebble/internal/keyspan/span_test.go b/pebble/internal/keyspan/span_test.go new file mode 100644 index 0000000..29651fb --- /dev/null +++ b/pebble/internal/keyspan/span_test.go @@ -0,0 +1,98 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "bytes" + "fmt" + "strconv" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/stretchr/testify/require" +) + +// TODO(jackson): Add unit tests for all of the various Span methods. + +func TestSpan_ParseRoundtrip(t *testing.T) { + spans := []string{ + "a-c:{(#5,RANGEDEL)}", + "a-c:{(#5,RANGEDEL) (#2,RANGEDEL)}", + "h-z:{(#20,RANGEKEYSET,@5,foo) (#15,RANGEKEYUNSET,@9) (#2,RANGEKEYDEL)}", + } + for _, input := range spans { + got := ParseSpan(input).String() + if got != input { + t.Errorf("ParseSpan(%q).String() = %q", input, got) + } + } +} + +func TestSpan_Visible(t *testing.T) { + var s Span + datadriven.RunTest(t, "testdata/visible", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + s = ParseSpan(d.Input) + return fmt.Sprint(s) + case "visible": + var buf bytes.Buffer + for _, line := range strings.Split(d.Input, "\n") { + snapshot, err := strconv.ParseUint(line, 10, 64) + require.NoError(t, err) + fmt.Fprintf(&buf, "%-2d: %s\n", snapshot, s.Visible(snapshot)) + } + return buf.String() + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestSpan_VisibleAt(t *testing.T) { + var s Span + datadriven.RunTest(t, "testdata/visible_at", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + s = ParseSpan(d.Input) + return fmt.Sprint(s) + case "visible-at": + var buf bytes.Buffer + for _, line := range strings.Split(d.Input, "\n") { + snapshot, err := strconv.ParseUint(line, 10, 64) + require.NoError(t, err) + fmt.Fprintf(&buf, "%-2d: %t\n", snapshot, s.VisibleAt(snapshot)) + } + return buf.String() + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestSpan_CoversAt(t *testing.T) { + var s Span + datadriven.RunTest(t, "testdata/covers_at", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + s = ParseSpan(d.Input) + return fmt.Sprint(s) + case "covers-at": + var buf bytes.Buffer + for _, line := range strings.Split(d.Input, "\n") { + fields := strings.Fields(line) + snapshot, err := strconv.ParseUint(fields[0], 10, 64) + require.NoError(t, err) + seqNum, err := strconv.ParseUint(fields[1], 10, 64) + require.NoError(t, err) + fmt.Fprintf(&buf, "%d %d : %t\n", snapshot, seqNum, s.CoversAt(snapshot, seqNum)) + } + return buf.String() + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} diff --git a/pebble/internal/keyspan/testdata/bounded_iter b/pebble/internal/keyspan/testdata/bounded_iter new file mode 100644 index 0000000..8532f62 --- /dev/null +++ b/pebble/internal/keyspan/testdata/bounded_iter @@ -0,0 +1,251 @@ +define +a-b:{(#10,RANGEKEYSET,@5,apples)} +d-e:{(#4,RANGEKEYSET,@3,coconut)} +g-h:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)} +---- + +# Nothing out of bounds. + +iter lower=a upper=z +first +next +next +next +last +prev +prev +prev +---- +a-b:{(#10,RANGEKEYSET,@5,apples)} +d-e:{(#4,RANGEKEYSET,@3,coconut)} +g-h:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)} + +g-h:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)} +d-e:{(#4,RANGEKEYSET,@3,coconut)} +a-b:{(#10,RANGEKEYSET,@5,apples)} + + +# Test out of upper bound, but undiscovered until we Next. + +iter lower=a upper=f +first +next +next +prev +---- +a-b:{(#10,RANGEKEYSET,@5,apples)} +d-e:{(#4,RANGEKEYSET,@3,coconut)} + +d-e:{(#4,RANGEKEYSET,@3,coconut)} + +# Test out of upper bound, but discovered before we Next. + +iter lower=a upper=dog +first +next +next +prev +---- +a-b:{(#10,RANGEKEYSET,@5,apples)} +d-e:{(#4,RANGEKEYSET,@3,coconut)} + +d-e:{(#4,RANGEKEYSET,@3,coconut)} + +# Test out of lower bound, but undiscovered until we Prev. + +iter lower=c upper=z +last +prev +prev +next +---- +g-h:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)} +d-e:{(#4,RANGEKEYSET,@3,coconut)} + +d-e:{(#4,RANGEKEYSET,@3,coconut)} + +# Test out of lower bound, but discovered before we Prev. + +iter lower=d upper=z +last +prev +prev +next +---- +g-h:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)} +d-e:{(#4,RANGEKEYSET,@3,coconut)} + +d-e:{(#4,RANGEKEYSET,@3,coconut)} + +# Test a single span ([b-g)) within the bounds, overlapping on both ends. + +define +a-b:{(#10,RANGEKEYSET,@5)} +b-g:{(#4,RANGEKEYSET,@3)} +g-h:{(#20,RANGEKEYSET,@5)} +---- + +iter lower=c upper=f +seek-ge b +next +next +seek-ge b +prev +prev +seek-lt f +prev +prev +seek-lt f +next +next +prev +prev +---- +b-g:{(#4,RANGEKEYSET,@3)} + + +b-g:{(#4,RANGEKEYSET,@3)} + + +b-g:{(#4,RANGEKEYSET,@3)} + + +b-g:{(#4,RANGEKEYSET,@3)} + + +b-g:{(#4,RANGEKEYSET,@3)} + + +set-prefix bar +---- +set prefix to "bar" + +# Test seeking to a portion of the keyspace that contains no range keys with +# start bounds ≥ the seek key such that the range key also overlaps the current +# prefix. + +iter lower=a upper=z +seek-ge bar +prev +prev +---- +b-g:{(#4,RANGEKEYSET,@3)} + + + +# Test seeking to a portion of the keyspace that contains a range key with a +# start bound < the seek key, and the range key also overlaps the current +# prefix. + +iter lower=a upper=z +seek-lt bar +next +prev +prev +---- +b-g:{(#4,RANGEKEYSET,@3)} + +b-g:{(#4,RANGEKEYSET,@3)} + + +# Test seeking with bounds narrower than the range of the seek prefix. This is +# possible in practice because the bounded iterator iterates over fragments, not +# pre-defragmented range keys. + +iter lower=bar@9 upper=bar@3 +seek-lt bar +next +prev +prev +---- +b-g:{(#4,RANGEKEYSET,@3)} + +b-g:{(#4,RANGEKEYSET,@3)} + + +# Test a similar scenario but on the start prefix of a key. + +iter lower=b@9 upper=b@3 +seek-lt b +next +next +prev +prev +---- + +b-g:{(#4,RANGEKEYSET,@3)} + +b-g:{(#4,RANGEKEYSET,@3)} + + +# Test a scenario where the prefix overlaps a span, but the bounds exclude it. + +iter lower=z@9 upper=z@3 +seek-lt z@3 +next +---- + + + +# Test many spans matching the prefix, due to fragmentation within a prefix. + +define +b-boo:{(#1,RANGEKEYSET,@1)} +c@9-c@8:{(#1,RANGEKEYSET,@1)} +c@8-c@7:{(#1,RANGEKEYSET,@1)} +c@7-c@6:{(#1,RANGEKEYSET,@1)} +c@6-c@5:{(#1,RANGEKEYSET,@1)} +c@5-c@4:{(#1,RANGEKEYSET,@1)} +---- + +set-prefix c +---- +set prefix to "c" + +iter +seek-lt c +next +next +next +next +next +next +---- + +c@9-c@8:{(#1,RANGEKEYSET,@1)} +c@8-c@7:{(#1,RANGEKEYSET,@1)} +c@7-c@6:{(#1,RANGEKEYSET,@1)} +c@6-c@5:{(#1,RANGEKEYSET,@1)} +c@5-c@4:{(#1,RANGEKEYSET,@1)} + + +# Test the same scenario with bounds limiting iteration to a subset of the +# keys. + +iter lower=c@7 upper=c@5 +seek-lt c@7 +next +next +next +---- + +c@7-c@6:{(#1,RANGEKEYSET,@1)} +c@6-c@5:{(#1,RANGEKEYSET,@1)} + + +define +a@7-a@5:{(#1,RANGEKEYSET,@1)} +b-boo:{(#1,RANGEKEYSET,@1)} +c@9-c@8:{(#1,RANGEKEYSET,@1)} +---- + +set-prefix b +---- +set prefix to "b" + +iter +seek-lt c@8 +seek-ge a@9 +---- + + diff --git a/pebble/internal/keyspan/testdata/covers_at b/pebble/internal/keyspan/testdata/covers_at new file mode 100644 index 0000000..c32f7ed --- /dev/null +++ b/pebble/internal/keyspan/testdata/covers_at @@ -0,0 +1,91 @@ +define +a-b:{(#5,RANGEDEL) (#3,RANGEDEL)} +---- +a-b:{(#5,RANGEDEL) (#3,RANGEDEL)} + +covers-at +6 6 +6 5 +6 4 +6 2 +6 3 +5 5 +5 4 +5 3 +5 2 +4 5 +4 1 +3 9 +3 2 +3 1 +3 0 +2 0 +1 0 +---- +6 6 : false +6 5 : false +6 4 : true +6 2 : true +6 3 : true +5 5 : false +5 4 : false +5 3 : false +5 2 : true +4 5 : false +4 1 : true +3 9 : false +3 2 : false +3 1 : false +3 0 : false +2 0 : false +1 0 : false + +# The below sequence number is the minimal batch sequence number (eg, a RANGEDEL +# written right at the beginning of the batch.) In the tests below, all other +# batch sequence numbers are not covered by it. + +define +a-c:{(#36028797018963968,RANGEDEL)} +---- +a-c:{(#36028797018963968,RANGEDEL)} + +covers-at +100 90000 +100 90 +0 0 +33 36028797018964068 +33 36028797018963968 +---- +100 90000 : true +100 90 : true +0 0 : true +33 36028797018964068 : false +33 36028797018963968 : false + +# The below sequence number is a batch sequence number for offset 100. + +define +a-c:{(#36028797018964068,RANGEDEL)} +---- +a-c:{(#36028797018964068,RANGEDEL)} + +covers-at +10 10 +---- +10 10 : true + +# The below sequence number is a batch sequence number for offset 200. It should +# not be covered. + +covers-at +100 36028797018964168 +---- +100 36028797018964168 : false + +# The below sequence number is a batch sequence number for offset 0. It should +# be covered. + +covers-at +100 36028797018963968 +---- +100 36028797018963968 : true diff --git a/pebble/internal/keyspan/testdata/defragmenting_iter b/pebble/internal/keyspan/testdata/defragmenting_iter new file mode 100644 index 0000000..f81ca9b --- /dev/null +++ b/pebble/internal/keyspan/testdata/defragmenting_iter @@ -0,0 +1,395 @@ +# Test a scenario that should NOT result in defragmentation. + +define +a-c:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +c-d:{(#4,RANGEKEYSET,@3,bananas)} +d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +---- + +iter +first +next +next +last +prev +prev +---- +a-c:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +c-d:{(#4,RANGEKEYSET,@3,bananas)} +d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +c-d:{(#4,RANGEKEYSET,@3,bananas)} +a-c:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} + +iter +first +next +next +next +last +prev +prev +prev +---- +a-c:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +c-d:{(#4,RANGEKEYSET,@3,bananas)} +d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} + +d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +c-d:{(#4,RANGEKEYSET,@3,bananas)} +a-c:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} + + +# Test a scenario that SHOULD result in internal defragmentation ([a,c) and +# [c,d) should be merged. + +define +a-c:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +c-d:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +d-e:{(#1,RANGEKEYSET,@3,bananas)} +---- + +iter +first +next +next +---- +a-d:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +d-e:{(#1,RANGEKEYSET,@3,bananas)} + + +# Test defragmenting in both directions at seek keys. + +define +a-f:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +f-h:{(#3,RANGEKEYSET,@3,bananas)} +h-p:{(#3,RANGEKEYSET,@3,bananas)} +p-t:{(#3,RANGEKEYSET,@3,bananas)} +---- + +iter +seekge b +prev +seekge b +next +seeklt d +next +seeklt d +prev +---- +a-f:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} + +a-f:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +f-t:{(#3,RANGEKEYSET,@3,bananas)} +a-f:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +f-t:{(#3,RANGEKEYSET,@3,bananas)} +a-f:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} + + +iter +seeklt d +next +prev +---- +a-f:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +f-t:{(#3,RANGEKEYSET,@3,bananas)} +a-f:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} + +# Test next-ing and prev-ing around seek keys. + +define +a-f:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +f-h:{(#3,RANGEKEYSET,@3,bananas)} +h-p:{(#3,RANGEKEYSET,@3,bananas)} +p-t:{(#3,RANGEKEYSET,@3,bananas)} +t-z:{(#4,RANGEKEYSET,@2,oranges)} +---- + +iter +seekge r +prev +next +next +---- +f-t:{(#3,RANGEKEYSET,@3,bananas)} +a-f:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +f-t:{(#3,RANGEKEYSET,@3,bananas)} +t-z:{(#4,RANGEKEYSET,@2,oranges)} + +iter +seekge f +seekge h +seekge p +seekge t +seekge u +seekge v +seekge z +---- +f-t:{(#3,RANGEKEYSET,@3,bananas)} +f-t:{(#3,RANGEKEYSET,@3,bananas)} +f-t:{(#3,RANGEKEYSET,@3,bananas)} +t-z:{(#4,RANGEKEYSET,@2,oranges)} +t-z:{(#4,RANGEKEYSET,@2,oranges)} +t-z:{(#4,RANGEKEYSET,@2,oranges)} + + +iter +seeklt f +seeklt h +seeklt p +seeklt t +seeklt u +seeklt z +---- +a-f:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +f-t:{(#3,RANGEKEYSET,@3,bananas)} +f-t:{(#3,RANGEKEYSET,@3,bananas)} +f-t:{(#3,RANGEKEYSET,@3,bananas)} +t-z:{(#4,RANGEKEYSET,@2,oranges)} +t-z:{(#4,RANGEKEYSET,@2,oranges)} + +# Test iteration with a reducer that collects keys across all spans that +# constitute a defragmented span. Abutting spans are always combined. + +define +a-b:{(#3,RANGEDEL) (#2,RANGEDEL)} +b-c:{(#4,RANGEDEL) (#1,RANGEDEL)} +c-d:{(#5,RANGEDEL)} +e-f:{(#1,RANGEDEL)} +f-g:{(#2,RANGEDEL)} +---- + +iter equal=always reducer=collect +first +next +next +last +prev +prev +---- +a-d:{(#5,RANGEDEL) (#4,RANGEDEL) (#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} +e-g:{(#2,RANGEDEL) (#1,RANGEDEL)} + +e-g:{(#2,RANGEDEL) (#1,RANGEDEL)} +a-d:{(#5,RANGEDEL) (#4,RANGEDEL) (#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} + + +# Test defragmentation of non-empty (i.e. more than one value) fragments, while +# empty fragments are left untouched. + +define +a-c:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +c-d:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +d-e:{} +e-f:{} +g-h:{(#1,RANGEKEYSET,@3,bananas)} +---- + +iter +first +next +next +next +next +---- +a-d:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +d-e:{} +e-f:{} +g-h:{(#1,RANGEKEYSET,@3,bananas)} + + +iter +last +prev +prev +prev +prev +---- +g-h:{(#1,RANGEKEYSET,@3,bananas)} +e-f:{} +d-e:{} +a-d:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} + + +iter +seekge d +next +prev +seekge e +next +prev +prev +prev +---- +d-e:{} +e-f:{} +d-e:{} +e-f:{} +g-h:{(#1,RANGEKEYSET,@3,bananas)} +e-f:{} +d-e:{} +a-d:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} + +iter +seeklt e +next +prev +seeklt f +next +prev +prev +prev +---- +d-e:{} +e-f:{} +d-e:{} +e-f:{} +g-h:{(#1,RANGEKEYSET,@3,bananas)} +e-f:{} +d-e:{} +a-d:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} + +# Test that the defragmenting iterator does yield errors in cases that do not +# need to defragment. + +define +a-c:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +c-d:{(#4,RANGEKEYSET,@3,bananas)} +d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +---- + +iter probes=(ErrInjected) +seek-ge b +seek-lt d +first +last +---- + err= + err= + err= + err= + +# Next and Prev may only be called on positioned iterators, so to test +# propagation of errors on Next or Prev, we must use a probe that injects errors +# on Next or Prev but leaves seeks untouched. +# +# The situation is complicated by the fact that a seek on the defragmenting +# iterator will result in Next/Prevs on the embedded iterator (in order to peek +# ahead to see if anything needs to be defragmented). +# +# First we test the seeks too result in injected errors when they Next/Prev +# ahead to determine if there's anything to defragment. + +iter probes=((If (Or OpNext OpPrev) ErrInjected noop), (Log "# inner.")) +seek-ge b +next +seek-lt cat +prev +---- +# inner.SeekGE("b") = a-c:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +# inner.Prev() = nil + err= +# inner.Next() = nil + err= +# inner.SeekLT("cat") = c-d:{(#4,RANGEKEYSET,@3,bananas)} +# inner.Next() = nil + err= +# inner.Prev() = nil + err= + +# Use a probe that injects errors whenever we otherwise would've returned the +# c-d span. First and Last calls should both return errors because during +# defragmenting they'll step the internal iterator on to the error position. + +iter probes=((If (Equal StartKey (Bytes "c")) ErrInjected noop), (Log "# inner.")) +first +last +---- +# inner.First() = a-c:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +# inner.Next() = nil + err= +# inner.Last() = d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +# inner.Prev() = nil + err= + +# In order to test that errors are injected when Next-ing the top-level +# iterator, define test data that includes 5 spans. + +define +a-b:{(#3,RANGEKEYUNSET,@5)} +b-c:{(#4,RANGEKEYSET,@5,apples)} +c-d:{(#5,RANGEKEYSET,@3,bananas)} +d-e:{(#6,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +e-f:{(#4,RANGEKEYSET,@1,pineapple)} +---- + +# Use a probe that injects errors whenever we would've otherwise returned the +# c-d span. Our initial First/Last seeks should not step on to the error +# position and should not error. The subsequent Next/Prev however should. + +iter probes=((If (Equal StartKey (Bytes "c")) ErrInjected noop), (Log "# inner.")) +first +next +last +prev +---- +# inner.First() = a-b:{(#3,RANGEKEYUNSET,@5)} +# inner.Next() = b-c:{(#4,RANGEKEYSET,@5,apples)} +a-b:{(#3,RANGEKEYUNSET,@5)} +# inner.Next() = nil + err= +# inner.Last() = e-f:{(#4,RANGEKEYSET,@1,pineapple)} +# inner.Prev() = d-e:{(#6,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +e-f:{(#4,RANGEKEYSET,@1,pineapple)} +# inner.Prev() = nil + err= + +# When seeking, the defragmenting iterator needs to defragment in both +# directions. A forward seek first defragments in the reverse direction, and +# then in the forward direction. A backward seek does the inverse. If an error +# is encountered while performing the first defragment scan, it must be +# surfaced. +# +# To test this scenario we again inject errors instead of the c-d span. +# - The SeekGE('d') should land on d-e, try to defragment backward first and +# encounter the error. +# - The SeekLT('c') should land on b-c, try to defragment forward first and +# encounter the error. +iter probes=((If (Equal StartKey (Bytes "c")) ErrInjected noop), (Log "# inner.")) +seek-ge d +seek-lt c +---- +# inner.SeekGE("d") = d-e:{(#6,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +# inner.Prev() = nil + err= +# inner.SeekLT("c") = b-c:{(#4,RANGEKEYSET,@5,apples)} +# inner.Next() = nil + err= + +# When changing directions in some circumstances we step an iterator and then +# defragment twice; once to skip over the current span and once to construct the +# next defragmented span in the new iteration direction. If the first step of +# the iterator surfaces an error, ensure that it's still propagated. +iter probes=((If (And OpPrev (Equal StartKey (Bytes "c"))) ErrInjected noop), (Log "# inner.")) +seek-ge c +prev +---- +# inner.SeekGE("c") = c-d:{(#5,RANGEKEYSET,@3,bananas)} +# inner.Prev() = b-c:{(#4,RANGEKEYSET,@5,apples)} +# inner.Next() = c-d:{(#5,RANGEKEYSET,@3,bananas)} +# inner.Next() = d-e:{(#6,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +c-d:{(#5,RANGEKEYSET,@3,bananas)} +# inner.Prev() = nil + err= + +iter probes=((If (And OpNext (Equal StartKey (Bytes "c"))) ErrInjected noop), (Log "# inner.")) +seek-lt d +next +---- +# inner.SeekLT("d") = c-d:{(#5,RANGEKEYSET,@3,bananas)} +# inner.Next() = d-e:{(#6,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +# inner.Prev() = c-d:{(#5,RANGEKEYSET,@3,bananas)} +# inner.Prev() = b-c:{(#4,RANGEKEYSET,@5,apples)} +c-d:{(#5,RANGEKEYSET,@3,bananas)} +# inner.Next() = nil + err= diff --git a/pebble/internal/keyspan/testdata/filtering_iter b/pebble/internal/keyspan/testdata/filtering_iter new file mode 100644 index 0000000..3299254 --- /dev/null +++ b/pebble/internal/keyspan/testdata/filtering_iter @@ -0,0 +1,84 @@ +# The following filters are available: +# - no-op: passes through all spans. +# - key-kind-{set,unset,del}: filters keys in spans with the given key kind. + +define +a-c:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +c-d:{(#4,RANGEKEYSET,@3,bananas) (#3,RANGEKEYDEL)} +d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +---- + +iter filter=no-op +first +next +next +next +---- +a-c:{(#3,RANGEKEYUNSET,@5) (#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +c-d:{(#4,RANGEKEYSET,@3,bananas) (#3,RANGEKEYDEL)} +d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +. + +iter filter=key-kind-set +first +next +next +next +---- +a-c:{(#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +c-d:{(#4,RANGEKEYSET,@3,bananas)} +d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +. + +iter filter=key-kind-set +last +prev +prev +prev +---- +d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +c-d:{(#4,RANGEKEYSET,@3,bananas)} +a-c:{(#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +. + +iter filter=key-kind-set +seek-ge a +seek-ge c +next +seek-lt b +prev +next +seek-lt z +next +---- +a-c:{(#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +c-d:{(#4,RANGEKEYSET,@3,bananas)} +d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +a-c:{(#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +. +a-c:{(#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} +. + +iter filter=key-kind-set +first +next +next +---- +a-c:{(#2,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@3,bananas)} +c-d:{(#4,RANGEKEYSET,@3,bananas)} +d-e:{(#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@1,pineapple)} + +iter filter=key-kind-unset +first +next +---- +a-c:{(#3,RANGEKEYUNSET,@5)} +. + +iter filter=key-kind-del +first +next +---- +c-d:{(#3,RANGEKEYDEL)} +. diff --git a/pebble/internal/keyspan/testdata/fragmenter b/pebble/internal/keyspan/testdata/fragmenter new file mode 100644 index 0000000..a064b00 --- /dev/null +++ b/pebble/internal/keyspan/testdata/fragmenter @@ -0,0 +1,951 @@ +build +3: a-----------m +2: f------------s +1: j---------------z +---- +3: a----f +3: f---j +2: f---j +3: j--m +2: j--m +1: j--m +2: m-----s +1: m-----s +1: s------z + +# Building is idempotent. +build +3: a----f +3: f---j +2: f---j +3: j--m +2: j--m +1: j--m +2: m-----s +1: m-----s +1: s------z +---- +3: a----f +3: f---j +2: f---j +3: j--m +2: j--m +1: j--m +2: m-----s +1: m-----s +1: s------z + +# An empty tombstone will not get emitted. +build +1: a-a +---- + +build +2: c-e +1: a-c +---- +pebble: keys must be added in order: c > a + +build +3: a-a +3: a-b +2: a-b +1: a-a +---- +3: ab +2: ab + +build +1: a---e +3: b-d +---- +1: ab +3: b-d +1: b-d +1: de + +get t=4 +a#3 a#2 a#1 a#0 +---- +alive alive alive deleted + +get t=3 +a#2 a#1 a#0 +---- +alive alive deleted + +get t=2 +a#1 a#0 +---- +alive deleted + +get t=4 +b#3 b#2 b#1 b#0 +---- +alive deleted deleted deleted + +get t=3 +b#2 b#1 b#0 +---- +alive alive deleted + +get t=2 +b#1 b#0 +---- +alive deleted + +get t=4 +c#3 c#2 c#1 c#0 +---- +alive deleted deleted deleted + +get t=3 +c#2 c#1 c#0 +---- +alive alive deleted + +get t=2 +c#1 c#0 +---- +alive deleted + +get t=4 +d#3 d#2 d#1 d#0 +---- +alive alive alive deleted + +get t=3 +d#2 d#1 d#0 +---- +alive alive deleted + +get t=2 +d#1 d#0 +---- +alive deleted + + +build +3: a---e +1: b-d +---- +3: ab +3: b-d +1: b-d +3: de + +get t=4 +a#3 a#2 a#1 a#0 +---- +alive deleted deleted deleted + +get t=3 +a#2 a#1 a#0 +---- +alive alive alive + +get t=2 +a#1 a#0 +---- +alive alive + +get t=4 +b#3 b#2 b#1 b#0 +---- +alive deleted deleted deleted + +get t=3 +b#2 b#1 b#0 +---- +alive alive deleted + +get t=2 +b#1 b#0 +---- +alive deleted + +get t=4 +c#3 c#2 c#1 c#0 +---- +alive deleted deleted deleted + +get t=3 +c#2 c#1 c#0 +---- +alive alive deleted + +get t=2 +c#1 c#0 +---- +alive deleted + +get t=4 +d#3 d#2 d#1 d#0 +---- +alive deleted deleted deleted + +get t=3 +d#2 d#1 d#0 +---- +alive alive alive + +get t=2 +d#1 d#0 +---- +alive alive + + +build +3: a--d +1: b--e +---- +3: ab +3: b-d +1: b-d +1: de + +get t=4 +a#3 a#2 a#1 a#0 +---- +alive deleted deleted deleted + +get t=3 +a#2 a#1 a#0 +---- +alive alive alive + +get t=2 +a#1 a#0 +---- +alive alive + +get t=4 +b#3 b#2 b#1 b#0 +---- +alive deleted deleted deleted + +get t=3 +b#2 b#1 b#0 +---- +alive alive deleted + +get t=2 +b#1 b#0 +---- +alive deleted + +get t=4 +c#3 c#2 c#1 c#0 +---- +alive deleted deleted deleted + +get t=3 +c#2 c#1 c#0 +---- +alive alive deleted + +get t=2 +c#1 c#0 +---- +alive deleted + +get t=4 +d#3 d#2 d#1 d#0 +---- +alive alive alive deleted + +get t=3 +d#2 d#1 d#0 +---- +alive alive deleted + +get t=2 +d#1 d#0 +---- +alive deleted + + +build +1: a--d +3: b--e +---- +1: ab +3: b-d +1: b-d +3: de + +get t=4 +a#3 a#2 a#1 a#0 +---- +alive alive alive deleted + +get t=3 +a#2 a#1 a#0 +---- +alive alive deleted + +get t=2 +a#1 a#0 +---- +alive deleted + +get t=4 +b#3 b#2 b#1 b#0 +---- +alive deleted deleted deleted + +get t=3 +b#2 b#1 b#0 +---- +alive alive deleted + +get t=2 +b#1 b#0 +---- +alive deleted + +get t=4 +c#3 c#2 c#1 c#0 +---- +alive deleted deleted deleted + +get t=3 +c#2 c#1 c#0 +---- +alive alive deleted + +get t=2 +c#1 c#0 +---- +alive deleted + +get t=4 +d#3 d#2 d#1 d#0 +---- +alive deleted deleted deleted + +get t=3 +d#2 d#1 d#0 +---- +alive alive alive + +get t=2 +d#1 d#0 +---- +alive alive + + +build +3: a--d +1: a---e +---- +3: a--d +1: a--d +1: de + +get t=4 +a#3 a#2 a#1 a#0 +---- +alive deleted deleted deleted + +get t=3 +a#2 a#1 a#0 +---- +alive alive deleted + +get t=2 +a#1 a#0 +---- +alive deleted + +get t=4 +b#3 b#2 b#1 b#0 +---- +alive deleted deleted deleted + +get t=3 +b#2 b#1 b#0 +---- +alive alive deleted + +get t=2 +b#1 b#0 +---- +alive deleted + +get t=4 +c#3 c#2 c#1 c#0 +---- +alive deleted deleted deleted + +get t=3 +c#2 c#1 c#0 +---- +alive alive deleted + +get t=2 +c#1 c#0 +---- +alive deleted + +get t=4 +d#3 d#2 d#1 d#0 +---- +alive alive alive deleted + +get t=3 +d#2 d#1 d#0 +---- +alive alive deleted + +get t=2 +d#1 d#0 +---- +alive deleted + + +build +3: a---e +1: a--d +---- +3: a--d +1: a--d +3: de + +get t=4 +a#3 a#2 a#1 a#0 +---- +alive deleted deleted deleted + +get t=3 +a#2 a#1 a#0 +---- +alive alive deleted + +get t=2 +a#1 a#0 +---- +alive deleted + +get t=4 +b#3 b#2 b#1 b#0 +---- +alive deleted deleted deleted + +get t=3 +b#2 b#1 b#0 +---- +alive alive deleted + +get t=2 +b#1 b#0 +---- +alive deleted + +get t=4 +c#3 c#2 c#1 c#0 +---- +alive deleted deleted deleted + +get t=3 +c#2 c#1 c#0 +---- +alive alive deleted + +get t=2 +c#1 c#0 +---- +alive deleted + +get t=4 +d#3 d#2 d#1 d#0 +---- +alive deleted deleted deleted + +get t=3 +d#2 d#1 d#0 +---- +alive alive alive + +get t=2 +d#1 d#0 +---- +alive alive + + +build +1: a---e +3: b--e +---- +1: ab +3: b--e +1: b--e + +get t=4 +a#3 a#2 a#1 a#0 +---- +alive alive alive deleted + +get t=3 +a#2 a#1 a#0 +---- +alive alive deleted + +get t=2 +a#1 a#0 +---- +alive deleted + +get t=4 +b#3 b#2 b#1 b#0 +---- +alive deleted deleted deleted + +get t=3 +b#2 b#1 b#0 +---- +alive alive deleted + +get t=2 +b#1 b#0 +---- +alive deleted + +get t=4 +c#3 c#2 c#1 c#0 +---- +alive deleted deleted deleted + +get t=3 +c#2 c#1 c#0 +---- +alive alive deleted + +get t=2 +c#1 c#0 +---- +alive deleted + +get t=4 +d#3 d#2 d#1 d#0 +---- +alive deleted deleted deleted + +get t=3 +d#2 d#1 d#0 +---- +alive alive deleted + +get t=2 +d#1 d#0 +---- +alive deleted + + +build +3: a---e +1: b--e +---- +3: ab +3: b--e +1: b--e + +get t=4 +a#3 a#2 a#1 a#0 +---- +alive deleted deleted deleted + +get t=3 +a#2 a#1 a#0 +---- +alive alive alive + +get t=2 +a#1 a#0 +---- +alive alive + +get t=4 +b#3 b#2 b#1 b#0 +---- +alive deleted deleted deleted + +get t=3 +b#2 b#1 b#0 +---- +alive alive deleted + +get t=2 +b#1 b#0 +---- +alive deleted + +get t=4 +c#3 c#2 c#1 c#0 +---- +alive deleted deleted deleted + +get t=3 +c#2 c#1 c#0 +---- +alive alive deleted + +get t=2 +c#1 c#0 +---- +alive deleted + +get t=4 +d#3 d#2 d#1 d#0 +---- +alive deleted deleted deleted + +get t=3 +d#2 d#1 d#0 +---- +alive alive deleted + +get t=2 +d#1 d#0 +---- +alive deleted + + +build +3: a---e +1: a---e +---- +3: a---e +1: a---e + +get t=4 +a#3 a#2 a#1 a#0 +---- +alive deleted deleted deleted + +get t=3 +a#2 a#1 a#0 +---- +alive alive deleted + +get t=2 +a#1 a#0 +---- +alive deleted + +get t=4 +b#3 b#2 b#1 b#0 +---- +alive deleted deleted deleted + +get t=3 +b#2 b#1 b#0 +---- +alive alive deleted + +get t=2 +b#1 b#0 +---- +alive deleted + +get t=4 +c#3 c#2 c#1 c#0 +---- +alive deleted deleted deleted + +get t=3 +c#2 c#1 c#0 +---- +alive alive deleted + +get t=2 +c#1 c#0 +---- +alive deleted + +get t=4 +d#3 d#2 d#1 d#0 +---- +alive deleted deleted deleted + +get t=3 +d#2 d#1 d#0 +---- +alive alive deleted + +get t=2 +d#1 d#0 +---- +alive deleted + + +build +1: a-c +3: c-e +---- +1: a-c +3: c-e + +get t=4 +a#3 a#2 a#1 a#0 +---- +alive alive alive deleted + +get t=3 +a#2 a#1 a#0 +---- +alive alive deleted + +get t=2 +a#1 a#0 +---- +alive deleted + +get t=4 +b#3 b#2 b#1 b#0 +---- +alive alive alive deleted + +get t=3 +b#2 b#1 b#0 +---- +alive alive deleted + +get t=2 +b#1 b#0 +---- +alive deleted + +get t=4 +c#3 c#2 c#1 c#0 +---- +alive deleted deleted deleted + +get t=3 +c#2 c#1 c#0 +---- +alive alive alive + +get t=2 +c#1 c#0 +---- +alive alive + +get t=4 +d#3 d#2 d#1 d#0 +---- +alive deleted deleted deleted + +get t=3 +d#2 d#1 d#0 +---- +alive alive alive + +get t=2 +d#1 d#0 +---- +alive alive + + +build +3: a-c +1: c-e +---- +3: a-c +1: c-e + +get t=4 +a#3 a#2 a#1 a#0 +---- +alive deleted deleted deleted + +get t=3 +a#2 a#1 a#0 +---- +alive alive alive + +get t=2 +a#1 a#0 +---- +alive alive + +get t=4 +b#3 b#2 b#1 b#0 +---- +alive deleted deleted deleted + +get t=3 +b#2 b#1 b#0 +---- +alive alive alive + +get t=2 +b#1 b#0 +---- +alive alive + +get t=4 +c#3 c#2 c#1 c#0 +---- +alive alive alive deleted + +get t=3 +c#2 c#1 c#0 +---- +alive alive deleted + +get t=2 +c#1 c#0 +---- +alive deleted + +get t=4 +d#3 d#2 d#1 d#0 +---- +alive alive alive deleted + +get t=3 +d#2 d#1 d#0 +---- +alive alive deleted + +get t=2 +d#1 d#0 +---- +alive deleted + + +build +1: a-c +3: de +---- +1: a-c +3: de + +get t=4 +a#3 a#2 a#1 a#0 +---- +alive alive alive deleted + +get t=3 +a#2 a#1 a#0 +---- +alive alive deleted + +get t=2 +a#1 a#0 +---- +alive deleted + +get t=4 +b#3 b#2 b#1 b#0 +---- +alive alive alive deleted + +get t=3 +b#2 b#1 b#0 +---- +alive alive deleted + +get t=2 +b#1 b#0 +---- +alive deleted + +get t=4 +c#3 c#2 c#1 c#0 +---- +alive alive alive alive + +get t=3 +c#2 c#1 c#0 +---- +alive alive alive + +get t=2 +c#1 c#0 +---- +alive alive + +get t=4 +d#3 d#2 d#1 d#0 +---- +alive deleted deleted deleted + +get t=3 +d#2 d#1 d#0 +---- +alive alive alive + +get t=2 +d#1 d#0 +---- +alive alive + + +build +3: a-c +1: de +---- +3: a-c +1: de + +get t=4 +a#3 a#2 a#1 a#0 +---- +alive deleted deleted deleted + +get t=3 +a#2 a#1 a#0 +---- +alive alive alive + +get t=2 +a#1 a#0 +---- +alive alive + +get t=4 +b#3 b#2 b#1 b#0 +---- +alive deleted deleted deleted + +get t=3 +b#2 b#1 b#0 +---- +alive alive alive + +get t=2 +b#1 b#0 +---- +alive alive + +get t=4 +c#3 c#2 c#1 c#0 +---- +alive alive alive alive + +get t=3 +c#2 c#1 c#0 +---- +alive alive alive + +get t=2 +c#1 c#0 +---- +alive alive + +get t=4 +d#3 d#2 d#1 d#0 +---- +alive alive alive deleted + +get t=3 +d#2 d#1 d#0 +---- +alive alive deleted + +get t=2 +d#1 d#0 +---- +alive deleted diff --git a/pebble/internal/keyspan/testdata/fragmenter_covers b/pebble/internal/keyspan/testdata/fragmenter_covers new file mode 100644 index 0000000..abd505d --- /dev/null +++ b/pebble/internal/keyspan/testdata/fragmenter_covers @@ -0,0 +1,58 @@ +# This datadriven test uses a single command 'build' that illustrates a sequence +# of calls to a fragmenter. +# +# 'add' lines add a new span with the provided sequence number and the provided +# bounds. 'add' outputs nothing. +# +# 'deleted' lines test whether the provided key is deleted by a RANGEDEL in the +# fragmenter when read at the trailing snapshot sequence number. + +build +deleted a.SET.0 5 +add 3: a-----------m +deleted a.SET.0 5 +deleted a.SET.1 5 +deleted a.SET.1 2 +deleted a.SET.2 5 +deleted a.SET.3 5 +deleted l.SET.3 5 +add 2: f------------s +deleted e.SET.3 5 +deleted f.SET.2 5 +deleted l.SET.2 5 +deleted m.SET.2 5 +add 1: j---------------z +deleted j.SET.1 5 +deleted j.SET.1 1 +deleted j.SET.2 5 +deleted j.SET.3 5 +deleted l.SET.2 5 +deleted m.SET.2 5 +deleted r.SET.1 5 +deleted r.SET.1 1 +deleted s.SET.1 5 +deleted y.SET.0 5 +deleted z.SET.0 5 +---- +a#0,1: none +a#0,1: visibly +a#1,1: visibly +a#1,1: invisibly +a#2,1: visibly +a#3,1: none +l#3,1: none +e#3,1: pebble: keys must be in order: f > e#3,SET +f#2,1: visibly +l#2,1: visibly +m#2,1: none +j#1,1: visibly +j#1,1: invisibly +j#2,1: visibly +j#3,1: none +l#2,1: visibly +m#2,1: none +r#1,1: visibly +r#1,1: invisibly +s#1,1: none +y#0,1: visibly +z#0,1: none diff --git a/pebble/internal/keyspan/testdata/fragmenter_emit_order b/pebble/internal/keyspan/testdata/fragmenter_emit_order new file mode 100644 index 0000000..9af2c42 --- /dev/null +++ b/pebble/internal/keyspan/testdata/fragmenter_emit_order @@ -0,0 +1,21 @@ +build +a.RANGEKEYSET.5 b +a.RANGEKEYSET.4 b +a.RANGEKEYUNSET.6 b +---- +a b: #6,RANGEKEYUNSET, #5,RANGEKEYSET, #4,RANGEKEYSET +- + +# Test that keys emitted together that share the same sequence number are +# ordered by key kind, descending. +# NB: RANGEKEYSET > RANGEKEYUNSET > RANGEKEYDEL + +build +b.RANGEKEYSET.5 c +b.RANGEKEYUNSET.5 d +b.RANGEKEYDEL.5 c +---- +b c: #5,RANGEKEYSET, #5,RANGEKEYUNSET, #5,RANGEKEYDEL +- +c d: #5,RANGEKEYUNSET +- diff --git a/pebble/internal/keyspan/testdata/fragmenter_truncate_and_flush_to b/pebble/internal/keyspan/testdata/fragmenter_truncate_and_flush_to new file mode 100644 index 0000000..9b7ecce --- /dev/null +++ b/pebble/internal/keyspan/testdata/fragmenter_truncate_and_flush_to @@ -0,0 +1,113 @@ +build +2: a--c +1: b--d +truncate-and-flush-to c +---- +2: ab +2: bc +1: bc +1: cd + +build +truncate-and-flush-to c +1: b--d +---- +pebble: start key (b) < flushed key (c) + +build +truncate-and-flush-to c +truncate-and-flush-to b +---- +pebble: start key (b) < flushed key (c) + +# Call out of order + +build +3: a--d +2: d--g +truncate-and-flush-to c +---- +pebble: start key (c) < flushed key (d) + +build +3: a--d +truncate-and-flush-to a +---- +3: a--d + +build +3: a--d +2: d--g +truncate-and-flush-to d +---- +3: a--d +2: d--g + +build +2: a----f +truncate-and-flush-to c +---- +2: a-c +2: c--f + +build +2: a----f +truncate-and-flush-to f +---- +2: a----f + +build +2: a----f +truncate-and-flush-to g +---- +2: a----f + +build +3: a-c +1: a-----g +truncate-and-flush-to d +---- +3: a-c +1: a-c +1: cd +1: d--g + +build +2: a---e +1: a------h +truncate-and-flush-to c +---- +2: a-c +1: a-c +2: c-e +1: c-e +1: e--h + +build +3: a-c +2: a---e +1: a-----g +truncate-and-flush-to d +3: d----i +---- +3: a-c +2: a-c +1: a-c +2: cd +1: cd +3: de +2: de +1: de +3: e-g +1: e-g +3: g-i + +build +3: a-c +2: a-----g +truncate-and-flush-to e +---- +3: a-c +2: a-c +2: c-e +2: e-g diff --git a/pebble/internal/keyspan/testdata/fragmenter_values b/pebble/internal/keyspan/testdata/fragmenter_values new file mode 100644 index 0000000..7462ae8 --- /dev/null +++ b/pebble/internal/keyspan/testdata/fragmenter_values @@ -0,0 +1,65 @@ +build +3: a-----------m apples +2: f------------s bananas +1: j---------------z coconuts +---- +3: a----f apples +3: f---j apples +2: f---j bananas +3: j--m apples +2: j--m bananas +1: j--m coconuts +2: m-----s bananas +1: m-----s coconuts +1: s------z coconuts + +# Building is idempotent. +build +3: a----f a +3: f---j b +2: f---j c +3: j--m d +2: j--m e +1: j--m f +2: m-----s g +1: m-----s h +1: s------z i +---- +3: a----f a +3: f---j b +2: f---j c +3: j--m d +2: j--m e +1: j--m f +2: m-----s g +1: m-----s h +1: s------z i + +build +2: a--c apple +1: b--d banana +truncate-and-flush-to c +---- +2: ab apple +2: bc apple +1: bc banana +1: cd banana + +build +3: a-c apple +2: a---e banana +1: a-----g coconut +truncate-and-flush-to d +3: d----i orange +---- +3: a-c apple +2: a-c banana +1: a-c coconut +2: cd banana +1: cd coconut +3: de orange +2: de banana +1: de coconut +3: e-g orange +1: e-g coconut +3: g-i orange diff --git a/pebble/internal/keyspan/testdata/interleaving_iter b/pebble/internal/keyspan/testdata/interleaving_iter new file mode 100644 index 0000000..b49db93 --- /dev/null +++ b/pebble/internal/keyspan/testdata/interleaving_iter @@ -0,0 +1,998 @@ +define-rangekeys +a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +c-d:{(#4,RANGEKEYSET,@3,coconut)} +e-f:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)} +h-j:{(#22,RANGEKEYDEL) (#21,RANGEKEYSET,@5,peaches) (#21,RANGEKEYSET,@3,starfruit)} +l-m:{(#2,RANGEKEYUNSET,@9) (#2,RANGEKEYUNSET,@5)} +q-z:{(#14,RANGEKEYSET,@9,mangos)} +---- +OK + +define-pointkeys +artichoke.SET.10 +artichoke.SET.8 +carrot.SET.13 +cauliflower.DEL.9 +parsnip.SET.3 +tomato.SET.2 +zucchini.MERGE.12 +---- +OK + +iter +first +next +next +next +next +next +next +next +next +next +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)}) +PointKey: a#72057594037927935,21 +Span: a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +- +PointKey: artichoke#10,1 +Span: a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +- +PointKey: artichoke#8,1 +Span: a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +- +-- SpanChanged(c-d:{(#4,RANGEKEYSET,@3,coconut)}) +PointKey: c#72057594037927935,21 +Span: c-d:{(#4,RANGEKEYSET,@3,coconut)} +- +PointKey: carrot#13,1 +Span: c-d:{(#4,RANGEKEYSET,@3,coconut)} +- +PointKey: cauliflower#9,0 +Span: c-d:{(#4,RANGEKEYSET,@3,coconut)} +- +-- SpanChanged(e-f:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)}) +PointKey: e#72057594037927935,21 +Span: e-f:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)} +- +-- SpanChanged(h-j:{(#22,RANGEKEYDEL) (#21,RANGEKEYSET,@5,peaches) (#21,RANGEKEYSET,@3,starfruit)}) +PointKey: h#72057594037927935,19 +Span: h-j:{(#22,RANGEKEYDEL) (#21,RANGEKEYSET,@5,peaches) (#21,RANGEKEYSET,@3,starfruit)} +- +-- SpanChanged(l-m:{(#2,RANGEKEYUNSET,@9) (#2,RANGEKEYUNSET,@5)}) +PointKey: l#72057594037927935,20 +Span: l-m:{(#2,RANGEKEYUNSET,@9) (#2,RANGEKEYUNSET,@5)} +- +-- SpanChanged(nil) +PointKey: parsnip#3,1 +Span: +- +-- SpanChanged(q-z:{(#14,RANGEKEYSET,@9,mangos)}) +PointKey: q#72057594037927935,21 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- +PointKey: tomato#2,1 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- + +# Test set-bounds passes through to the underlying point iterator and truncates +# a range key's end. + +iter +set-bounds b carrot +seek-ge b +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(b-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)}) +PointKey: b#72057594037927935,21 +Span: b-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +- +-- SpanChanged(c-carrot:{(#4,RANGEKEYSET,@3,coconut)}) +PointKey: c#72057594037927935,21 +Span: c-carrot:{(#4,RANGEKEYSET,@3,coconut)} +- +-- SpanChanged(nil) +. + + +# Test set-bounds passes through to the underlying point iterator and truncates +# a range key's start. + +iter +set-bounds b carrot +seek-lt carrot +prev +prev +---- +-- SpanChanged(nil) +-- SpanChanged(c-carrot:{(#4,RANGEKEYSET,@3,coconut)}) +PointKey: c#72057594037927935,21 +Span: c-carrot:{(#4,RANGEKEYSET,@3,coconut)} +- +-- SpanChanged(b-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)}) +PointKey: b#72057594037927935,21 +Span: b-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +- +-- SpanChanged(nil) +. + +# Test seek-ge. +# NB: The `seek-ge yyy` case demonstrates truncation to the search key. + +iter +first +seek-ge a +seek-ge p +seek-ge yyy +seek-ge z +---- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)}) +PointKey: a#72057594037927935,21 +Span: a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)}) +PointKey: a#72057594037927935,21 +Span: a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: parsnip#3,1 +Span: +- +-- SpanChanged(nil) +-- SpanChanged(q-z:{(#14,RANGEKEYSET,@9,mangos)}) +PointKey: yyy#72057594037927935,21 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: zucchini#12,2 +Span: +- + +iter +last +prev +prev +prev +prev +next +next +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: zucchini#12,2 +Span: +- +-- SpanChanged(q-z:{(#14,RANGEKEYSET,@9,mangos)}) +PointKey: tomato#2,1 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- +PointKey: q#72057594037927935,21 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- +-- SpanChanged(nil) +PointKey: parsnip#3,1 +Span: +- +-- SpanChanged(l-m:{(#2,RANGEKEYUNSET,@9) (#2,RANGEKEYUNSET,@5)}) +PointKey: l#72057594037927935,20 +Span: l-m:{(#2,RANGEKEYUNSET,@9) (#2,RANGEKEYUNSET,@5)} +- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: parsnip#3,1 +Span: +- +-- SpanChanged(q-z:{(#14,RANGEKEYSET,@9,mangos)}) +PointKey: q#72057594037927935,21 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- +PointKey: tomato#2,1 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- +-- SpanChanged(nil) +PointKey: zucchini#12,2 +Span: +- + +iter +seek-ge tomato +next +seek-ge q +seek-ge parsnip +next +---- +-- SpanChanged(nil) +-- SpanChanged(q-z:{(#14,RANGEKEYSET,@9,mangos)}) +PointKey: tomato#72057594037927935,21 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- +PointKey: tomato#2,1 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- +-- SpanChanged(nil) +-- SpanChanged(q-z:{(#14,RANGEKEYSET,@9,mangos)}) +PointKey: q#72057594037927935,21 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: parsnip#3,1 +Span: +- +-- SpanChanged(q-z:{(#14,RANGEKEYSET,@9,mangos)}) +PointKey: q#72057594037927935,21 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- + +iter +seek-lt tomato +prev +seek-lt a +seek-lt tomato +seek-lt tomago +---- +-- SpanChanged(nil) +-- SpanChanged(q-z:{(#14,RANGEKEYSET,@9,mangos)}) +PointKey: q#72057594037927935,21 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- +-- SpanChanged(nil) +PointKey: parsnip#3,1 +Span: +- +-- SpanChanged(nil) +-- SpanChanged(nil) +. +-- SpanChanged(nil) +-- SpanChanged(q-z:{(#14,RANGEKEYSET,@9,mangos)}) +PointKey: q#72057594037927935,21 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- +-- SpanChanged(nil) +-- SpanChanged(q-z:{(#14,RANGEKEYSET,@9,mangos)}) +PointKey: q#72057594037927935,21 +Span: q-z:{(#14,RANGEKEYSET,@9,mangos)} +- + +define-rangekeys +a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +c-d:{(#4,RANGEKEYSET,@3,coconut)} +e-f:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)} +h-j:{(#22,RANGEKEYDEL) (#21,RANGEKEYSET,@5,peaches) (#21,RANGEKEYSET,@3,starfruit)} +l-m:{(#2,RANGEKEYUNSET,@9) (#2,RANGEKEYUNSET,@5)} +q-z:{(#14,RANGEKEYSET,@9,mangos)} +---- +OK + +define-pointkeys +a.SET.10 +a.SET.8 +b.SET.13 +c.DEL.9 +d.SET.3 +e.SET.2 +---- +OK + +iter +seek-ge a +next +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)}) +PointKey: a#72057594037927935,21 +Span: a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +- +PointKey: a#10,1 +Span: a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +- +PointKey: a#8,1 +Span: a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +- +PointKey: b#13,1 +Span: a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +- + +iter +seek-lt a +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +. + +iter +seek-ge ab +next +next +next +next +next +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)}) +PointKey: ab#72057594037927935,21 +Span: a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +- +PointKey: b#13,1 +Span: a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas) (#4,RANGEKEYSET,@2,oranges)} +- +-- SpanChanged(c-d:{(#4,RANGEKEYSET,@3,coconut)}) +PointKey: c#72057594037927935,21 +Span: c-d:{(#4,RANGEKEYSET,@3,coconut)} +- +PointKey: c#9,0 +Span: c-d:{(#4,RANGEKEYSET,@3,coconut)} +- +-- SpanChanged(nil) +PointKey: d#3,1 +Span: +- +-- SpanChanged(e-f:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)}) +PointKey: e#72057594037927935,21 +Span: e-f:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)} +- +PointKey: e#2,1 +Span: e-f:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)} +- +-- SpanChanged(h-j:{(#22,RANGEKEYDEL) (#21,RANGEKEYSET,@5,peaches) (#21,RANGEKEYSET,@3,starfruit)}) +PointKey: h#72057594037927935,19 +Span: h-j:{(#22,RANGEKEYDEL) (#21,RANGEKEYSET,@5,peaches) (#21,RANGEKEYSET,@3,starfruit)} +- + +define-rangekeys +a-z:{(#5,RANGEKEYSET,@5,apples)} +---- +OK + +define-pointkeys +a.SET.10 +a.SET.8 +b.SET.13 +c.DEL.9 +d.SET.3 +e.SET.2 +---- +OK + +iter +first +next +next +next +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(a-z:{(#5,RANGEKEYSET,@5,apples)}) +PointKey: a#72057594037927935,21 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- +PointKey: a#10,1 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- +PointKey: a#8,1 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- +PointKey: b#13,1 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- +PointKey: c#9,0 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- +PointKey: d#3,1 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- + +# Switch to reverse within a range key. +# NB: The seek-ge b should truncate the range key a-z to b. + +iter +seek-ge b +prev +---- +-- SpanChanged(nil) +-- SpanChanged(a-z:{(#5,RANGEKEYSET,@5,apples)}) +PointKey: b#72057594037927935,21 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- +-- SpanChanged(nil) +-- SpanChanged(a-z:{(#5,RANGEKEYSET,@5,apples)}) +PointKey: a#8,1 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- + +# Switch to reverse after a seek-ge. Reverse iteration should not revisit the +# interleaved range-key start at the seek-ge bound: The range-key start should +# be interleaved at its true start key. + +iter +seek-ge b +next +prev +prev +prev +---- +-- SpanChanged(nil) +-- SpanChanged(a-z:{(#5,RANGEKEYSET,@5,apples)}) +PointKey: b#72057594037927935,21 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- +PointKey: b#13,1 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- +-- SpanChanged(nil) +-- SpanChanged(a-z:{(#5,RANGEKEYSET,@5,apples)}) +PointKey: a#8,1 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- +PointKey: a#10,1 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- +PointKey: a#72057594037927935,21 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- + +# Switch to forward iteration after a seek-lt. + +iter +seek-lt c +next +---- +-- SpanChanged(nil) +-- SpanChanged(a-z:{(#5,RANGEKEYSET,@5,apples)}) +PointKey: b#13,1 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- +-- SpanChanged(nil) +-- SpanChanged(a-z:{(#5,RANGEKEYSET,@5,apples)}) +PointKey: c#9,0 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- + +iter +seek-lt c +prev +next +---- +-- SpanChanged(nil) +-- SpanChanged(a-z:{(#5,RANGEKEYSET,@5,apples)}) +PointKey: b#13,1 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- +PointKey: a#8,1 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- +-- SpanChanged(nil) +-- SpanChanged(a-z:{(#5,RANGEKEYSET,@5,apples)}) +PointKey: b#13,1 +Span: a-z:{(#5,RANGEKEYSET,@5,apples)} +- + +# Test sparse range keys. + +define-rangekeys +ace-bat:{(#5,RANGEKEYSET,@5,v5)} +x-z:{(#6,RANGEKEYSET,@6,v5)} +---- +OK + +define-pointkeys +a.SET.9 +b.SET.13 +c.DEL.9 +d.SET.18 +m.SET.4 +o.MERGE.3 +r.SET.22 +y.SET.3 +z.SET.3 +---- +OK + +iter +first +next +next +prev +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: a#9,1 +Span: +- +-- SpanChanged(ace-bat:{(#5,RANGEKEYSET,@5,v5)}) +PointKey: ace#72057594037927935,21 +Span: ace-bat:{(#5,RANGEKEYSET,@5,v5)} +- +PointKey: b#13,1 +Span: ace-bat:{(#5,RANGEKEYSET,@5,v5)} +- +-- SpanChanged(nil) +-- SpanChanged(ace-bat:{(#5,RANGEKEYSET,@5,v5)}) +PointKey: ace#72057594037927935,21 +Span: ace-bat:{(#5,RANGEKEYSET,@5,v5)} +- +-- SpanChanged(nil) +-- SpanChanged(ace-bat:{(#5,RANGEKEYSET,@5,v5)}) +PointKey: b#13,1 +Span: ace-bat:{(#5,RANGEKEYSET,@5,v5)} +- +-- SpanChanged(nil) +PointKey: c#9,0 +Span: +- + +iter +seek-lt ace +seek-lt zoo +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: a#9,1 +Span: +- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: z#3,1 +Span: +- + +iter +last +prev +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: z#3,1 +Span: +- +-- SpanChanged(x-z:{(#6,RANGEKEYSET,@6,v5)}) +PointKey: y#3,1 +Span: x-z:{(#6,RANGEKEYSET,@6,v5)} +- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: z#3,1 +Span: +- +-- SpanChanged(nil) +. + +iter +seek-lt m +next +seek-ge m +prev +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: d#18,1 +Span: +- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: m#4,1 +Span: +- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: m#4,1 +Span: +- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: d#18,1 +Span: +- + +# First, Last, SeekLT and SeekGE elide spans without Sets. + +define-rangekeys +b-d:{(#5,RANGEKEYDEL)} +f-g:{(#6,RANGEKEYDEL)} +---- +OK + +define-pointkeys +c.SET.8 +---- +OK + +iter +first +last +seek-ge a +seek-lt d +---- +-- SpanChanged(nil) +-- SpanChanged(b-d:{(#5,RANGEKEYDEL)}) +PointKey: b#72057594037927935,19 +Span: b-d:{(#5,RANGEKEYDEL)} +- +-- SpanChanged(nil) +-- SpanChanged(f-g:{(#6,RANGEKEYDEL)}) +PointKey: f#72057594037927935,19 +Span: f-g:{(#6,RANGEKEYDEL)} +- +-- SpanChanged(nil) +-- SpanChanged(b-d:{(#5,RANGEKEYDEL)}) +PointKey: b#72057594037927935,19 +Span: b-d:{(#5,RANGEKEYDEL)} +- +-- SpanChanged(nil) +-- SpanChanged(b-d:{(#5,RANGEKEYDEL)}) +PointKey: c#8,1 +Span: b-d:{(#5,RANGEKEYDEL)} +- + +# Test a scenario where Next is out of point keys, the current range key has +# already been interleaved, and there are no more range keys. + +define-rangekeys +w-y:{(#5,RANGEKEYSET,@1,v1)} +y-z:{(#5,RANGEKEYDEL)} +---- +OK + +define-pointkeys +x.SET.8 +---- +OK + +iter +first +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(w-y:{(#5,RANGEKEYSET,@1,v1)}) +PointKey: w#72057594037927935,21 +Span: w-y:{(#5,RANGEKEYSET,@1,v1)} +- +PointKey: x#8,1 +Span: w-y:{(#5,RANGEKEYSET,@1,v1)} +- +-- SpanChanged(y-z:{(#5,RANGEKEYDEL)}) +PointKey: y#72057594037927935,19 +Span: y-z:{(#5,RANGEKEYDEL)} +- + +# Test a scenario where we change direction on a synthetic range key boundary +# key. +iter +first +prev +---- +-- SpanChanged(nil) +-- SpanChanged(w-y:{(#5,RANGEKEYSET,@1,v1)}) +PointKey: w#72057594037927935,21 +Span: w-y:{(#5,RANGEKEYSET,@1,v1)} +- +-- SpanChanged(nil) +-- SpanChanged(nil) +. + +define-rangekeys +a-z:{(#5,RANGEKEYSET,@1,v1)} +---- +OK + +define-pointkeys +z.SET.8 +---- +OK + +iter +seek-ge c +prev +next +---- +-- SpanChanged(nil) +-- SpanChanged(a-z:{(#5,RANGEKEYSET,@1,v1)}) +PointKey: c#72057594037927935,21 +Span: a-z:{(#5,RANGEKEYSET,@1,v1)} +- +-- SpanChanged(nil) +-- SpanChanged(a-z:{(#5,RANGEKEYSET,@1,v1)}) +PointKey: a#72057594037927935,21 +Span: a-z:{(#5,RANGEKEYSET,@1,v1)} +- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: z#8,1 +Span: +- + +iter +set-bounds . c +first +set-bounds c . +last +prev +prev +---- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#5,RANGEKEYSET,@1,v1)}) +PointKey: a#72057594037927935,21 +Span: a-c:{(#5,RANGEKEYSET,@1,v1)} +- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: z#8,1 +Span: +- +-- SpanChanged(c-z:{(#5,RANGEKEYSET,@1,v1)}) +PointKey: c#72057594037927935,21 +Span: c-z:{(#5,RANGEKEYSET,@1,v1)} +- +-- SpanChanged(nil) +. + +# Test switching directions after exhausting a range key iterator. +# Switching reverse to forward iteration. + +define-rangekeys +j-l:{(#3,RANGEKEYSET,@1,v0)} +---- +OK + +define-pointkeys +g.SET.1 +s.SET.1 +v.SET.2 +v.SET.1 +z.SET.1 +---- +OK + +iter +last +prev +prev +prev +prev +prev +next +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: z#1,1 +Span: +- +-- SpanChanged(nil) +PointKey: v#1,1 +Span: +- +-- SpanChanged(nil) +PointKey: v#2,1 +Span: +- +-- SpanChanged(nil) +PointKey: s#1,1 +Span: +- +-- SpanChanged(j-l:{(#3,RANGEKEYSET,@1,v0)}) +PointKey: j#72057594037927935,21 +Span: j-l:{(#3,RANGEKEYSET,@1,v0)} +- +-- SpanChanged(nil) +PointKey: g#1,1 +Span: +- +-- SpanChanged(nil) +-- SpanChanged(j-l:{(#3,RANGEKEYSET,@1,v0)}) +PointKey: j#72057594037927935,21 +Span: j-l:{(#3,RANGEKEYSET,@1,v0)} +- + +# Test switching directions after exhausting a range key iterator. +# Switching forward to reverse iteration. + +define-rangekeys +j-l:{(#3,RANGEKEYSET,@1,v0)} +---- +OK + +define-pointkeys +a.SET.1 +k.SET.1 +m.SET.1 +---- +OK + +iter +first +next +next +next +prev +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: a#1,1 +Span: +- +-- SpanChanged(j-l:{(#3,RANGEKEYSET,@1,v0)}) +PointKey: j#72057594037927935,21 +Span: j-l:{(#3,RANGEKEYSET,@1,v0)} +- +PointKey: k#1,1 +Span: j-l:{(#3,RANGEKEYSET,@1,v0)} +- +-- SpanChanged(nil) +PointKey: m#1,1 +Span: +- +-- SpanChanged(nil) +-- SpanChanged(j-l:{(#3,RANGEKEYSET,@1,v0)}) +PointKey: k#1,1 +Span: j-l:{(#3,RANGEKEYSET,@1,v0)} +- + +# Test a seek that moves the lower bound beyond the upper bound. + +define-rangekeys +a-d:{(#10,RANGEKEYSET,@5,apples)} +---- +OK + +define-pointkeys +b.SET.8 +---- +OK + + +iter +set-bounds a c +seek-ge c +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +. + +iter +set-bounds a c +seek-lt a +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +. + +# Test a SeekLT that searches a keyspace exclusive with the iterator's bounds. +# Previously, there was a bug that would incorrectly surface the span with the +# iterator's bounds, despite the fact the SeekLT search key is exclusive. See +# the comment in keyspanSeekLT. + +define-rangekeys +b-f:{(#1,RANGEKEYSET,@1,foo)} +---- +OK + +define-pointkeys +f.SET.3 +---- +OK + +iter +set-bounds d e +seek-lt d +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +. + +# Test seek-prefix-ge and its truncation of bounds to the prefix's bounds. + +define-rangekeys +b-d:{(#5,RANGEKEYSET,@1,foo)} +f-g:{(#6,RANGEKEYSET,@1,foo)} +---- +OK + +define-pointkeys +c.SET.8 +---- +OK + +iter +seek-prefix-ge b +next +seek-prefix-ge c +next +seek-ge c +---- +-- SpanChanged(nil) +-- SpanChanged(b-b\x00:{(#5,RANGEKEYSET,@1,foo)}) +PointKey: b#72057594037927935,21 +Span: b-b\x00:{(#5,RANGEKEYSET,@1,foo)} +- +PointKey: c#8,1 +Span: b-b\x00:{(#5,RANGEKEYSET,@1,foo)} +- +-- SpanChanged(nil) +-- SpanChanged(c-c\x00:{(#5,RANGEKEYSET,@1,foo)}) +PointKey: c#72057594037927935,21 +Span: c-c\x00:{(#5,RANGEKEYSET,@1,foo)} +- +PointKey: c#8,1 +Span: c-c\x00:{(#5,RANGEKEYSET,@1,foo)} +- +-- SpanChanged(nil) +-- SpanChanged(b-d:{(#5,RANGEKEYSET,@1,foo)}) +PointKey: c#72057594037927935,21 +Span: b-d:{(#5,RANGEKEYSET,@1,foo)} +- + +# Test NextPrefix + +define-rangekeys +b-e:{(#5,RANGEKEYSET,@9,foo)} +f-g:{(#6,RANGEKEYSET,@9,foo)} +---- +OK + +define-pointkeys +a@4.SET.8 +c@11.SET.8 +c@3.SET.8 +c@1.SET.4 +d@5.SET.3 +e@9.SET.2 +---- +OK + +iter +first +next-prefix +next-prefix +next-prefix +next-prefix +next-prefix +next-prefix +next-prefix +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: a@4#8,1 +Span: +- +-- SpanChanged(b-e:{(#5,RANGEKEYSET,@9,foo)}) +PointKey: b#72057594037927935,21 +Span: b-e:{(#5,RANGEKEYSET,@9,foo)} +- +PointKey: c@11#8,1 +Span: b-e:{(#5,RANGEKEYSET,@9,foo)} +- +PointKey: d@5#3,1 +Span: b-e:{(#5,RANGEKEYSET,@9,foo)} +- +-- SpanChanged(nil) +PointKey: e@9#2,1 +Span: +- +-- SpanChanged(f-g:{(#6,RANGEKEYSET,@9,foo)}) +PointKey: f#72057594037927935,21 +Span: f-g:{(#6,RANGEKEYSET,@9,foo)} +- +-- SpanChanged(nil) +. +. diff --git a/pebble/internal/keyspan/testdata/interleaving_iter_masking b/pebble/internal/keyspan/testdata/interleaving_iter_masking new file mode 100644 index 0000000..8ad8fb3 --- /dev/null +++ b/pebble/internal/keyspan/testdata/interleaving_iter_masking @@ -0,0 +1,501 @@ +# Test the scenario illustrated in the below visualization. +# +# ^ +# @9 | •―――――――――――――――○ [e,m)@9 +# s 8 | • l@8 +# u 7 |------------------------------------ @7 masking +# f 6 | [h,q)@6 •―――――――――――――――――○ threshold +# f 5 | • h@5 +# f 4 | • n@4 +# i 3 | •―――――――――――○ [f,l)@3 +# x 2 | • b@2 +# 1 | +# 0 |___________________________________ +# a b c d e f g h i j k l m n o p q +# + +define-rangekeys +e-f:{(#1,RANGEKEYSET,@9,foo)} +f-h:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@3,bar)} +h-l:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax) (#1,RANGEKEYSET,@3,bar)} +l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)} +m-q:{(#1,RANGEKEYSET,@6,bax)} +---- +OK + +define-pointkeys +b@2.SET.1 +h@5.SET.1 +l@8.SET.1 +n@4.SET.1 +---- +OK + +set-masking-threshold +@7 +---- +OK + +iter +first +next +next +next +next +next +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: b@2#1,1 +Span: +- +-- SpanChanged(e-f:{(#1,RANGEKEYSET,@9,foo)}) +PointKey: e#72057594037927935,21 +Span: e-f:{(#1,RANGEKEYSET,@9,foo)} +- +-- SpanChanged(f-h:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@3,bar)}) +PointKey: f#72057594037927935,21 +Span: f-h:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@3,bar)} +- +-- SpanChanged(h-l:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax) (#1,RANGEKEYSET,@3,bar)}) +PointKey: h#72057594037927935,21 +Span: h-l:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax) (#1,RANGEKEYSET,@3,bar)} +- +-- SpanChanged(l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)}) +PointKey: l#72057594037927935,21 +Span: l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)} +- +PointKey: l@8#1,1 +Span: l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)} +- +-- SpanChanged(m-q:{(#1,RANGEKEYSET,@6,bax)}) +PointKey: m#72057594037927935,21 +Span: m-q:{(#1,RANGEKEYSET,@6,bax)} +- +-- SpanChanged(nil) +. + +iter +last +prev +prev +prev +prev +prev +prev +prev +---- +-- SpanChanged(nil) +-- SpanChanged(m-q:{(#1,RANGEKEYSET,@6,bax)}) +PointKey: m#72057594037927935,21 +Span: m-q:{(#1,RANGEKEYSET,@6,bax)} +- +-- SpanChanged(l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)}) +PointKey: l@8#1,1 +Span: l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)} +- +PointKey: l#72057594037927935,21 +Span: l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)} +- +-- SpanChanged(h-l:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax) (#1,RANGEKEYSET,@3,bar)}) +PointKey: h#72057594037927935,21 +Span: h-l:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax) (#1,RANGEKEYSET,@3,bar)} +- +-- SpanChanged(f-h:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@3,bar)}) +PointKey: f#72057594037927935,21 +Span: f-h:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@3,bar)} +- +-- SpanChanged(e-f:{(#1,RANGEKEYSET,@9,foo)}) +PointKey: e#72057594037927935,21 +Span: e-f:{(#1,RANGEKEYSET,@9,foo)} +- +-- SpanChanged(nil) +PointKey: b@2#1,1 +Span: +- +-- SpanChanged(nil) +. + +iter +seek-ge a +seek-ge c +seek-ge h +seek-ge i +seek-ge l +next +seek-ge m +seek-ge r +---- +-- SpanChanged(nil) +-- SpanChanged(nil) +PointKey: b@2#1,1 +Span: +- +-- SpanChanged(nil) +-- SpanChanged(e-f:{(#1,RANGEKEYSET,@9,foo)}) +PointKey: e#72057594037927935,21 +Span: e-f:{(#1,RANGEKEYSET,@9,foo)} +- +-- SpanChanged(nil) +-- SpanChanged(h-l:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax) (#1,RANGEKEYSET,@3,bar)}) +PointKey: h#72057594037927935,21 +Span: h-l:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax) (#1,RANGEKEYSET,@3,bar)} +- +-- SpanChanged(nil) +-- SpanChanged(h-l:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax) (#1,RANGEKEYSET,@3,bar)}) +PointKey: i#72057594037927935,21 +Span: h-l:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax) (#1,RANGEKEYSET,@3,bar)} +- +-- SpanChanged(nil) +-- SpanChanged(l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)}) +PointKey: l#72057594037927935,21 +Span: l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)} +- +PointKey: l@8#1,1 +Span: l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)} +- +-- SpanChanged(nil) +-- SpanChanged(m-q:{(#1,RANGEKEYSET,@6,bax)}) +PointKey: m#72057594037927935,21 +Span: m-q:{(#1,RANGEKEYSET,@6,bax)} +- +-- SpanChanged(nil) +-- SpanChanged(nil) +. + +# Setting the masking threshold to @9 should result in l@8 being masked by +# [e,m)@9. + +set-masking-threshold +@9 +---- +OK + +iter +seek-ge l +next +seek-lt l +seek-lt ll +prev +---- +-- SpanChanged(nil) +-- SpanChanged(l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)}) +PointKey: l#72057594037927935,21 +Span: l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)} +- +-- SpanChanged(m-q:{(#1,RANGEKEYSET,@6,bax)}) +PointKey: m#72057594037927935,21 +Span: m-q:{(#1,RANGEKEYSET,@6,bax)} +- +-- SpanChanged(nil) +-- SpanChanged(h-l:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax) (#1,RANGEKEYSET,@3,bar)}) +PointKey: h#72057594037927935,21 +Span: h-l:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax) (#1,RANGEKEYSET,@3,bar)} +- +-- SpanChanged(nil) +-- SpanChanged(l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)}) +PointKey: l#72057594037927935,21 +Span: l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)} +- +-- SpanChanged(h-l:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax) (#1,RANGEKEYSET,@3,bar)}) +PointKey: h#72057594037927935,21 +Span: h-l:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax) (#1,RANGEKEYSET,@3,bar)} +- + +iter +seek-ge l +next +---- +-- SpanChanged(nil) +-- SpanChanged(l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)}) +PointKey: l#72057594037927935,21 +Span: l-m:{(#1,RANGEKEYSET,@9,foo) (#1,RANGEKEYSET,@6,bax)} +- +-- SpanChanged(m-q:{(#1,RANGEKEYSET,@6,bax)}) +PointKey: m#72057594037927935,21 +Span: m-q:{(#1,RANGEKEYSET,@6,bax)} +- + +define-rangekeys +a-c:{(#1,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@2,bananas)} +---- +OK + +define-pointkeys +a.SET.1 +a@3.SET.1 +a@12.SET.1 +b@2.SET.1 +---- +OK + +set-masking-threshold +@10 +---- +OK + +# Test that both a@3 and b@2 are masked by the rangekey. +# The unsuffixed point key 'a' and the point key at a higher timestamp 'a@12' +# are not masked. + +iter +first +next +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#1,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@2,bananas)}) +PointKey: a#72057594037927935,21 +Span: a-c:{(#1,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@2,bananas)} +- +PointKey: a#1,1 +Span: a-c:{(#1,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@2,bananas)} +- +PointKey: a@12#1,1 +Span: a-c:{(#1,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@2,bananas)} +- +-- SpanChanged(nil) +. + +iter +last +prev +prev +prev +---- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#1,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@2,bananas)}) +PointKey: a@12#1,1 +Span: a-c:{(#1,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@2,bananas)} +- +PointKey: a#1,1 +Span: a-c:{(#1,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@2,bananas)} +- +PointKey: a#72057594037927935,21 +Span: a-c:{(#1,RANGEKEYSET,@5,apples) (#1,RANGEKEYSET,@2,bananas)} +- +-- SpanChanged(nil) +. + +# Try the same test, but with a range key that sorts before the masking +# threshold (eg, higher MVCC timestamp). Nothing should be masked. + +define-rangekeys +a-c:{(#2,RANGEKEYSET,@20,apples)} +---- +OK + +iter +first +next +next +next +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#2,RANGEKEYSET,@20,apples)}) +PointKey: a#72057594037927935,21 +Span: a-c:{(#2,RANGEKEYSET,@20,apples)} +- +PointKey: a#1,1 +Span: a-c:{(#2,RANGEKEYSET,@20,apples)} +- +PointKey: a@3#1,1 +Span: a-c:{(#2,RANGEKEYSET,@20,apples)} +- +PointKey: a@12#1,1 +Span: a-c:{(#2,RANGEKEYSET,@20,apples)} +- +PointKey: b@2#1,1 +Span: a-c:{(#2,RANGEKEYSET,@20,apples)} +- +-- SpanChanged(nil) +. + +iter +last +prev +prev +prev +prev +prev +---- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#2,RANGEKEYSET,@20,apples)}) +PointKey: b@2#1,1 +Span: a-c:{(#2,RANGEKEYSET,@20,apples)} +- +PointKey: a@12#1,1 +Span: a-c:{(#2,RANGEKEYSET,@20,apples)} +- +PointKey: a@3#1,1 +Span: a-c:{(#2,RANGEKEYSET,@20,apples)} +- +PointKey: a#1,1 +Span: a-c:{(#2,RANGEKEYSET,@20,apples)} +- +PointKey: a#72057594037927935,21 +Span: a-c:{(#2,RANGEKEYSET,@20,apples)} +- +-- SpanChanged(nil) +. + +# Try the original test, but with an internal range key containing just an +# Unset, and no Set. Nothing should be masked. No range keys should be surfaced, +# because there are none. + +define-rangekeys +a-c:{(#1,RANGEKEYUNSET,@5) (#1,RANGEKEYUNSET,@2)} +---- +OK + +iter +first +next +next +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#1,RANGEKEYUNSET,@5) (#1,RANGEKEYUNSET,@2)}) +PointKey: a#72057594037927935,20 +Span: a-c:{(#1,RANGEKEYUNSET,@5) (#1,RANGEKEYUNSET,@2)} +- +PointKey: a#1,1 +Span: a-c:{(#1,RANGEKEYUNSET,@5) (#1,RANGEKEYUNSET,@2)} +- +PointKey: a@12#1,1 +Span: a-c:{(#1,RANGEKEYUNSET,@5) (#1,RANGEKEYUNSET,@2)} +- +-- SpanChanged(nil) +. +-- SpanChanged(nil) +. + +iter +last +prev +prev +prev +prev +---- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#1,RANGEKEYUNSET,@5) (#1,RANGEKEYUNSET,@2)}) +PointKey: a@12#1,1 +Span: a-c:{(#1,RANGEKEYUNSET,@5) (#1,RANGEKEYUNSET,@2)} +- +PointKey: a#1,1 +Span: a-c:{(#1,RANGEKEYUNSET,@5) (#1,RANGEKEYUNSET,@2)} +- +PointKey: a#72057594037927935,20 +Span: a-c:{(#1,RANGEKEYUNSET,@5) (#1,RANGEKEYUNSET,@2)} +- +-- SpanChanged(nil) +. +-- SpanChanged(nil) +. + +# Test a scenario where a point key is masked in the forward direction, which in +# turn requires nexting to the next range key as well. + +define-rangekeys +a-c:{(#1,RANGEKEYSET,@5,apples)} +c-z:{(#1,RANGEKEYSET,@10,bananas)} +---- +OK + +define-pointkeys +b@3.SET.2 +d@9.SET.4 +j@11.SET.3 +---- +OK + +set-masking-threshold +@20 +---- +OK + +iter +first +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#1,RANGEKEYSET,@5,apples)}) +PointKey: a#72057594037927935,21 +Span: a-c:{(#1,RANGEKEYSET,@5,apples)} +- +-- SpanChanged(c-z:{(#1,RANGEKEYSET,@10,bananas)}) +PointKey: c#72057594037927935,21 +Span: c-z:{(#1,RANGEKEYSET,@10,bananas)} +- +PointKey: j@11#3,1 +Span: c-z:{(#1,RANGEKEYSET,@10,bananas)} +- + +iter +last +prev +prev +---- +-- SpanChanged(nil) +-- SpanChanged(c-z:{(#1,RANGEKEYSET,@10,bananas)}) +PointKey: j@11#3,1 +Span: c-z:{(#1,RANGEKEYSET,@10,bananas)} +- +PointKey: c#72057594037927935,21 +Span: c-z:{(#1,RANGEKEYSET,@10,bananas)} +- +-- SpanChanged(a-c:{(#1,RANGEKEYSET,@5,apples)}) +PointKey: a#72057594037927935,21 +Span: a-c:{(#1,RANGEKEYSET,@5,apples)} +- + +# Test a scenario where a there's an empty range key, requiring the interleaving +# iter to call SpanChanged(nil) which should clear the previous mask. + +define-rangekeys +a-c:{(#1,RANGEKEYSET,@10,apples)} +c-e:{} +e-f:{(#1,RANGEKEYSET,@5,bananas)} +---- +OK + +define-pointkeys +a@2.SET.4 +b@9.SET.2 +d@9.SET.3 +---- +OK + +set-masking-threshold +@20 +---- +OK + +iter +seek-ge a +next +next +next +---- +-- SpanChanged(nil) +-- SpanChanged(a-c:{(#1,RANGEKEYSET,@10,apples)}) +PointKey: a#72057594037927935,21 +Span: a-c:{(#1,RANGEKEYSET,@10,apples)} +- +-- SpanChanged(nil) +PointKey: d@9#3,1 +Span: +- +-- SpanChanged(e-f:{(#1,RANGEKEYSET,@5,bananas)}) +PointKey: e#72057594037927935,21 +Span: e-f:{(#1,RANGEKEYSET,@5,bananas)} +- +-- SpanChanged(nil) +. diff --git a/pebble/internal/keyspan/testdata/iter b/pebble/internal/keyspan/testdata/iter new file mode 100644 index 0000000..5a1c451 --- /dev/null +++ b/pebble/internal/keyspan/testdata/iter @@ -0,0 +1,55 @@ +define +a-b:{(#2,SET) (#1,SET)} +b-c:{(#2,SET) (#1,SET)} +c-d:{(#2,SET) (#1,SET)} +---- + +iter +seek-ge a +seek-ge b +seek-ge c +seek-ge cat +seek-ge d +seek-lt a +seek-lt b +seek-lt c +seek-lt cat +seek-lt d +seek-lt e +---- +a-b:{(#2,SET) (#1,SET)} +b-c:{(#2,SET) (#1,SET)} +c-d:{(#2,SET) (#1,SET)} +c-d:{(#2,SET) (#1,SET)} +. +. +a-b:{(#2,SET) (#1,SET)} +b-c:{(#2,SET) (#1,SET)} +c-d:{(#2,SET) (#1,SET)} +c-d:{(#2,SET) (#1,SET)} +c-d:{(#2,SET) (#1,SET)} + +iter +first +next +prev +prev +next +next +next +prev +next +next +prev +---- +a-b:{(#2,SET) (#1,SET)} +b-c:{(#2,SET) (#1,SET)} +a-b:{(#2,SET) (#1,SET)} +. +a-b:{(#2,SET) (#1,SET)} +b-c:{(#2,SET) (#1,SET)} +c-d:{(#2,SET) (#1,SET)} +b-c:{(#2,SET) (#1,SET)} +c-d:{(#2,SET) (#1,SET)} +. +c-d:{(#2,SET) (#1,SET)} diff --git a/pebble/internal/keyspan/testdata/level_iter b/pebble/internal/keyspan/testdata/level_iter new file mode 100644 index 0000000..3919819 --- /dev/null +++ b/pebble/internal/keyspan/testdata/level_iter @@ -0,0 +1,475 @@ + +# Simple case. + +define +file + a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +file + b-c:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} + c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +---- + +iter +seek-ge a +seek-ge apple +seek-ge b +seek-ge banana +seek-ge c +seek-ge cantalope +seek-ge d +seek-ge dragonfruit +---- +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +b-c:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +b-c:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +. +. + +iter +seek-lt a +seek-lt apple +seek-lt b +seek-lt banana +seek-lt c +seek-lt cantalope +seek-lt d +seek-lt dragonfruit +prev +---- +. +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +b-c:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +b-c:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +b-c:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) + +iter +seek-ge a +prev +seek-lt d +next +---- +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +. +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +. + +iter +first +next +next +next +---- +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +b-c:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +. + +iter +last +prev +prev +prev +---- +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +b-c:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +. + +# Set some bounds + +iter +seek-ge a +seek-ge b +seek-ge c +seek-ge d +seek-lt a +seek-lt b +seek-lt c +seek-lt d +---- +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +b-c:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +. +. +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +b-c:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) + + +iter +seek-lt cc +prev +prev +prev +---- +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +b-c:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +. + +# Test skipping over empty/point-key-only files in both directions. + +define +file + a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +file + point:b.SET.1:foo +file + c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} + d-e:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +---- + +num-files +---- +3 + +iter +first +next +next +next +---- +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +b-c:{} (file = 000001.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) +d-e:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) + +iter +last +prev +prev +prev +---- +d-e:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) +b-c:{} (file = 000003.sst) +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) + +# Test straddle keys between files. + +define +file + a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +file + c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +file + e-f:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +file + g-h:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +---- + +iter +first +next +next +next +next +next +next +next +---- +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +b-c:{} (file = 000001.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +d-e:{} (file = 000002.sst) +e-f:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) +f-g:{} (file = 000003.sst) +g-h:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000004.sst) +. + +iter +last +prev +prev +prev +prev +prev +prev +prev +---- +g-h:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000004.sst) +f-g:{} (file = 000004.sst) +e-f:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) +d-e:{} (file = 000003.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +b-c:{} (file = 000002.sst) +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +. + +# The below case seeks into a file straddle, then iterates forward and back to +# it, and confirms that changing iterator directions on a straddle does the +# right thing. + +iter +seek-ge bb +next +prev +next +prev +prev +---- +b-c:{} (file = 000001.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +b-c:{} (file = 000002.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +b-c:{} (file = 000002.sst) +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) + +# The same case as above, but with inverted directions. + +iter +seek-lt dd +prev +next +prev +next +next +---- +d-e:{} (file = 000001.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +d-e:{} (file = 000002.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +d-e:{} (file = 000002.sst) +e-f:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) + +iter +seek-lt dd +prev +next +prev +next +next +---- +d-e:{} (file = 000003.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +d-e:{} (file = 000002.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +d-e:{} (file = 000002.sst) +e-f:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) + +# Seeks right at the bound should return nothing. + +iter +seek-lt bb +---- +b-c:{} (file = 000003.sst) + +iter +seek-ge dd +---- +d-e:{} (file = 000003.sst) + +iter +seek-lt d +prev +next +prev +prev +prev +next +next +---- +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +b-c:{} (file = 000002.sst) +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +b-c:{} (file = 000002.sst) +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +. +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +b-c:{} (file = 000001.sst) + +# A bunch of files with point keys only should not fragment straddles. + +define +file + a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +file + point:c.SET.1:foo +file + point:d.SET.1:foo +file + e-f:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +file + point:g.SET.1:foo +file + h-i:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +---- + +iter +first +next +next +next +next +next +---- +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +b-e:{} (file = 000001.sst) +e-f:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000004.sst) +f-h:{} (file = 000004.sst) +h-i:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000006.sst) +. + +iter +last +prev +prev +prev +prev +prev +---- +h-i:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000006.sst) +f-h:{} (file = 000006.sst) +e-f:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000004.sst) +b-e:{} (file = 000004.sst) +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +. + +# Test files with range keys and rangedels + +define +file + a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} + point:a.SET.1:foo + point:b.SET.1:foo +file + c-e:{(#3,RANGEKEYSET,@3,baz) (#3,RANGEKEYSET,@1,bar)} + point:c.RANGEDEL.2:f + point:d.SET.1:foo +file + g-h:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} + i-j:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} + point:f.RANGEDEL.2:g +---- + +iter rangedel +first +next +next +next +---- +c-f:{(#2,RANGEDEL)} (file = 000002.sst) +f-g:{(#2,RANGEDEL)} (file = 000003.sst) +. +. + +iter rangedel +last +prev +prev +prev +---- +f-g:{(#2,RANGEDEL)} (file = 000003.sst) +c-f:{(#2,RANGEDEL)} (file = 000002.sst) +. +. + +iter rangedel +seek-ge c +next +next +---- +c-f:{(#2,RANGEDEL)} (file = 000002.sst) +f-g:{(#2,RANGEDEL)} (file = 000003.sst) +. + +iter rangedel +seek-lt ff +prev +next +prev +prev +---- +f-g:{(#2,RANGEDEL)} (file = 000003.sst) +c-f:{(#2,RANGEDEL)} (file = 000002.sst) +f-g:{(#2,RANGEDEL)} (file = 000003.sst) +c-f:{(#2,RANGEDEL)} (file = 000002.sst) +. + +close-iter +---- +ok + +# Test that a regular LevelIter ignores rangedels and emits straddle spans. + +iter +first +next +next +next +next +next +---- +a-b:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +b-c:{} (file = 000001.sst) +c-e:{(#3,RANGEKEYSET,@3,baz) (#3,RANGEKEYSET,@1,bar)} (file = 000002.sst) +e-g:{} (file = 000002.sst) +g-h:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) +i-j:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) + +iter +seek-ge c +next +next +next +next +---- +c-e:{(#3,RANGEKEYSET,@3,baz) (#3,RANGEKEYSET,@1,bar)} (file = 000002.sst) +e-g:{} (file = 000002.sst) +g-h:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) +i-j:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) +. + +# Test seeking outside of bounds with straddles. + +define +file + c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +file + e-f:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +file + g-h:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} +---- + +iter +seek-lt j +next +prev +prev +---- +g-h:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) +. +g-h:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) +f-g:{} (file = 000003.sst) + +iter +seek-lt j +prev +prev +next +next +---- +g-h:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) +f-g:{} (file = 000003.sst) +e-f:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000002.sst) +f-g:{} (file = 000002.sst) +g-h:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000003.sst) + +iter +seek-ge a +prev +next +next +---- +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +. +c-d:{(#2,RANGEKEYSET,@3,foo) (#1,RANGEKEYSET,@1,bar)} (file = 000001.sst) +d-e:{} (file = 000001.sst) diff --git a/pebble/internal/keyspan/testdata/merging_iter b/pebble/internal/keyspan/testdata/merging_iter new file mode 100644 index 0000000..aa309e2 --- /dev/null +++ b/pebble/internal/keyspan/testdata/merging_iter @@ -0,0 +1,758 @@ +# Test a single level. + +define +a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas)} +c-d:{(#4,RANGEKEYSET,@3,coconut)} +e-f:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)} +h-j:{(#22,RANGEKEYDEL) (#21,RANGEKEYSET,@5,peaches) (#21,RANGEKEYSET,@3,starfruit)} +l-m:{(#2,RANGEKEYUNSET,@9) (#2,RANGEKEYUNSET,@5)} +q-z:{(#14,RANGEKEYSET,@9,mangos)} +---- +1 levels + +iter +first +next +next +next +next +next +next +---- +a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas)} +c-d:{(#4,RANGEKEYSET,@3,coconut)} +e-f:{(#20,RANGEKEYSET,@5,pineapple) (#20,RANGEKEYSET,@3,guava)} +h-j:{(#22,RANGEKEYDEL) (#21,RANGEKEYSET,@5,peaches) (#21,RANGEKEYSET,@3,starfruit)} +l-m:{(#2,RANGEKEYUNSET,@9) (#2,RANGEKEYUNSET,@5)} +q-z:{(#14,RANGEKEYSET,@9,mangos)} + + +# Test snapshot filtering. + +iter snapshot=12 +first +next +next +next +next +next +next +---- +a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas)} +c-d:{(#4,RANGEKEYSET,@3,coconut)} +e-f:{} +h-j:{} +l-m:{(#2,RANGEKEYUNSET,@9) (#2,RANGEKEYUNSET,@5)} +q-z:{} + + +# Test error handling on seeks. + +iter probes=(0,ErrInjected,(Log "# inner.")) +first +last +seek-ge boo +seek-lt lemon +---- +# inner.First() = nil + err= +# inner.Last() = nil + err= +# inner.SeekLT("boo") = nil + err= +# inner.SeekGE("lemon") = nil + err= + +# Test error handling on steps. + +iter probes=(0,(If (Or OpNext OpPrev) ErrInjected noop),(Log "# inner.")) +first +next +last +prev +---- +# inner.First() = a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas)} +a-c:{(#10,RANGEKEYSET,@5,apples) (#10,RANGEKEYDEL) (#8,RANGEKEYUNSET,@1) (#4,RANGEKEYSET,@3,bananas)} +# inner.Next() = nil + err= +# inner.Last() = q-z:{(#14,RANGEKEYSET,@9,mangos)} +q-z:{(#14,RANGEKEYSET,@9,mangos)} +# inner.Prev() = nil + err= + +define +b-d:{#10,RANGEKEYSET,@1,apples} +e-h:{#8,RANGEKEYDEL} +-- +a-c:{#3,RANGEKEYUNSET,@1} +h-k:{#5,RANGEKEYDEL} +---- +2 levels + +iter +first +next +next +next +next +next +---- +a-b:{(#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +e-h:{(#8,RANGEKEYDEL)} +h-k:{(#5,RANGEKEYDEL)} + + +iter +last +prev +prev +prev +prev +prev +---- +h-k:{(#5,RANGEKEYDEL)} +e-h:{(#8,RANGEKEYDEL)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +a-b:{(#3,RANGEKEYUNSET,@1)} + + +# Test changing directions at each iterator position, reverse to forward. +iter +last +next +last +prev +next +---- +h-k:{(#5,RANGEKEYDEL)} + +h-k:{(#5,RANGEKEYDEL)} +e-h:{(#8,RANGEKEYDEL)} +h-k:{(#5,RANGEKEYDEL)} + +iter +last +prev +prev +next +---- +h-k:{(#5,RANGEKEYDEL)} +e-h:{(#8,RANGEKEYDEL)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +e-h:{(#8,RANGEKEYDEL)} + +iter +last +prev +prev +prev +next +---- +h-k:{(#5,RANGEKEYDEL)} +e-h:{(#8,RANGEKEYDEL)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +c-d:{(#10,RANGEKEYSET,@1,apples)} + +iter +last +prev +prev +prev +prev +next +---- +h-k:{(#5,RANGEKEYDEL)} +e-h:{(#8,RANGEKEYDEL)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +a-b:{(#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} + +iter +last +prev +prev +prev +prev +prev +next +---- +h-k:{(#5,RANGEKEYDEL)} +e-h:{(#8,RANGEKEYDEL)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +a-b:{(#3,RANGEKEYUNSET,@1)} + +a-b:{(#3,RANGEKEYUNSET,@1)} + +# Test changing directions at each iterator position, forward to reverse. + +iter +first +prev +first +next +prev +---- +a-b:{(#3,RANGEKEYUNSET,@1)} + +a-b:{(#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +a-b:{(#3,RANGEKEYUNSET,@1)} + +iter +first +next +next +prev +---- +a-b:{(#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} + +iter +first +next +next +next +prev +---- +a-b:{(#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +e-h:{(#8,RANGEKEYDEL)} +c-d:{(#10,RANGEKEYSET,@1,apples)} + +iter +first +next +next +next +next +next +prev +---- +a-b:{(#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +e-h:{(#8,RANGEKEYDEL)} +h-k:{(#5,RANGEKEYDEL)} + +h-k:{(#5,RANGEKEYDEL)} + +iter +first +next +next +next +next +prev +---- +a-b:{(#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +e-h:{(#8,RANGEKEYDEL)} +h-k:{(#5,RANGEKEYDEL)} +e-h:{(#8,RANGEKEYDEL)} + +# Test SeekGE. Note that MergingIter's SeekGE implements the FragmentIterator's +# SeekGE semantics. It returns the first fragment that covers a key ≥ the search +# key. + +iter +seek-ge cc +---- +c-d:{(#10,RANGEKEYSET,@1,apples)} + +iter +seek-ge 1 +seek-ge a +seek-ge b +seek-ge bb +---- +a-b:{(#3,RANGEKEYUNSET,@1)} +a-b:{(#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} + +iter +seek-ge c +seek-ge cc +seek-ge e +seek-ge f +---- +c-d:{(#10,RANGEKEYSET,@1,apples)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +e-h:{(#8,RANGEKEYDEL)} +e-h:{(#8,RANGEKEYDEL)} + +iter +seek-ge h +seek-ge i +seek-ge k +seek-ge l +---- +h-k:{(#5,RANGEKEYDEL)} +h-k:{(#5,RANGEKEYDEL)} + + + +# Test SeekLT. Note that MergingIter's SeekLT implements the FragmentIterator's +# SeekLT semantics. It returns the first fragment with a Start key < the search +# key, NOT the first fragment that covers a key < the search key. +# +# NB: seek-lt bb finds b-c#3.RANGEKEYUNSET (the last fragment with the bounds +# [b,c), unlike the above seek-ge b which finds the first). + +iter +seek-lt b +---- +a-b:{(#3,RANGEKEYUNSET,@1)} + +iter +seek-lt 1 +seek-lt a +seek-lt aa +seek-lt b +seek-lt bb +seek-lt c +---- + + +a-b:{(#3,RANGEKEYUNSET,@1)} +a-b:{(#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} + +iter +seek-lt cc +seek-lt d +seek-lt dd +seek-lt e +seek-lt ee +seek-lt h +seek-lt hh +seek-lt k +seek-lt z +---- +c-d:{(#10,RANGEKEYSET,@1,apples)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +e-h:{(#8,RANGEKEYDEL)} +e-h:{(#8,RANGEKEYDEL)} +h-k:{(#5,RANGEKEYDEL)} +h-k:{(#5,RANGEKEYDEL)} +h-k:{(#5,RANGEKEYDEL)} + +# Test error handling with multiple levels. Inject errors in all operations on +# the first iterator, and none of the second iterator. + +iter probes=(0,ErrInjected,(Log "# a.")) probes=(1,(Log "# b.")) +seek-ge a +seek-ge b +seek-ge c +seek-ge d +seek-ge e +seek-ge f +seek-ge g +seek-ge h +seek-ge i +seek-ge j +seek-ge k +seek-ge z +---- +# a.SeekLT("a") = nil +# b.SeekLT("a") = nil + err= +# a.SeekLT("b") = nil +# b.SeekLT("b") = a-c:{(#3,RANGEKEYUNSET,@1)} + err= +# a.SeekLT("c") = nil +# b.SeekLT("c") = a-c:{(#3,RANGEKEYUNSET,@1)} + err= +# a.SeekLT("d") = nil +# b.SeekLT("d") = a-c:{(#3,RANGEKEYUNSET,@1)} + err= +# a.SeekLT("e") = nil +# b.SeekLT("e") = a-c:{(#3,RANGEKEYUNSET,@1)} + err= +# a.SeekLT("f") = nil +# b.SeekLT("f") = a-c:{(#3,RANGEKEYUNSET,@1)} + err= +# a.SeekLT("g") = nil +# b.SeekLT("g") = a-c:{(#3,RANGEKEYUNSET,@1)} + err= +# a.SeekLT("h") = nil +# b.SeekLT("h") = a-c:{(#3,RANGEKEYUNSET,@1)} + err= +# a.SeekLT("i") = nil +# b.SeekLT("i") = h-k:{(#5,RANGEKEYDEL)} + err= +# a.SeekLT("j") = nil +# b.SeekLT("j") = h-k:{(#5,RANGEKEYDEL)} + err= +# a.SeekLT("k") = nil +# b.SeekLT("k") = h-k:{(#5,RANGEKEYDEL)} + err= +# a.SeekLT("z") = nil +# b.SeekLT("z") = h-k:{(#5,RANGEKEYDEL)} + err= + +# Test the same as above, but with errors injected on the second iterator. + +iter probes=(0,(Log "# a.")) probes=(1,ErrInjected,(Log "# b.")) +seek-ge a +seek-ge b +seek-ge c +seek-ge d +seek-ge e +seek-ge f +seek-ge g +seek-ge h +seek-ge i +seek-ge j +seek-ge k +seek-ge z +---- +# a.SeekLT("a") = nil +# b.SeekLT("a") = nil + err= +# a.SeekLT("b") = nil +# b.SeekLT("b") = nil + err= +# a.SeekLT("c") = b-d:{(#10,RANGEKEYSET,@1,apples)} +# b.SeekLT("c") = nil + err= +# a.SeekLT("d") = b-d:{(#10,RANGEKEYSET,@1,apples)} +# b.SeekLT("d") = nil + err= +# a.SeekLT("e") = b-d:{(#10,RANGEKEYSET,@1,apples)} +# b.SeekLT("e") = nil + err= +# a.SeekLT("f") = e-h:{(#8,RANGEKEYDEL)} +# b.SeekLT("f") = nil + err= +# a.SeekLT("g") = e-h:{(#8,RANGEKEYDEL)} +# b.SeekLT("g") = nil + err= +# a.SeekLT("h") = e-h:{(#8,RANGEKEYDEL)} +# b.SeekLT("h") = nil + err= +# a.SeekLT("i") = e-h:{(#8,RANGEKEYDEL)} +# b.SeekLT("i") = nil + err= +# a.SeekLT("j") = e-h:{(#8,RANGEKEYDEL)} +# b.SeekLT("j") = nil + err= +# a.SeekLT("k") = e-h:{(#8,RANGEKEYDEL)} +# b.SeekLT("k") = nil + err= +# a.SeekLT("z") = e-h:{(#8,RANGEKEYDEL)} +# b.SeekLT("z") = nil + err= + +# Test SeekLTs with errors injected on the first iterator. + +iter probes=(0,ErrInjected,(Log "# a.")) probes=(1,(Log "# b.")) +seek-lt a +seek-lt b +seek-lt c +seek-lt d +seek-lt e +seek-lt f +seek-lt g +seek-lt h +seek-lt i +seek-lt j +seek-lt k +seek-lt z +---- +# a.SeekGE("a") = nil +# b.SeekGE("a") = a-c:{(#3,RANGEKEYUNSET,@1)} + err= +# a.SeekGE("b") = nil +# b.SeekGE("b") = a-c:{(#3,RANGEKEYUNSET,@1)} + err= +# a.SeekGE("c") = nil +# b.SeekGE("c") = h-k:{(#5,RANGEKEYDEL)} + err= +# a.SeekGE("d") = nil +# b.SeekGE("d") = h-k:{(#5,RANGEKEYDEL)} + err= +# a.SeekGE("e") = nil +# b.SeekGE("e") = h-k:{(#5,RANGEKEYDEL)} + err= +# a.SeekGE("f") = nil +# b.SeekGE("f") = h-k:{(#5,RANGEKEYDEL)} + err= +# a.SeekGE("g") = nil +# b.SeekGE("g") = h-k:{(#5,RANGEKEYDEL)} + err= +# a.SeekGE("h") = nil +# b.SeekGE("h") = h-k:{(#5,RANGEKEYDEL)} + err= +# a.SeekGE("i") = nil +# b.SeekGE("i") = h-k:{(#5,RANGEKEYDEL)} + err= +# a.SeekGE("j") = nil +# b.SeekGE("j") = h-k:{(#5,RANGEKEYDEL)} + err= +# a.SeekGE("k") = nil +# b.SeekGE("k") = nil + err= +# a.SeekGE("z") = nil +# b.SeekGE("z") = nil + err= + +# Test SeekLTs with errors injected on the second iterator. + +iter probes=(0,(Log "# a.")) probes=(1,ErrInjected,(Log "# b.")) +seek-lt a +seek-lt b +seek-lt c +seek-lt d +seek-lt e +seek-lt f +seek-lt g +seek-lt h +seek-lt i +seek-lt j +seek-lt k +seek-lt z +---- +# a.SeekGE("a") = b-d:{(#10,RANGEKEYSET,@1,apples)} +# b.SeekGE("a") = nil + err= +# a.SeekGE("b") = b-d:{(#10,RANGEKEYSET,@1,apples)} +# b.SeekGE("b") = nil + err= +# a.SeekGE("c") = b-d:{(#10,RANGEKEYSET,@1,apples)} +# b.SeekGE("c") = nil + err= +# a.SeekGE("d") = e-h:{(#8,RANGEKEYDEL)} +# b.SeekGE("d") = nil + err= +# a.SeekGE("e") = e-h:{(#8,RANGEKEYDEL)} +# b.SeekGE("e") = nil + err= +# a.SeekGE("f") = e-h:{(#8,RANGEKEYDEL)} +# b.SeekGE("f") = nil + err= +# a.SeekGE("g") = e-h:{(#8,RANGEKEYDEL)} +# b.SeekGE("g") = nil + err= +# a.SeekGE("h") = nil +# b.SeekGE("h") = nil + err= +# a.SeekGE("i") = nil +# b.SeekGE("i") = nil + err= +# a.SeekGE("j") = nil +# b.SeekGE("j") = nil + err= +# a.SeekGE("k") = nil +# b.SeekGE("k") = nil + err= +# a.SeekGE("z") = nil +# b.SeekGE("z") = nil + err= + +# Test error handling during Next. + +iter probes=(0,(If OpNext ErrInjected noop),(Log "# a.")) probes=(1,(Log "# b.")) +first +next +next +next +---- +# a.First() = b-d:{(#10,RANGEKEYSET,@1,apples)} +# b.First() = a-c:{(#3,RANGEKEYUNSET,@1)} +a-b:{(#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +# b.Next() = h-k:{(#5,RANGEKEYDEL)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +# a.Next() = nil + err= + +iter probes=(0,(Log "# a.")) probes=(1,(If OpNext ErrInjected noop),(Log "# b.")) +first +next +next +---- +# a.First() = b-d:{(#10,RANGEKEYSET,@1,apples)} +# b.First() = a-c:{(#3,RANGEKEYUNSET,@1)} +a-b:{(#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +# b.Next() = nil + err= + +# Test error handling during Prev. + +iter probes=(0,(If OpPrev ErrInjected noop),(Log "# a.")) probes=(1,(Log "# b.")) +last +prev +prev +---- +# a.Last() = e-h:{(#8,RANGEKEYDEL)} +# b.Last() = h-k:{(#5,RANGEKEYDEL)} +h-k:{(#5,RANGEKEYDEL)} +# b.Prev() = a-c:{(#3,RANGEKEYUNSET,@1)} +e-h:{(#8,RANGEKEYDEL)} +# a.Prev() = nil + err= + +iter probes=(0,(Log "# a.")) probes=(1,(If OpPrev ErrInjected noop),(Log "# b.")) +last +prev +---- +# a.Last() = e-h:{(#8,RANGEKEYDEL)} +# b.Last() = h-k:{(#5,RANGEKEYDEL)} +h-k:{(#5,RANGEKEYDEL)} +# b.Prev() = nil + err= + +define +a-f:{(#5,RANGEKEYDEL) (#4,RANGEKEYDEL)} +k-s:{(#5,RANGEKEYDEL) (#4,RANGEKEYDEL)} +---- +1 levels + +iter +first +prev +next +---- +a-f:{(#5,RANGEKEYDEL) (#4,RANGEKEYDEL)} + +a-f:{(#5,RANGEKEYDEL) (#4,RANGEKEYDEL)} + +iter +last +next +prev +---- +k-s:{(#5,RANGEKEYDEL) (#4,RANGEKEYDEL)} + +k-s:{(#5,RANGEKEYDEL) (#4,RANGEKEYDEL)} + +define +w-x:{(#5,RANGEKEYDEL) (#3,RANGEKEYDEL)} +x-z:{(#5,RANGEKEYDEL)} +-- +w-y:{(#4,RANGEKEYDEL) (#1,RANGEKEYDEL)} +---- +2 levels + +iter +last +next +prev +first +prev +next +---- +y-z:{(#5,RANGEKEYDEL)} + +y-z:{(#5,RANGEKEYDEL)} +w-x:{(#5,RANGEKEYDEL) (#4,RANGEKEYDEL) (#3,RANGEKEYDEL) (#1,RANGEKEYDEL)} + +w-x:{(#5,RANGEKEYDEL) (#4,RANGEKEYDEL) (#3,RANGEKEYDEL) (#1,RANGEKEYDEL)} + +iter +seek-ge x +prev +seek-ge xray +prev +---- +x-y:{(#5,RANGEKEYDEL) (#4,RANGEKEYDEL) (#1,RANGEKEYDEL)} +w-x:{(#5,RANGEKEYDEL) (#4,RANGEKEYDEL) (#3,RANGEKEYDEL) (#1,RANGEKEYDEL)} +x-y:{(#5,RANGEKEYDEL) (#4,RANGEKEYDEL) (#1,RANGEKEYDEL)} +w-x:{(#5,RANGEKEYDEL) (#4,RANGEKEYDEL) (#3,RANGEKEYDEL) (#1,RANGEKEYDEL)} + +define +il-qb:{(#10,RANGEKEYDEL)} +sn-wn:{(#10,RANGEKEYDEL)} +-- +qt-kh:{(#9,RANGEKEYDEL) (#8,RANGEKEYDEL) (#7,RANGEKEYDEL)} +ky-sv:{(#8,RANGEKEYDEL) (#7,RANGEKEYDEL)} +-- +as-fz:{(#5,RANGEKEYDEL) (#4,RANGEKEYDEL)} +hh-ir:{(#4,RANGEKEYDEL)} +rf-yx:{(#4,RANGEKEYDEL)} +---- +3 levels + +iter +seek-ge qp +next +next +next +next +next +seek-ge yz +prev +---- +qb-rf:{(#8,RANGEKEYDEL) (#7,RANGEKEYDEL)} +rf-sn:{(#8,RANGEKEYDEL) (#7,RANGEKEYDEL) (#4,RANGEKEYDEL)} +sn-sv:{(#10,RANGEKEYDEL) (#8,RANGEKEYDEL) (#7,RANGEKEYDEL) (#4,RANGEKEYDEL)} +sv-wn:{(#10,RANGEKEYDEL) (#4,RANGEKEYDEL)} +wn-yx:{(#4,RANGEKEYDEL)} + + +wn-yx:{(#4,RANGEKEYDEL)} + +# Test that empty spans from child iterators are preserved +define +b-d:{#10,RANGEKEYSET,@1,apples} +e-f:{} +g-h:{#8,RANGEKEYDEL} +-- +a-c:{#3,RANGEKEYUNSET,@1} +h-k:{#5,RANGEKEYDEL} +k-m:{} +---- +2 levels + +iter +first +next +next +next +next +next +next +next +---- +a-b:{(#3,RANGEKEYUNSET,@1)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +c-d:{(#10,RANGEKEYSET,@1,apples)} +e-f:{} +g-h:{(#8,RANGEKEYDEL)} +h-k:{(#5,RANGEKEYDEL)} +k-m:{} + + +iter +last +prev +prev +prev +prev +prev +prev +prev +---- +k-m:{} +h-k:{(#5,RANGEKEYDEL)} +g-h:{(#8,RANGEKEYDEL)} +e-f:{} +c-d:{(#10,RANGEKEYSET,@1,apples)} +b-c:{(#10,RANGEKEYSET,@1,apples) (#3,RANGEKEYUNSET,@1)} +a-b:{(#3,RANGEKEYUNSET,@1)} + diff --git a/pebble/internal/keyspan/testdata/seek b/pebble/internal/keyspan/testdata/seek new file mode 100644 index 0000000..e75a65c --- /dev/null +++ b/pebble/internal/keyspan/testdata/seek @@ -0,0 +1,309 @@ +build +1: b-d +---- +b-d:{(#1,RANGEDEL)} + +seek-ge +a 2 +b 2 +b 1 +d 2 +---- +b-d:{(#1,RANGEDEL)} +b-d:{(#1,RANGEDEL)} +b-d:{} + + +seek-le +a 2 +b 2 +b 1 +d 2 +---- + +b-d:{(#1,RANGEDEL)} +b-d:{} +b-d:{(#1,RANGEDEL)} + +build +3: b-d +2: b-d +1: b-d +---- +b-d:{(#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} + +seek-ge +a 4 +b 4 +b 3 +b 2 +b 1 +d 4 +---- +b-d:{(#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} +b-d:{(#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} +b-d:{(#2,RANGEDEL) (#1,RANGEDEL)} +b-d:{(#1,RANGEDEL)} +b-d:{} + + +seek-le +a 4 +b 4 +b 3 +b 2 +b 1 +d 4 +---- + +b-d:{(#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} +b-d:{(#2,RANGEDEL) (#1,RANGEDEL)} +b-d:{(#1,RANGEDEL)} +b-d:{} +b-d:{(#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} + +build +1: b-d +2: d-f +---- +b-d:{(#1,RANGEDEL)} +d-f:{(#2,RANGEDEL)} + +seek-ge +b 2 +d 2 +d 3 +e 3 +---- +b-d:{(#1,RANGEDEL)} +d-f:{} +d-f:{(#2,RANGEDEL)} +d-f:{(#2,RANGEDEL)} + +seek-le +a 3 +b 2 +d 2 +d 3 +e 3 +f 3 +---- + +b-d:{(#1,RANGEDEL)} +d-f:{} +d-f:{(#2,RANGEDEL)} +d-f:{(#2,RANGEDEL)} +d-f:{(#2,RANGEDEL)} + +build +3: a-----------m +2: f------------s +1: j---------------z +---- +a-f:{(#3,RANGEDEL)} +f-j:{(#3,RANGEDEL) (#2,RANGEDEL)} +j-m:{(#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} +m-s:{(#2,RANGEDEL) (#1,RANGEDEL)} +s-z:{(#1,RANGEDEL)} + +seek-ge +a 4 +a 3 +a 2 +a 1 +f 4 +f 3 +f 2 +f 1 +j 4 +j 3 +j 2 +j 1 +m 3 +m 2 +m 1 +s 2 +s 1 +z 2 +---- +a-f:{(#3,RANGEDEL)} +a-f:{} +a-f:{} +a-f:{} +f-j:{(#3,RANGEDEL) (#2,RANGEDEL)} +f-j:{(#2,RANGEDEL)} +f-j:{} +f-j:{} +j-m:{(#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} +j-m:{(#2,RANGEDEL) (#1,RANGEDEL)} +j-m:{(#1,RANGEDEL)} +j-m:{} +m-s:{(#2,RANGEDEL) (#1,RANGEDEL)} +m-s:{(#1,RANGEDEL)} +m-s:{} +s-z:{(#1,RANGEDEL)} +s-z:{} + + +seek-le +a 4 +a 3 +a 2 +a 1 +f 4 +f 3 +f 2 +f 1 +j 4 +j 3 +j 2 +j 1 +m 3 +m 2 +m 1 +s 2 +s 1 +z 2 +---- +a-f:{(#3,RANGEDEL)} +a-f:{} +a-f:{} +a-f:{} +f-j:{(#3,RANGEDEL) (#2,RANGEDEL)} +f-j:{(#2,RANGEDEL)} +f-j:{} +f-j:{} +j-m:{(#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} +j-m:{(#2,RANGEDEL) (#1,RANGEDEL)} +j-m:{(#1,RANGEDEL)} +j-m:{} +m-s:{(#2,RANGEDEL) (#1,RANGEDEL)} +m-s:{(#1,RANGEDEL)} +m-s:{} +s-z:{(#1,RANGEDEL)} +s-z:{} +s-z:{(#1,RANGEDEL)} + +build +1: a-----------m +2: f------------s +3: j---------------z +---- +a-f:{(#1,RANGEDEL)} +f-j:{(#2,RANGEDEL) (#1,RANGEDEL)} +j-m:{(#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} +m-s:{(#3,RANGEDEL) (#2,RANGEDEL)} +s-z:{(#3,RANGEDEL)} + +seek-ge +a 2 +a 1 +f 3 +f 2 +f 1 +j 4 +j 3 +j 2 +j 1 +m 4 +m 3 +m 2 +m 1 +s 4 +s 3 +s 2 +s 1 +z 4 +---- +a-f:{(#1,RANGEDEL)} +a-f:{} +f-j:{(#2,RANGEDEL) (#1,RANGEDEL)} +f-j:{(#1,RANGEDEL)} +f-j:{} +j-m:{(#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} +j-m:{(#2,RANGEDEL) (#1,RANGEDEL)} +j-m:{(#1,RANGEDEL)} +j-m:{} +m-s:{(#3,RANGEDEL) (#2,RANGEDEL)} +m-s:{(#2,RANGEDEL)} +m-s:{} +m-s:{} +s-z:{(#3,RANGEDEL)} +s-z:{} +s-z:{} +s-z:{} + + +seek-le +a 2 +a 1 +f 3 +f 2 +f 1 +j 4 +j 3 +j 2 +j 1 +m 4 +m 3 +m 2 +m 1 +s 4 +s 3 +s 2 +s 1 +z 4 +z 3 +z 2 +---- +a-f:{(#1,RANGEDEL)} +a-f:{} +f-j:{(#2,RANGEDEL) (#1,RANGEDEL)} +f-j:{(#1,RANGEDEL)} +f-j:{} +j-m:{(#3,RANGEDEL) (#2,RANGEDEL) (#1,RANGEDEL)} +j-m:{(#2,RANGEDEL) (#1,RANGEDEL)} +j-m:{(#1,RANGEDEL)} +j-m:{} +m-s:{(#3,RANGEDEL) (#2,RANGEDEL)} +m-s:{(#2,RANGEDEL)} +m-s:{} +m-s:{} +s-z:{(#3,RANGEDEL)} +s-z:{} +s-z:{} +s-z:{} +s-z:{(#3,RANGEDEL)} +s-z:{} +s-z:{} + +build +1: a-c +3: a-c +5: a-c +5: c-e +---- +a-c:{(#5,RANGEDEL) (#3,RANGEDEL) (#1,RANGEDEL)} +c-e:{(#5,RANGEDEL)} + +# Regression test for a bug where seek-le was failing to find the most recent +# version of a tombstone. The bug existed when seek-{ge,le} performed snapshot +# filtering, and the problematic case was "seek-le c 4". The seeking code was +# finding the tombstone c-e#5, determining it wasn't visible and then return the +# immediately preceding tombstone a-c#1. Now we return c-e:{} immediately, +# because the span c-e covers c and contains no visible keys. + +seek-le +c 1 +c 2 +c 3 +c 4 +c 5 +c 6 +---- +c-e:{} +c-e:{} +c-e:{} +c-e:{} +c-e:{} +c-e:{(#5,RANGEDEL)} diff --git a/pebble/internal/keyspan/testdata/truncate b/pebble/internal/keyspan/testdata/truncate new file mode 100644 index 0000000..33ab3a5 --- /dev/null +++ b/pebble/internal/keyspan/testdata/truncate @@ -0,0 +1,318 @@ +build +1: b-d +2: d-f +3: f-h +---- +1: b-d +2: d-f +3: f-h + + +truncate a-b +---- + +truncate a-c +---- +1: bc + +truncate a-d +---- +1: b-d + +truncate a-e +---- +1: b-d +2: de + +# The second range tombstone should be elided, as it starts after the +# specified file end key. + +truncate a-e endKey=(d.SET.3) +---- +1: b-d + +# The second range tombstone should be back in the below example, as the +# specified end key has a trailer (RANGEDEL.2) exactly matching that of the +# rangedel tombstone's start key. + +truncate a-e endKey=(d.RANGEDEL.2) +---- +1: b-d +2: de + +truncate a-e endKey=(d.SET.1) +---- +1: b-d +2: de + +# Similarly, truncate range tombstones that end before the start key. + +truncate a-e startKey=(d.SET.3) +---- +2: de + +truncate a-e startKey=(c.SET.3) +---- +1: b-d +2: de + +truncate a-f +---- +1: b-d +2: d-f + +truncate a-g +---- +1: b-d +2: d-f +3: fg + +truncate a-h +---- +1: b-d +2: d-f +3: f-h + + +truncate b-b +---- + +truncate b-c +---- +1: bc + +truncate b-d +---- +1: b-d + +truncate b-e +---- +1: b-d +2: de + +truncate b-f +---- +1: b-d +2: d-f + +truncate b-g +---- +1: b-d +2: d-f +3: fg + +truncate b-h +---- +1: b-d +2: d-f +3: f-h + + +truncate c-c +---- + +truncate c-d +---- +1: cd + +truncate c-e +---- +1: cd +2: de + +truncate c-f +---- +1: cd +2: d-f + +truncate c-g +---- +1: cd +2: d-f +3: fg + +truncate c-h +---- +1: cd +2: d-f +3: f-h + + +truncate d-d +---- + +truncate d-e +---- +2: de + +truncate d-f +---- +2: d-f + +truncate d-g +---- +2: d-f +3: fg + +truncate d-h +---- +2: d-f +3: f-h + + +truncate e-e +---- + +truncate e-f +---- +2: ef + +truncate e-g +---- +2: ef +3: fg + +truncate e-h +---- +2: ef +3: f-h + + +truncate f-f +---- + +truncate f-g +---- +3: fg + +truncate f-h +---- +3: f-h + + +truncate g-g +---- + +truncate g-h +---- +3: gh + +# Regression test for https://github.com/cockroachdb/cockroach/issues/113973. + +truncate-and-save-iter a-dd +---- +ok + +saved-iter +first +next +next +next +---- +b-d:{(#1,RANGEDEL)} +d-dd:{(#2,RANGEDEL)} + + + +saved-iter +seek-ge e +next +next +---- + + + + +saved-iter +seek-ge e +prev +prev +---- + +d-dd:{(#2,RANGEDEL)} +b-d:{(#1,RANGEDEL)} + +saved-iter +seek-lt e +prev +prev +---- +d-dd:{(#2,RANGEDEL)} +b-d:{(#1,RANGEDEL)} + + +saved-iter +seek-lt e +next +next +---- +d-dd:{(#2,RANGEDEL)} + + + +truncate-and-save-iter ee-h +---- +ok + +saved-iter +first +next +next +next +---- +ee-f:{(#2,RANGEDEL)} +f-h:{(#3,RANGEDEL)} + + + +saved-iter +seek-ge e +next +next +---- +ee-f:{(#2,RANGEDEL)} +f-h:{(#3,RANGEDEL)} + + +saved-iter +seek-ge e +prev +prev +---- +ee-f:{(#2,RANGEDEL)} + + + +saved-iter +seek-lt e +prev +prev +---- + + + + +saved-iter +seek-lt e +next +next +---- + +ee-f:{(#2,RANGEDEL)} +f-h:{(#3,RANGEDEL)} + + +truncate-and-save-iter a-g +---- +ok + +saved-iter +seek-ge h +prev +seek-lt h +next +---- + +f-g:{(#3,RANGEDEL)} +f-g:{(#3,RANGEDEL)} + diff --git a/pebble/internal/keyspan/testdata/visible b/pebble/internal/keyspan/testdata/visible new file mode 100644 index 0000000..6a3b14b --- /dev/null +++ b/pebble/internal/keyspan/testdata/visible @@ -0,0 +1,58 @@ +define +a-b:{(#5,RANGEKEYSET) (#3,RANGEKEYSET)} +---- +a-b:{(#5,RANGEKEYSET) (#3,RANGEKEYSET)} + +visible +6 +5 +4 +3 +2 +1 +---- +6 : a-b:{(#5,RANGEKEYSET) (#3,RANGEKEYSET)} +5 : a-b:{(#3,RANGEKEYSET)} +4 : a-b:{(#3,RANGEKEYSET)} +3 : a-b:{} +2 : a-b:{} +1 : a-b:{} + +define +a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET)} +---- +a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET)} + +visible +5 +1 +---- +5 : a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET)} +1 : a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET)} + +define +a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET) (#10,RANGEKEYSET) (#9,RANGEKEYSET) (#4,RANGEKEYSET) (#1,RANGEKEYSET)} +---- +a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET) (#10,RANGEKEYSET) (#9,RANGEKEYSET) (#4,RANGEKEYSET) (#1,RANGEKEYSET)} + +# Test 'sandwich cases'. Eg, at snapshot=7 the keys at #10 and #9 are invisible, +# but the batch keys and the keys at #4 and #1 are visible. + +visible +12 +10 +8 +7 +4 +3 +2 +1 +---- +12: a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET) (#10,RANGEKEYSET) (#9,RANGEKEYSET) (#4,RANGEKEYSET) (#1,RANGEKEYSET)} +10: a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET) (#9,RANGEKEYSET) (#4,RANGEKEYSET) (#1,RANGEKEYSET)} +8 : a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET) (#4,RANGEKEYSET) (#1,RANGEKEYSET)} +7 : a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET) (#4,RANGEKEYSET) (#1,RANGEKEYSET)} +4 : a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET) (#1,RANGEKEYSET)} +3 : a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET) (#1,RANGEKEYSET)} +2 : a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET) (#1,RANGEKEYSET)} +1 : a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET)} diff --git a/pebble/internal/keyspan/testdata/visible_at b/pebble/internal/keyspan/testdata/visible_at new file mode 100644 index 0000000..6c8d56b --- /dev/null +++ b/pebble/internal/keyspan/testdata/visible_at @@ -0,0 +1,58 @@ +define +a-b:{(#5,RANGEKEYSET) (#3,RANGEKEYSET)} +---- +a-b:{(#5,RANGEKEYSET) (#3,RANGEKEYSET)} + +visible-at +6 +5 +4 +3 +2 +1 +---- +6 : true +5 : true +4 : true +3 : false +2 : false +1 : false + +# NB: #36028797018963996 and #36028797018963995 are sequence numbers with the +# batch bit set. These keys should always be visible. + +define +a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET)} +---- +a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET)} + +visible-at +5 +1 +---- +5 : true +1 : true + +define +a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET) (#10,RANGEKEYSET) (#9,RANGEKEYSET) (#4,RANGEKEYSET) (#1,RANGEKEYSET)} +---- +a-c:{(#36028797018963996,RANGEKEYSET) (#36028797018963995,RANGEKEYSET) (#10,RANGEKEYSET) (#9,RANGEKEYSET) (#4,RANGEKEYSET) (#1,RANGEKEYSET)} + +visible-at +12 +10 +8 +7 +4 +3 +2 +1 +---- +12: true +10: true +8 : true +7 : true +4 : true +3 : true +2 : true +1 : true diff --git a/pebble/internal/keyspan/transformer.go b/pebble/internal/keyspan/transformer.go new file mode 100644 index 0000000..b5e8735 --- /dev/null +++ b/pebble/internal/keyspan/transformer.go @@ -0,0 +1,50 @@ +// Copyright 2023 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import "github.com/cockroachdb/pebble/internal/base" + +// Transformer defines a transformation to be applied to a Span. +type Transformer interface { + // Transform takes a Span as input and writes the transformed Span to the + // provided output *Span pointer. The output Span's Keys slice may be reused + // by Transform to reduce allocations. + Transform(cmp base.Compare, in Span, out *Span) error +} + +// The TransformerFunc type is an adapter to allow the use of ordinary functions +// as Transformers. If f is a function with the appropriate signature, +// TransformerFunc(f) is a Transformer that calls f. +type TransformerFunc func(base.Compare, Span, *Span) error + +// Transform calls f(cmp, in, out). +func (tf TransformerFunc) Transform(cmp base.Compare, in Span, out *Span) error { + return tf(cmp, in, out) +} + +var noopTransform Transformer = TransformerFunc(func(_ base.Compare, s Span, dst *Span) error { + dst.Start, dst.End = s.Start, s.End + dst.Keys = append(dst.Keys[:0], s.Keys...) + return nil +}) + +// VisibleTransform filters keys that are invisible at the provided snapshot +// sequence number. +func VisibleTransform(snapshot uint64) Transformer { + return TransformerFunc(func(_ base.Compare, s Span, dst *Span) error { + dst.Start, dst.End = s.Start, s.End + dst.Keys = dst.Keys[:0] + for _, k := range s.Keys { + // NB: The InternalKeySeqNumMax value is used for the batch snapshot + // because a batch's visible span keys are filtered when they're + // fragmented. There's no requirement to enforce visibility at + // iteration time. + if base.Visible(k.SeqNum(), snapshot, base.InternalKeySeqNumMax) { + dst.Keys = append(dst.Keys, k) + } + } + return nil + }) +} diff --git a/pebble/internal/keyspan/truncate.go b/pebble/internal/keyspan/truncate.go new file mode 100644 index 0000000..c0e609b --- /dev/null +++ b/pebble/internal/keyspan/truncate.go @@ -0,0 +1,73 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import "github.com/cockroachdb/pebble/internal/base" + +// Truncate creates a new iterator where every span in the supplied iterator is +// truncated to be contained within the range [lower, upper). If start and end +// are specified, filter out any spans that are completely outside those bounds. +func Truncate( + cmp base.Compare, + iter FragmentIterator, + lower, upper []byte, + start, end *base.InternalKey, + panicOnUpperTruncate bool, +) FragmentIterator { + return Filter(iter, func(in *Span, out *Span) (keep bool) { + out.Start, out.End = in.Start, in.End + out.Keys = append(out.Keys[:0], in.Keys...) + + // Ignore this span if it lies completely outside start, end. Note that + // end endInclusive indicated whether end is inclusive. + // + // The comparison between s.End and start is by user key only, as + // the span is exclusive at s.End, so comparing by user keys + // is sufficient. + if start != nil && cmp(in.End, start.UserKey) <= 0 { + return false + } + if end != nil { + v := cmp(in.Start, end.UserKey) + switch { + case v > 0: + // Wholly outside the end bound. Skip it. + return false + case v == 0: + // This span begins at the same user key as `end`. Whether or + // not any of the keys contained within the span are relevant is + // dependent on Trailers. Any keys contained within the span + // with trailers larger than end cover the small sliver of + // keyspace between [k#inf, k#]. Since keys are + // sorted descending by Trailer within the span, we need to find + // the prefix of keys with larger trailers. + for i := range in.Keys { + if in.Keys[i].Trailer < end.Trailer { + out.Keys = out.Keys[:i] + break + } + } + default: + // Wholly within the end bound. Keep it. + } + } + + var truncated bool + // Truncate the bounds to lower and upper. + if cmp(in.Start, lower) < 0 { + out.Start = lower + } + if cmp(in.End, upper) > 0 { + truncated = true + out.End = upper + } + + if panicOnUpperTruncate && truncated { + panic("pebble: upper bound should not be truncated") + } + + return !out.Empty() && cmp(out.Start, out.End) < 0 + }, cmp) +} diff --git a/pebble/internal/keyspan/truncate_test.go b/pebble/internal/keyspan/truncate_test.go new file mode 100644 index 0000000..f2b2793 --- /dev/null +++ b/pebble/internal/keyspan/truncate_test.go @@ -0,0 +1,94 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package keyspan + +import ( + "bytes" + "fmt" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" +) + +func TestTruncate(t *testing.T) { + cmp := base.DefaultComparer.Compare + fmtKey := base.DefaultComparer.FormatKey + var iter FragmentIterator + var savedIter FragmentIterator + defer func() { + if savedIter != nil { + savedIter.Close() + savedIter = nil + } + }() + + datadriven.RunTest(t, "testdata/truncate", func(t *testing.T, d *datadriven.TestData) string { + doTruncate := func() FragmentIterator { + if len(d.Input) > 0 { + t.Fatalf("unexpected input: %s", d.Input) + } + if len(d.CmdArgs) < 1 || len(d.CmdArgs) > 3 { + t.Fatalf("expected 1-3 arguments: %s", d.CmdArgs) + } + parts := strings.Split(d.CmdArgs[0].String(), "-") + var startKey, endKey *base.InternalKey + if len(d.CmdArgs) > 1 { + for _, arg := range d.CmdArgs[1:] { + switch arg.Key { + case "startKey": + startKey = &base.InternalKey{} + *startKey = base.ParseInternalKey(arg.Vals[0]) + case "endKey": + endKey = &base.InternalKey{} + *endKey = base.ParseInternalKey(arg.Vals[0]) + } + } + } + if len(parts) != 2 { + t.Fatalf("malformed arg: %s", d.CmdArgs[0]) + } + lower := []byte(parts[0]) + upper := []byte(parts[1]) + + tIter := Truncate( + cmp, iter, lower, upper, startKey, endKey, false, + ) + return tIter + } + + switch d.Cmd { + case "build": + tombstones := buildSpans(t, cmp, fmtKey, d.Input, base.InternalKeyKindRangeDelete) + iter = NewIter(cmp, tombstones) + return formatAlphabeticSpans(tombstones) + + case "truncate": + tIter := doTruncate() + defer tIter.Close() + var truncated []Span + for s := tIter.First(); s != nil; s = tIter.Next() { + truncated = append(truncated, s.ShallowClone()) + } + return formatAlphabeticSpans(truncated) + + case "truncate-and-save-iter": + if savedIter != nil { + savedIter.Close() + } + savedIter = doTruncate() + return "ok" + + case "saved-iter": + var buf bytes.Buffer + runIterCmd(t, d, savedIter, &buf) + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} diff --git a/pebble/internal/lint/lint.go b/pebble/internal/lint/lint.go new file mode 100644 index 0000000..338a34a --- /dev/null +++ b/pebble/internal/lint/lint.go @@ -0,0 +1,5 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package lint diff --git a/pebble/internal/lint/lint_test.go b/pebble/internal/lint/lint_test.go new file mode 100644 index 0000000..e088d69 --- /dev/null +++ b/pebble/internal/lint/lint_test.go @@ -0,0 +1,301 @@ +// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package lint + +import ( + "bytes" + "fmt" + "go/build" + "os/exec" + "regexp" + "runtime" + "strings" + "testing" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/ghemawat/stream" + "github.com/stretchr/testify/require" +) + +const ( + cmdGo = "go" + golint = "golang.org/x/lint/golint@6edffad5e6160f5949cdefc81710b2706fbcd4f6" + staticcheck = "honnef.co/go/tools/cmd/staticcheck@2023.1" + crlfmt = "github.com/cockroachdb/crlfmt@44a36ec7" +) + +func dirCmd(t *testing.T, dir string, name string, args ...string) stream.Filter { + cmd := exec.Command(name, args...) + cmd.Dir = dir + out, err := cmd.CombinedOutput() + switch err.(type) { + case nil: + case *exec.ExitError: + // Non-zero exit is expected. + default: + require.NoError(t, err) + } + return stream.ReadLines(bytes.NewReader(out)) +} + +func ignoreGoMod() stream.Filter { + return stream.GrepNot(`^go: (finding|extracting|downloading)`) +} + +func TestLint(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("lint checks skipped on Windows") + } + if invariants.RaceEnabled { + // We are not interested in race-testing the linters themselves. + t.Skip("lint checks skipped on race builds") + } + + const root = "github.com/cockroachdb/pebble" + + pkg, err := build.Import(root, "../..", 0) + require.NoError(t, err) + + var pkgs []string + if err := stream.ForEach( + stream.Sequence( + dirCmd(t, pkg.Dir, "go", "list", "./..."), + ignoreGoMod(), + ), func(s string) { + pkgs = append(pkgs, s) + }); err != nil { + require.NoError(t, err) + } + + t.Run("TestGolint", func(t *testing.T) { + t.Parallel() + + args := []string{"run", golint} + args = append(args, pkgs...) + + // This is overkill right now, but provides a structure for filtering out + // lint errors we don't care about. + if err := stream.ForEach( + stream.Sequence( + dirCmd(t, pkg.Dir, cmdGo, args...), + stream.GrepNot("go: downloading"), + ), func(s string) { + t.Errorf("\n%s", s) + }); err != nil { + t.Error(err) + } + }) + + t.Run("TestStaticcheck", func(t *testing.T) { + t.Parallel() + + args := []string{"run", staticcheck} + args = append(args, pkgs...) + + if err := stream.ForEach( + stream.Sequence( + dirCmd(t, pkg.Dir, cmdGo, args...), + stream.GrepNot("go: downloading"), + ), func(s string) { + t.Errorf("\n%s", s) + }); err != nil { + t.Error(err) + } + }) + + t.Run("TestGoVet", func(t *testing.T) { + t.Parallel() + + if err := stream.ForEach( + stream.Sequence( + dirCmd(t, pkg.Dir, "go", "vet", "-all", "./..."), + stream.GrepNot(`^#`), // ignore comment lines + ignoreGoMod(), + ), func(s string) { + t.Errorf("\n%s", s) + }); err != nil { + t.Error(err) + } + }) + + t.Run("TestFmtErrorf", func(t *testing.T) { + t.Parallel() + + if err := stream.ForEach( + dirCmd(t, pkg.Dir, "git", "grep", "fmt\\.Errorf("), + func(s string) { + t.Errorf("\n%s <- please use \"errors.Errorf\" instead", s) + }); err != nil { + t.Error(err) + } + }) + + t.Run("TestOSIsErr", func(t *testing.T) { + t.Parallel() + + if err := stream.ForEach( + dirCmd(t, pkg.Dir, "git", "grep", "os\\.Is"), + func(s string) { + t.Errorf("\n%s <- please use the \"oserror\" equivalent instead", s) + }); err != nil { + t.Error(err) + } + }) + + t.Run("TestSetFinalizer", func(t *testing.T) { + t.Parallel() + + if err := stream.ForEach( + stream.Sequence( + dirCmd(t, pkg.Dir, "git", "grep", "-B1", "runtime\\.SetFinalizer("), + lintIgnore("lint:ignore SetFinalizer"), + stream.GrepNot(`^internal/invariants/finalizer_on.go`), + ), func(s string) { + t.Errorf("\n%s <- please use the \"invariants.SetFinalizer\" equivalent instead", s) + }); err != nil { + t.Error(err) + } + }) + + // Disallow "raw" atomics; wrappers like atomic.Int32 provide much better + // safety and alignment guarantees. + t.Run("TestRawAtomics", func(t *testing.T) { + t.Parallel() + if err := stream.ForEach( + stream.Sequence( + dirCmd(t, pkg.Dir, "git", "grep", `atomic\.\(Load\|Store\|Add\|Swap\|Compare\)`), + lintIgnore("lint:ignore RawAtomics"), + ), func(s string) { + t.Errorf("\n%s <- please use atomic wrappers (like atomic.Int32) instead", s) + }); err != nil { + t.Error(err) + } + }) + + t.Run("TestForbiddenImports", func(t *testing.T) { + t.Parallel() + + // Forbidden-import-pkg -> permitted-replacement-pkg + forbiddenImports := map[string]string{ + "errors": "github.com/cockroachdb/errors", + "pkg/errors": "github.com/cockroachdb/errors", + } + + // grepBuf creates a grep string that matches any forbidden import pkgs. + var grepBuf bytes.Buffer + grepBuf.WriteByte('(') + for forbiddenPkg := range forbiddenImports { + grepBuf.WriteByte('|') + grepBuf.WriteString(regexp.QuoteMeta(forbiddenPkg)) + } + grepBuf.WriteString(")$") + + filter := stream.FilterFunc(func(arg stream.Arg) error { + for _, path := range pkgs { + buildContext := build.Default + buildContext.UseAllFiles = true + importPkg, err := buildContext.Import(path, pkg.Dir, 0) + if _, ok := err.(*build.MultiplePackageError); ok { + buildContext.UseAllFiles = false + importPkg, err = buildContext.Import(path, pkg.Dir, 0) + } + + switch err.(type) { + case nil: + for _, s := range importPkg.Imports { + arg.Out <- importPkg.ImportPath + ": " + s + } + for _, s := range importPkg.TestImports { + arg.Out <- importPkg.ImportPath + ": " + s + } + for _, s := range importPkg.XTestImports { + arg.Out <- importPkg.ImportPath + ": " + s + } + case *build.NoGoError: + default: + return errors.Wrapf(err, "error loading package %s", path) + } + } + return nil + }) + if err := stream.ForEach(stream.Sequence( + filter, + stream.Sort(), + stream.Uniq(), + stream.Grep(grepBuf.String()), + ), func(s string) { + pkgStr := strings.Split(s, ": ") + importedPkg := pkgStr[1] + + // Test that a disallowed package is not imported. + if replPkg, ok := forbiddenImports[importedPkg]; ok { + t.Errorf("\n%s <- please use %q instead of %q", s, replPkg, importedPkg) + } + }); err != nil { + t.Error(err) + } + }) + + t.Run("TestCrlfmt", func(t *testing.T) { + t.Parallel() + + args := []string{"run", crlfmt, "-fast", "-tab", "2", "."} + var buf bytes.Buffer + if err := stream.ForEach( + stream.Sequence( + dirCmd(t, pkg.Dir, cmdGo, args...), + stream.GrepNot("go: downloading"), + ), + func(s string) { + fmt.Fprintln(&buf, s) + }); err != nil { + t.Error(err) + } + errs := buf.String() + if len(errs) > 0 { + t.Errorf("\n%s", errs) + } + + if t.Failed() { + reWriteCmd := []string{crlfmt, "-w"} + reWriteCmd = append(reWriteCmd, args...) + t.Logf("run the following to fix your formatting:\n"+ + "\n%s\n\n"+ + "Don't forget to add amend the result to the correct commits.", + strings.Join(reWriteCmd, " "), + ) + } + }) +} + +// lintIgnore is a stream.FilterFunc that filters out lines that are preceded by +// the given ignore directive. The function assumes the input stream receives a +// sequence of strings that are to be considered as pairs. If the first string +// in the sequence matches the ignore directive, the following string is +// dropped, else it is emitted. +// +// For example, given the sequence "foo", "bar", "baz", "bam", and an ignore +// directive "foo", the sequence "baz", "bam" would be emitted. If the directive +// was "baz", the sequence "foo", "bar" would be emitted. +func lintIgnore(ignore string) stream.FilterFunc { + return func(arg stream.Arg) error { + var prev string + var i int + for s := range arg.In { + if i%2 == 0 { + // Fist string in the pair is used as the filter. Store it. + prev = s + } else { + // Second string is emitted only if it _does not_ match the directive. + if !strings.Contains(prev, ignore) { + arg.Out <- s + } + } + i++ + } + return nil + } +} diff --git a/pebble/internal/manifest/btree.go b/pebble/internal/manifest/btree.go new file mode 100644 index 0000000..dd17834 --- /dev/null +++ b/pebble/internal/manifest/btree.go @@ -0,0 +1,1304 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manifest + +import ( + "bytes" + "fmt" + "strings" + "sync/atomic" + "unsafe" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/invariants" + stdcmp "github.com/cockroachdb/pebble/shims/cmp" +) + +// The Annotator type defined below is used by other packages to lazily +// compute a value over a B-Tree. Each node of the B-Tree stores one +// `annotation` per annotator, containing the result of the computation over +// the node's subtree. +// +// An annotation is marked as valid if it's current with the current subtree +// state. Annotations are marked as invalid whenever a node will be mutated +// (in mut). Annotators may also return `false` from `Accumulate` to signal +// that a computation for a file is not stable and may change in the future. +// Annotations that include these unstable values are also marked as invalid +// on the node, ensuring that future queries for the annotation will recompute +// the value. + +// An Annotator defines a computation over a level's FileMetadata. If the +// computation is stable and uses inputs that are fixed for the lifetime of +// a FileMetadata, the LevelMetadata's internal data structures are annotated +// with the intermediary computations. This allows the computation to be +// computed incrementally as edits are applied to a level. +type Annotator interface { + // Zero returns the zero value of an annotation. This value is returned + // when a LevelMetadata is empty. The dst argument, if non-nil, is an + // obsolete value previously returned by this Annotator and may be + // overwritten and reused to avoid a memory allocation. + Zero(dst interface{}) (v interface{}) + + // Accumulate computes the annotation for a single file in a level's + // metadata. It merges the file's value into dst and returns a bool flag + // indicating whether or not the value is stable and okay to cache as an + // annotation. If the file's value may change over the life of the file, + // the annotator must return false. + // + // Implementations may modify dst and return it to avoid an allocation. + Accumulate(m *FileMetadata, dst interface{}) (v interface{}, cacheOK bool) + + // Merge combines two values src and dst, returning the result. + // Implementations may modify dst and return it to avoid an allocation. + Merge(src interface{}, dst interface{}) interface{} +} + +type btreeCmp func(*FileMetadata, *FileMetadata) int + +func btreeCmpSeqNum(a, b *FileMetadata) int { + return a.cmpSeqNum(b) +} + +func btreeCmpSmallestKey(cmp Compare) btreeCmp { + return func(a, b *FileMetadata) int { + return a.cmpSmallestKey(b, cmp) + } +} + +// btreeCmpSpecificOrder is used in tests to construct a B-Tree with a +// specific ordering of FileMetadata within the tree. It's typically used to +// test consistency checking code that needs to construct a malformed B-Tree. +func btreeCmpSpecificOrder(files []*FileMetadata) btreeCmp { + m := map[*FileMetadata]int{} + for i, f := range files { + m[f] = i + } + return func(a, b *FileMetadata) int { + ai, aok := m[a] + bi, bok := m[b] + if !aok || !bok { + panic("btreeCmpSliceOrder called with unknown files") + } + return stdcmp.Compare(ai, bi) + } +} + +const ( + degree = 16 + maxItems = 2*degree - 1 + minItems = degree - 1 +) + +type annotation struct { + annotator Annotator + // v is an annotation value, the output of either + // annotator.Value or annotator.Merge. + v interface{} + // valid indicates whether future reads of the annotation may use v as-is. + // If false, v will be zeroed and recalculated. + valid bool +} + +type leafNode struct { + ref atomic.Int32 + count int16 + leaf bool + // subtreeCount holds the count of files in the entire subtree formed by + // this node. For leaf nodes, subtreeCount is always equal to count. For + // non-leaf nodes, it's the sum of count plus all the children's + // subtreeCounts. + // + // NB: We could move this field to the end of the node struct, since leaf => + // count=subtreeCount, however the unsafe casting [leafToNode] performs make + // it risky and cumbersome. + subtreeCount int + items [maxItems]*FileMetadata + // annot contains one annotation per annotator, merged over the entire + // node's files (and all descendants for non-leaf nodes). + annot []annotation +} + +type node struct { + leafNode + children [maxItems + 1]*node +} + +//go:nocheckptr casts a ptr to a smaller struct to a ptr to a larger struct. +func leafToNode(ln *leafNode) *node { + return (*node)(unsafe.Pointer(ln)) +} + +func newLeafNode() *node { + n := leafToNode(new(leafNode)) + n.leaf = true + n.ref.Store(1) + return n +} + +func newNode() *node { + n := new(node) + n.ref.Store(1) + return n +} + +// mut creates and returns a mutable node reference. If the node is not shared +// with any other trees then it can be modified in place. Otherwise, it must be +// cloned to ensure unique ownership. In this way, we enforce a copy-on-write +// policy which transparently incorporates the idea of local mutations, like +// Clojure's transients or Haskell's ST monad, where nodes are only copied +// during the first time that they are modified between Clone operations. +// +// When a node is cloned, the provided pointer will be redirected to the new +// mutable node. +func mut(n **node) *node { + if (*n).ref.Load() == 1 { + // Exclusive ownership. Can mutate in place. + + // Whenever a node will be mutated, reset its annotations to be marked + // as uncached. This ensures any future calls to (*node).annotation + // will recompute annotations on the modified subtree. + for i := range (*n).annot { + (*n).annot[i].valid = false + } + return *n + } + // If we do not have unique ownership over the node then we + // clone it to gain unique ownership. After doing so, we can + // release our reference to the old node. We pass recursive + // as true because even though we just observed the node's + // reference count to be greater than 1, we might be racing + // with another call to decRef on this node. + c := (*n).clone() + (*n).decRef(true /* contentsToo */, nil) + *n = c + // NB: We don't need to clear annotations, because (*node).clone does not + // copy them. + return *n +} + +// incRef acquires a reference to the node. +func (n *node) incRef() { + n.ref.Add(1) +} + +// decRef releases a reference to the node. If requested, the method will unref +// its items and recurse into child nodes and decrease their refcounts as well. +// Some internal codepaths that manually copy the node's items or children to +// new nodes pass contentsToo=false to preserve existing reference counts during +// operations that should yield a net-zero change to descendant refcounts. +// When a node is released, its contained files are dereferenced. +func (n *node) decRef(contentsToo bool, obsolete *[]*FileBacking) { + if n.ref.Add(-1) > 0 { + // Other references remain. Can't free. + return + } + + // Dereference the node's metadata and release child references if + // requested. Some internal callers may not want to propagate the deref + // because they're manually copying the filemetadata and children to other + // nodes, and they want to preserve the existing reference count. + if contentsToo { + for _, f := range n.items[:n.count] { + if f.Unref() == 0 { + // There are two sources of node dereferences: tree mutations + // and Version dereferences. Files should only be made obsolete + // during Version dereferences, during which `obsolete` will be + // non-nil. + if obsolete == nil { + panic(fmt.Sprintf("file metadata %s dereferenced to zero during tree mutation", f.FileNum)) + } + // Reference counting is performed on the FileBacking. In the case + // of a virtual sstable, this reference counting is performed on + // a FileBacking which is shared by every single virtual sstable + // with the same backing sstable. If the reference count hits 0, + // then we know that the FileBacking won't be required by any + // sstable in Pebble, and that the backing sstable can be deleted. + *obsolete = append(*obsolete, f.FileBacking) + } + } + if !n.leaf { + for i := int16(0); i <= n.count; i++ { + n.children[i].decRef(true /* contentsToo */, obsolete) + } + } + } +} + +// clone creates a clone of the receiver with a single reference count. +func (n *node) clone() *node { + var c *node + if n.leaf { + c = newLeafNode() + } else { + c = newNode() + } + // NB: copy field-by-field without touching n.ref to avoid + // triggering the race detector and looking like a data race. + c.count = n.count + c.items = n.items + c.subtreeCount = n.subtreeCount + // Increase the refcount of each contained item. + for _, f := range n.items[:n.count] { + f.Ref() + } + if !c.leaf { + // Copy children and increase each refcount. + c.children = n.children + for i := int16(0); i <= c.count; i++ { + c.children[i].incRef() + } + } + return c +} + +// insertAt inserts the provided file and node at the provided index. This +// function is for use only as a helper function for internal B-Tree code. +// Clients should not invoke it directly. +func (n *node) insertAt(index int, item *FileMetadata, nd *node) { + if index < int(n.count) { + copy(n.items[index+1:n.count+1], n.items[index:n.count]) + if !n.leaf { + copy(n.children[index+2:n.count+2], n.children[index+1:n.count+1]) + } + } + n.items[index] = item + if !n.leaf { + n.children[index+1] = nd + } + n.count++ +} + +// pushBack inserts the provided file and node at the tail of the node's items. +// This function is for use only as a helper function for internal B-Tree code. +// Clients should not invoke it directly. +func (n *node) pushBack(item *FileMetadata, nd *node) { + n.items[n.count] = item + if !n.leaf { + n.children[n.count+1] = nd + } + n.count++ +} + +// pushFront inserts the provided file and node at the head of the +// node's items. This function is for use only as a helper function for internal B-Tree +// code. Clients should not invoke it directly. +func (n *node) pushFront(item *FileMetadata, nd *node) { + if !n.leaf { + copy(n.children[1:n.count+2], n.children[:n.count+1]) + n.children[0] = nd + } + copy(n.items[1:n.count+1], n.items[:n.count]) + n.items[0] = item + n.count++ +} + +// removeAt removes a value at a given index, pulling all subsequent values +// back. This function is for use only as a helper function for internal B-Tree +// code. Clients should not invoke it directly. +func (n *node) removeAt(index int) (*FileMetadata, *node) { + var child *node + if !n.leaf { + child = n.children[index+1] + copy(n.children[index+1:n.count], n.children[index+2:n.count+1]) + n.children[n.count] = nil + } + n.count-- + out := n.items[index] + copy(n.items[index:n.count], n.items[index+1:n.count+1]) + n.items[n.count] = nil + return out, child +} + +// popBack removes and returns the last element in the list. This function is +// for use only as a helper function for internal B-Tree code. Clients should +// not invoke it directly. +func (n *node) popBack() (*FileMetadata, *node) { + n.count-- + out := n.items[n.count] + n.items[n.count] = nil + if n.leaf { + return out, nil + } + child := n.children[n.count+1] + n.children[n.count+1] = nil + return out, child +} + +// popFront removes and returns the first element in the list. This function is +// for use only as a helper function for internal B-Tree code. Clients should +// not invoke it directly. +func (n *node) popFront() (*FileMetadata, *node) { + n.count-- + var child *node + if !n.leaf { + child = n.children[0] + copy(n.children[:n.count+1], n.children[1:n.count+2]) + n.children[n.count+1] = nil + } + out := n.items[0] + copy(n.items[:n.count], n.items[1:n.count+1]) + n.items[n.count] = nil + return out, child +} + +// find returns the index where the given item should be inserted into this +// list. 'found' is true if the item already exists in the list at the given +// index. +// +// This function is for use only as a helper function for internal B-Tree code. +// Clients should not invoke it directly. +func (n *node) find(cmp btreeCmp, item *FileMetadata) (index int, found bool) { + // Logic copied from sort.Search. Inlining this gave + // an 11% speedup on BenchmarkBTreeDeleteInsert. + i, j := 0, int(n.count) + for i < j { + h := int(uint(i+j) >> 1) // avoid overflow when computing h + // i ≤ h < j + v := cmp(item, n.items[h]) + if v == 0 { + return h, true + } else if v > 0 { + i = h + 1 + } else { + j = h + } + } + return i, false +} + +// split splits the given node at the given index. The current node shrinks, +// and this function returns the item that existed at that index and a new +// node containing all items/children after it. +// +// split is called when we want to perform a transformation like the one +// depicted in the following diagram. +// +// Before: +// +-----------+ +// n *node | x y z | +// +--/-/-\-\--+ +// +// After: +// +-----------+ +// | y | n's parent +// +----/-\----+ +// / \ +// v v +// +-----------+ +-----------+ +// n *node | x | | z | next *node +// +-----------+ +-----------+ +// +// split does not perform the complete transformation; the caller is responsible +// for updating the parent appropriately. split splits `n` into two nodes, `n` +// and `next`, returning `next` and the file that separates them. In the diagram +// above, `n.split` removes y and z from `n`, returning y in the first return +// value and `next` in the second return value. The caller is responsible for +// updating n's parent to now contain `y` as the separator between nodes `n` and +// `next`. +// +// This function is for use only as a helper function for internal B-Tree code. +// Clients should not invoke it directly. +func (n *node) split(i int) (*FileMetadata, *node) { + out := n.items[i] + var next *node + if n.leaf { + next = newLeafNode() + } else { + next = newNode() + } + next.count = n.count - int16(i+1) + copy(next.items[:], n.items[i+1:n.count]) + for j := int16(i); j < n.count; j++ { + n.items[j] = nil + } + if !n.leaf { + copy(next.children[:], n.children[i+1:n.count+1]) + descendantsMoved := 0 + for j := int16(i + 1); j <= n.count; j++ { + descendantsMoved += n.children[j].subtreeCount + n.children[j] = nil + } + n.subtreeCount -= descendantsMoved + next.subtreeCount += descendantsMoved + } + n.count = int16(i) + // NB: We subtract one more than `next.count` from n's subtreeCount because + // the item at index `i` was removed from `n.items`. We'll return the item + // at index `i`, and the caller is responsible for updating the subtree + // count of whichever node adopts it. + n.subtreeCount -= int(next.count) + 1 + next.subtreeCount += int(next.count) + return out, next +} + +// Insert inserts a item into the subtree rooted at this node, making sure no +// nodes in the subtree exceed maxItems items. +func (n *node) Insert(cmp btreeCmp, item *FileMetadata) error { + i, found := n.find(cmp, item) + if found { + // cmp provides a total ordering of the files within a level. + // If we're inserting a metadata that's equal to an existing item + // in the tree, we're inserting a file into a level twice. + return errors.Errorf("files %s and %s collided on sort keys", + errors.Safe(item.FileNum), errors.Safe(n.items[i].FileNum)) + } + if n.leaf { + n.insertAt(i, item, nil) + n.subtreeCount++ + return nil + } + if n.children[i].count >= maxItems { + splitLa, splitNode := mut(&n.children[i]).split(maxItems / 2) + n.insertAt(i, splitLa, splitNode) + + switch cmp := cmp(item, n.items[i]); { + case cmp < 0: + // no change, we want first split node + case cmp > 0: + i++ // we want second split node + default: + // cmp provides a total ordering of the files within a level. + // If we're inserting a metadata that's equal to an existing item + // in the tree, we're inserting a file into a level twice. + return errors.Errorf("files %s and %s collided on sort keys", + errors.Safe(item.FileNum), errors.Safe(n.items[i].FileNum)) + } + } + + err := mut(&n.children[i]).Insert(cmp, item) + if err == nil { + n.subtreeCount++ + } + return err +} + +// removeMax removes and returns the maximum item from the subtree rooted at +// this node. This function is for use only as a helper function for internal +// B-Tree code. Clients should not invoke it directly. +func (n *node) removeMax() *FileMetadata { + if n.leaf { + n.count-- + n.subtreeCount-- + out := n.items[n.count] + n.items[n.count] = nil + return out + } + child := mut(&n.children[n.count]) + if child.count <= minItems { + n.rebalanceOrMerge(int(n.count)) + return n.removeMax() + } + n.subtreeCount-- + return child.removeMax() +} + +// Remove removes a item from the subtree rooted at this node. Returns +// the item that was removed or nil if no matching item was found. +func (n *node) Remove(cmp btreeCmp, item *FileMetadata) (out *FileMetadata) { + i, found := n.find(cmp, item) + if n.leaf { + if found { + out, _ = n.removeAt(i) + n.subtreeCount-- + return out + } + return nil + } + if n.children[i].count <= minItems { + // Child not large enough to remove from. + n.rebalanceOrMerge(i) + return n.Remove(cmp, item) + } + child := mut(&n.children[i]) + if found { + // Replace the item being removed with the max item in our left child. + out = n.items[i] + n.items[i] = child.removeMax() + n.subtreeCount-- + return out + } + // File is not in this node and child is large enough to remove from. + out = child.Remove(cmp, item) + if out != nil { + n.subtreeCount-- + } + return out +} + +// rebalanceOrMerge grows child 'i' to ensure it has sufficient room to remove a +// item from it while keeping it at or above minItems. This function is for use +// only as a helper function for internal B-Tree code. Clients should not invoke +// it directly. +func (n *node) rebalanceOrMerge(i int) { + switch { + case i > 0 && n.children[i-1].count > minItems: + // Rebalance from left sibling. + // + // +-----------+ + // | y | + // +----/-\----+ + // / \ + // v v + // +-----------+ +-----------+ + // | x | | | + // +----------\+ +-----------+ + // \ + // v + // a + // + // After: + // + // +-----------+ + // | x | + // +----/-\----+ + // / \ + // v v + // +-----------+ +-----------+ + // | | | y | + // +-----------+ +/----------+ + // / + // v + // a + // + left := mut(&n.children[i-1]) + child := mut(&n.children[i]) + xLa, grandChild := left.popBack() + yLa := n.items[i-1] + child.pushFront(yLa, grandChild) + n.items[i-1] = xLa + child.subtreeCount++ + left.subtreeCount-- + if grandChild != nil { + child.subtreeCount += grandChild.subtreeCount + left.subtreeCount -= grandChild.subtreeCount + } + + case i < int(n.count) && n.children[i+1].count > minItems: + // Rebalance from right sibling. + // + // +-----------+ + // | y | + // +----/-\----+ + // / \ + // v v + // +-----------+ +-----------+ + // | | | x | + // +-----------+ +/----------+ + // / + // v + // a + // + // After: + // + // +-----------+ + // | x | + // +----/-\----+ + // / \ + // v v + // +-----------+ +-----------+ + // | y | | | + // +----------\+ +-----------+ + // \ + // v + // a + // + right := mut(&n.children[i+1]) + child := mut(&n.children[i]) + xLa, grandChild := right.popFront() + yLa := n.items[i] + child.pushBack(yLa, grandChild) + child.subtreeCount++ + right.subtreeCount-- + if grandChild != nil { + child.subtreeCount += grandChild.subtreeCount + right.subtreeCount -= grandChild.subtreeCount + } + n.items[i] = xLa + + default: + // Merge with either the left or right sibling. + // + // +-----------+ + // | u y v | + // +----/-\----+ + // / \ + // v v + // +-----------+ +-----------+ + // | x | | z | + // +-----------+ +-----------+ + // + // After: + // + // +-----------+ + // | u v | + // +-----|-----+ + // | + // v + // +-----------+ + // | x y z | + // +-----------+ + // + if i >= int(n.count) { + i = int(n.count - 1) + } + child := mut(&n.children[i]) + // Make mergeChild mutable, bumping the refcounts on its children if necessary. + _ = mut(&n.children[i+1]) + mergeLa, mergeChild := n.removeAt(i) + child.items[child.count] = mergeLa + copy(child.items[child.count+1:], mergeChild.items[:mergeChild.count]) + if !child.leaf { + copy(child.children[child.count+1:], mergeChild.children[:mergeChild.count+1]) + } + child.count += mergeChild.count + 1 + child.subtreeCount += mergeChild.subtreeCount + 1 + + mergeChild.decRef(false /* contentsToo */, nil) + } +} + +// InvalidateAnnotation removes any existing cached annotations for the provided +// annotator from this node's subtree. +func (n *node) InvalidateAnnotation(a Annotator) { + // Find this annotator's annotation on this node. + var annot *annotation + for i := range n.annot { + if n.annot[i].annotator == a { + annot = &n.annot[i] + } + } + + if annot != nil && annot.valid { + annot.valid = false + annot.v = a.Zero(annot.v) + } + if !n.leaf { + for i := int16(0); i <= n.count; i++ { + n.children[i].InvalidateAnnotation(a) + } + } +} + +// Annotation retrieves, computing if not already computed, the provided +// annotator's annotation of this node. The second return value indicates +// whether the future reads of this annotation may use the first return value +// as-is. If false, the annotation is not stable and may change on a subsequent +// computation. +func (n *node) Annotation(a Annotator) (interface{}, bool) { + // Find this annotator's annotation on this node. + var annot *annotation + for i := range n.annot { + if n.annot[i].annotator == a { + annot = &n.annot[i] + } + } + + // If it exists and is marked as valid, we can return it without + // recomputing anything. + if annot != nil && annot.valid { + return annot.v, true + } + + if annot == nil { + // This is n's first time being annotated by a. + // Create a new zeroed annotation. + n.annot = append(n.annot, annotation{ + annotator: a, + v: a.Zero(nil), + }) + annot = &n.annot[len(n.annot)-1] + } else { + // There's an existing annotation that must be recomputed. + // Zero its value. + annot.v = a.Zero(annot.v) + } + + annot.valid = true + for i := int16(0); i <= n.count; i++ { + if !n.leaf { + v, ok := n.children[i].Annotation(a) + annot.v = a.Merge(v, annot.v) + annot.valid = annot.valid && ok + } + if i < n.count { + v, ok := a.Accumulate(n.items[i], annot.v) + annot.v = v + annot.valid = annot.valid && ok + } + } + return annot.v, annot.valid +} + +func (n *node) verifyInvariants() { + recomputedSubtreeCount := int(n.count) + if !n.leaf { + for i := int16(0); i <= n.count; i++ { + n.children[i].verifyInvariants() + recomputedSubtreeCount += n.children[i].subtreeCount + } + } + if recomputedSubtreeCount != n.subtreeCount { + panic(fmt.Sprintf("recomputed subtree count (%d) ≠ n.subtreeCount (%d)", + recomputedSubtreeCount, n.subtreeCount)) + } +} + +// btree is an implementation of a B-Tree. +// +// btree stores FileMetadata in an ordered structure, allowing easy insertion, +// removal, and iteration. The B-Tree stores items in order based on cmp. The +// first level of the LSM uses a cmp function that compares sequence numbers. +// All other levels compare using the FileMetadata.Smallest. +// +// Write operations are not safe for concurrent mutation by multiple +// goroutines, but Read operations are. +type btree struct { + root *node + cmp btreeCmp +} + +// Release dereferences and clears the root node of the btree, removing all +// items from the btree. In doing so, it decrements contained file counts. +// It returns a slice of newly obsolete backing files, if any. +func (t *btree) Release() (obsolete []*FileBacking) { + if t.root != nil { + t.root.decRef(true /* contentsToo */, &obsolete) + t.root = nil + } + return obsolete +} + +// Clone clones the btree, lazily. It does so in constant time. +func (t *btree) Clone() btree { + c := *t + if c.root != nil { + // Incrementing the reference count on the root node is sufficient to + // ensure that no node in the cloned tree can be mutated by an actor + // holding a reference to the original tree and vice versa. This + // property is upheld because the root node in the receiver btree and + // the returned btree will both necessarily have a reference count of at + // least 2 when this method returns. All tree mutations recursively + // acquire mutable node references (see mut) as they traverse down the + // tree. The act of acquiring a mutable node reference performs a clone + // if a node's reference count is greater than one. Cloning a node (see + // clone) increases the reference count on each of its children, + // ensuring that they have a reference count of at least 2. This, in + // turn, ensures that any of the child nodes that are modified will also + // be copied-on-write, recursively ensuring the immutability property + // over the entire tree. + c.root.incRef() + } + return c +} + +// Delete removes the provided file from the tree. +// It returns true if the file now has a zero reference count. +func (t *btree) Delete(item *FileMetadata) (obsolete bool) { + if t.root == nil || t.root.count == 0 { + return false + } + if out := mut(&t.root).Remove(t.cmp, item); out != nil { + obsolete = out.Unref() == 0 + } + if invariants.Enabled { + t.root.verifyInvariants() + } + if t.root.count == 0 { + old := t.root + if t.root.leaf { + t.root = nil + } else { + t.root = t.root.children[0] + } + old.decRef(false /* contentsToo */, nil) + } + return obsolete +} + +// Insert adds the given item to the tree. If a item in the tree already +// equals the given one, Insert panics. +func (t *btree) Insert(item *FileMetadata) error { + if t.root == nil { + t.root = newLeafNode() + } else if t.root.count >= maxItems { + splitLa, splitNode := mut(&t.root).split(maxItems / 2) + newRoot := newNode() + newRoot.count = 1 + newRoot.items[0] = splitLa + newRoot.children[0] = t.root + newRoot.children[1] = splitNode + newRoot.subtreeCount = t.root.subtreeCount + splitNode.subtreeCount + 1 + t.root = newRoot + } + item.Ref() + err := mut(&t.root).Insert(t.cmp, item) + if invariants.Enabled { + t.root.verifyInvariants() + } + return err +} + +// Iter returns a new iterator object. It is not safe to continue using an +// iterator after modifications are made to the tree. If modifications are made, +// create a new iterator. +func (t *btree) Iter() iterator { + return iterator{r: t.root, pos: -1, cmp: t.cmp} +} + +// Count returns the number of files contained within the B-Tree. +func (t *btree) Count() int { + if t.root == nil { + return 0 + } + return t.root.subtreeCount +} + +// String returns a string description of the tree. The format is +// similar to the https://en.wikipedia.org/wiki/Newick_format. +func (t *btree) String() string { + if t.Count() == 0 { + return ";" + } + var b strings.Builder + t.root.writeString(&b) + return b.String() +} + +func (n *node) writeString(b *strings.Builder) { + if n.leaf { + for i := int16(0); i < n.count; i++ { + if i != 0 { + b.WriteString(",") + } + b.WriteString(n.items[i].String()) + } + return + } + for i := int16(0); i <= n.count; i++ { + b.WriteString("(") + n.children[i].writeString(b) + b.WriteString(")") + if i < n.count { + b.WriteString(n.items[i].String()) + } + } +} + +// iterStack represents a stack of (node, pos) tuples, which captures +// iteration state as an iterator descends a btree. +type iterStack struct { + // a contains aLen stack frames when an iterator stack is short enough. + // If the iterator stack overflows the capacity of iterStackArr, the stack + // is moved to s and aLen is set to -1. + a iterStackArr + aLen int16 // -1 when using s + s []iterFrame +} + +// Used to avoid allocations for stacks below a certain size. +type iterStackArr [3]iterFrame + +type iterFrame struct { + n *node + pos int16 +} + +func (is *iterStack) push(f iterFrame) { + if is.aLen == -1 { + is.s = append(is.s, f) + } else if int(is.aLen) == len(is.a) { + is.s = make([]iterFrame, int(is.aLen)+1, 2*int(is.aLen)) + copy(is.s, is.a[:]) + is.s[int(is.aLen)] = f + is.aLen = -1 + } else { + is.a[is.aLen] = f + is.aLen++ + } +} + +func (is *iterStack) pop() iterFrame { + if is.aLen == -1 { + f := is.s[len(is.s)-1] + is.s = is.s[:len(is.s)-1] + return f + } + is.aLen-- + return is.a[is.aLen] +} + +func (is *iterStack) len() int { + if is.aLen == -1 { + return len(is.s) + } + return int(is.aLen) +} + +func (is *iterStack) clone() iterStack { + // If the iterator is using the embedded iterStackArr, we only need to + // copy the struct itself. + if is.s == nil { + return *is + } + clone := *is + clone.s = make([]iterFrame, len(is.s)) + copy(clone.s, is.s) + return clone +} + +func (is *iterStack) nth(n int) (f iterFrame, ok bool) { + if is.aLen == -1 { + if n >= len(is.s) { + return f, false + } + return is.s[n], true + } + if int16(n) >= is.aLen { + return f, false + } + return is.a[n], true +} + +func (is *iterStack) reset() { + if is.aLen == -1 { + is.s = is.s[:0] + } else { + is.aLen = 0 + } +} + +// iterator is responsible for search and traversal within a btree. +type iterator struct { + // the root node of the B-Tree. + r *node + // n and pos make up the current position of the iterator. + // If valid, n.items[pos] is the current value of the iterator. + // + // n may be nil iff i.r is nil. + n *node + pos int16 + // cmp dictates the ordering of the FileMetadata. + cmp func(*FileMetadata, *FileMetadata) int + // a stack of n's ancestors within the B-Tree, alongside the position + // taken to arrive at n. If non-empty, the bottommost frame of the stack + // will always contain the B-Tree root. + s iterStack +} + +// countLeft returns the count of files that are to the left of the current +// iterator position. +func (i *iterator) countLeft() int { + if i.r == nil { + return 0 + } + + // Each iterator has a stack of frames marking the path from the root node + // to the current iterator position. All files (n.items) and all subtrees + // (n.children) with indexes less than [pos] are to the left of the current + // iterator position. + // + // +------------------------+ - + // | Root pos:5 | | + // +------------------------+ | stack + // | Root/5 pos:3 | | frames + // +------------------------+ | [i.s] + // | Root/5/3 pos:9 | | + // +========================+ - + // | | + // | i.n: Root/5/3/9 i.pos:2| + // +------------------------+ + // + var count int + // Walk all the ancestors in the iterator stack [i.s], tallying up all the + // files and subtrees to the left of the stack frame's position. + f, ok := i.s.nth(0) + for fi := 0; ok; fi++ { + // There are [f.pos] files contained within [f.n.items] that sort to the + // left of the subtree the iterator has descended. + count += int(f.pos) + // Any subtrees that fall before the stack frame's position are entirely + // to the left of the iterator's current position. + for j := int16(0); j < f.pos; j++ { + count += f.n.children[j].subtreeCount + } + f, ok = i.s.nth(fi + 1) + } + + // The bottommost stack frame is inlined within the iterator struct. Again, + // [i.pos] files fall to the left of the current iterator position. + count += int(i.pos) + if !i.n.leaf { + // NB: Unlike above, we use a `<= i.pos` comparison. The iterator is + // positioned at item `i.n.items[i.pos]`, which sorts after everything + // in the subtree at `i.n.children[i.pos]`. + for j := int16(0); j <= i.pos; j++ { + count += i.n.children[j].subtreeCount + } + } + return count +} + +func (i *iterator) clone() iterator { + c := *i + c.s = i.s.clone() + return c +} + +func (i *iterator) reset() { + i.n = i.r + i.pos = -1 + i.s.reset() +} + +func (i iterator) String() string { + var buf bytes.Buffer + for n := 0; ; n++ { + f, ok := i.s.nth(n) + if !ok { + break + } + fmt.Fprintf(&buf, "%p: %02d/%02d\n", f.n, f.pos, f.n.count) + } + if i.r == nil { + fmt.Fprintf(&buf, ": %02d", i.pos) + } else { + fmt.Fprintf(&buf, "%p: %02d/%02d", i.n, i.pos, i.n.count) + } + return buf.String() +} + +func cmpIter(a, b iterator) int { + if a.r != b.r { + panic("compared iterators from different btrees") + } + + // Each iterator has a stack of frames marking the path from the root node + // to the current iterator position. We walk both paths formed by the + // iterators' stacks simultaneously, descending from the shared root node, + // always comparing nodes at the same level in the tree. + // + // If the iterators' paths ever diverge and point to different nodes, the + // iterators are not equal and we use the node positions to evaluate the + // comparison. + // + // If an iterator's stack ends, we stop descending and use its current + // node and position for the final comparison. One iterator's stack may + // end before another's if one iterator is positioned deeper in the tree. + // + // a b + // +------------------------+ +--------------------------+ - + // | Root pos:5 | = | Root pos:5 | | + // +------------------------+ +--------------------------+ | stack + // | Root/5 pos:3 | = | Root/5 pos:3 | | frames + // +------------------------+ +--------------------------+ | + // | Root/5/3 pos:9 | > | Root/5/3 pos:1 | | + // +========================+ +==========================+ - + // | | | | + // | a.n: Root/5/3/9 a.pos:2| | b.n: Root/5/3/1, b.pos:5 | + // +------------------------+ +--------------------------+ + + // Initialize with the iterator's current node and position. These are + // conceptually the most-recent/current frame of the iterator stack. + an, apos := a.n, a.pos + bn, bpos := b.n, b.pos + + // aok, bok are set while traversing the iterator's path down the B-Tree. + // They're declared in the outer scope because they help distinguish the + // sentinel case when both iterators' first frame points to the last child + // of the root. If an iterator has no other frames in its stack, it's the + // end sentinel state which sorts after everything else. + var aok, bok bool + for i := 0; ; i++ { + var af, bf iterFrame + af, aok = a.s.nth(i) + bf, bok = b.s.nth(i) + if !aok || !bok { + if aok { + // Iterator a, unlike iterator b, still has a frame. Set an, + // apos so we compare using the frame from the stack. + an, apos = af.n, af.pos + } + if bok { + // Iterator b, unlike iterator a, still has a frame. Set bn, + // bpos so we compare using the frame from the stack. + bn, bpos = bf.n, bf.pos + } + break + } + + // aok && bok + if af.n != bf.n { + panic("nonmatching nodes during btree iterator comparison") + } + if v := stdcmp.Compare(af.pos, bf.pos); v != 0 { + return v + } + // Otherwise continue up both iterators' stacks (equivalently, down the + // B-Tree away from the root). + } + + if aok && bok { + panic("expected one or more stacks to have been exhausted") + } + if an != bn { + panic("nonmatching nodes during btree iterator comparison") + } + if v := stdcmp.Compare(apos, bpos); v != 0 { + return v + } + switch { + case aok: + // a is positioned at a leaf child at this position and b is at an + // end sentinel state. + return -1 + case bok: + // b is positioned at a leaf child at this position and a is at an + // end sentinel state. + return +1 + default: + return 0 + } +} + +func (i *iterator) descend(n *node, pos int16) { + i.s.push(iterFrame{n: n, pos: pos}) + i.n = n.children[pos] + i.pos = 0 +} + +// ascend ascends up to the current node's parent and resets the position +// to the one previously set for this parent node. +func (i *iterator) ascend() { + f := i.s.pop() + i.n = f.n + i.pos = f.pos +} + +// seek repositions the iterator over the first file for which fn returns +// true, mirroring the semantics of the standard library's sort.Search +// function. Like sort.Search, seek requires the iterator's B-Tree to be +// ordered such that fn returns false for some (possibly empty) prefix of the +// tree's files, and then true for the (possibly empty) remainder. +func (i *iterator) seek(fn func(*FileMetadata) bool) { + i.reset() + if i.r == nil { + return + } + + for { + // Logic copied from sort.Search. + j, k := 0, int(i.n.count) + for j < k { + h := int(uint(j+k) >> 1) // avoid overflow when computing h + + // j ≤ h < k + if !fn(i.n.items[h]) { + j = h + 1 // preserves f(j-1) == false + } else { + k = h // preserves f(k) == true + } + } + + i.pos = int16(j) + if i.n.leaf { + if i.pos == i.n.count { + i.next() + } + return + } + i.descend(i.n, i.pos) + } +} + +// first seeks to the first item in the btree. +func (i *iterator) first() { + i.reset() + if i.r == nil { + return + } + for !i.n.leaf { + i.descend(i.n, 0) + } + i.pos = 0 +} + +// last seeks to the last item in the btree. +func (i *iterator) last() { + i.reset() + if i.r == nil { + return + } + for !i.n.leaf { + i.descend(i.n, i.n.count) + } + i.pos = i.n.count - 1 +} + +// next positions the iterator to the item immediately following +// its current position. +func (i *iterator) next() { + if i.r == nil { + return + } + + if i.n.leaf { + if i.pos < i.n.count { + i.pos++ + } + if i.pos < i.n.count { + return + } + for i.s.len() > 0 && i.pos >= i.n.count { + i.ascend() + } + return + } + + i.descend(i.n, i.pos+1) + for !i.n.leaf { + i.descend(i.n, 0) + } + i.pos = 0 +} + +// prev positions the iterator to the item immediately preceding +// its current position. +func (i *iterator) prev() { + if i.r == nil { + return + } + + if i.n.leaf { + i.pos-- + if i.pos >= 0 { + return + } + for i.s.len() > 0 && i.pos < 0 { + i.ascend() + i.pos-- + } + return + } + + i.descend(i.n, i.pos) + for !i.n.leaf { + i.descend(i.n, i.n.count) + } + i.pos = i.n.count - 1 +} + +// valid returns whether the iterator is positioned at a valid position. +func (i *iterator) valid() bool { + return i.r != nil && i.pos >= 0 && i.pos < i.n.count +} + +// cur returns the item at the iterator's current position. It is illegal +// to call cur if the iterator is not valid. +func (i *iterator) cur() *FileMetadata { + if invariants.Enabled && !i.valid() { + panic("btree iterator.cur invoked on invalid iterator") + } + return i.n.items[i.pos] +} diff --git a/pebble/internal/manifest/btree_test.go b/pebble/internal/manifest/btree_test.go new file mode 100644 index 0000000..cce22a2 --- /dev/null +++ b/pebble/internal/manifest/btree_test.go @@ -0,0 +1,991 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manifest + +import ( + stdcmp "cmp" + "fmt" + "math/rand" + "reflect" + "slices" + "sync" + "testing" + "time" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invariants" + "github.com/stretchr/testify/require" +) + +func newItem(k InternalKey) *FileMetadata { + m := (&FileMetadata{}).ExtendPointKeyBounds( + base.DefaultComparer.Compare, k, k, + ) + m.InitPhysicalBacking() + return m +} + +func cmp(a, b *FileMetadata) int { + return cmpKey(a.Smallest, b.Smallest) +} + +func cmpKey(a, b InternalKey) int { + return base.InternalCompare(base.DefaultComparer.Compare, a, b) +} + +////////////////////////////////////////// +// Invariant verification // +////////////////////////////////////////// + +// Verify asserts that the tree's structural invariants all hold. +func (t *btree) Verify(tt *testing.T) { + if t.Count() == 0 { + require.Nil(tt, t.root) + return + } + t.verifyLeafSameDepth(tt) + t.verifyCountAllowed(tt) + t.isSorted(tt) + t.root.verifyInvariants() +} + +func (t *btree) verifyLeafSameDepth(tt *testing.T) { + h := t.height() + t.root.verifyDepthEqualToHeight(tt, 1, h) +} + +func (n *node) verifyDepthEqualToHeight(t *testing.T, depth, height int) { + if n.leaf { + require.Equal(t, height, depth, "all leaves should have the same depth as the tree height") + } + n.recurse(func(child *node, _ int16) { + child.verifyDepthEqualToHeight(t, depth+1, height) + }) +} + +func (t *btree) verifyCountAllowed(tt *testing.T) { + t.root.verifyCountAllowed(tt, true) +} + +// height returns the height of the tree. +func (t *btree) height() int { + if t.root == nil { + return 0 + } + h := 1 + n := t.root + for !n.leaf { + n = n.children[0] + h++ + } + return h +} + +func (n *node) verifyCountAllowed(t *testing.T, root bool) { + if !root { + require.GreaterOrEqual(t, n.count, int16(minItems), "item count %d must be in range [%d,%d]", n.count, minItems, maxItems) + require.LessOrEqual(t, n.count, int16(maxItems), "item count %d must be in range [%d,%d]", n.count, minItems, maxItems) + } + for i, item := range n.items { + if i < int(n.count) { + require.NotNil(t, item, "item below count") + } else { + require.Nil(t, item, "item above count") + } + } + if !n.leaf { + for i, child := range n.children { + if i <= int(n.count) { + require.NotNil(t, child, "node below count") + } else { + require.Nil(t, child, "node above count") + } + } + } + n.recurse(func(child *node, _ int16) { + child.verifyCountAllowed(t, false) + }) +} + +func (t *btree) isSorted(tt *testing.T) { + t.root.isSorted(tt, t.cmp) +} + +func (n *node) isSorted(t *testing.T, cmp func(*FileMetadata, *FileMetadata) int) { + for i := int16(1); i < n.count; i++ { + require.LessOrEqual(t, cmp(n.items[i-1], n.items[i]), 0) + } + if !n.leaf { + for i := int16(0); i < n.count; i++ { + prev := n.children[i] + next := n.children[i+1] + + require.LessOrEqual(t, cmp(prev.items[prev.count-1], n.items[i]), 0) + require.LessOrEqual(t, cmp(n.items[i], next.items[0]), 0) + } + } + n.recurse(func(child *node, _ int16) { + child.isSorted(t, cmp) + }) +} + +func (n *node) recurse(f func(child *node, pos int16)) { + if !n.leaf { + for i := int16(0); i <= n.count; i++ { + f(n.children[i], i) + } + } +} + +////////////////////////////////////////// +// Unit Tests // +////////////////////////////////////////// + +func key(i int) InternalKey { + if i < 0 || i > 99999 { + panic("key out of bounds") + } + return base.MakeInternalKey([]byte(fmt.Sprintf("%05d", i)), 0, base.InternalKeyKindSet) +} + +func keyWithMemo(i int, memo map[int]InternalKey) InternalKey { + if s, ok := memo[i]; ok { + return s + } + s := key(i) + memo[i] = s + return s +} + +func checkIterRelative(t *testing.T, it *iterator, start, end int, keyMemo map[int]InternalKey) { + t.Helper() + i := start + for ; it.valid(); it.next() { + item := it.cur() + expected := keyWithMemo(i, keyMemo) + if cmpKey(expected, item.Smallest) != 0 { + t.Fatalf("expected %s, but found %s", expected, item.Smallest) + } + i++ + } + if i != end { + t.Fatalf("expected %d, but at %d", end, i) + } +} + +func checkIter(t *testing.T, it iterator, start, end int, keyMemo map[int]InternalKey) { + t.Helper() + i := start + for it.first(); it.valid(); it.next() { + item := it.cur() + expected := keyWithMemo(i, keyMemo) + if cmpKey(expected, item.Smallest) != 0 { + t.Fatalf("expected %s, but found %s", expected, item.Smallest) + } + require.Equal(t, i-start, it.countLeft()) + i++ + } + if i != end { + t.Fatalf("expected %d, but at %d", end, i) + } + + for it.last(); it.valid(); it.prev() { + i-- + item := it.cur() + expected := keyWithMemo(i, keyMemo) + if cmpKey(expected, item.Smallest) != 0 { + t.Fatalf("expected %s, but found %s", expected, item.Smallest) + } + require.Equal(t, i-start, it.countLeft()) + } + if i != start { + t.Fatalf("expected %d, but at %d: %+v", start, i, it) + } +} + +// TestBTree tests basic btree operations. +func TestBTree(t *testing.T) { + var tr btree + tr.cmp = cmp + keyMemo := make(map[int]InternalKey) + + // With degree == 16 (max-items/node == 31) we need 513 items in order for + // there to be 3 levels in the tree. The count here is comfortably above + // that. + const count = 768 + items := rang(0, count-1) + + // Add keys in sorted order. + for i := 0; i < count; i++ { + require.NoError(t, tr.Insert(items[i])) + tr.Verify(t) + if e := i + 1; e != tr.Count() { + t.Fatalf("expected length %d, but found %d", e, tr.Count()) + } + checkIter(t, tr.Iter(), 0, i+1, keyMemo) + } + + // delete keys in sorted order. + for i := 0; i < count; i++ { + obsolete := tr.Delete(items[i]) + if !obsolete { + t.Fatalf("expected item %d to be obsolete", i) + } + tr.Verify(t) + if e := count - (i + 1); e != tr.Count() { + t.Fatalf("expected length %d, but found %d", e, tr.Count()) + } + checkIter(t, tr.Iter(), i+1, count, keyMemo) + } + + // Add keys in reverse sorted order. + for i := 1; i <= count; i++ { + require.NoError(t, tr.Insert(items[count-i])) + tr.Verify(t) + if i != tr.Count() { + t.Fatalf("expected length %d, but found %d", i, tr.Count()) + } + checkIter(t, tr.Iter(), count-i, count, keyMemo) + } + + // delete keys in reverse sorted order. + for i := 1; i <= count; i++ { + obsolete := tr.Delete(items[count-i]) + if !obsolete { + t.Fatalf("expected item %d to be obsolete", i) + } + tr.Verify(t) + if e := count - i; e != tr.Count() { + t.Fatalf("expected length %d, but found %d", e, tr.Count()) + } + checkIter(t, tr.Iter(), 0, count-i, keyMemo) + } +} + +func TestIterClone(t *testing.T) { + const count = 65536 + + var tr btree + tr.cmp = cmp + keyMemo := make(map[int]InternalKey) + + for i := 0; i < count; i++ { + require.NoError(t, tr.Insert(newItem(key(i)))) + } + + it := tr.Iter() + i := 0 + for it.first(); it.valid(); it.next() { + if i%500 == 0 { + c := it.clone() + + require.Equal(t, 0, cmpIter(it, c)) + checkIterRelative(t, &c, i, count, keyMemo) + if i < count { + require.Equal(t, -1, cmpIter(it, c)) + require.Equal(t, +1, cmpIter(c, it)) + } + } + i++ + } +} + +func TestIterCmpEdgeCases(t *testing.T) { + var tr btree + tr.cmp = cmp + t.Run("empty", func(t *testing.T) { + a := tr.Iter() + b := tr.Iter() + require.Equal(t, 0, cmpIter(a, b)) + }) + require.NoError(t, tr.Insert(newItem(key(5)))) + t.Run("exhausted_next", func(t *testing.T) { + a := tr.Iter() + b := tr.Iter() + a.first() + b.first() + require.Equal(t, 0, cmpIter(a, b)) + b.next() + require.False(t, b.valid()) + require.Equal(t, -1, cmpIter(a, b)) + }) + t.Run("exhausted_prev", func(t *testing.T) { + a := tr.Iter() + b := tr.Iter() + a.first() + b.first() + b.prev() + require.False(t, b.valid()) + require.Equal(t, 1, cmpIter(a, b)) + b.next() + require.Equal(t, 0, cmpIter(a, b)) + }) +} + +func TestIterCmpRand(t *testing.T) { + const itemCount = 65536 + const iterCount = 1000 + + var tr btree + tr.cmp = cmp + for i := 0; i < itemCount; i++ { + require.NoError(t, tr.Insert(newItem(key(i)))) + } + + seed := time.Now().UnixNano() + rng := rand.New(rand.NewSource(seed)) + iters1 := make([]*LevelIterator, iterCount) + iters2 := make([]*LevelIterator, iterCount) + for i := 0; i < iterCount; i++ { + k := rng.Intn(itemCount) + iter := LevelIterator{iter: tr.Iter()} + iter.SeekGE(base.DefaultComparer.Compare, key(k).UserKey) + iters1[i] = &iter + iters2[i] = &iter + } + + // All the iterators should be positioned, so sorting them by items and by + // iterator comparisons should equal identical orderings. + slices.SortStableFunc(iters1, func(a, b *LevelIterator) int { return cmpIter(a.iter, b.iter) }) + slices.SortStableFunc(iters2, func(a, b *LevelIterator) int { return cmp(a.iter.cur(), b.iter.cur()) }) + for i := 0; i < iterCount; i++ { + if iters1[i] != iters2[i] { + t.Fatalf("seed %d: iters out of order at index %d:\n%s\n\n%s", + seed, i, iters1[i], iters2[i]) + } + } +} + +// TestBTreeSeek tests basic btree iterator operations on an iterator wrapped +// by a LevelIterator. +func TestBTreeSeek(t *testing.T) { + const count = 513 + + var tr btree + tr.cmp = cmp + for i := 0; i < count; i++ { + require.NoError(t, tr.Insert(newItem(key(i*2)))) + } + + it := LevelIterator{iter: tr.Iter()} + for i := 0; i < 2*count-1; i++ { + item := it.SeekGE(base.DefaultComparer.Compare, key(i).UserKey) + if item == nil { + t.Fatalf("%d: expected valid iterator", i) + } + expected := key(2 * ((i + 1) / 2)) + if cmpKey(expected, item.Smallest) != 0 { + t.Fatalf("%d: expected %s, but found %s", i, expected, item.Smallest) + } + } + it.SeekGE(base.DefaultComparer.Compare, key(2*count-1).UserKey) + if it.iter.valid() { + t.Fatalf("expected invalid iterator") + } + + for i := 1; i < 2*count; i++ { + item := it.SeekLT(base.DefaultComparer.Compare, key(i).UserKey) + if item == nil { + t.Fatalf("%d: expected valid iterator", i) + } + expected := key(2 * ((i - 1) / 2)) + if cmpKey(expected, item.Smallest) != 0 { + t.Fatalf("%d: expected %s, but found %s", i, expected, item.Smallest) + } + } + it.SeekLT(base.DefaultComparer.Compare, key(0).UserKey) + if it.iter.valid() { + t.Fatalf("expected invalid iterator") + } +} + +func TestBTreeInsertDuplicateError(t *testing.T) { + var tr btree + tr.cmp = cmp + require.NoError(t, tr.Insert(newItem(key(1)))) + require.NoError(t, tr.Insert(newItem(key(2)))) + require.NoError(t, tr.Insert(newItem(key(3)))) + wantErr := errors.Errorf("files %s and %s collided on sort keys", + errors.Safe(base.FileNum(000000)), errors.Safe(base.FileNum(000000))) + require.Error(t, wantErr, tr.Insert(newItem(key(2)))) +} + +// TestBTreeCloneConcurrentOperations tests that cloning a btree returns a new +// btree instance which is an exact logical copy of the original but that can be +// modified independently going forward. +func TestBTreeCloneConcurrentOperations(t *testing.T) { + const cloneTestSize = 1000 + p := perm(cloneTestSize) + + var trees []*btree + treeC, treeDone := make(chan *btree), make(chan struct{}) + go func() { + for b := range treeC { + trees = append(trees, b) + } + close(treeDone) + }() + + var wg sync.WaitGroup + var populate func(tr *btree, start int) + populate = func(tr *btree, start int) { + t.Logf("Starting new clone at %v", start) + treeC <- tr + for i := start; i < cloneTestSize; i++ { + require.NoError(t, tr.Insert(p[i])) + if i%(cloneTestSize/5) == 0 { + wg.Add(1) + c := tr.Clone() + go populate(&c, i+1) + } + } + wg.Done() + } + + wg.Add(1) + var tr btree + tr.cmp = cmp + go populate(&tr, 0) + wg.Wait() + close(treeC) + <-treeDone + + t.Logf("Starting equality checks on %d trees", len(trees)) + want := rang(0, cloneTestSize-1) + for i, tree := range trees { + if got := all(tree); !reflect.DeepEqual(strReprs(got), strReprs(want)) { + t.Errorf("tree %v mismatch", i) + } + } + + t.Log("Removing half of items from first half") + toRemove := want[cloneTestSize/2:] + for i := 0; i < len(trees)/2; i++ { + tree := trees[i] + wg.Add(1) + go func() { + for _, item := range toRemove { + tree.Delete(item) + } + wg.Done() + }() + } + wg.Wait() + + t.Log("Checking all values again") + for i, tree := range trees { + var wantpart []*FileMetadata + if i < len(trees)/2 { + wantpart = want[:cloneTestSize/2] + } else { + wantpart = want + } + if got := all(tree); !reflect.DeepEqual(strReprs(got), strReprs(wantpart)) { + t.Errorf("tree %v mismatch, want %#v got %#v", i, strReprs(wantpart), strReprs(got)) + } + } + + var obsolete []*FileBacking + for i := range trees { + obsolete = append(obsolete, trees[i].Release()...) + } + if len(obsolete) != len(p) { + t.Errorf("got %d obsolete trees, expected %d", len(obsolete), len(p)) + } +} + +// TestIterStack tests the interface of the iterStack type. +func TestIterStack(t *testing.T) { + f := func(i int) iterFrame { return iterFrame{pos: int16(i)} } + var is iterStack + for i := 1; i <= 2*len(iterStackArr{}); i++ { + var j int + for j = 0; j < i; j++ { + is.push(f(j)) + } + require.Equal(t, j, is.len()) + for j--; j >= 0; j-- { + require.Equal(t, f(j), is.pop()) + } + is.reset() + } +} + +func TestIterEndSentinel(t *testing.T) { + var tr btree + tr.cmp = cmp + require.NoError(t, tr.Insert(newItem(key(1)))) + require.NoError(t, tr.Insert(newItem(key(2)))) + require.NoError(t, tr.Insert(newItem(key(3)))) + iter := LevelIterator{iter: tr.Iter()} + iter.SeekGE(base.DefaultComparer.Compare, key(3).UserKey) + require.True(t, iter.iter.valid()) + iter.Next() + require.False(t, iter.iter.valid()) + + // If we seek into the end sentinel, prev should return us to a valid + // position. + iter.SeekGE(base.DefaultComparer.Compare, key(4).UserKey) + require.False(t, iter.iter.valid()) + iter.Prev() + require.True(t, iter.iter.valid()) +} + +type orderStatistic struct{} + +func (o orderStatistic) Zero(dst interface{}) interface{} { + if dst == nil { + return new(int) + } + v := dst.(*int) + *v = 0 + return v +} + +func (o orderStatistic) Accumulate(meta *FileMetadata, dst interface{}) (interface{}, bool) { + v := dst.(*int) + *v++ + return v, true +} + +func (o orderStatistic) Merge(src interface{}, dst interface{}) interface{} { + srcv := src.(*int) + dstv := dst.(*int) + *dstv = *dstv + *srcv + return dstv +} + +func TestAnnotationOrderStatistic(t *testing.T) { + const count = 1000 + ann := orderStatistic{} + + var tr btree + tr.cmp = cmp + for i := 1; i <= count; i++ { + require.NoError(t, tr.Insert(newItem(key(i)))) + + v, ok := tr.root.Annotation(ann) + require.True(t, ok) + vtyped := v.(*int) + require.Equal(t, i, *vtyped) + } + + v, ok := tr.root.Annotation(ann) + require.True(t, ok) + vtyped := v.(*int) + require.Equal(t, count, *vtyped) + + v, ok = tr.root.Annotation(ann) + vtyped = v.(*int) + require.True(t, ok) + require.Equal(t, count, *vtyped) +} + +// TestRandomizedBTree tests a random set of Insert, Delete and iteration +// operations, checking for equivalence with a map of filenums. +func TestRandomizedBTree(t *testing.T) { + const maxFileNum = 50_000 + + seed := time.Now().UnixNano() + t.Log("seed", seed) + rng := rand.New(rand.NewSource(seed)) + + var numOps int + if invariants.RaceEnabled { + // Reduce the number of ops in race mode so the test doesn't take very long. + numOps = 1_000 + rng.Intn(4_000) + } else { + numOps = 10_000 + rng.Intn(40_000) + } + + var metadataAlloc [maxFileNum]FileMetadata + for i := 0; i < len(metadataAlloc); i++ { + metadataAlloc[i].FileNum = base.FileNum(i) + metadataAlloc[i].InitPhysicalBacking() + } + + // Use a btree comparator that sorts by file number to make it easier to + // prevent duplicates or overlaps. + tree := btree{ + cmp: func(a *FileMetadata, b *FileMetadata) int { + return stdcmp.Compare(a.FileNum, b.FileNum) + }, + } + + type opDecl struct { + fn func() + weight int + } + ref := map[base.FileNum]bool{} + ops := []opDecl{ + { + // Insert + fn: func() { + f := &metadataAlloc[rng.Intn(maxFileNum)] + err := tree.Insert(f) + if ref[f.FileNum] { + require.Error(t, err, "btree.Insert should error if file already exists") + } else { + ref[f.FileNum] = true + require.NoError(t, err) + } + }, + weight: 20, + }, + { + // Delete + fn: func() { + f := &metadataAlloc[rng.Intn(maxFileNum)] + tree.Delete(f) + delete(ref, f.FileNum) + }, + weight: 10, + }, + { + // Iterate + fn: func() { + iter := tree.Iter() + count := 0 + var prev base.FileNum + for iter.first(); iter.valid(); iter.next() { + fn := iter.cur().FileNum + require.True(t, ref[fn]) + if count > 0 { + require.Less(t, prev, fn) + } + count++ + } + require.Equal(t, count, len(ref)) + }, + weight: 1, + }, + } + weightSum := 0 + for i := range ops { + weightSum += ops[i].weight + } + + for i := 0; i < numOps; i++ { + w := rng.Intn(weightSum) + for j := range ops { + w -= ops[j].weight + if w < 0 { + ops[j].fn() + break + } + } + } +} + +////////////////////////////////////////// +// Benchmarks // +////////////////////////////////////////// + +// perm returns a random permutation of items with keys in the range [0, n). +func perm(n int) (out []*FileMetadata) { + for _, i := range rand.Perm(n) { + out = append(out, newItem(key(i))) + } + return out +} + +// rang returns an ordered list of items with keys in the range [m, n]. +func rang(m, n int) (out []*FileMetadata) { + for i := m; i <= n; i++ { + out = append(out, newItem(key(i))) + } + return out +} + +func strReprs(items []*FileMetadata) []string { + s := make([]string, len(items)) + for i := range items { + s[i] = items[i].String() + } + return s +} + +// all extracts all items from a tree in order as a slice. +func all(tr *btree) (out []*FileMetadata) { + it := tr.Iter() + it.first() + for it.valid() { + out = append(out, it.cur()) + it.next() + } + return out +} + +func forBenchmarkSizes(b *testing.B, f func(b *testing.B, count int)) { + for _, count := range []int{16, 128, 1024, 8192, 65536} { + b.Run(fmt.Sprintf("count=%d", count), func(b *testing.B) { + f(b, count) + }) + } +} + +// BenchmarkBTreeInsert measures btree insertion performance. +func BenchmarkBTreeInsert(b *testing.B) { + forBenchmarkSizes(b, func(b *testing.B, count int) { + insertP := perm(count) + b.ResetTimer() + for i := 0; i < b.N; { + var tr btree + tr.cmp = cmp + for _, item := range insertP { + if err := tr.Insert(item); err != nil { + b.Fatal(err) + } + i++ + if i >= b.N { + return + } + } + } + }) +} + +// BenchmarkBTreeDelete measures btree deletion performance. +func BenchmarkBTreeDelete(b *testing.B) { + forBenchmarkSizes(b, func(b *testing.B, count int) { + insertP, removeP := perm(count), perm(count) + b.ResetTimer() + for i := 0; i < b.N; { + b.StopTimer() + var tr btree + tr.cmp = cmp + for _, item := range insertP { + if err := tr.Insert(item); err != nil { + b.Fatal(err) + } + } + b.StartTimer() + for _, item := range removeP { + tr.Delete(item) + i++ + if i >= b.N { + return + } + } + if tr.Count() > 0 { + b.Fatalf("tree not empty: %s", &tr) + } + } + }) +} + +// BenchmarkBTreeDeleteInsert measures btree deletion and insertion performance. +func BenchmarkBTreeDeleteInsert(b *testing.B) { + forBenchmarkSizes(b, func(b *testing.B, count int) { + insertP := perm(count) + var tr btree + tr.cmp = cmp + for _, item := range insertP { + if err := tr.Insert(item); err != nil { + b.Fatal(err) + } + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + item := insertP[i%count] + tr.Delete(item) + if err := tr.Insert(item); err != nil { + b.Fatal(err) + } + } + }) +} + +// BenchmarkBTreeDeleteInsertCloneOnce measures btree deletion and insertion +// performance after the tree has been copy-on-write cloned once. +func BenchmarkBTreeDeleteInsertCloneOnce(b *testing.B) { + forBenchmarkSizes(b, func(b *testing.B, count int) { + insertP := perm(count) + var tr btree + tr.cmp = cmp + for _, item := range insertP { + if err := tr.Insert(item); err != nil { + b.Fatal(err) + } + } + tr = tr.Clone() + b.ResetTimer() + for i := 0; i < b.N; i++ { + item := insertP[i%count] + tr.Delete(item) + if err := tr.Insert(item); err != nil { + b.Fatal(err) + } + } + }) +} + +// BenchmarkBTreeDeleteInsertCloneEachTime measures btree deletion and insertion +// performance while the tree is repeatedly copy-on-write cloned. +func BenchmarkBTreeDeleteInsertCloneEachTime(b *testing.B) { + for _, release := range []bool{false, true} { + b.Run(fmt.Sprintf("release=%t", release), func(b *testing.B) { + forBenchmarkSizes(b, func(b *testing.B, count int) { + insertP := perm(count) + var tr, trRelease btree + tr.cmp = cmp + trRelease.cmp = cmp + for _, item := range insertP { + if err := tr.Insert(item); err != nil { + b.Fatal(err) + } + } + b.ResetTimer() + for i := 0; i < b.N; i++ { + item := insertP[i%count] + if release { + trRelease.Release() + trRelease = tr + } + tr = tr.Clone() + tr.Delete(item) + if err := tr.Insert(item); err != nil { + b.Fatal(err) + } + } + }) + }) + } +} + +// BenchmarkBTreeIter measures the cost of creating a btree iterator. +func BenchmarkBTreeIter(b *testing.B) { + var tr btree + tr.cmp = cmp + for i := 0; i < b.N; i++ { + it := tr.Iter() + it.first() + } +} + +// BenchmarkBTreeIterSeekGE measures the cost of seeking a btree iterator +// forward. +func BenchmarkBTreeIterSeekGE(b *testing.B) { + rng := rand.New(rand.NewSource(time.Now().UnixNano())) + forBenchmarkSizes(b, func(b *testing.B, count int) { + var keys []InternalKey + var tr btree + tr.cmp = cmp + + for i := 0; i < count; i++ { + s := key(i) + keys = append(keys, s) + if err := tr.Insert(newItem(s)); err != nil { + b.Fatal(err) + } + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + k := keys[rng.Intn(len(keys))] + it := LevelIterator{iter: tr.Iter()} + f := it.SeekGE(base.DefaultComparer.Compare, k.UserKey) + if testing.Verbose() { + if f == nil { + b.Fatal("expected to find key") + } + if cmpKey(k, f.Smallest) != 0 { + b.Fatalf("expected %s, but found %s", k, f.Smallest) + } + } + } + }) +} + +// BenchmarkBTreeIterSeekLT measures the cost of seeking a btree iterator +// backward. +func BenchmarkBTreeIterSeekLT(b *testing.B) { + rng := rand.New(rand.NewSource(time.Now().UnixNano())) + forBenchmarkSizes(b, func(b *testing.B, count int) { + var keys []InternalKey + var tr btree + tr.cmp = cmp + + for i := 0; i < count; i++ { + k := key(i) + keys = append(keys, k) + if err := tr.Insert(newItem(k)); err != nil { + b.Fatal(err) + } + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + j := rng.Intn(len(keys)) + k := keys[j] + it := LevelIterator{iter: tr.Iter()} + f := it.SeekLT(base.DefaultComparer.Compare, k.UserKey) + if testing.Verbose() { + if j == 0 { + if f != nil { + b.Fatal("unexpected key") + } + } else { + if f == nil { + b.Fatal("expected to find key") + } + k := keys[j-1] + if cmpKey(k, f.Smallest) != 0 { + b.Fatalf("expected %s, but found %s", k, f.Smallest) + } + } + } + } + }) +} + +// BenchmarkBTreeIterNext measures the cost of seeking a btree iterator to the +// next item in the tree. +func BenchmarkBTreeIterNext(b *testing.B) { + var tr btree + tr.cmp = cmp + + const count = 8 << 10 + for i := 0; i < count; i++ { + item := newItem(key(i)) + if err := tr.Insert(item); err != nil { + b.Fatal(err) + } + } + + it := tr.Iter() + b.ResetTimer() + for i := 0; i < b.N; i++ { + if !it.valid() { + it.first() + } + it.next() + } +} + +// BenchmarkBTreeIterPrev measures the cost of seeking a btree iterator to the +// previous item in the tree. +func BenchmarkBTreeIterPrev(b *testing.B) { + var tr btree + tr.cmp = cmp + + const count = 8 << 10 + for i := 0; i < count; i++ { + item := newItem(key(i)) + if err := tr.Insert(item); err != nil { + b.Fatal(err) + } + } + + it := tr.Iter() + b.ResetTimer() + for i := 0; i < b.N; i++ { + if !it.valid() { + it.first() + } + it.prev() + } +} diff --git a/pebble/internal/manifest/l0_sublevels.go b/pebble/internal/manifest/l0_sublevels.go new file mode 100644 index 0000000..3857045 --- /dev/null +++ b/pebble/internal/manifest/l0_sublevels.go @@ -0,0 +1,2042 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manifest + +import ( + "bytes" + "fmt" + "math" + "sort" + "strings" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invariants" + stdcmp "github.com/cockroachdb/pebble/shims/cmp" + "github.com/cockroachdb/pebble/shims/slices" +) + +// errInvalidL0SublevelsOpt is for use in AddL0Files when the incremental +// sublevel generation optimization failed, and NewL0Sublevels must be called. +var errInvalidL0SublevelsOpt = errors.New("pebble: L0 sublevel generation optimization cannot be used") + +// Intervals are of the form [start, end) with no gap between intervals. Each +// file overlaps perfectly with a sequence of intervals. This perfect overlap +// occurs because the union of file boundary keys is used to pick intervals. +// However the largest key in a file is inclusive, so when it is used as +// an interval, the actual key is ImmediateSuccessor(key). We don't have the +// ImmediateSuccessor function to do this computation, so we instead keep an +// isLargest bool to remind the code about this fact. This is used for +// comparisons in the following manner: +// - intervalKey{k, false} < intervalKey{k, true} +// - k1 < k2 -> intervalKey{k1, _} < intervalKey{k2, _}. +// +// Note that the file's largest key is exclusive if the internal key +// has a trailer matching the rangedel sentinel key. In this case, we set +// isLargest to false for end interval computation. +// +// For example, consider three files with bounds [a,e], [b,g], and [e,j]. The +// interval keys produced would be intervalKey{a, false}, intervalKey{b, false}, +// intervalKey{e, false}, intervalKey{e, true}, intervalKey{g, true} and +// intervalKey{j, true}, resulting in intervals +// [a, b), [b, (e, false)), [(e,false), (e, true)), [(e, true), (g, true)) and +// [(g, true), (j, true)). The first file overlaps with the first three +// perfectly, the second file overlaps with the second through to fourth +// intervals, and the third file overlaps with the last three. +// +// The intervals are indexed starting from 0, with the index of the interval +// being the index of the start key of the interval. +// +// In addition to helping with compaction picking, we use interval indices +// to assign each file an interval range once. Subsequent operations, say +// picking overlapping files for a compaction, only need to use the index +// numbers and so avoid expensive byte slice comparisons. +type intervalKey struct { + key []byte + isLargest bool +} + +// intervalKeyTemp is used in the sortAndSweep step. It contains additional metadata +// which is used to generate the {min,max}IntervalIndex for files. +type intervalKeyTemp struct { + intervalKey intervalKey + fileMeta *FileMetadata + isEndKey bool +} + +func (i *intervalKeyTemp) setFileIntervalIndex(idx int) { + if i.isEndKey { + // This is the right endpoint of some file interval, so the + // file.maxIntervalIndex must be j - 1 as maxIntervalIndex is + // inclusive. + i.fileMeta.maxIntervalIndex = idx - 1 + return + } + // This is the left endpoint for some file interval, so the + // file.minIntervalIndex must be j. + i.fileMeta.minIntervalIndex = idx +} + +func intervalKeyCompare(cmp Compare, a, b intervalKey) int { + rv := cmp(a.key, b.key) + if rv == 0 { + if a.isLargest && !b.isLargest { + return +1 + } + if !a.isLargest && b.isLargest { + return -1 + } + } + return rv +} + +type intervalKeySorter struct { + keys []intervalKeyTemp + cmp Compare +} + +func (s intervalKeySorter) Len() int { return len(s.keys) } +func (s intervalKeySorter) Less(i, j int) bool { + return intervalKeyCompare(s.cmp, s.keys[i].intervalKey, s.keys[j].intervalKey) < 0 +} +func (s intervalKeySorter) Swap(i, j int) { + s.keys[i], s.keys[j] = s.keys[j], s.keys[i] +} + +// sortAndSweep will sort the intervalKeys using intervalKeySorter, remove the +// duplicate fileIntervals, and set the {min, max}IntervalIndex for the files. +func sortAndSweep(keys []intervalKeyTemp, cmp Compare) []intervalKeyTemp { + if len(keys) == 0 { + return nil + } + sorter := intervalKeySorter{keys: keys, cmp: cmp} + sort.Sort(sorter) + + // intervalKeys are generated using the file bounds. Specifically, there are + // 2 intervalKeys for each file, and len(keys) = 2 * number of files. Each + // `intervalKeyTemp` stores information about which file it was generated + // from, and whether the key represents the end key of the file. So, as + // we're deduplicating the `keys` slice, we're guaranteed to iterate over + // the interval keys belonging to each of the files. Since the + // file.{min,max}IntervalIndex points to the position of the files bounds in + // the deduplicated `keys` slice, we can determine + // file.{min,max}IntervalIndex during the iteration. + i := 0 + j := 0 + for i < len(keys) { + // loop invariant: j <= i + currKey := keys[i] + keys[j] = keys[i] + + for { + keys[i].setFileIntervalIndex(j) + i++ + if i >= len(keys) || intervalKeyCompare(cmp, currKey.intervalKey, keys[i].intervalKey) != 0 { + break + } + } + j++ + } + return keys[:j] +} + +// A key interval of the form [start, end). The end is not represented here +// since it is implicit in the start of the next interval. The last interval is +// an exception but we don't need to ever lookup the end of that interval; the +// last fileInterval will only act as an end key marker. The set of intervals +// is const after initialization. +type fileInterval struct { + index int + startKey intervalKey + + // True iff some file in this interval is compacting to base. Such intervals + // cannot have any files participate in L0 -> Lbase compactions. + isBaseCompacting bool + + // The min and max intervals index across all the files that overlap with + // this interval. Inclusive on both sides. + filesMinIntervalIndex int + filesMaxIntervalIndex int + + // True if another interval that has a file extending into this interval is + // undergoing a compaction into Lbase. In other words, this bool is true if + // any interval in [filesMinIntervalIndex, filesMaxIntervalIndex] has + // isBaseCompacting set to true. This lets the compaction picker + // de-prioritize this interval for picking compactions, since there's a high + // chance that a base compaction with a sufficient height of sublevels + // rooted at this interval could not be chosen due to the ongoing base + // compaction in the other interval. If the file straddling the two + // intervals is at a sufficiently high sublevel (with enough compactible + // files below it to satisfy minCompactionDepth), this is not an issue, but + // to optimize for quickly picking base compactions far away from other base + // compactions, this bool is used as a heuristic (but not as a complete + // disqualifier). + intervalRangeIsBaseCompacting bool + + // All files in this interval, in increasing sublevel order. + files []*FileMetadata + + // len(files) - compactingFileCount is the stack depth that requires + // starting new compactions. This metric is not precise since the + // compactingFileCount can include files that are part of N (where N > 1) + // intra-L0 compactions, so the stack depth after those complete will be + // len(files) - compactingFileCount + N. We ignore this imprecision since we + // don't want to track which files are part of which intra-L0 compaction. + compactingFileCount int + + // Interpolated from files in this interval. For files spanning multiple + // intervals, we assume an equal distribution of bytes across all those + // intervals. + estimatedBytes uint64 +} + +// Helper type for any cases requiring a bool slice. +type bitSet []bool + +func newBitSet(n int) bitSet { + return make([]bool, n) +} + +func (b *bitSet) markBit(i int) { + (*b)[i] = true +} + +func (b *bitSet) markBits(start, end int) { + for i := start; i < end; i++ { + (*b)[i] = true + } +} + +func (b *bitSet) clearAllBits() { + for i := range *b { + (*b)[i] = false + } +} + +// L0Compaction describes an active compaction with inputs from L0. +type L0Compaction struct { + Smallest InternalKey + Largest InternalKey + IsIntraL0 bool +} + +// L0Sublevels represents a sublevel view of SSTables in L0. Tables in one +// sublevel are non-overlapping in key ranges, and keys in higher-indexed +// sublevels shadow older versions in lower-indexed sublevels. These invariants +// are similar to the regular level invariants, except with higher indexed +// sublevels having newer keys as opposed to lower indexed levels. +// +// There is no limit to the number of sublevels that can exist in L0 at any +// time, however read and compaction performance is best when there are as few +// sublevels as possible. +type L0Sublevels struct { + // Levels are ordered from oldest sublevel to youngest sublevel in the + // outer slice, and the inner slice contains non-overlapping files for + // that sublevel in increasing key order. Levels is constructed from + // levelFiles and is used by callers that require a LevelSlice. The below two + // fields are treated as immutable once created in NewL0Sublevels. + Levels []LevelSlice + levelFiles [][]*FileMetadata + + cmp Compare + formatKey base.FormatKey + + fileBytes uint64 + // All the L0 files, ordered from oldest to youngest. + levelMetadata *LevelMetadata + + // The file intervals in increasing key order. + orderedIntervals []fileInterval + + // Keys to break flushes at. + flushSplitUserKeys [][]byte + + // Only used to check invariants. + addL0FilesCalled bool +} + +type sublevelSorter []*FileMetadata + +// Len implements sort.Interface. +func (sl sublevelSorter) Len() int { + return len(sl) +} + +// Less implements sort.Interface. +func (sl sublevelSorter) Less(i, j int) bool { + return sl[i].minIntervalIndex < sl[j].minIntervalIndex +} + +// Swap implements sort.Interface. +func (sl sublevelSorter) Swap(i, j int) { + sl[i], sl[j] = sl[j], sl[i] +} + +// NewL0Sublevels creates an L0Sublevels instance for a given set of L0 files. +// These files must all be in L0 and must be sorted by seqnum (see +// SortBySeqNum). During interval iteration, when flushSplitMaxBytes bytes are +// exceeded in the range of intervals since the last flush split key, a flush +// split key is added. +// +// This method can be called without DB.mu being held, so any DB.mu protected +// fields in FileMetadata cannot be accessed here, such as Compacting and +// IsIntraL0Compacting. Those fields are accessed in InitCompactingFileInfo +// instead. +func NewL0Sublevels( + levelMetadata *LevelMetadata, cmp Compare, formatKey base.FormatKey, flushSplitMaxBytes int64, +) (*L0Sublevels, error) { + s := &L0Sublevels{cmp: cmp, formatKey: formatKey} + s.levelMetadata = levelMetadata + keys := make([]intervalKeyTemp, 0, 2*s.levelMetadata.Len()) + iter := levelMetadata.Iter() + for i, f := 0, iter.First(); f != nil; i, f = i+1, iter.Next() { + f.L0Index = i + keys = append(keys, intervalKeyTemp{ + intervalKey: intervalKey{key: f.Smallest.UserKey}, + fileMeta: f, + isEndKey: false, + }) + keys = append(keys, intervalKeyTemp{ + intervalKey: intervalKey{ + key: f.Largest.UserKey, + isLargest: !f.Largest.IsExclusiveSentinel(), + }, + fileMeta: f, + isEndKey: true, + }) + } + keys = sortAndSweep(keys, cmp) + // All interval indices reference s.orderedIntervals. + s.orderedIntervals = make([]fileInterval, len(keys)) + for i := range keys { + s.orderedIntervals[i] = fileInterval{ + index: i, + startKey: keys[i].intervalKey, + filesMinIntervalIndex: i, + filesMaxIntervalIndex: i, + } + } + // Initialize minIntervalIndex and maxIntervalIndex for each file, and use that + // to update intervals. + for f := iter.First(); f != nil; f = iter.Next() { + if err := s.addFileToSublevels(f, false /* checkInvariant */); err != nil { + return nil, err + } + } + // Sort each sublevel in increasing key order. + for i := range s.levelFiles { + sort.Sort(sublevelSorter(s.levelFiles[i])) + } + + // Construct a parallel slice of sublevel B-Trees. + // TODO(jackson): Consolidate and only use the B-Trees. + for _, sublevelFiles := range s.levelFiles { + tr, ls := makeBTree(btreeCmpSmallestKey(cmp), sublevelFiles) + s.Levels = append(s.Levels, ls) + tr.Release() + } + + s.calculateFlushSplitKeys(flushSplitMaxBytes) + return s, nil +} + +// Helper function to merge new intervalKeys into an existing slice of old +// fileIntervals, into result. Returns the new result and a slice of ints +// mapping old interval indices to new ones. The added intervalKeys do not need +// to be sorted; they get sorted and deduped in this function. +func mergeIntervals( + old, result []fileInterval, added []intervalKeyTemp, compare Compare, +) ([]fileInterval, []int) { + sorter := intervalKeySorter{keys: added, cmp: compare} + sort.Sort(sorter) + + oldToNewMap := make([]int, len(old)) + i := 0 + j := 0 + + for i < len(old) || j < len(added) { + for j > 0 && j < len(added) && intervalKeyCompare(compare, added[j-1].intervalKey, added[j].intervalKey) == 0 { + added[j].setFileIntervalIndex(len(result) - 1) + j++ + } + if i >= len(old) && j >= len(added) { + break + } + var cmp int + if i >= len(old) { + cmp = +1 + } + if j >= len(added) { + cmp = -1 + } + if cmp == 0 { + cmp = intervalKeyCompare(compare, old[i].startKey, added[j].intervalKey) + } + switch { + case cmp <= 0: + // Shallow-copy the existing interval. + newInterval := old[i] + result = append(result, newInterval) + oldToNewMap[i] = len(result) - 1 + i++ + if cmp == 0 { + added[j].setFileIntervalIndex(len(result) - 1) + j++ + } + case cmp > 0: + var prevInterval fileInterval + // Insert a new interval for a newly-added file. prevInterval, if + // non-zero, will be "inherited"; we copy its files as those extend + // into this interval. + if len(result) > 0 { + prevInterval = result[len(result)-1] + } + newInterval := fileInterval{ + index: len(result), + startKey: added[j].intervalKey, + filesMinIntervalIndex: len(result), + filesMaxIntervalIndex: len(result), + + // estimatedBytes gets recalculated later on, as the number of intervals + // the file bytes are interpolated over has changed. + estimatedBytes: 0, + // Copy the below attributes from prevInterval. + files: append([]*FileMetadata(nil), prevInterval.files...), + isBaseCompacting: prevInterval.isBaseCompacting, + intervalRangeIsBaseCompacting: prevInterval.intervalRangeIsBaseCompacting, + compactingFileCount: prevInterval.compactingFileCount, + } + result = append(result, newInterval) + added[j].setFileIntervalIndex(len(result) - 1) + j++ + } + } + return result, oldToNewMap +} + +// AddL0Files incrementally builds a new L0Sublevels for when the only change +// since the receiver L0Sublevels was an addition of the specified files, with +// no L0 deletions. The common case of this is an ingestion or a flush. These +// files can "sit on top" of existing sublevels, creating at most one new +// sublevel for a flush (and possibly multiple for an ingestion), and at most +// 2*len(files) additions to s.orderedIntervals. No files must have been deleted +// from L0, and the added files must all be newer in sequence numbers than +// existing files in L0Sublevels. The files parameter must be sorted in seqnum +// order. The levelMetadata parameter corresponds to the new L0 post addition of +// files. This method is meant to be significantly more performant than +// NewL0Sublevels. +// +// Note that this function can only be called once on a given receiver; it +// appends to some slices in s which is only safe when done once. This is okay, +// as the common case (generating a new L0Sublevels after a flush/ingestion) is +// only going to necessitate one call of this method on a given receiver. The +// returned value, if non-nil, can then have [*L0Sublevels.AddL0Files] called on +// it again, and so on. If [errInvalidL0SublevelsOpt] is returned as an error, +// it likely means the optimization could not be applied (i.e. files added were +// older than files already in the sublevels, which is possible around +// ingestions and in tests). Eg. it can happen when an ingested file was +// ingested without queueing a flush since it did not actually overlap with any +// keys in the memtable. Later on the memtable was flushed, and the memtable had +// keys spanning around the ingested file, producing a flushed file that +// overlapped with the ingested file in file bounds but not in keys. It's +// possible for that flushed file to have a lower LargestSeqNum than the +// ingested file if all the additions after the ingestion were to another +// flushed file that was split into a separate sstable during flush. Any other +// non-nil error means [L0Sublevels] generation failed in the same way as +// [NewL0Sublevels] would likely fail. +func (s *L0Sublevels) AddL0Files( + files []*FileMetadata, flushSplitMaxBytes int64, levelMetadata *LevelMetadata, +) (*L0Sublevels, error) { + if invariants.Enabled && s.addL0FilesCalled { + panic("AddL0Files called twice on the same receiver") + } + s.addL0FilesCalled = true + + // Start with a shallow copy of s. + newVal := &L0Sublevels{} + *newVal = *s + + newVal.addL0FilesCalled = false + newVal.levelMetadata = levelMetadata + // Deep copy levelFiles and Levels, as they are mutated and sorted below. + // Shallow copies of slices that we just append to, are okay. + newVal.levelFiles = make([][]*FileMetadata, len(s.levelFiles)) + for i := range s.levelFiles { + newVal.levelFiles[i] = make([]*FileMetadata, len(s.levelFiles[i])) + copy(newVal.levelFiles[i], s.levelFiles[i]) + } + newVal.Levels = make([]LevelSlice, len(s.Levels)) + copy(newVal.Levels, s.Levels) + + fileKeys := make([]intervalKeyTemp, 0, 2*len(files)) + for _, f := range files { + left := intervalKeyTemp{ + intervalKey: intervalKey{key: f.Smallest.UserKey}, + fileMeta: f, + } + right := intervalKeyTemp{ + intervalKey: intervalKey{ + key: f.Largest.UserKey, + isLargest: !f.Largest.IsExclusiveSentinel(), + }, + fileMeta: f, + isEndKey: true, + } + fileKeys = append(fileKeys, left, right) + } + keys := make([]fileInterval, 0, 2*levelMetadata.Len()) + var oldToNewMap []int + // We can avoid the sortAndSweep step on the combined length of + // s.orderedIntervals and fileKeys by treating this as a merge of two sorted + // runs, fileKeys and s.orderedIntervals, into `keys` which will form + // newVal.orderedIntervals. + keys, oldToNewMap = mergeIntervals(s.orderedIntervals, keys, fileKeys, s.cmp) + if invariants.Enabled { + for i := 1; i < len(keys); i++ { + if intervalKeyCompare(newVal.cmp, keys[i-1].startKey, keys[i].startKey) >= 0 { + panic("keys not sorted correctly") + } + } + } + newVal.orderedIntervals = keys + // Update indices in s.orderedIntervals for fileIntervals we retained. + for _, newIdx := range oldToNewMap { + newInterval := &keys[newIdx] + newInterval.index = newIdx + // This code, and related code in the for loop below, adjusts + // files{Min,Max}IntervalIndex just for interval indices shifting due to + // new intervals, and not for any of the new files being added to the + // same intervals. The goal is to produce a state of the system that's + // accurate for all existing files, and has all the new intervals to + // support new files. Once that's done, we can just call + // addFileToSublevel to adjust all relevant intervals for new files. + newInterval.filesMinIntervalIndex = oldToNewMap[newInterval.filesMinIntervalIndex] + // maxIntervalIndexes are special. Since it's an inclusive end bound, we + // actually have to map it to the _next_ old interval's new previous + // interval. This logic is easier to understand if you see + // [f.minIntervalIndex, f.maxIntervalIndex] as [f.minIntervalIndex, + // f.maxIntervalIndex+1). The other case to remember is when the + // interval is completely empty (i.e. len(newInterval.files) == 0); in + // that case we want to refer back to ourselves regardless of additions + // to the right of us. + if newInterval.filesMaxIntervalIndex < len(oldToNewMap)-1 && len(newInterval.files) > 0 { + newInterval.filesMaxIntervalIndex = oldToNewMap[newInterval.filesMaxIntervalIndex+1] - 1 + } else { + // newInterval.filesMaxIntervalIndex == len(oldToNewMap)-1. + newInterval.filesMaxIntervalIndex = oldToNewMap[newInterval.filesMaxIntervalIndex] + } + } + // Loop through all instances of new intervals added between two old + // intervals and expand [filesMinIntervalIndex, filesMaxIntervalIndex] of + // new intervals to reflect that of adjacent old intervals. + { + // We can skip cases where new intervals were added to the left of all + // existing intervals (eg. if the first entry in oldToNewMap is + // oldToNewMap[0] >= 1). Those intervals will only contain newly added + // files and will have their parameters adjusted down in + // addFileToSublevels. The same can also be said about new intervals + // that are to the right of all existing intervals. + lastIdx := 0 + for _, newIdx := range oldToNewMap { + for i := lastIdx + 1; i < newIdx; i++ { + minIntervalIndex := i + maxIntervalIndex := i + if keys[lastIdx].filesMaxIntervalIndex != lastIdx { + // Last old interval has files extending into keys[i]. + minIntervalIndex = keys[lastIdx].filesMinIntervalIndex + maxIntervalIndex = keys[lastIdx].filesMaxIntervalIndex + } + + keys[i].filesMinIntervalIndex = minIntervalIndex + keys[i].filesMaxIntervalIndex = maxIntervalIndex + } + lastIdx = newIdx + } + } + // Go through old files and update interval indices. + // + // TODO(bilal): This is the only place in this method where we loop through + // all existing files, which could be much more in number than newly added + // files. See if we can avoid the need for this, either by getting rid of + // f.minIntervalIndex and f.maxIntervalIndex and calculating them on the fly + // with a binary search, or by only looping through files to the right of + // the first interval touched by this method. + for sublevel := range s.Levels { + s.Levels[sublevel].Each(func(f *FileMetadata) { + oldIntervalDelta := f.maxIntervalIndex - f.minIntervalIndex + 1 + oldMinIntervalIndex := f.minIntervalIndex + f.minIntervalIndex = oldToNewMap[f.minIntervalIndex] + // maxIntervalIndex is special. Since it's an inclusive end bound, + // we actually have to map it to the _next_ old interval's new + // previous interval. This logic is easier to understand if you see + // [f.minIntervalIndex, f.maxIntervalIndex] as [f.minIntervalIndex, + // f.maxIntervalIndex+1). + f.maxIntervalIndex = oldToNewMap[f.maxIntervalIndex+1] - 1 + newIntervalDelta := f.maxIntervalIndex - f.minIntervalIndex + 1 + // Recalculate estimatedBytes for all old files across new + // intervals, but only if new intervals were added in between. + if oldIntervalDelta != newIntervalDelta { + // j is incremented so that oldToNewMap[j] points to the next + // old interval. This is used to distinguish between old + // intervals (i.e. ones where we need to subtract + // f.Size/oldIntervalDelta) from new ones (where we don't need + // to subtract). In both cases we need to add + // f.Size/newIntervalDelta. + j := oldMinIntervalIndex + for i := f.minIntervalIndex; i <= f.maxIntervalIndex; i++ { + if oldToNewMap[j] == i { + newVal.orderedIntervals[i].estimatedBytes -= f.Size / uint64(oldIntervalDelta) + j++ + } + newVal.orderedIntervals[i].estimatedBytes += f.Size / uint64(newIntervalDelta) + } + } + }) + } + updatedSublevels := make([]int, 0) + // Update interval indices for new files. + for i, f := range files { + f.L0Index = s.levelMetadata.Len() + i + if err := newVal.addFileToSublevels(f, true /* checkInvariant */); err != nil { + return nil, err + } + updatedSublevels = append(updatedSublevels, f.SubLevel) + } + + // Sort and deduplicate updatedSublevels. + sort.Ints(updatedSublevels) + { + j := 0 + for i := 1; i < len(updatedSublevels); i++ { + if updatedSublevels[i] != updatedSublevels[j] { + j++ + updatedSublevels[j] = updatedSublevels[i] + } + } + updatedSublevels = updatedSublevels[:j+1] + } + + // Sort each updated sublevel in increasing key order. + for _, sublevel := range updatedSublevels { + sort.Sort(sublevelSorter(newVal.levelFiles[sublevel])) + } + + // Construct a parallel slice of sublevel B-Trees. + // TODO(jackson): Consolidate and only use the B-Trees. + for _, sublevel := range updatedSublevels { + tr, ls := makeBTree(btreeCmpSmallestKey(newVal.cmp), newVal.levelFiles[sublevel]) + if sublevel == len(newVal.Levels) { + newVal.Levels = append(newVal.Levels, ls) + } else { + // sublevel < len(s.Levels). If this panics, updatedSublevels was not + // populated correctly. + newVal.Levels[sublevel] = ls + } + tr.Release() + } + + newVal.flushSplitUserKeys = nil + newVal.calculateFlushSplitKeys(flushSplitMaxBytes) + return newVal, nil +} + +// addFileToSublevels is called during L0Sublevels generation, and adds f to the +// correct sublevel's levelFiles, the relevant intervals' files slices, and sets +// interval indices on f. This method, if called successively on multiple files, +// _must_ be called on successively newer files (by seqnum). If checkInvariant +// is true, it could check for this in some cases and return +// [errInvalidL0SublevelsOpt] if that invariant isn't held. +func (s *L0Sublevels) addFileToSublevels(f *FileMetadata, checkInvariant bool) error { + // This is a simple and not very accurate estimate of the number of + // bytes this SSTable contributes to the intervals it is a part of. + // + // TODO(bilal): Call EstimateDiskUsage in sstable.Reader with interval + // bounds to get a better estimate for each interval. + interpolatedBytes := f.Size / uint64(f.maxIntervalIndex-f.minIntervalIndex+1) + s.fileBytes += f.Size + subLevel := 0 + // Update state in every fileInterval for this file. + for i := f.minIntervalIndex; i <= f.maxIntervalIndex; i++ { + interval := &s.orderedIntervals[i] + if len(interval.files) > 0 { + if checkInvariant && interval.files[len(interval.files)-1].LargestSeqNum > f.LargestSeqNum { + // We are sliding this file "underneath" an existing file. Throw away + // and start over in NewL0Sublevels. + return errInvalidL0SublevelsOpt + } + // interval.files is sorted by sublevels, from lowest to highest. + // AddL0Files can only add files at sublevels higher than existing files + // in the same key intervals. + if maxSublevel := interval.files[len(interval.files)-1].SubLevel; subLevel <= maxSublevel { + subLevel = maxSublevel + 1 + } + } + interval.estimatedBytes += interpolatedBytes + if f.minIntervalIndex < interval.filesMinIntervalIndex { + interval.filesMinIntervalIndex = f.minIntervalIndex + } + if f.maxIntervalIndex > interval.filesMaxIntervalIndex { + interval.filesMaxIntervalIndex = f.maxIntervalIndex + } + interval.files = append(interval.files, f) + } + f.SubLevel = subLevel + if subLevel > len(s.levelFiles) { + return errors.Errorf("chose a sublevel beyond allowed range of sublevels: %d vs 0-%d", subLevel, len(s.levelFiles)) + } + if subLevel == len(s.levelFiles) { + s.levelFiles = append(s.levelFiles, []*FileMetadata{f}) + } else { + s.levelFiles[subLevel] = append(s.levelFiles[subLevel], f) + } + return nil +} + +func (s *L0Sublevels) calculateFlushSplitKeys(flushSplitMaxBytes int64) { + var cumulativeBytes uint64 + // Multiply flushSplitMaxBytes by the number of sublevels. This prevents + // excessive flush splitting when the number of sublevels increases. + flushSplitMaxBytes *= int64(len(s.levelFiles)) + for i := 0; i < len(s.orderedIntervals); i++ { + interval := &s.orderedIntervals[i] + if flushSplitMaxBytes > 0 && cumulativeBytes > uint64(flushSplitMaxBytes) && + (len(s.flushSplitUserKeys) == 0 || + !bytes.Equal(interval.startKey.key, s.flushSplitUserKeys[len(s.flushSplitUserKeys)-1])) { + s.flushSplitUserKeys = append(s.flushSplitUserKeys, interval.startKey.key) + cumulativeBytes = 0 + } + cumulativeBytes += s.orderedIntervals[i].estimatedBytes + } +} + +// InitCompactingFileInfo initializes internal flags relating to compacting +// files. Must be called after sublevel initialization. +// +// Requires DB.mu *and* the manifest lock to be held. +func (s *L0Sublevels) InitCompactingFileInfo(inProgress []L0Compaction) { + for i := range s.orderedIntervals { + s.orderedIntervals[i].compactingFileCount = 0 + s.orderedIntervals[i].isBaseCompacting = false + s.orderedIntervals[i].intervalRangeIsBaseCompacting = false + } + + iter := s.levelMetadata.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if invariants.Enabled { + if !bytes.Equal(s.orderedIntervals[f.minIntervalIndex].startKey.key, f.Smallest.UserKey) { + panic(fmt.Sprintf("f.minIntervalIndex in FileMetadata out of sync with intervals in L0Sublevels: %s != %s", + s.formatKey(s.orderedIntervals[f.minIntervalIndex].startKey.key), s.formatKey(f.Smallest.UserKey))) + } + if !bytes.Equal(s.orderedIntervals[f.maxIntervalIndex+1].startKey.key, f.Largest.UserKey) { + panic(fmt.Sprintf("f.maxIntervalIndex in FileMetadata out of sync with intervals in L0Sublevels: %s != %s", + s.formatKey(s.orderedIntervals[f.maxIntervalIndex+1].startKey.key), s.formatKey(f.Smallest.UserKey))) + } + } + if !f.IsCompacting() { + continue + } + if invariants.Enabled { + if s.cmp(s.orderedIntervals[f.minIntervalIndex].startKey.key, f.Smallest.UserKey) != 0 || s.cmp(s.orderedIntervals[f.maxIntervalIndex+1].startKey.key, f.Largest.UserKey) != 0 { + panic(fmt.Sprintf("file %s has inconsistent L0 Sublevel interval bounds: %s-%s, %s-%s", f.FileNum, + s.orderedIntervals[f.minIntervalIndex].startKey.key, s.orderedIntervals[f.maxIntervalIndex+1].startKey.key, + f.Smallest.UserKey, f.Largest.UserKey)) + } + } + for i := f.minIntervalIndex; i <= f.maxIntervalIndex; i++ { + interval := &s.orderedIntervals[i] + interval.compactingFileCount++ + if !f.IsIntraL0Compacting { + // If f.Compacting && !f.IsIntraL0Compacting, this file is + // being compacted to Lbase. + interval.isBaseCompacting = true + } + } + } + + // Some intervals may be base compacting without the files contained within + // those intervals being marked as compacting. This is possible if the files + // were added after the compaction initiated, and the active compaction + // files straddle the input file. Mark these intervals as base compacting. + for _, c := range inProgress { + startIK := intervalKey{key: c.Smallest.UserKey, isLargest: false} + endIK := intervalKey{key: c.Largest.UserKey, isLargest: !c.Largest.IsExclusiveSentinel()} + start, _ := slices.BinarySearchFunc(s.orderedIntervals, startIK, func(a fileInterval, b intervalKey) int { + return intervalKeyCompare(s.cmp, a.startKey, b) + }) + end, _ := slices.BinarySearchFunc(s.orderedIntervals, endIK, func(a fileInterval, b intervalKey) int { + return intervalKeyCompare(s.cmp, a.startKey, b) + }) + for i := start; i < end && i < len(s.orderedIntervals); i++ { + interval := &s.orderedIntervals[i] + if !c.IsIntraL0 { + interval.isBaseCompacting = true + } + } + } + + min := 0 + for i := range s.orderedIntervals { + interval := &s.orderedIntervals[i] + if interval.isBaseCompacting { + minIndex := interval.filesMinIntervalIndex + if minIndex < min { + minIndex = min + } + for j := minIndex; j <= interval.filesMaxIntervalIndex; j++ { + min = j + s.orderedIntervals[j].intervalRangeIsBaseCompacting = true + } + } + } +} + +// String produces a string containing useful debug information. Useful in test +// code and debugging. +func (s *L0Sublevels) String() string { + return s.describe(false) +} + +func (s *L0Sublevels) describe(verbose bool) string { + var buf strings.Builder + fmt.Fprintf(&buf, "file count: %d, sublevels: %d, intervals: %d\nflush split keys(%d): [", + s.levelMetadata.Len(), len(s.levelFiles), len(s.orderedIntervals), len(s.flushSplitUserKeys)) + for i := range s.flushSplitUserKeys { + fmt.Fprintf(&buf, "%s", s.formatKey(s.flushSplitUserKeys[i])) + if i < len(s.flushSplitUserKeys)-1 { + fmt.Fprintf(&buf, ", ") + } + } + fmt.Fprintln(&buf, "]") + numCompactingFiles := 0 + for i := len(s.levelFiles) - 1; i >= 0; i-- { + maxIntervals := 0 + sumIntervals := 0 + var totalBytes uint64 + for _, f := range s.levelFiles[i] { + intervals := f.maxIntervalIndex - f.minIntervalIndex + 1 + if intervals > maxIntervals { + maxIntervals = intervals + } + sumIntervals += intervals + totalBytes += f.Size + if f.IsCompacting() { + numCompactingFiles++ + } + } + fmt.Fprintf(&buf, "0.%d: file count: %d, bytes: %d, width (mean, max): %0.1f, %d, interval range: [%d, %d]\n", + i, len(s.levelFiles[i]), totalBytes, float64(sumIntervals)/float64(len(s.levelFiles[i])), maxIntervals, s.levelFiles[i][0].minIntervalIndex, + s.levelFiles[i][len(s.levelFiles[i])-1].maxIntervalIndex) + for _, f := range s.levelFiles[i] { + intervals := f.maxIntervalIndex - f.minIntervalIndex + 1 + if verbose { + fmt.Fprintf(&buf, "\t%s\n", f) + } + if s.levelMetadata.Len() > 50 && intervals*3 > len(s.orderedIntervals) { + var intervalsBytes uint64 + for k := f.minIntervalIndex; k <= f.maxIntervalIndex; k++ { + intervalsBytes += s.orderedIntervals[k].estimatedBytes + } + fmt.Fprintf(&buf, "wide file: %d, [%d, %d], byte fraction: %f\n", + f.FileNum, f.minIntervalIndex, f.maxIntervalIndex, + float64(intervalsBytes)/float64(s.fileBytes)) + } + } + } + + lastCompactingIntervalStart := -1 + fmt.Fprintf(&buf, "compacting file count: %d, base compacting intervals: ", numCompactingFiles) + i := 0 + foundBaseCompactingIntervals := false + for ; i < len(s.orderedIntervals); i++ { + interval := &s.orderedIntervals[i] + if len(interval.files) == 0 { + continue + } + if !interval.isBaseCompacting { + if lastCompactingIntervalStart != -1 { + if foundBaseCompactingIntervals { + buf.WriteString(", ") + } + fmt.Fprintf(&buf, "[%d, %d]", lastCompactingIntervalStart, i-1) + foundBaseCompactingIntervals = true + } + lastCompactingIntervalStart = -1 + } else { + if lastCompactingIntervalStart == -1 { + lastCompactingIntervalStart = i + } + } + } + if lastCompactingIntervalStart != -1 { + if foundBaseCompactingIntervals { + buf.WriteString(", ") + } + fmt.Fprintf(&buf, "[%d, %d]", lastCompactingIntervalStart, i-1) + } else if !foundBaseCompactingIntervals { + fmt.Fprintf(&buf, "none") + } + fmt.Fprintln(&buf, "") + return buf.String() +} + +// ReadAmplification returns the contribution of L0Sublevels to the read +// amplification for any particular point key. It is the maximum height of any +// tracked fileInterval. This is always less than or equal to the number of +// sublevels. +func (s *L0Sublevels) ReadAmplification() int { + amp := 0 + for i := range s.orderedIntervals { + interval := &s.orderedIntervals[i] + fileCount := len(interval.files) + if amp < fileCount { + amp = fileCount + } + } + return amp +} + +// UserKeyRange encodes a key range in user key space. A UserKeyRange's Start +// and End boundaries are both inclusive. +type UserKeyRange struct { + Start, End []byte +} + +// InUseKeyRanges returns the merged table bounds of L0 files overlapping the +// provided user key range. The returned key ranges are sorted and +// nonoverlapping. +func (s *L0Sublevels) InUseKeyRanges(smallest, largest []byte) []UserKeyRange { + // Binary search to find the provided keys within the intervals. + startIK := intervalKey{key: smallest, isLargest: false} + endIK := intervalKey{key: largest, isLargest: true} + start := sort.Search(len(s.orderedIntervals), func(i int) bool { + return intervalKeyCompare(s.cmp, s.orderedIntervals[i].startKey, startIK) > 0 + }) + if start > 0 { + // Back up to the first interval with a start key <= startIK. + start-- + } + end := sort.Search(len(s.orderedIntervals), func(i int) bool { + return intervalKeyCompare(s.cmp, s.orderedIntervals[i].startKey, endIK) > 0 + }) + + var keyRanges []UserKeyRange + var curr *UserKeyRange + for i := start; i < end; { + // Intervals with no files are not in use and can be skipped, once we + // end the current UserKeyRange. + if len(s.orderedIntervals[i].files) == 0 { + curr = nil + i++ + continue + } + + // If curr is nil, start a new in-use key range. + if curr == nil { + keyRanges = append(keyRanges, UserKeyRange{ + Start: s.orderedIntervals[i].startKey.key, + }) + curr = &keyRanges[len(keyRanges)-1] + } + + // If the filesMaxIntervalIndex is not the current index, we can jump to + // the max index, knowing that all intermediary intervals are overlapped + // by some file. + if maxIdx := s.orderedIntervals[i].filesMaxIntervalIndex; maxIdx != i { + // Note that end may be less than or equal to maxIdx if we're + // concerned with a key range that ends before the interval at + // maxIdx starts. We must set curr.End now, before making that leap, + // because this iteration may be the last. + i = maxIdx + curr.End = s.orderedIntervals[i+1].startKey.key + continue + } + + // No files overlapping with this interval overlap with the next + // interval. Update the current end to be the next interval's start key. + // Note that curr is not necessarily finished, because there may be an + // abutting non-empty interval. + curr.End = s.orderedIntervals[i+1].startKey.key + i++ + } + return keyRanges +} + +// FlushSplitKeys returns a slice of user keys to split flushes at. Used by +// flushes to avoid writing sstables that straddle these split keys. These +// should be interpreted as the keys to start the next sstable (not the last key +// to include in the prev sstable). These are user keys so that range tombstones +// can be properly truncated (untruncated range tombstones are not permitted for +// L0 files). +func (s *L0Sublevels) FlushSplitKeys() [][]byte { + return s.flushSplitUserKeys +} + +// MaxDepthAfterOngoingCompactions returns an estimate of maximum depth of +// sublevels after all ongoing compactions run to completion. Used by compaction +// picker to decide compaction score for L0. There is no scoring for intra-L0 +// compactions -- they only run if L0 score is high but we're unable to pick an +// L0 -> Lbase compaction. +func (s *L0Sublevels) MaxDepthAfterOngoingCompactions() int { + depth := 0 + for i := range s.orderedIntervals { + interval := &s.orderedIntervals[i] + intervalDepth := len(interval.files) - interval.compactingFileCount + if depth < intervalDepth { + depth = intervalDepth + } + } + return depth +} + +// Only for temporary debugging in the absence of proper tests. +// +// TODO(bilal): Simplify away the debugging statements in this method, and make +// this a pure sanity checker. +// +//lint:ignore U1000 - useful for debugging +func (s *L0Sublevels) checkCompaction(c *L0CompactionFiles) error { + includedFiles := newBitSet(s.levelMetadata.Len()) + fileIntervalsByLevel := make([]struct { + min int + max int + }, len(s.levelFiles)) + for i := range fileIntervalsByLevel { + fileIntervalsByLevel[i].min = math.MaxInt32 + fileIntervalsByLevel[i].max = 0 + } + var topLevel int + var increment int + var limitReached func(int) bool + if c.isIntraL0 { + topLevel = len(s.levelFiles) - 1 + increment = +1 + limitReached = func(level int) bool { + return level == len(s.levelFiles) + } + } else { + topLevel = 0 + increment = -1 + limitReached = func(level int) bool { + return level < 0 + } + } + for _, f := range c.Files { + if fileIntervalsByLevel[f.SubLevel].min > f.minIntervalIndex { + fileIntervalsByLevel[f.SubLevel].min = f.minIntervalIndex + } + if fileIntervalsByLevel[f.SubLevel].max < f.maxIntervalIndex { + fileIntervalsByLevel[f.SubLevel].max = f.maxIntervalIndex + } + includedFiles.markBit(f.L0Index) + if c.isIntraL0 { + if topLevel > f.SubLevel { + topLevel = f.SubLevel + } + } else { + if topLevel < f.SubLevel { + topLevel = f.SubLevel + } + } + } + min := fileIntervalsByLevel[topLevel].min + max := fileIntervalsByLevel[topLevel].max + for level := topLevel; !limitReached(level); level += increment { + if fileIntervalsByLevel[level].min < min { + min = fileIntervalsByLevel[level].min + } + if fileIntervalsByLevel[level].max > max { + max = fileIntervalsByLevel[level].max + } + index, _ := slices.BinarySearchFunc(s.levelFiles[level], min, func(a *FileMetadata, b int) int { + return stdcmp.Compare(a.maxIntervalIndex, b) + }) + // start := index + for ; index < len(s.levelFiles[level]); index++ { + f := s.levelFiles[level][index] + if f.minIntervalIndex > max { + break + } + if c.isIntraL0 && f.LargestSeqNum >= c.earliestUnflushedSeqNum { + return errors.Errorf( + "sstable %s in compaction has sequence numbers higher than the earliest unflushed seqnum %d: %d-%d", + f.FileNum, c.earliestUnflushedSeqNum, f.SmallestSeqNum, + f.LargestSeqNum) + } + if !includedFiles[f.L0Index] { + var buf strings.Builder + fmt.Fprintf(&buf, "bug %t, seed interval: %d: level %d, sl index %d, f.index %d, min %d, max %d, pre-min %d, pre-max %d, f.min %d, f.max %d, filenum: %d, isCompacting: %t\n%s\n", + c.isIntraL0, c.seedInterval, level, index, f.L0Index, min, max, c.preExtensionMinInterval, c.preExtensionMaxInterval, + f.minIntervalIndex, f.maxIntervalIndex, + f.FileNum, f.IsCompacting(), s) + fmt.Fprintf(&buf, "files included:\n") + for _, f := range c.Files { + fmt.Fprintf(&buf, "filenum: %d, sl: %d, index: %d, [%d, %d]\n", + f.FileNum, f.SubLevel, f.L0Index, f.minIntervalIndex, f.maxIntervalIndex) + } + fmt.Fprintf(&buf, "files added:\n") + for _, f := range c.filesAdded { + fmt.Fprintf(&buf, "filenum: %d, sl: %d, index: %d, [%d, %d]\n", + f.FileNum, f.SubLevel, f.L0Index, f.minIntervalIndex, f.maxIntervalIndex) + } + return errors.New(buf.String()) + } + } + } + return nil +} + +// UpdateStateForStartedCompaction updates internal L0Sublevels state for a +// recently started compaction. isBase specifies if this is a base compaction; +// if false, this is assumed to be an intra-L0 compaction. The specified +// compaction must be involving L0 SSTables. It's assumed that the Compacting +// and IsIntraL0Compacting fields are already set on all [FileMetadata]s passed +// in. +func (s *L0Sublevels) UpdateStateForStartedCompaction(inputs []LevelSlice, isBase bool) error { + minIntervalIndex := -1 + maxIntervalIndex := 0 + for i := range inputs { + iter := inputs[i].Iter() + for f := iter.First(); f != nil; f = iter.Next() { + for i := f.minIntervalIndex; i <= f.maxIntervalIndex; i++ { + interval := &s.orderedIntervals[i] + interval.compactingFileCount++ + } + if f.minIntervalIndex < minIntervalIndex || minIntervalIndex == -1 { + minIntervalIndex = f.minIntervalIndex + } + if f.maxIntervalIndex > maxIntervalIndex { + maxIntervalIndex = f.maxIntervalIndex + } + } + } + if isBase { + for i := minIntervalIndex; i <= maxIntervalIndex; i++ { + interval := &s.orderedIntervals[i] + interval.isBaseCompacting = isBase + for j := interval.filesMinIntervalIndex; j <= interval.filesMaxIntervalIndex; j++ { + s.orderedIntervals[j].intervalRangeIsBaseCompacting = true + } + } + } + return nil +} + +// L0CompactionFiles represents a candidate set of L0 files for compaction. Also +// referred to as "lcf". Contains state information useful for generating the +// compaction (such as Files), as well as for picking between candidate +// compactions (eg. fileBytes and seedIntervalStackDepthReduction). +type L0CompactionFiles struct { + Files []*FileMetadata + + FilesIncluded bitSet + // A "seed interval" is an interval with a high stack depth that was chosen + // to bootstrap this compaction candidate. seedIntervalStackDepthReduction + // is the number of sublevels that have a file in the seed interval that is + // a part of this compaction. + seedIntervalStackDepthReduction int + // For base compactions, seedIntervalMinLevel is 0, and for intra-L0 + // compactions, seedIntervalMaxLevel is len(s.Files)-1 i.e. the highest + // sublevel. + seedIntervalMinLevel int + seedIntervalMaxLevel int + // Index of the seed interval. + seedInterval int + // Sum of file sizes for all files in this compaction. + fileBytes uint64 + // Intervals with index [minIntervalIndex, maxIntervalIndex] are + // participating in this compaction; it's the union set of all intervals + // overlapped by participating files. + minIntervalIndex int + maxIntervalIndex int + + // Set for intra-L0 compactions. SSTables with sequence numbers greater + // than earliestUnflushedSeqNum cannot be a part of intra-L0 compactions. + isIntraL0 bool + earliestUnflushedSeqNum uint64 + + // For debugging purposes only. Used in checkCompaction(). + preExtensionMinInterval int + preExtensionMaxInterval int + filesAdded []*FileMetadata +} + +// Clone allocates a new L0CompactionFiles, with the same underlying data. Note +// that the two fileMetadata slices contain values that point to the same +// underlying fileMetadata object. This is safe because these objects are read +// only. +func (l *L0CompactionFiles) Clone() *L0CompactionFiles { + oldLcf := *l + return &oldLcf +} + +// String merely prints the starting address of the first file, if it exists. +func (l *L0CompactionFiles) String() string { + if len(l.Files) > 0 { + return fmt.Sprintf("First File Address: %p", &l.Files[0]) + } + return "" +} + +// addFile adds the specified file to the LCF. +func (l *L0CompactionFiles) addFile(f *FileMetadata) { + if l.FilesIncluded[f.L0Index] { + return + } + l.FilesIncluded.markBit(f.L0Index) + l.Files = append(l.Files, f) + l.filesAdded = append(l.filesAdded, f) + l.fileBytes += f.Size + if f.minIntervalIndex < l.minIntervalIndex { + l.minIntervalIndex = f.minIntervalIndex + } + if f.maxIntervalIndex > l.maxIntervalIndex { + l.maxIntervalIndex = f.maxIntervalIndex + } +} + +// Helper to order intervals being considered for compaction. +type intervalAndScore struct { + interval int + score int +} +type intervalSorterByDecreasingScore []intervalAndScore + +func (is intervalSorterByDecreasingScore) Len() int { return len(is) } +func (is intervalSorterByDecreasingScore) Less(i, j int) bool { + return is[i].score > is[j].score +} +func (is intervalSorterByDecreasingScore) Swap(i, j int) { + is[i], is[j] = is[j], is[i] +} + +// Compactions: +// +// The sub-levels and intervals can be visualized in 2 dimensions as the X axis +// containing intervals in increasing order and the Y axis containing sub-levels +// (older to younger). The intervals can be sparse wrt sub-levels. We observe +// that the system is typically under severe pressure in L0 during large numbers +// of ingestions where most files added to L0 are narrow and non-overlapping. +// +// L0.1 d---g +// L0.0 c--e g--j o--s u--x +// +// As opposed to a case with a lot of wide, overlapping L0 files: +// +// L0.3 d-----------r +// L0.2 c--------o +// L0.1 b-----------q +// L0.0 a----------------x +// +// In that case we expect the rectangle represented in the good visualization +// above (i.e. the first one) to be wide and short, and not too sparse (most +// intervals will have fileCount close to the sub-level count), which would make +// it amenable to concurrent L0 -> Lbase compactions. +// +// L0 -> Lbase: The high-level goal of a L0 -> Lbase compaction is to reduce +// stack depth, by compacting files in the intervals with the highest (fileCount +// - compactingCount). Additionally, we would like compactions to not involve a +// huge number of files, so that they finish quickly, and to allow for +// concurrent L0 -> Lbase compactions when needed. In order to achieve these +// goals we would like compactions to visualize as capturing thin and tall +// rectangles. The approach below is to consider intervals in some order and +// then try to construct a compaction using the interval. The first interval we +// can construct a compaction for is the compaction that is started. There can +// be multiple heuristics in choosing the ordering of the intervals -- the code +// uses one heuristic that worked well for a large ingestion stemming from a +// cockroachdb import, but additional experimentation is necessary to pick a +// general heuristic. Additionally, the compaction that gets picked may be not +// as desirable as one that could be constructed later in terms of reducing +// stack depth (since adding more files to the compaction can get blocked by +// needing to encompass files that are already being compacted). So an +// alternative would be to try to construct more than one compaction and pick +// the best one. +// +// Here's a visualization of an ideal L0->LBase compaction selection: +// +// L0.3 a--d g-j +// L0.2 f--j r-t +// L0.1 b-d e---j +// L0.0 a--d f--j l--o p-----x +// +// Lbase a--------i m---------w +// +// The [g,j] interval has the highest stack depth, so it would have the highest +// priority for selecting a base compaction candidate. Assuming none of the +// files are already compacting, this is the compaction that will be chosen: +// +// _______ +// L0.3 a--d | g-j| +// L0.2 | f--j| r-t +// L0.1 b-d |e---j| +// L0.0 a--d | f--j| l--o p-----x +// +// Lbase a--------i m---------w +// +// Note that running this compaction will mark the a--i file in Lbase as +// compacting, and when ExtendL0ForBaseCompactionTo is called with the bounds of +// that base file, it'll expand the compaction to also include all L0 files in +// the a-d interval. The resultant compaction would then be: +// +// _____________ +// L0.3 |a--d g-j| +// L0.2 | f--j| r-t +// L0.1 | b-d e---j| +// L0.0 |a--d f--j| l--o p-----x +// +// Lbase a--------i m---------w +// +// The next best interval for base compaction would therefore be the one +// including r--t in L0.2 and p--x in L0.0, and both this compaction and the one +// picked earlier can run in parallel. This is assuming minCompactionDepth >= 2, +// otherwise the second compaction has too little depth to pick. +// +// _____________ +// L0.3 |a--d g-j| _________ +// L0.2 | f--j| | r-t | +// L0.1 | b-d e---j| | | +// L0.0 |a--d f--j| l--o |p-----x| +// +// Lbase a--------i m---------w +// +// Note that when ExtendL0ForBaseCompactionTo is called, the compaction expands +// to the following, given that the [l,o] file can be added without including +// additional files in Lbase: +// +// _____________ +// L0.3 |a--d g-j| _________ +// L0.2 | f--j| | r-t | +// L0.1 | b-d e---j|______| | +// L0.0 |a--d f--j||l--o p-----x| +// +// Lbase a--------i m---------w +// +// If an additional file existed in LBase that overlapped with [l,o], it would +// be excluded from the compaction. Concretely: +// +// _____________ +// L0.3 |a--d g-j| _________ +// L0.2 | f--j| | r-t | +// L0.1 | b-d e---j| | | +// L0.0 |a--d f--j| l--o |p-----x| +// +// Lbase a--------ij--lm---------w +// +// Intra-L0: If the L0 score is high, but PickBaseCompaction() is unable to pick +// a compaction, PickIntraL0Compaction will be used to pick an intra-L0 +// compaction. Similar to L0 -> Lbase compactions, we want to allow for multiple +// intra-L0 compactions and not generate wide output files that hinder later +// concurrency of L0 -> Lbase compactions. Also compactions that produce wide +// files don't reduce stack depth -- they represent wide rectangles in our +// visualization, which means many intervals have their depth reduced by a small +// amount. Typically, L0 files have non-overlapping sequence numbers, and +// sticking to that invariant would require us to consider intra-L0 compactions +// that proceed from youngest to oldest files, which could result in the +// aforementioned undesirable wide rectangle shape. But this non-overlapping +// sequence number is already relaxed in RocksDB -- sstables are primarily +// ordered by their largest sequence number. So we can arrange for intra-L0 +// compactions to capture thin and tall rectangles starting with the top of the +// stack (youngest files). Like the L0 -> Lbase case we order the intervals +// using a heuristic and consider each in turn. The same comment about better L0 +// -> Lbase heuristics and not being greedy applies here. +// +// Going back to a modified version of our example from earlier, let's say these +// are the base compactions in progress: +// _______ +// L0.3 a--d | g-j| _________ +// L0.2 | f--j| | r-t | +// L0.1 b-d |e---j| | | +// L0.0 a--d | f--j| l--o |p-----x| +// +// Lbase a---------i m---------w +// +// Since both LBase files are compacting, the only L0 compaction that can be +// picked is an intra-L0 compaction. For this, the b--d interval has the highest +// stack depth (3), and starting with a--d in L0.3 as the seed file, we can +// iterate downward and build this compaction, assuming all files in that +// interval are not compacting and have a highest sequence number less than +// earliestUnflushedSeqNum: +// +// _______ +// L0.3 |a--d| | g-j| _________ +// L0.2 | | | f--j| | r-t | +// L0.1 | b-d| |e---j| | | +// L0.0 |a--d| | f--j| l--o |p-----x| +// ------ +// Lbase a---------i m---------w +// + +// PickBaseCompaction picks a base compaction based on the above specified +// heuristics, for the specified Lbase files and a minimum depth of overlapping +// files that can be selected for compaction. Returns nil if no compaction is +// possible. +func (s *L0Sublevels) PickBaseCompaction( + minCompactionDepth int, baseFiles LevelSlice, +) (*L0CompactionFiles, error) { + // For LBase compactions, we consider intervals in a greedy manner in the + // following order: + // - Intervals that are unlikely to be blocked due + // to ongoing L0 -> Lbase compactions. These are the ones with + // !isBaseCompacting && !intervalRangeIsBaseCompacting. + // - Intervals that are !isBaseCompacting && intervalRangeIsBaseCompacting. + // + // The ordering heuristic exists just to avoid wasted work. Ideally, + // we would consider all intervals with isBaseCompacting = false and + // construct a compaction for it and compare the constructed compactions + // and pick the best one. If microbenchmarks show that we can afford + // this cost we can eliminate this heuristic. + scoredIntervals := make([]intervalAndScore, 0, len(s.orderedIntervals)) + sublevelCount := len(s.levelFiles) + for i := range s.orderedIntervals { + interval := &s.orderedIntervals[i] + depth := len(interval.files) - interval.compactingFileCount + if interval.isBaseCompacting || minCompactionDepth > depth { + continue + } + if interval.intervalRangeIsBaseCompacting { + scoredIntervals = append(scoredIntervals, intervalAndScore{interval: i, score: depth}) + } else { + // Prioritize this interval by incrementing the score by the number + // of sublevels. + scoredIntervals = append(scoredIntervals, intervalAndScore{interval: i, score: depth + sublevelCount}) + } + } + sort.Sort(intervalSorterByDecreasingScore(scoredIntervals)) + + // Optimization to avoid considering different intervals that + // are likely to choose the same seed file. Again this is just + // to reduce wasted work. + consideredIntervals := newBitSet(len(s.orderedIntervals)) + for _, scoredInterval := range scoredIntervals { + interval := &s.orderedIntervals[scoredInterval.interval] + if consideredIntervals[interval.index] { + continue + } + + // Pick the seed file for the interval as the file + // in the lowest sub-level. + f := interval.files[0] + // Don't bother considering the intervals that are covered by the seed + // file since they are likely nearby. Note that it is possible that + // those intervals have seed files at lower sub-levels so could be + // viable for compaction. + if f == nil { + return nil, errors.New("no seed file found in sublevel intervals") + } + consideredIntervals.markBits(f.minIntervalIndex, f.maxIntervalIndex+1) + if f.IsCompacting() { + if f.IsIntraL0Compacting { + // If we're picking a base compaction and we came across a seed + // file candidate that's being intra-L0 compacted, skip the + // interval instead of erroring out. + continue + } + // We chose a compaction seed file that should not be compacting. + // Usually means the score is not accurately accounting for files + // already compacting, or internal state is inconsistent. + return nil, errors.Errorf("file %s chosen as seed file for compaction should not be compacting", f.FileNum) + } + + c := s.baseCompactionUsingSeed(f, interval.index, minCompactionDepth) + if c != nil { + // Check if the chosen compaction overlaps with any files in Lbase + // that have Compacting = true. If that's the case, this compaction + // cannot be chosen. + baseIter := baseFiles.Iter() + // An interval starting at ImmediateSuccessor(key) can never be the + // first interval of a compaction since no file can start at that + // interval. + m := baseIter.SeekGE(s.cmp, s.orderedIntervals[c.minIntervalIndex].startKey.key) + + var baseCompacting bool + for ; m != nil && !baseCompacting; m = baseIter.Next() { + cmp := s.cmp(m.Smallest.UserKey, s.orderedIntervals[c.maxIntervalIndex+1].startKey.key) + // Compaction is ending at exclusive bound of c.maxIntervalIndex+1 + if cmp > 0 || (cmp == 0 && !s.orderedIntervals[c.maxIntervalIndex+1].startKey.isLargest) { + break + } + baseCompacting = baseCompacting || m.IsCompacting() + } + if baseCompacting { + continue + } + return c, nil + } + } + return nil, nil +} + +// Helper function for building an L0 -> Lbase compaction using a seed interval +// and seed file in that seed interval. +func (s *L0Sublevels) baseCompactionUsingSeed( + f *FileMetadata, intervalIndex int, minCompactionDepth int, +) *L0CompactionFiles { + c := &L0CompactionFiles{ + FilesIncluded: newBitSet(s.levelMetadata.Len()), + seedInterval: intervalIndex, + seedIntervalMinLevel: 0, + minIntervalIndex: f.minIntervalIndex, + maxIntervalIndex: f.maxIntervalIndex, + } + c.addFile(f) + + // The first iteration of this loop builds the compaction at the seed file's + // sublevel. Future iterations expand on this compaction by stacking more + // files from intervalIndex and repeating. This is an optional activity so + // when it fails we can fallback to the last successful candidate. + var lastCandidate *L0CompactionFiles + interval := &s.orderedIntervals[intervalIndex] + + for i := 0; i < len(interval.files); i++ { + f2 := interval.files[i] + sl := f2.SubLevel + c.seedIntervalStackDepthReduction++ + c.seedIntervalMaxLevel = sl + c.addFile(f2) + // The seed file is in the lowest sublevel in the seed interval, but it + // may overlap with other files in even lower sublevels. For correctness + // we need to grow our interval to include those files, and capture all + // files in the next level that fall in this extended interval and so + // on. This can result in a triangular shape like the following where + // again the X axis is the key intervals and the Y axis is oldest to + // youngest. Note that it is not necessary for correctness to fill out + // the shape at the higher sub-levels to make it more rectangular since + // the invariant only requires that younger versions of a key not be + // moved to Lbase while leaving behind older versions. + // - + // --- + // ----- + // It may be better for performance to have a more rectangular shape + // since the files being left behind will overlap with the same Lbase + // key range as that of this compaction. But there is also the danger + // that in trying to construct a more rectangular shape we will be + // forced to pull in a file that is already compacting. We expect + // extendCandidateToRectangle to eventually be called on this compaction + // if it's chosen, at which point we would iterate backward and choose + // those files. This logic is similar to compaction.grow for non-L0 + // compactions. + done := false + for currLevel := sl - 1; currLevel >= 0; currLevel-- { + if !s.extendFiles(currLevel, math.MaxUint64, c) { + // Failed to extend due to ongoing compaction. + done = true + break + } + } + if done { + break + } + // Observed some compactions using > 1GB from L0 in an import + // experiment. Very long running compactions are not great as they + // reduce concurrency while they run, and take a while to produce + // results, though they're sometimes unavoidable. There is a tradeoff + // here in that adding more depth is more efficient in reducing stack + // depth, but long running compactions reduce flexibility in what can + // run concurrently in L0 and even Lbase -> Lbase+1. An increase more + // than 150% in bytes since the last candidate compaction (along with a + // total compaction size in excess of 100mb), or a total compaction size + // beyond a hard limit of 500mb, is criteria for rejecting this + // candidate. This lets us prefer slow growths as we add files, while + // still having a hard limit. Note that if this is the first compaction + // candidate to reach a stack depth reduction of minCompactionDepth or + // higher, this candidate will be chosen regardless. + if lastCandidate == nil { + lastCandidate = &L0CompactionFiles{} + } else if lastCandidate.seedIntervalStackDepthReduction >= minCompactionDepth && + c.fileBytes > 100<<20 && + (float64(c.fileBytes)/float64(lastCandidate.fileBytes) > 1.5 || c.fileBytes > 500<<20) { + break + } + *lastCandidate = *c + } + if lastCandidate != nil && lastCandidate.seedIntervalStackDepthReduction >= minCompactionDepth { + lastCandidate.FilesIncluded.clearAllBits() + for _, f := range lastCandidate.Files { + lastCandidate.FilesIncluded.markBit(f.L0Index) + } + return lastCandidate + } + return nil +} + +// Expands fields in the provided L0CompactionFiles instance (cFiles) to +// include overlapping files in the specified sublevel. Returns true if the +// compaction is possible (i.e. does not conflict with any base/intra-L0 +// compacting files). +func (s *L0Sublevels) extendFiles( + sl int, earliestUnflushedSeqNum uint64, cFiles *L0CompactionFiles, +) bool { + index, _ := slices.BinarySearchFunc(s.levelFiles[sl], cFiles.minIntervalIndex, func(a *FileMetadata, b int) int { + return stdcmp.Compare(a.maxIntervalIndex, b) + }) + for ; index < len(s.levelFiles[sl]); index++ { + f := s.levelFiles[sl][index] + if f.minIntervalIndex > cFiles.maxIntervalIndex { + break + } + if f.IsCompacting() { + return false + } + // Skip over files that are newer than earliestUnflushedSeqNum. This is + // okay because this compaction can just pretend these files are not in + // L0 yet. These files must be in higher sublevels than any overlapping + // files with f.LargestSeqNum < earliestUnflushedSeqNum, and the output + // of the compaction will also go in a lower (older) sublevel than this + // file by definition. + if f.LargestSeqNum >= earliestUnflushedSeqNum { + continue + } + cFiles.addFile(f) + } + return true +} + +// PickIntraL0Compaction picks an intra-L0 compaction for files in this +// sublevel. This method is only called when a base compaction cannot be chosen. +// See comment above [PickBaseCompaction] for heuristics involved in this +// selection. +func (s *L0Sublevels) PickIntraL0Compaction( + earliestUnflushedSeqNum uint64, minCompactionDepth int, +) (*L0CompactionFiles, error) { + scoredIntervals := make([]intervalAndScore, len(s.orderedIntervals)) + for i := range s.orderedIntervals { + interval := &s.orderedIntervals[i] + depth := len(interval.files) - interval.compactingFileCount + if minCompactionDepth > depth { + continue + } + scoredIntervals[i] = intervalAndScore{interval: i, score: depth} + } + sort.Sort(intervalSorterByDecreasingScore(scoredIntervals)) + + // Optimization to avoid considering different intervals that are likely to + // choose the same seed file. Again this is just to reduce wasted work. + consideredIntervals := newBitSet(len(s.orderedIntervals)) + for _, scoredInterval := range scoredIntervals { + interval := &s.orderedIntervals[scoredInterval.interval] + if consideredIntervals[interval.index] { + continue + } + + var f *FileMetadata + // Pick the seed file for the interval as the file in the highest + // sub-level. + stackDepthReduction := scoredInterval.score + for i := len(interval.files) - 1; i >= 0; i-- { + f = interval.files[i] + if f.IsCompacting() { + break + } + consideredIntervals.markBits(f.minIntervalIndex, f.maxIntervalIndex+1) + // Can this be the seed file? Files with newer sequence numbers than + // earliestUnflushedSeqNum cannot be in the compaction. + if f.LargestSeqNum >= earliestUnflushedSeqNum { + stackDepthReduction-- + if stackDepthReduction == 0 { + break + } + } else { + break + } + } + if stackDepthReduction < minCompactionDepth { + // Can't use this interval. + continue + } + + if f == nil { + return nil, errors.New("no seed file found in sublevel intervals") + } + if f.IsCompacting() { + // This file could be in a concurrent intra-L0 or base compaction. + // Try another interval. + continue + } + + // We have a seed file. Build a compaction off of that seed. + c := s.intraL0CompactionUsingSeed( + f, interval.index, earliestUnflushedSeqNum, minCompactionDepth) + if c != nil { + return c, nil + } + } + return nil, nil +} + +func (s *L0Sublevels) intraL0CompactionUsingSeed( + f *FileMetadata, intervalIndex int, earliestUnflushedSeqNum uint64, minCompactionDepth int, +) *L0CompactionFiles { + // We know that all the files that overlap with intervalIndex have + // LargestSeqNum < earliestUnflushedSeqNum, but for other intervals + // we need to exclude files >= earliestUnflushedSeqNum + + c := &L0CompactionFiles{ + FilesIncluded: newBitSet(s.levelMetadata.Len()), + seedInterval: intervalIndex, + seedIntervalMaxLevel: len(s.levelFiles) - 1, + minIntervalIndex: f.minIntervalIndex, + maxIntervalIndex: f.maxIntervalIndex, + isIntraL0: true, + earliestUnflushedSeqNum: earliestUnflushedSeqNum, + } + c.addFile(f) + + var lastCandidate *L0CompactionFiles + interval := &s.orderedIntervals[intervalIndex] + slIndex := len(interval.files) - 1 + for { + if interval.files[slIndex] == f { + break + } + slIndex-- + } + // The first iteration of this loop produces an intra-L0 compaction at the + // seed level. Iterations after that optionally add to the compaction by + // stacking more files from intervalIndex and repeating. This is an optional + // activity so when it fails we can fallback to the last successful + // candidate. The code stops adding when it can't add more, or when + // fileBytes grows too large. + for ; slIndex >= 0; slIndex-- { + f2 := interval.files[slIndex] + sl := f2.SubLevel + if f2.IsCompacting() { + break + } + c.seedIntervalStackDepthReduction++ + c.seedIntervalMinLevel = sl + c.addFile(f2) + // The seed file captures all files in the higher level that fall in the + // range of intervals. That may extend the range of intervals so for + // correctness we need to capture all files in the next higher level + // that fall in this extended interval and so on. This can result in an + // inverted triangular shape like the following where again the X axis + // is the key intervals and the Y axis is oldest to youngest. Note that + // it is not necessary for correctness to fill out the shape at lower + // sub-levels to make it more rectangular since the invariant only + // requires that if we move an older seqnum for key k into a file that + // has a higher seqnum, we also move all younger seqnums for that key k + // into that file. + // ----- + // --- + // - + // It may be better for performance to have a more rectangular shape + // since it will reduce the stack depth for more intervals. But there is + // also the danger that in explicitly trying to construct a more + // rectangular shape we will be forced to pull in a file that is already + // compacting. We assume that the performance concern is not a practical + // issue. + done := false + for currLevel := sl + 1; currLevel < len(s.levelFiles); currLevel++ { + if !s.extendFiles(currLevel, earliestUnflushedSeqNum, c) { + // Failed to extend due to ongoing compaction. + done = true + break + } + } + if done { + break + } + if lastCandidate == nil { + lastCandidate = &L0CompactionFiles{} + } else if lastCandidate.seedIntervalStackDepthReduction >= minCompactionDepth && + c.fileBytes > 100<<20 && + (float64(c.fileBytes)/float64(lastCandidate.fileBytes) > 1.5 || c.fileBytes > 500<<20) { + break + } + *lastCandidate = *c + } + if lastCandidate != nil && lastCandidate.seedIntervalStackDepthReduction >= minCompactionDepth { + lastCandidate.FilesIncluded.clearAllBits() + for _, f := range lastCandidate.Files { + lastCandidate.FilesIncluded.markBit(f.L0Index) + } + s.extendCandidateToRectangle( + lastCandidate.minIntervalIndex, lastCandidate.maxIntervalIndex, lastCandidate, false) + return lastCandidate + } + return nil +} + +// ExtendL0ForBaseCompactionTo extends the specified base compaction candidate +// L0CompactionFiles to optionally cover more files in L0 without "touching" any +// of the passed-in keys (i.e. the smallest/largest bounds are exclusive), as +// including any user keys for those internal keys could require choosing more +// files in LBase which is undesirable. Unbounded start/end keys are indicated +// by passing in the InvalidInternalKey. +func (s *L0Sublevels) ExtendL0ForBaseCompactionTo( + smallest, largest InternalKey, candidate *L0CompactionFiles, +) bool { + firstIntervalIndex := 0 + lastIntervalIndex := len(s.orderedIntervals) - 1 + if smallest.Kind() != base.InternalKeyKindInvalid { + if smallest.Trailer == base.InternalKeyRangeDeleteSentinel { + // Starting at smallest.UserKey == interval.startKey is okay. + firstIntervalIndex = sort.Search(len(s.orderedIntervals), func(i int) bool { + return s.cmp(smallest.UserKey, s.orderedIntervals[i].startKey.key) <= 0 + }) + } else { + firstIntervalIndex = sort.Search(len(s.orderedIntervals), func(i int) bool { + // Need to start at >= smallest since if we widen too much we may miss + // an Lbase file that overlaps with an L0 file that will get picked in + // this widening, which would be bad. This interval will not start with + // an immediate successor key. + return s.cmp(smallest.UserKey, s.orderedIntervals[i].startKey.key) < 0 + }) + } + } + if largest.Kind() != base.InternalKeyKindInvalid { + // First interval that starts at or beyond the largest. This interval will not + // start with an immediate successor key. + lastIntervalIndex = sort.Search(len(s.orderedIntervals), func(i int) bool { + return s.cmp(largest.UserKey, s.orderedIntervals[i].startKey.key) <= 0 + }) + // Right now, lastIntervalIndex has a startKey that extends beyond largest. + // The previous interval, by definition, has an end key higher than largest. + // Iterate back twice to get the last interval that's completely within + // (smallest, largest). Except in the case where we went past the end of the + // list; in that case, the last interval to include is the very last + // interval in the list. + if lastIntervalIndex < len(s.orderedIntervals) { + lastIntervalIndex-- + } + lastIntervalIndex-- + } + if lastIntervalIndex < firstIntervalIndex { + return false + } + return s.extendCandidateToRectangle(firstIntervalIndex, lastIntervalIndex, candidate, true) +} + +// Best-effort attempt to make the compaction include more files in the +// rectangle defined by [minIntervalIndex, maxIntervalIndex] on the X axis and +// bounded on the Y axis by seedIntervalMinLevel and seedIntervalMaxLevel. +// +// This is strictly an optional extension; at any point where we can't feasibly +// add more files, the sublevel iteration can be halted early and candidate will +// still be a correct compaction candidate. +// +// Consider this scenario (original candidate is inside the rectangle), with +// isBase = true and interval bounds a-j (from the union of base file bounds and +// that of compaction candidate): +// +// _______ +// L0.3 a--d | g-j| +// L0.2 | f--j| r-t +// L0.1 b-d |e---j| +// L0.0 a--d | f--j| l--o p-----x +// +// Lbase a--------i m---------w +// +// This method will iterate from the bottom up. At L0.0, it will add a--d since +// it's in the bounds, then add b-d, then a--d, and so on, to produce this: +// +// _____________ +// L0.3 |a--d g-j| +// L0.2 | f--j| r-t +// L0.1 | b-d e---j| +// L0.0 |a--d f--j| l--o p-----x +// +// Lbase a-------i m---------w +// +// Let's assume that, instead of a--d in the top sublevel, we had 3 files, a-b, +// bb-c, and cc-d, of which bb-c is compacting. Let's also add another sublevel +// L0.4 with some files, all of which aren't compacting: +// +// L0.4 a------c ca--d _______ +// L0.3 a-b bb-c cc-d | g-j| +// L0.2 | f--j| r-t +// L0.1 b----------d |e---j| +// L0.0 a------------d | f--j| l--o p-----x +// +// Lbase a------------------i m---------w +// +// This method then needs to choose between the left side of L0.3 bb-c (i.e. +// a-b), or the right side (i.e. cc-d and g-j) for inclusion in this compaction. +// Since the right side has more files as well as one file that has already been +// picked, it gets chosen at that sublevel, resulting in this intermediate +// compaction: +// +// L0.4 a------c ca--d +// ______________ +// L0.3 a-b bb-c| cc-d g-j| +// L0.2 _________| f--j| r-t +// L0.1 | b----------d e---j| +// L0.0 |a------------d f--j| l--o p-----x +// +// Lbase a------------------i m---------w +// +// Since bb-c had to be excluded at L0.3, the interval bounds for L0.4 are +// actually ca-j, since ca is the next interval start key after the end interval +// of bb-c. This would result in only ca-d being chosen at that sublevel, even +// though a--c is also not compacting. This is the final result: +// +// ______________ +// L0.4 a------c|ca--d | +// L0.3 a-b bb-c| cc-d g-j| +// L0.2 _________| f--j| r-t +// L0.1 | b----------d e---j| +// L0.0 |a------------d f--j| l--o p-----x +// +// Lbase a------------------i m---------w +// +// TODO(bilal): Add more targeted tests for this method, through +// ExtendL0ForBaseCompactionTo and intraL0CompactionUsingSeed. +func (s *L0Sublevels) extendCandidateToRectangle( + minIntervalIndex int, maxIntervalIndex int, candidate *L0CompactionFiles, isBase bool, +) bool { + candidate.preExtensionMinInterval = candidate.minIntervalIndex + candidate.preExtensionMaxInterval = candidate.maxIntervalIndex + // Extend {min,max}IntervalIndex to include all of the candidate's current + // bounds. + if minIntervalIndex > candidate.minIntervalIndex { + minIntervalIndex = candidate.minIntervalIndex + } + if maxIntervalIndex < candidate.maxIntervalIndex { + maxIntervalIndex = candidate.maxIntervalIndex + } + var startLevel, increment, endLevel int + if isBase { + startLevel = 0 + increment = +1 + // seedIntervalMaxLevel is inclusive, while endLevel is exclusive. + endLevel = candidate.seedIntervalMaxLevel + 1 + } else { + startLevel = len(s.levelFiles) - 1 + increment = -1 + // seedIntervalMinLevel is inclusive, while endLevel is exclusive. + endLevel = candidate.seedIntervalMinLevel - 1 + } + // Stats for files. + addedCount := 0 + // Iterate from the oldest sub-level for L0 -> Lbase and youngest sub-level + // for intra-L0. The idea here is that anything that can't be included from + // that level constrains what can be included from the next level. This + // change in constraint is directly incorporated into minIntervalIndex, + // maxIntervalIndex. + for sl := startLevel; sl != endLevel; sl += increment { + files := s.levelFiles[sl] + // Find the first file that overlaps with minIntervalIndex. + index := sort.Search(len(files), func(i int) bool { + return minIntervalIndex <= files[i].maxIntervalIndex + }) + // Track the files that are fully within the current constraint of + // [minIntervalIndex, maxIntervalIndex]. + firstIndex := -1 + lastIndex := -1 + for ; index < len(files); index++ { + f := files[index] + if f.minIntervalIndex > maxIntervalIndex { + break + } + include := true + // Extends out on the left so can't be included. This narrows what + // we can included in the next level. + if f.minIntervalIndex < minIntervalIndex { + include = false + minIntervalIndex = f.maxIntervalIndex + 1 + } + // Extends out on the right so can't be included. + if f.maxIntervalIndex > maxIntervalIndex { + include = false + maxIntervalIndex = f.minIntervalIndex - 1 + } + if !include { + continue + } + if firstIndex == -1 { + firstIndex = index + } + lastIndex = index + } + if minIntervalIndex > maxIntervalIndex { + // We excluded files that prevent continuation. + break + } + if firstIndex < 0 { + // No files to add in this sub-level. + continue + } + // We have the files in [firstIndex, lastIndex] as potential for + // inclusion. Some of these may already have been picked. Some of them + // may be already compacting. The latter is tricky since we have to + // decide whether to contract minIntervalIndex or maxIntervalIndex when + // we encounter an already compacting file. We pick the longest sequence + // between firstIndex and lastIndex of non-compacting files -- this is + // represented by [candidateNonCompactingFirst, + // candidateNonCompactingLast]. + nonCompactingFirst := -1 + currentRunHasAlreadyPickedFiles := false + candidateNonCompactingFirst := -1 + candidateNonCompactingLast := -1 + candidateHasAlreadyPickedFiles := false + for index = firstIndex; index <= lastIndex; index++ { + f := files[index] + if f.IsCompacting() { + if nonCompactingFirst != -1 { + last := index - 1 + // Prioritize runs of consecutive non-compacting files that + // have files that have already been picked. That is to say, + // if candidateHasAlreadyPickedFiles == true, we stick with + // it, and if currentRunHasAlreadyPickedfiles == true, we + // pick that run even if it contains fewer files than the + // previous candidate. + if !candidateHasAlreadyPickedFiles && (candidateNonCompactingFirst == -1 || + currentRunHasAlreadyPickedFiles || + (last-nonCompactingFirst) > (candidateNonCompactingLast-candidateNonCompactingFirst)) { + candidateNonCompactingFirst = nonCompactingFirst + candidateNonCompactingLast = last + candidateHasAlreadyPickedFiles = currentRunHasAlreadyPickedFiles + } + } + nonCompactingFirst = -1 + currentRunHasAlreadyPickedFiles = false + continue + } + if nonCompactingFirst == -1 { + nonCompactingFirst = index + } + if candidate.FilesIncluded[f.L0Index] { + currentRunHasAlreadyPickedFiles = true + } + } + // Logic duplicated from inside the for loop above. + if nonCompactingFirst != -1 { + last := index - 1 + if !candidateHasAlreadyPickedFiles && (candidateNonCompactingFirst == -1 || + currentRunHasAlreadyPickedFiles || + (last-nonCompactingFirst) > (candidateNonCompactingLast-candidateNonCompactingFirst)) { + candidateNonCompactingFirst = nonCompactingFirst + candidateNonCompactingLast = last + } + } + if candidateNonCompactingFirst == -1 { + // All files are compacting. There will be gaps that we could + // exploit to continue, but don't bother. + break + } + // May need to shrink [minIntervalIndex, maxIntervalIndex] for the next level. + if candidateNonCompactingFirst > firstIndex { + minIntervalIndex = files[candidateNonCompactingFirst-1].maxIntervalIndex + 1 + } + if candidateNonCompactingLast < lastIndex { + maxIntervalIndex = files[candidateNonCompactingLast+1].minIntervalIndex - 1 + } + for index := candidateNonCompactingFirst; index <= candidateNonCompactingLast; index++ { + f := files[index] + if f.IsCompacting() { + // TODO(bilal): Do a logger.Fatalf instead of a panic, for + // cleaner unwinding and error messages. + panic(fmt.Sprintf("expected %s to not be compacting", f.FileNum)) + } + if candidate.isIntraL0 && f.LargestSeqNum >= candidate.earliestUnflushedSeqNum { + continue + } + if !candidate.FilesIncluded[f.L0Index] { + addedCount++ + candidate.addFile(f) + } + } + } + return addedCount > 0 +} diff --git a/pebble/internal/manifest/l0_sublevels_test.go b/pebble/internal/manifest/l0_sublevels_test.go new file mode 100644 index 0000000..8cedb87 --- /dev/null +++ b/pebble/internal/manifest/l0_sublevels_test.go @@ -0,0 +1,620 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manifest + +import ( + "bytes" + "fmt" + "io" + "math" + "os" + "slices" + "sort" + "strconv" + "strings" + "testing" + "time" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/cockroachdb/pebble/record" + "github.com/stretchr/testify/require" + "golang.org/x/exp/rand" +) + +func readManifest(filename string) (*Version, error) { + f, err := os.Open(filename) + if err != nil { + return nil, err + } + defer f.Close() + rr := record.NewReader(f, 0 /* logNum */) + var v *Version + addedByFileNum := make(map[base.FileNum]*FileMetadata) + for { + r, err := rr.Next() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + var ve VersionEdit + if err = ve.Decode(r); err != nil { + return nil, err + } + var bve BulkVersionEdit + bve.AddedByFileNum = addedByFileNum + if err := bve.Accumulate(&ve); err != nil { + return nil, err + } + if v, err = bve.Apply(v, base.DefaultComparer.Compare, base.DefaultFormatter, 10<<20, 32000, nil, ProhibitSplitUserKeys); err != nil { + return nil, err + } + } + return v, nil +} + +func visualizeSublevels( + s *L0Sublevels, compactionFiles bitSet, otherLevels [][]*FileMetadata, +) string { + var buf strings.Builder + if compactionFiles == nil { + compactionFiles = newBitSet(s.levelMetadata.Len()) + } + largestChar := byte('a') + printLevel := func(files []*FileMetadata, level string, isL0 bool) { + lastChar := byte('a') + fmt.Fprintf(&buf, "L%s:", level) + for i := 0; i < 5-len(level); i++ { + buf.WriteByte(' ') + } + for j, f := range files { + for lastChar < f.Smallest.UserKey[0] { + buf.WriteString(" ") + lastChar++ + } + buf.WriteByte(f.Smallest.UserKey[0]) + middleChar := byte('-') + if isL0 { + if compactionFiles[f.L0Index] { + middleChar = '+' + } else if f.IsCompacting() { + if f.IsIntraL0Compacting { + middleChar = '^' + } else { + middleChar = 'v' + } + } + } else if f.IsCompacting() { + middleChar = '=' + } + if largestChar < f.Largest.UserKey[0] { + largestChar = f.Largest.UserKey[0] + } + if f.Smallest.UserKey[0] == f.Largest.UserKey[0] { + buf.WriteByte(f.Largest.UserKey[0]) + if compactionFiles[f.L0Index] { + buf.WriteByte('+') + } else if j < len(files)-1 { + buf.WriteByte(' ') + } + lastChar++ + continue + } + buf.WriteByte(middleChar) + buf.WriteByte(middleChar) + lastChar++ + for lastChar < f.Largest.UserKey[0] { + buf.WriteByte(middleChar) + buf.WriteByte(middleChar) + buf.WriteByte(middleChar) + lastChar++ + } + if f.Largest.IsExclusiveSentinel() && + j < len(files)-1 && files[j+1].Smallest.UserKey[0] == f.Largest.UserKey[0] { + // This case happens where two successive files have + // matching end/start user keys but where the left-side file + // has the sentinel key as its end key trailer. In this case + // we print the sstables as: + // + // a------d------g + // + continue + } + buf.WriteByte(middleChar) + buf.WriteByte(f.Largest.UserKey[0]) + if j < len(files)-1 { + buf.WriteByte(' ') + } + lastChar++ + } + fmt.Fprintf(&buf, "\n") + } + for i := len(s.levelFiles) - 1; i >= 0; i-- { + printLevel(s.levelFiles[i], fmt.Sprintf("0.%d", i), true) + } + for i := range otherLevels { + if len(otherLevels[i]) == 0 { + continue + } + printLevel(otherLevels[i], strconv.Itoa(i+1), false) + } + buf.WriteString(" ") + for b := byte('a'); b <= largestChar; b++ { + buf.WriteByte(b) + buf.WriteByte(b) + if b < largestChar { + buf.WriteByte(' ') + } + } + buf.WriteByte('\n') + return buf.String() +} + +func TestL0Sublevels(t *testing.T) { + parseMeta := func(s string) (*FileMetadata, error) { + parts := strings.Split(s, ":") + if len(parts) != 2 { + t.Fatalf("malformed table spec: %s", s) + } + fileNum, err := strconv.Atoi(strings.TrimSpace(parts[0])) + if err != nil { + return nil, err + } + fields := strings.Fields(parts[1]) + keyRange := strings.Split(strings.TrimSpace(fields[0]), "-") + m := (&FileMetadata{}).ExtendPointKeyBounds( + base.DefaultComparer.Compare, + base.ParseInternalKey(strings.TrimSpace(keyRange[0])), + base.ParseInternalKey(strings.TrimSpace(keyRange[1])), + ) + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + if m.Largest.IsExclusiveSentinel() { + m.LargestSeqNum = m.SmallestSeqNum + } + m.FileNum = base.FileNum(fileNum) + m.Size = uint64(256) + m.InitPhysicalBacking() + if len(fields) > 1 { + for _, field := range fields[1:] { + parts := strings.Split(field, "=") + switch parts[0] { + case "base_compacting": + m.IsIntraL0Compacting = false + m.CompactionState = CompactionStateCompacting + case "intra_l0_compacting": + m.IsIntraL0Compacting = true + m.CompactionState = CompactionStateCompacting + case "compacting": + m.CompactionState = CompactionStateCompacting + case "size": + sizeInt, err := strconv.Atoi(parts[1]) + if err != nil { + return nil, err + } + m.Size = uint64(sizeInt) + } + } + } + + return m, nil + } + + var err error + var fileMetas [NumLevels][]*FileMetadata + var explicitSublevels [][]*FileMetadata + var activeCompactions []L0Compaction + var sublevels *L0Sublevels + baseLevel := NumLevels - 1 + + datadriven.RunTest(t, "testdata/l0_sublevels", func(t *testing.T, td *datadriven.TestData) string { + pickBaseCompaction := false + level := 0 + addL0FilesOpt := false + switch td.Cmd { + case "add-l0-files": + addL0FilesOpt = true + level = 0 + fallthrough + case "define": + if !addL0FilesOpt { + fileMetas = [NumLevels][]*FileMetadata{} + baseLevel = NumLevels - 1 + activeCompactions = nil + } + explicitSublevels = [][]*FileMetadata{} + sublevel := -1 + addedL0Files := make([]*FileMetadata, 0) + for _, data := range strings.Split(td.Input, "\n") { + data = strings.TrimSpace(data) + switch data[:2] { + case "L0", "L1", "L2", "L3", "L4", "L5", "L6": + level, err = strconv.Atoi(data[1:2]) + if err != nil { + return err.Error() + } + if level == 0 && len(data) > 3 { + // Sublevel was specified. + sublevel, err = strconv.Atoi(data[3:]) + if err != nil { + return err.Error() + } + } else { + sublevel = -1 + } + default: + meta, err := parseMeta(data) + if err != nil { + return err.Error() + } + if level != 0 && level < baseLevel { + baseLevel = level + } + fileMetas[level] = append(fileMetas[level], meta) + if level == 0 { + addedL0Files = append(addedL0Files, meta) + } + if sublevel != -1 { + for len(explicitSublevels) <= sublevel { + explicitSublevels = append(explicitSublevels, []*FileMetadata{}) + } + explicitSublevels[sublevel] = append(explicitSublevels[sublevel], meta) + } + } + } + + flushSplitMaxBytes := 64 + initialize := true + for _, arg := range td.CmdArgs { + switch arg.Key { + case "flush_split_max_bytes": + flushSplitMaxBytes, err = strconv.Atoi(arg.Vals[0]) + if err != nil { + t.Fatal(err) + } + case "no_initialize": + // This case is for use with explicitly-specified sublevels + // only. + initialize = false + } + } + SortBySeqNum(fileMetas[0]) + for i := 1; i < NumLevels; i++ { + SortBySmallest(fileMetas[i], base.DefaultComparer.Compare) + } + + levelMetadata := makeLevelMetadata(base.DefaultComparer.Compare, 0, fileMetas[0]) + if initialize { + if addL0FilesOpt { + SortBySeqNum(addedL0Files) + sublevels, err = sublevels.AddL0Files(addedL0Files, int64(flushSplitMaxBytes), &levelMetadata) + // Check if the output matches a full initialization. + sublevels2, _ := NewL0Sublevels(&levelMetadata, base.DefaultComparer.Compare, base.DefaultFormatter, int64(flushSplitMaxBytes)) + if sublevels != nil && sublevels2 != nil { + require.Equal(t, sublevels.flushSplitUserKeys, sublevels2.flushSplitUserKeys) + require.Equal(t, sublevels.levelFiles, sublevels2.levelFiles) + } + } else { + sublevels, err = NewL0Sublevels( + &levelMetadata, + base.DefaultComparer.Compare, + base.DefaultFormatter, + int64(flushSplitMaxBytes)) + } + if err != nil { + return err.Error() + } + sublevels.InitCompactingFileInfo(nil) + } else { + // This case is for use with explicitly-specified sublevels + // only. + sublevels = &L0Sublevels{ + levelFiles: explicitSublevels, + cmp: base.DefaultComparer.Compare, + formatKey: base.DefaultFormatter, + levelMetadata: &levelMetadata, + } + for _, files := range explicitSublevels { + sublevels.Levels = append(sublevels.Levels, NewLevelSliceSpecificOrder(files)) + } + } + + if err != nil { + t.Fatal(err) + } + + var builder strings.Builder + builder.WriteString(sublevels.describe(true)) + builder.WriteString(visualizeSublevels(sublevels, nil, fileMetas[1:])) + return builder.String() + case "pick-base-compaction": + pickBaseCompaction = true + fallthrough + case "pick-intra-l0-compaction": + minCompactionDepth := 3 + earliestUnflushedSeqNum := uint64(math.MaxUint64) + for _, arg := range td.CmdArgs { + switch arg.Key { + case "min_depth": + minCompactionDepth, err = strconv.Atoi(arg.Vals[0]) + if err != nil { + t.Fatal(err) + } + case "earliest_unflushed_seqnum": + eusnInt, err := strconv.Atoi(arg.Vals[0]) + if err != nil { + t.Fatal(err) + } + earliestUnflushedSeqNum = uint64(eusnInt) + } + } + + var lcf *L0CompactionFiles + if pickBaseCompaction { + baseFiles := NewLevelSliceKeySorted(base.DefaultComparer.Compare, fileMetas[baseLevel]) + lcf, err = sublevels.PickBaseCompaction(minCompactionDepth, baseFiles) + if err == nil && lcf != nil { + // Try to extend the base compaction into a more rectangular + // shape, using the smallest/largest keys of the files before + // and after overlapping base files. This mimics the logic + // the compactor is expected to implement. + baseFiles := fileMetas[baseLevel] + firstFile := sort.Search(len(baseFiles), func(i int) bool { + return sublevels.cmp(baseFiles[i].Largest.UserKey, sublevels.orderedIntervals[lcf.minIntervalIndex].startKey.key) >= 0 + }) + lastFile := sort.Search(len(baseFiles), func(i int) bool { + return sublevels.cmp(baseFiles[i].Smallest.UserKey, sublevels.orderedIntervals[lcf.maxIntervalIndex+1].startKey.key) >= 0 + }) + startKey := base.InvalidInternalKey + endKey := base.InvalidInternalKey + if firstFile > 0 { + startKey = baseFiles[firstFile-1].Largest + } + if lastFile < len(baseFiles) { + endKey = baseFiles[lastFile].Smallest + } + sublevels.ExtendL0ForBaseCompactionTo( + startKey, + endKey, + lcf) + } + } else { + lcf, err = sublevels.PickIntraL0Compaction(earliestUnflushedSeqNum, minCompactionDepth) + } + if err != nil { + return fmt.Sprintf("error: %s", err.Error()) + } + if lcf == nil { + return "no compaction picked" + } + var builder strings.Builder + builder.WriteString(fmt.Sprintf("compaction picked with stack depth reduction %d\n", lcf.seedIntervalStackDepthReduction)) + for i, file := range lcf.Files { + builder.WriteString(file.FileNum.String()) + if i < len(lcf.Files)-1 { + builder.WriteByte(',') + } + } + startKey := sublevels.orderedIntervals[lcf.seedInterval].startKey + endKey := sublevels.orderedIntervals[lcf.seedInterval+1].startKey + builder.WriteString(fmt.Sprintf("\nseed interval: %s-%s\n", startKey.key, endKey.key)) + builder.WriteString(visualizeSublevels(sublevels, lcf.FilesIncluded, fileMetas[1:])) + + return builder.String() + case "read-amp": + return strconv.Itoa(sublevels.ReadAmplification()) + case "in-use-key-ranges": + var buf bytes.Buffer + for _, data := range strings.Split(strings.TrimSpace(td.Input), "\n") { + keyRange := strings.Split(strings.TrimSpace(data), "-") + smallest := []byte(strings.TrimSpace(keyRange[0])) + largest := []byte(strings.TrimSpace(keyRange[1])) + + keyRanges := sublevels.InUseKeyRanges(smallest, largest) + for i, r := range keyRanges { + fmt.Fprintf(&buf, "%s-%s", sublevels.formatKey(r.Start), sublevels.formatKey(r.End)) + if i < len(keyRanges)-1 { + fmt.Fprint(&buf, ", ") + } + } + if len(keyRanges) == 0 { + fmt.Fprint(&buf, ".") + } + fmt.Fprintln(&buf) + } + return buf.String() + case "flush-split-keys": + var builder strings.Builder + builder.WriteString("flush user split keys: ") + flushSplitKeys := sublevels.FlushSplitKeys() + for i, key := range flushSplitKeys { + builder.Write(key) + if i < len(flushSplitKeys)-1 { + builder.WriteString(", ") + } + } + if len(flushSplitKeys) == 0 { + builder.WriteString("none") + } + return builder.String() + case "max-depth-after-ongoing-compactions": + return strconv.Itoa(sublevels.MaxDepthAfterOngoingCompactions()) + case "l0-check-ordering": + for sublevel, files := range sublevels.levelFiles { + slice := NewLevelSliceSpecificOrder(files) + err := CheckOrdering(base.DefaultComparer.Compare, base.DefaultFormatter, + L0Sublevel(sublevel), slice.Iter(), ProhibitSplitUserKeys) + if err != nil { + return err.Error() + } + } + return "OK" + case "update-state-for-compaction": + var fileNums []base.FileNum + for _, arg := range td.CmdArgs { + switch arg.Key { + case "files": + for _, val := range arg.Vals { + fileNum, err := strconv.ParseUint(val, 10, 64) + if err != nil { + return err.Error() + } + fileNums = append(fileNums, base.FileNum(fileNum)) + } + } + } + files := make([]*FileMetadata, 0, len(fileNums)) + for _, num := range fileNums { + for _, f := range fileMetas[0] { + if f.FileNum == num { + f.CompactionState = CompactionStateCompacting + files = append(files, f) + break + } + } + } + slice := NewLevelSliceSeqSorted(files) + sm, la := KeyRange(base.DefaultComparer.Compare, slice.Iter()) + activeCompactions = append(activeCompactions, L0Compaction{Smallest: sm, Largest: la}) + if err := sublevels.UpdateStateForStartedCompaction([]LevelSlice{slice}, true); err != nil { + return err.Error() + } + return "OK" + case "describe": + var builder strings.Builder + builder.WriteString(sublevels.describe(true)) + builder.WriteString(visualizeSublevels(sublevels, nil, fileMetas[1:])) + return builder.String() + } + return fmt.Sprintf("unrecognized command: %s", td.Cmd) + }) +} + +func TestAddL0FilesEquivalence(t *testing.T) { + seed := uint64(time.Now().UnixNano()) + rng := rand.New(rand.NewSource(seed)) + t.Logf("seed: %d", seed) + + var inUseKeys [][]byte + const keyReusePct = 0.15 + var fileMetas []*FileMetadata + var s, s2 *L0Sublevels + keySpace := testkeys.Alpha(8) + + flushSplitMaxBytes := rng.Int63n(1 << 20) + + // The outer loop runs once for each version edit. The inner loop(s) run + // once for each file, or each file bound. + for i := 0; i < 100; i++ { + var filesToAdd []*FileMetadata + numFiles := 1 + rng.Intn(9) + keys := make([][]byte, 0, 2*numFiles) + for j := 0; j < 2*numFiles; j++ { + if rng.Float64() <= keyReusePct && len(inUseKeys) > 0 { + keys = append(keys, inUseKeys[rng.Intn(len(inUseKeys))]) + } else { + newKey := testkeys.Key(keySpace, rng.Int63n(keySpace.Count())) + inUseKeys = append(inUseKeys, newKey) + keys = append(keys, newKey) + } + } + slices.SortFunc(keys, bytes.Compare) + for j := 0; j < numFiles; j++ { + startKey := keys[j*2] + endKey := keys[j*2+1] + if bytes.Equal(startKey, endKey) { + continue + } + meta := (&FileMetadata{ + FileNum: base.FileNum(i*10 + j + 1), + Size: rng.Uint64n(1 << 20), + SmallestSeqNum: uint64(2*i + 1), + LargestSeqNum: uint64(2*i + 2), + }).ExtendPointKeyBounds( + base.DefaultComparer.Compare, + base.MakeInternalKey(startKey, uint64(2*i+1), base.InternalKeyKindSet), + base.MakeRangeDeleteSentinelKey(endKey), + ) + meta.InitPhysicalBacking() + fileMetas = append(fileMetas, meta) + filesToAdd = append(filesToAdd, meta) + } + if len(filesToAdd) == 0 { + continue + } + + levelMetadata := makeLevelMetadata(testkeys.Comparer.Compare, 0, fileMetas) + var err error + + if s2 == nil { + s2, err = NewL0Sublevels(&levelMetadata, testkeys.Comparer.Compare, testkeys.Comparer.FormatKey, flushSplitMaxBytes) + require.NoError(t, err) + } else { + // AddL0Files relies on the indices in FileMetadatas pointing to that of + // the previous L0Sublevels. So it must be called before NewL0Sublevels; + // calling it the other way around results in out-of-bounds panics. + SortBySeqNum(filesToAdd) + s2, err = s2.AddL0Files(filesToAdd, flushSplitMaxBytes, &levelMetadata) + require.NoError(t, err) + } + + s, err = NewL0Sublevels(&levelMetadata, testkeys.Comparer.Compare, testkeys.Comparer.FormatKey, flushSplitMaxBytes) + require.NoError(t, err) + + // Check for equivalence. + require.Equal(t, s.flushSplitUserKeys, s2.flushSplitUserKeys) + require.Equal(t, s.orderedIntervals, s2.orderedIntervals) + require.Equal(t, s.levelFiles, s2.levelFiles) + } +} + +func BenchmarkManifestApplyWithL0Sublevels(b *testing.B) { + b.ResetTimer() + for n := 0; n < b.N; n++ { + v, err := readManifest("testdata/MANIFEST_import") + require.NotNil(b, v) + require.NoError(b, err) + } +} + +func BenchmarkL0SublevelsInit(b *testing.B) { + v, err := readManifest("testdata/MANIFEST_import") + if err != nil { + b.Fatal(err) + } + b.ResetTimer() + for n := 0; n < b.N; n++ { + sl, err := NewL0Sublevels(&v.Levels[0], + base.DefaultComparer.Compare, base.DefaultFormatter, 5<<20) + require.NoError(b, err) + if sl == nil { + b.Fatal("expected non-nil L0Sublevels to be generated") + } + } +} + +func BenchmarkL0SublevelsInitAndPick(b *testing.B) { + v, err := readManifest("testdata/MANIFEST_import") + if err != nil { + b.Fatal(err) + } + b.ResetTimer() + for n := 0; n < b.N; n++ { + sl, err := NewL0Sublevels(&v.Levels[0], + base.DefaultComparer.Compare, base.DefaultFormatter, 5<<20) + require.NoError(b, err) + if sl == nil { + b.Fatal("expected non-nil L0Sublevels to be generated") + } + c, err := sl.PickBaseCompaction(2, LevelSlice{}) + require.NoError(b, err) + if c == nil { + b.Fatal("expected non-nil compaction to be generated") + } + } +} diff --git a/pebble/internal/manifest/level.go b/pebble/internal/manifest/level.go new file mode 100644 index 0000000..1a971f6 --- /dev/null +++ b/pebble/internal/manifest/level.go @@ -0,0 +1,46 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manifest + +import "fmt" + +const ( + // 3 bits are necessary to represent level values from 0-6. + levelBits = 3 + levelMask = (1 << levelBits) - 1 + // invalidSublevel denotes an invalid or non-applicable sublevel. + invalidSublevel = -1 +) + +// Level encodes a level and optional sublevel for use in log and error +// messages. The encoding has the property that Level(0) == +// L0Sublevel(invalidSublevel). +type Level uint32 + +func makeLevel(level, sublevel int) Level { + return Level(((sublevel + 1) << levelBits) | level) +} + +// LevelToInt returns the int representation of a Level +func LevelToInt(l Level) int { + return int(l) & levelMask +} + +// L0Sublevel returns a Level representing the specified L0 sublevel. +func L0Sublevel(sublevel int) Level { + if sublevel < 0 { + panic(fmt.Sprintf("invalid L0 sublevel: %d", sublevel)) + } + return makeLevel(0, sublevel) +} + +func (l Level) String() string { + level := int(l) & levelMask + sublevel := (int(l) >> levelBits) - 1 + if sublevel != invalidSublevel { + return fmt.Sprintf("L%d.%d", level, sublevel) + } + return fmt.Sprintf("L%d", level) +} diff --git a/pebble/internal/manifest/level_metadata.go b/pebble/internal/manifest/level_metadata.go new file mode 100644 index 0000000..d48e277 --- /dev/null +++ b/pebble/internal/manifest/level_metadata.go @@ -0,0 +1,748 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manifest + +import ( + "bytes" + "fmt" + + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invariants" +) + +// LevelMetadata contains metadata for all of the files within +// a level of the LSM. +type LevelMetadata struct { + level int + totalSize uint64 + // NumVirtual is the number of virtual sstables in the level. + NumVirtual uint64 + // VirtualSize is the size of the virtual sstables in the level. + VirtualSize uint64 + tree btree +} + +// clone makes a copy of the level metadata, implicitly increasing the ref +// count of every file contained within lm. +func (lm *LevelMetadata) clone() LevelMetadata { + return LevelMetadata{ + level: lm.level, + totalSize: lm.totalSize, + NumVirtual: lm.NumVirtual, + VirtualSize: lm.VirtualSize, + tree: lm.tree.Clone(), + } +} + +func (lm *LevelMetadata) release() (obsolete []*FileBacking) { + return lm.tree.Release() +} + +func makeLevelMetadata(cmp Compare, level int, files []*FileMetadata) LevelMetadata { + bcmp := btreeCmpSeqNum + if level > 0 { + bcmp = btreeCmpSmallestKey(cmp) + } + var lm LevelMetadata + lm.level = level + lm.tree, _ = makeBTree(bcmp, files) + for _, f := range files { + lm.totalSize += f.Size + if f.Virtual { + lm.NumVirtual++ + lm.VirtualSize += f.Size + } + } + return lm +} + +func makeBTree(cmp btreeCmp, files []*FileMetadata) (btree, LevelSlice) { + var t btree + t.cmp = cmp + for _, f := range files { + t.Insert(f) + } + return t, newLevelSlice(t.Iter()) +} + +func (lm *LevelMetadata) insert(f *FileMetadata) error { + if err := lm.tree.Insert(f); err != nil { + return err + } + lm.totalSize += f.Size + if f.Virtual { + lm.NumVirtual++ + lm.VirtualSize += f.Size + } + return nil +} + +func (lm *LevelMetadata) remove(f *FileMetadata) bool { + lm.totalSize -= f.Size + if f.Virtual { + lm.NumVirtual-- + lm.VirtualSize -= f.Size + } + return lm.tree.Delete(f) +} + +// Empty indicates whether there are any files in the level. +func (lm *LevelMetadata) Empty() bool { + return lm.tree.Count() == 0 +} + +// Len returns the number of files within the level. +func (lm *LevelMetadata) Len() int { + return lm.tree.Count() +} + +// Size returns the cumulative size of all the files within the level. +func (lm *LevelMetadata) Size() uint64 { + return lm.totalSize +} + +// Iter constructs a LevelIterator over the entire level. +func (lm *LevelMetadata) Iter() LevelIterator { + return LevelIterator{iter: lm.tree.Iter()} +} + +// Slice constructs a slice containing the entire level. +func (lm *LevelMetadata) Slice() LevelSlice { + return newLevelSlice(lm.tree.Iter()) +} + +// Find finds the provided file in the level if it exists. +func (lm *LevelMetadata) Find(cmp base.Compare, m *FileMetadata) *LevelFile { + iter := lm.Iter() + if lm.level != 0 { + // If lm holds files for levels >0, we can narrow our search by binary + // searching by bounds. + o := overlaps(iter, cmp, m.Smallest.UserKey, + m.Largest.UserKey, m.Largest.IsExclusiveSentinel()) + iter = o.Iter() + } + for f := iter.First(); f != nil; f = iter.Next() { + if f == m { + lf := iter.Take() + return &lf + } + } + return nil +} + +// Annotation lazily calculates and returns the annotation defined by +// Annotator. The Annotator is used as the key for pre-calculated +// values, so equal Annotators must be used to avoid duplicate computations +// and cached annotations. Annotation must not be called concurrently, and in +// practice this is achieved by requiring callers to hold DB.mu. +func (lm *LevelMetadata) Annotation(annotator Annotator) interface{} { + if lm.Empty() { + return annotator.Zero(nil) + } + v, _ := lm.tree.root.Annotation(annotator) + return v +} + +// InvalidateAnnotation clears any cached annotations defined by Annotator. The +// Annotator is used as the key for pre-calculated values, so equal Annotators +// must be used to clear the appropriate cached annotation. InvalidateAnnotation +// must not be called concurrently, and in practice this is achieved by +// requiring callers to hold DB.mu. +func (lm *LevelMetadata) InvalidateAnnotation(annotator Annotator) { + if lm.Empty() { + return + } + lm.tree.root.InvalidateAnnotation(annotator) +} + +// LevelFile holds a file's metadata along with its position +// within a level of the LSM. +type LevelFile struct { + *FileMetadata + slice LevelSlice +} + +// Slice constructs a LevelSlice containing only this file. +func (lf LevelFile) Slice() LevelSlice { + return lf.slice +} + +// NewLevelSliceSeqSorted constructs a LevelSlice over the provided files, +// sorted by the L0 sequence number sort order. +// TODO(jackson): Can we improve this interface or avoid needing to export +// a slice constructor like this? +func NewLevelSliceSeqSorted(files []*FileMetadata) LevelSlice { + tr, slice := makeBTree(btreeCmpSeqNum, files) + tr.Release() + slice.verifyInvariants() + return slice +} + +// NewLevelSliceKeySorted constructs a LevelSlice over the provided files, +// sorted by the files smallest keys. +// TODO(jackson): Can we improve this interface or avoid needing to export +// a slice constructor like this? +func NewLevelSliceKeySorted(cmp base.Compare, files []*FileMetadata) LevelSlice { + tr, slice := makeBTree(btreeCmpSmallestKey(cmp), files) + tr.Release() + slice.verifyInvariants() + return slice +} + +// NewLevelSliceSpecificOrder constructs a LevelSlice over the provided files, +// ordering the files by their order in the provided slice. It's used in +// tests. +// TODO(jackson): Update tests to avoid requiring this and remove it. +func NewLevelSliceSpecificOrder(files []*FileMetadata) LevelSlice { + tr, slice := makeBTree(btreeCmpSpecificOrder(files), files) + tr.Release() + slice.verifyInvariants() + return slice +} + +// newLevelSlice constructs a new LevelSlice backed by iter. +func newLevelSlice(iter iterator) LevelSlice { + s := LevelSlice{iter: iter} + if iter.r != nil { + s.length = iter.r.subtreeCount + } + s.verifyInvariants() + return s +} + +// newBoundedLevelSlice constructs a new LevelSlice backed by iter and bounded +// by the provided start and end bounds. The provided startBound and endBound +// iterators must be iterators over the same B-Tree. Both start and end bounds +// are inclusive. +func newBoundedLevelSlice(iter iterator, startBound, endBound *iterator) LevelSlice { + s := LevelSlice{ + iter: iter, + start: startBound, + end: endBound, + } + if iter.valid() { + s.length = endBound.countLeft() - startBound.countLeft() + // NB: The +1 is a consequence of the end bound being inclusive. + if endBound.valid() { + s.length++ + } + // NB: A slice that's empty due to its bounds may have an endBound + // positioned before the startBound due to the inclusive bounds. + // TODO(jackson): Consider refactoring the end boundary to be exclusive; + // it would simplify some areas (eg, here) and complicate others (eg, + // Reslice-ing to grow compactions). + if s.length < 0 { + s.length = 0 + } + } + s.verifyInvariants() + return s +} + +// LevelSlice contains a slice of the files within a level of the LSM. +// A LevelSlice is immutable once created, but may be used to construct a +// mutable LevelIterator over the slice's files. +// +// LevelSlices should be constructed through one of the existing constructors, +// not manually initialized. +type LevelSlice struct { + iter iterator + length int + // start and end form the inclusive bounds of a slice of files within a + // level of the LSM. They may be nil if the entire B-Tree backing iter is + // accessible. + start *iterator + end *iterator +} + +func (ls LevelSlice) verifyInvariants() { + if invariants.Enabled { + i := ls.Iter() + var length int + for f := i.First(); f != nil; f = i.Next() { + length++ + } + if ls.length != length { + panic(fmt.Sprintf("LevelSlice %s has length %d value; actual length is %d", ls, ls.length, length)) + } + } +} + +// Each invokes fn for each element in the slice. +func (ls LevelSlice) Each(fn func(*FileMetadata)) { + iter := ls.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + fn(f) + } +} + +// String implements fmt.Stringer. +func (ls LevelSlice) String() string { + var buf bytes.Buffer + fmt.Fprintf(&buf, "%d files: ", ls.length) + ls.Each(func(f *FileMetadata) { + if buf.Len() > 0 { + fmt.Fprintf(&buf, " ") + } + fmt.Fprint(&buf, f) + }) + return buf.String() +} + +// Empty indicates whether the slice contains any files. +func (ls *LevelSlice) Empty() bool { + return emptyWithBounds(ls.iter, ls.start, ls.end) +} + +// Iter constructs a LevelIterator that iterates over the slice. +func (ls *LevelSlice) Iter() LevelIterator { + return LevelIterator{ + start: ls.start, + end: ls.end, + iter: ls.iter.clone(), + } +} + +// Len returns the number of files in the slice. Its runtime is constant. +func (ls *LevelSlice) Len() int { + return ls.length +} + +// SizeSum sums the size of all files in the slice. Its runtime is linear in +// the length of the slice. +func (ls *LevelSlice) SizeSum() uint64 { + var sum uint64 + iter := ls.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + sum += f.Size + } + return sum +} + +// NumVirtual returns the number of virtual sstables in the level. Its runtime is +// linear in the length of the slice. +func (ls *LevelSlice) NumVirtual() uint64 { + var n uint64 + iter := ls.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if f.Virtual { + n++ + } + } + return n +} + +// VirtualSizeSum returns the sum of the sizes of the virtual sstables in the +// level. +func (ls *LevelSlice) VirtualSizeSum() uint64 { + var sum uint64 + iter := ls.Iter() + for f := iter.First(); f != nil; f = iter.Next() { + if f.Virtual { + sum += f.Size + } + } + return sum +} + +// Reslice constructs a new slice backed by the same underlying level, with +// new start and end positions. Reslice invokes the provided function, passing +// two LevelIterators: one positioned to i's inclusive start and one +// positioned to i's inclusive end. The resliceFunc may move either iterator +// forward or backwards, including beyond the callee's original bounds to +// capture additional files from the underlying level. Reslice constructs and +// returns a new LevelSlice with the final bounds of the iterators after +// calling resliceFunc. +func (ls LevelSlice) Reslice(resliceFunc func(start, end *LevelIterator)) LevelSlice { + if ls.iter.r == nil { + return ls + } + var start, end LevelIterator + if ls.start == nil { + start.iter = ls.iter.clone() + start.iter.first() + } else { + start.iter = ls.start.clone() + } + if ls.end == nil { + end.iter = ls.iter.clone() + end.iter.last() + } else { + end.iter = ls.end.clone() + } + resliceFunc(&start, &end) + return newBoundedLevelSlice(start.iter.clone(), &start.iter, &end.iter) +} + +// KeyType is used to specify the type of keys we're looking for in +// LevelIterator positioning operations. Files not containing any keys of the +// desired type are skipped. +type KeyType int8 + +const ( + // KeyTypePointAndRange denotes a search among the entire keyspace, including + // both point keys and range keys. No sstables are skipped. + KeyTypePointAndRange KeyType = iota + // KeyTypePoint denotes a search among the point keyspace. SSTables with no + // point keys will be skipped. Note that the point keyspace includes rangedels. + KeyTypePoint + // KeyTypeRange denotes a search among the range keyspace. SSTables with no + // range keys will be skipped. + KeyTypeRange +) + +type keyTypeAnnotator struct{} + +var _ Annotator = keyTypeAnnotator{} + +func (k keyTypeAnnotator) Zero(dst interface{}) interface{} { + var val *KeyType + if dst != nil { + val = dst.(*KeyType) + } else { + val = new(KeyType) + } + *val = KeyTypePoint + return val +} + +func (k keyTypeAnnotator) Accumulate(m *FileMetadata, dst interface{}) (interface{}, bool) { + v := dst.(*KeyType) + switch *v { + case KeyTypePoint: + if m.HasRangeKeys { + *v = KeyTypePointAndRange + } + case KeyTypePointAndRange: + // Do nothing. + default: + panic("unexpected key type") + } + return v, true +} + +func (k keyTypeAnnotator) Merge(src interface{}, dst interface{}) interface{} { + v := dst.(*KeyType) + srcVal := src.(*KeyType) + switch *v { + case KeyTypePoint: + if *srcVal == KeyTypePointAndRange { + *v = KeyTypePointAndRange + } + case KeyTypePointAndRange: + // Do nothing. + default: + panic("unexpected key type") + } + return v +} + +// LevelIterator iterates over a set of files' metadata. Its zero value is an +// empty iterator. +type LevelIterator struct { + iter iterator + start *iterator + end *iterator + filter KeyType +} + +func (i LevelIterator) String() string { + var buf bytes.Buffer + iter := i.iter.clone() + iter.first() + iter.prev() + if i.iter.pos == -1 { + fmt.Fprint(&buf, "()*") + } + iter.next() + for ; iter.valid(); iter.next() { + if buf.Len() > 0 { + fmt.Fprint(&buf, " ") + } + + if i.start != nil && cmpIter(iter, *i.start) == 0 { + fmt.Fprintf(&buf, " [ ") + } + isCurrentPos := cmpIter(iter, i.iter) == 0 + if isCurrentPos { + fmt.Fprint(&buf, " ( ") + } + fmt.Fprint(&buf, iter.cur().String()) + if isCurrentPos { + fmt.Fprint(&buf, " )*") + } + if i.end != nil && cmpIter(iter, *i.end) == 0 { + fmt.Fprintf(&buf, " ]") + } + } + if i.iter.n != nil && i.iter.pos >= i.iter.n.count { + if buf.Len() > 0 { + fmt.Fprint(&buf, " ") + } + fmt.Fprint(&buf, "()*") + } + return buf.String() +} + +// Clone copies the iterator, returning an independent iterator at the same +// position. +func (i *LevelIterator) Clone() LevelIterator { + if i.iter.r == nil { + return *i + } + // The start and end iterators are not cloned and are treated as + // immutable. + return LevelIterator{ + iter: i.iter.clone(), + start: i.start, + end: i.end, + filter: i.filter, + } +} + +// Current returns the item at the current iterator position. +// +// Current is deprecated. Callers should instead use the return value of a +// positioning operation. +func (i *LevelIterator) Current() *FileMetadata { + if !i.iter.valid() || + (i.end != nil && cmpIter(i.iter, *i.end) > 0) || + (i.start != nil && cmpIter(i.iter, *i.start) < 0) { + return nil + } + return i.iter.cur() +} + +func (i *LevelIterator) empty() bool { + return emptyWithBounds(i.iter, i.start, i.end) +} + +// Filter clones the iterator and sets the desired KeyType as the key to filter +// files on. +func (i *LevelIterator) Filter(keyType KeyType) LevelIterator { + l := i.Clone() + l.filter = keyType + return l +} + +func emptyWithBounds(i iterator, start, end *iterator) bool { + // If i.r is nil, the iterator was constructed from an empty btree. + // If the end bound is before the start bound, the bounds represent an + // empty slice of the B-Tree. + return i.r == nil || (start != nil && end != nil && cmpIter(*end, *start) < 0) +} + +// First seeks to the first file in the iterator and returns it. +func (i *LevelIterator) First() *FileMetadata { + if i.empty() { + return nil + } + if i.start != nil { + i.iter = i.start.clone() + } else { + i.iter.first() + } + if !i.iter.valid() { + return nil + } + return i.skipFilteredForward(i.iter.cur()) +} + +// Last seeks to the last file in the iterator and returns it. +func (i *LevelIterator) Last() *FileMetadata { + if i.empty() { + return nil + } + if i.end != nil { + i.iter = i.end.clone() + } else { + i.iter.last() + } + if !i.iter.valid() { + return nil + } + return i.skipFilteredBackward(i.iter.cur()) +} + +// Next advances the iterator to the next file and returns it. +func (i *LevelIterator) Next() *FileMetadata { + if i.iter.r == nil { + return nil + } + if invariants.Enabled && (i.iter.pos >= i.iter.n.count || (i.end != nil && cmpIter(i.iter, *i.end) > 0)) { + panic("pebble: cannot next forward-exhausted iterator") + } + i.iter.next() + if !i.iter.valid() { + return nil + } + return i.skipFilteredForward(i.iter.cur()) +} + +// Prev moves the iterator the previous file and returns it. +func (i *LevelIterator) Prev() *FileMetadata { + if i.iter.r == nil { + return nil + } + if invariants.Enabled && (i.iter.pos < 0 || (i.start != nil && cmpIter(i.iter, *i.start) < 0)) { + panic("pebble: cannot prev backward-exhausted iterator") + } + i.iter.prev() + if !i.iter.valid() { + return nil + } + return i.skipFilteredBackward(i.iter.cur()) +} + +// SeekGE seeks to the first file in the iterator's file set with a largest +// user key greater than or equal to the provided user key. The iterator must +// have been constructed from L1+, because it requires the underlying files to +// be sorted by user keys and non-overlapping. +func (i *LevelIterator) SeekGE(cmp Compare, userKey []byte) *FileMetadata { + // TODO(jackson): Assert that i.iter.cmp == btreeCmpSmallestKey. + if i.iter.r == nil { + return nil + } + m := i.seek(func(m *FileMetadata) bool { + return cmp(m.Largest.UserKey, userKey) >= 0 + }) + if i.filter != KeyTypePointAndRange && m != nil { + b, ok := m.LargestBound(i.filter) + if !ok { + m = i.Next() + } else if c := cmp(b.UserKey, userKey); c < 0 || c == 0 && b.IsExclusiveSentinel() { + // This file does not contain any keys of the type ≥ lower. It + // should be filtered, even though it does contain point keys. + m = i.Next() + } + } + return i.skipFilteredForward(m) +} + +// SeekLT seeks to the last file in the iterator's file set with a smallest +// user key less than the provided user key. The iterator must have been +// constructed from L1+, because it requires the underlying files to be sorted +// by user keys and non-overlapping. +func (i *LevelIterator) SeekLT(cmp Compare, userKey []byte) *FileMetadata { + // TODO(jackson): Assert that i.iter.cmp == btreeCmpSmallestKey. + if i.iter.r == nil { + return nil + } + i.seek(func(m *FileMetadata) bool { + return cmp(m.Smallest.UserKey, userKey) >= 0 + }) + m := i.Prev() + // Although i.Prev() guarantees that the current file contains keys of the + // relevant type, it doesn't guarantee that the keys of the relevant type + // are < userKey. + if i.filter != KeyTypePointAndRange && m != nil { + b, ok := m.SmallestBound(i.filter) + if !ok { + panic("unreachable") + } + if c := cmp(b.UserKey, userKey); c >= 0 { + // This file does not contain any keys of the type ≥ lower. It + // should be filtered, even though it does contain point keys. + m = i.Prev() + } + } + return i.skipFilteredBackward(m) +} + +// skipFilteredForward takes the file metadata at the iterator's current +// position, and skips forward if the current key-type filter (i.filter) +// excludes the file. It skips until it finds an unfiltered file or exhausts the +// level. If lower is != nil, skipFilteredForward skips any files that do not +// contain keys with the provided key-type ≥ lower. +// +// skipFilteredForward also enforces the upper bound, returning nil if at any +// point the upper bound is exceeded. +func (i *LevelIterator) skipFilteredForward(meta *FileMetadata) *FileMetadata { + for meta != nil && !meta.ContainsKeyType(i.filter) { + i.iter.next() + if !i.iter.valid() { + meta = nil + } else { + meta = i.iter.cur() + } + } + if meta != nil && i.end != nil && cmpIter(i.iter, *i.end) > 0 { + // Exceeded upper bound. + meta = nil + } + return meta +} + +// skipFilteredBackward takes the file metadata at the iterator's current +// position, and skips backward if the current key-type filter (i.filter) +// excludes the file. It skips until it finds an unfiltered file or exhausts the +// level. If upper is != nil, skipFilteredBackward skips any files that do not +// contain keys with the provided key-type < upper. +// +// skipFilteredBackward also enforces the lower bound, returning nil if at any +// point the lower bound is exceeded. +func (i *LevelIterator) skipFilteredBackward(meta *FileMetadata) *FileMetadata { + for meta != nil && !meta.ContainsKeyType(i.filter) { + i.iter.prev() + if !i.iter.valid() { + meta = nil + } else { + meta = i.iter.cur() + } + } + if meta != nil && i.start != nil && cmpIter(i.iter, *i.start) < 0 { + // Exceeded lower bound. + meta = nil + } + return meta +} + +func (i *LevelIterator) seek(fn func(*FileMetadata) bool) *FileMetadata { + i.iter.seek(fn) + + // i.iter.seek seeked in the unbounded underlying B-Tree. If the iterator + // has start or end bounds, we may have exceeded them. Reset to the bounds + // if necessary. + // + // NB: The LevelIterator and LevelSlice semantics require that a bounded + // LevelIterator/LevelSlice containing files x0, x1, ..., xn behave + // identically to an unbounded LevelIterator/LevelSlice of a B-Tree + // containing x0, x1, ..., xn. In other words, any files outside the + // LevelIterator's bounds should not influence the iterator's behavior. + // When seeking, this means a SeekGE that seeks beyond the end bound, + // followed by a Prev should return the last element within bounds. + if i.end != nil && cmpIter(i.iter, *i.end) > 0 { + i.iter = i.end.clone() + // Since seek(fn) positioned beyond i.end, we know there is nothing to + // return within bounds. + i.iter.next() + return nil + } else if i.start != nil && cmpIter(i.iter, *i.start) < 0 { + i.iter = i.start.clone() + } + if !i.iter.valid() { + return nil + } + return i.iter.cur() +} + +// Take constructs a LevelFile containing the file at the iterator's current +// position. Take panics if the iterator is not currently positioned over a +// file. +func (i *LevelIterator) Take() LevelFile { + m := i.Current() + if m == nil { + panic("Take called on invalid LevelIterator") + } + // LevelSlice's start and end fields are immutable and are positioned to + // the same position for a LevelFile because they're inclusive, so we can + // share one iterator stack between the two bounds. + boundsIter := i.iter.clone() + s := newBoundedLevelSlice(i.iter.clone(), &boundsIter, &boundsIter) + return LevelFile{ + FileMetadata: m, + slice: s, + } +} diff --git a/pebble/internal/manifest/level_metadata_test.go b/pebble/internal/manifest/level_metadata_test.go new file mode 100644 index 0000000..95ef91a --- /dev/null +++ b/pebble/internal/manifest/level_metadata_test.go @@ -0,0 +1,144 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manifest + +import ( + "bytes" + "fmt" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/stretchr/testify/require" +) + +func TestLevelIterator(t *testing.T) { + var level LevelSlice + datadriven.RunTest(t, "testdata/level_iterator", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + var files []*FileMetadata + var startReslice int + var endReslice int + for _, metaStr := range strings.Split(d.Input, " ") { + switch metaStr { + case "[": + startReslice = len(files) + continue + case "]": + endReslice = len(files) + continue + case " ", "": + continue + default: + parts := strings.Split(metaStr, "-") + if len(parts) != 2 { + t.Fatalf("malformed table spec: %q", metaStr) + } + m := &FileMetadata{FileNum: base.FileNum(len(files) + 1)} + m.ExtendPointKeyBounds( + base.DefaultComparer.Compare, + base.ParseInternalKey(strings.TrimSpace(parts[0])), + base.ParseInternalKey(strings.TrimSpace(parts[1])), + ) + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + m.InitPhysicalBacking() + files = append(files, m) + } + } + level = NewLevelSliceKeySorted(base.DefaultComparer.Compare, files) + level = level.Reslice(func(start, end *LevelIterator) { + for i := 0; i < startReslice; i++ { + start.Next() + } + for i := len(files); i > endReslice; i-- { + end.Prev() + } + }) + return "" + + case "iter": + return runIterCmd(t, d, level.Iter(), false /* verbose */) + + default: + return fmt.Sprintf("unknown command %q", d.Cmd) + } + }) +} + +func TestLevelIteratorFiltered(t *testing.T) { + var level LevelSlice + datadriven.RunTest(t, "testdata/level_iterator_filtered", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + var files []*FileMetadata + for _, metaStr := range strings.Split(d.Input, "\n") { + m, err := ParseFileMetadataDebug(metaStr) + require.NoError(t, err) + files = append(files, m) + } + level = NewLevelSliceKeySorted(base.DefaultComparer.Compare, files) + return "" + + case "iter": + var keyType string + d.ScanArgs(t, "key-type", &keyType) + iter := level.Iter() + switch keyType { + case "both": + // noop + case "points": + iter = iter.Filter(KeyTypePoint) + case "ranges": + iter = iter.Filter(KeyTypeRange) + } + return runIterCmd(t, d, iter, true /* verbose */) + + default: + return fmt.Sprintf("unknown command %q", d.Cmd) + } + }) +} + +func runIterCmd(t *testing.T, d *datadriven.TestData, iter LevelIterator, verbose bool) string { + var buf bytes.Buffer + for _, line := range strings.Split(d.Input, "\n") { + parts := strings.Fields(line) + if len(parts) == 0 { + continue + } + var m *FileMetadata + switch parts[0] { + case "first": + m = iter.First() + case "last": + m = iter.Last() + case "next": + m = iter.Next() + case "prev": + m = iter.Prev() + case "seek-ge": + m = iter.SeekGE(base.DefaultComparer.Compare, []byte(parts[1])) + case "seek-lt": + m = iter.SeekLT(base.DefaultComparer.Compare, []byte(parts[1])) + default: + return fmt.Sprintf("unknown command %q", parts[0]) + } + if m == nil { + fmt.Fprintln(&buf, ".") + } else { + if verbose { + fmt.Fprintln(&buf, m.DebugString(base.DefaultComparer.FormatKey, verbose)) + } else { + fmt.Fprintln(&buf, m) + } + } + } + return buf.String() +} diff --git a/pebble/internal/manifest/level_test.go b/pebble/internal/manifest/level_test.go new file mode 100644 index 0000000..0b9aa7f --- /dev/null +++ b/pebble/internal/manifest/level_test.go @@ -0,0 +1,64 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manifest + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestLevel(t *testing.T) { + testCases := []struct { + level int + expected string + }{ + {0, "L0"}, + {1, "L1"}, + {2, "L2"}, + {3, "L3"}, + {4, "L4"}, + {5, "L5"}, + {6, "L6"}, + {7, "L7"}, + } + + for _, c := range testCases { + t.Run("", func(t *testing.T) { + s := Level(c.level).String() + require.EqualValues(t, c.expected, s) + }) + } +} + +func TestL0Sublevel(t *testing.T) { + testCases := []struct { + level int + sublevel int + expected string + }{ + {0, 0, "L0.0"}, + {0, 1, "L0.1"}, + {0, 2, "L0.2"}, + {0, 1000, "L0.1000"}, + {0, -1, "invalid L0 sublevel: -1"}, + {0, -2, "invalid L0 sublevel: -2"}, + } + + for _, c := range testCases { + t.Run("", func(t *testing.T) { + s := func() (result string) { + defer func() { + if r := recover(); r != nil { + result = fmt.Sprint(r) + } + }() + return L0Sublevel(c.sublevel).String() + }() + require.EqualValues(t, c.expected, s) + }) + } +} diff --git a/pebble/internal/manifest/testdata/MANIFEST_import b/pebble/internal/manifest/testdata/MANIFEST_import new file mode 100644 index 0000000..3c5a010 Binary files /dev/null and b/pebble/internal/manifest/testdata/MANIFEST_import differ diff --git a/pebble/internal/manifest/testdata/file_metadata_bounds b/pebble/internal/manifest/testdata/file_metadata_bounds new file mode 100644 index 0000000..f849d44 --- /dev/null +++ b/pebble/internal/manifest/testdata/file_metadata_bounds @@ -0,0 +1,81 @@ +# Points only (single update). + +extend-point-key-bounds +a.SET.0 - z.DEL.42 +---- +000000:[a#0,SET-z#42,DEL] seqnums:[0-0] points:[a#0,SET-z#42,DEL] + bounds: (smallest=point,largest=point) (0x00000111) + +# Rangedels only (single update). + +reset +---- + +extend-point-key-bounds +a.RANGEDEL.0:z +---- +000000:[a#0,RANGEDEL-z#inf,RANGEDEL] seqnums:[0-0] points:[a#0,RANGEDEL-z#inf,RANGEDEL] + bounds: (smallest=point,largest=point) (0x00000111) + +# Range keys only (single update). + +reset +---- + +extend-range-key-bounds +a.RANGEKEYSET.0:z +---- +000000:[a#0,RANGEKEYSET-z#inf,RANGEKEYSET] seqnums:[0-0] ranges:[a#0,RANGEKEYSET-z#inf,RANGEKEYSET] + bounds: (smallest=range,largest=range) (0x00000000) + +# Multiple updates with various key kinds. + +reset +---- + +extend-point-key-bounds +m.SET.0 - n.SET.0 +---- +000000:[m#0,SET-n#0,SET] seqnums:[0-0] points:[m#0,SET-n#0,SET] + bounds: (smallest=point,largest=point) (0x00000111) + +# Extend the lower point key bound. + +extend-point-key-bounds +j.SET.0 - k.SET.0 +---- +000000:[j#0,SET-n#0,SET] seqnums:[0-0] points:[j#0,SET-n#0,SET] + bounds: (smallest=point,largest=point) (0x00000111) + +# Extend the upper point key bound with a rangedel. + +extend-point-key-bounds +k.RANGEDEL.0:o +---- +000000:[j#0,SET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] + bounds: (smallest=point,largest=point) (0x00000111) + +# Extend the lower bounds bound with a range key. + +extend-range-key-bounds +a.RANGEKEYSET.42:m +---- +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] + bounds: (smallest=range,largest=point) (0x00000101) + +# Extend again with a wide range key (equal keys tiebreak on seqnums descending, +# so the overall lower bound is unchanged). + +extend-range-key-bounds +a.RANGEKEYSET.0:z +---- +000000:[a#42,RANGEKEYSET-z#inf,RANGEKEYSET] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-z#inf,RANGEKEYSET] + bounds: (smallest=range,largest=range) (0x00000001) + +# Extend again with a wide rangedel over the same range. + +extend-point-key-bounds +A.RANGEDEL.0:y +---- +000000:[A#0,RANGEDEL-z#inf,RANGEKEYSET] seqnums:[0-0] points:[A#0,RANGEDEL-y#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-z#inf,RANGEKEYSET] + bounds: (smallest=point,largest=range) (0x00000011) diff --git a/pebble/internal/manifest/testdata/l0_sublevels b/pebble/internal/manifest/testdata/l0_sublevels new file mode 100644 index 0000000..f190ed2 --- /dev/null +++ b/pebble/internal/manifest/testdata/l0_sublevels @@ -0,0 +1,1766 @@ + +define +L0 + 000009:a.SET.10-b.SET.10 + 000007:c.SET.6-d.SET.8 + 000003:e.SET.5-j.SET.7 +---- +file count: 3, sublevels: 1, intervals: 6 +flush split keys(3): [b, d, j] +0.0: file count: 3, bytes: 768, width (mean, max): 1.0, 1, interval range: [0, 4] + 000009:[a#10,1-b#10,1] + 000007:[c#6,1-d#8,1] + 000003:[e#5,1-j#7,1] +compacting file count: 0, base compacting intervals: none +L0.0: a---b c---d e---------------j + aa bb cc dd ee ff gg hh ii jj + +in-use-key-ranges +a-z +a-c +aa-cc +f-g +e-j +---- +a-b, c-d, e-j +a-b, c-d +a-b, c-d +e-j +e-j + +define +L0 + 000009:a.SET.10-b.SET.10 + 000007:b.SET.6-j.SET.8 + 000003:e.SET.5-j.SET.7 +---- +file count: 3, sublevels: 3, intervals: 5 +flush split keys(2): [b, j] +0.2: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [0, 1] + 000009:[a#10,1-b#10,1] +0.1: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [1, 3] + 000007:[b#6,1-j#8,1] +0.0: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [3, 3] + 000003:[e#5,1-j#7,1] +compacting file count: 0, base compacting intervals: none +L0.2: a---b +L0.1: b------------------------j +L0.0: e---------------j + aa bb cc dd ee ff gg hh ii jj + +in-use-key-ranges +a-z +a-b +a-aa +b-bb +b-j +j-j +---- +a-j +a-j +a-b +b-j +b-j +e-j + +define no_initialize +L0.2 + 000009:a.SET.10-b.SET.10 +L0.1 + 000003:e.SET.5-j.SET.7 +L0.0 + 000007:b.SET.6-j.SET.8 +---- +file count: 3, sublevels: 3, intervals: 0 +flush split keys(0): [] +0.2: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000009:[a#10,1-b#10,1] +0.1: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000003:[e#5,1-j#7,1] +0.0: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000007:[b#6,1-j#8,1] +compacting file count: 0, base compacting intervals: none +L0.2: a---b +L0.1: e---------------j +L0.0: b------------------------j + aa bb cc dd ee ff gg hh ii jj + +l0-check-ordering +---- +OK + +define no_initialize +L0.1 + 000009:a.SET.10-b.SET.10 +L0.0 + 000007:b.SET.6-j.SET.8 + 000003:e.SET.5-j.SET.7 +---- +file count: 3, sublevels: 2, intervals: 0 +flush split keys(0): [] +0.1: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000009:[a#10,1-b#10,1] +0.0: file count: 2, bytes: 512, width (mean, max): 1.0, 1, interval range: [0, 0] + 000007:[b#6,1-j#8,1] + 000003:[e#5,1-j#7,1] +compacting file count: 0, base compacting intervals: none +L0.1: a---b +L0.0: b------------------------j e---j + aa bb cc dd ee ff gg hh ii jj + +l0-check-ordering +---- +L0.0 files 000007 and 000003 have overlapping ranges: [b#6,SET-j#8,SET] vs [e#5,SET-j#7,SET] + +define +L0 + 000001:a.SET.2-b.SET.3 + 000002:c.SET.3-d.SET.5 + 000003:e.SET.5-f.SET.7 + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000009:f.SET.10-i.SET.10 + 000010:f.SET.11-g.SET.11 +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 7, sublevels: 5, intervals: 10 +flush split keys(3): [d, f, g] +0.4: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [5, 6] + 000010:[f#11,1-g#11,1] +0.3: file count: 1, bytes: 256, width (mean, max): 4.0, 4, interval range: [5, 8] + 000009:[f#10,1-i#10,1] +0.2: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [5, 7] + 000005:[f#6,1-h#9,1] +0.1: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [4, 5] + 000003:[e#5,1-f#7,1] +0.0: file count: 3, bytes: 768, width (mean, max): 1.3, 2, interval range: [0, 6] + 000001:[a#2,1-b#3,1] + 000002:[c#3,1-d#5,1] + 000006:[f#4,1-g#5,1] +compacting file count: 0, base compacting intervals: none +L0.4: f---g +L0.3: f---------i +L0.2: f------h +L0.1: e---f +L0.0: a---b c---d f---g +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +max-depth-after-ongoing-compactions +---- +5 + +pick-base-compaction min_depth=3 +---- +compaction picked with stack depth reduction 5 +000006,000003,000005,000009,000010,000001,000002 +seed interval: f-f +L0.4: f+++g +L0.3: f+++++++++i +L0.2: f++++++h +L0.1: e+++f +L0.0: a+++b c+++d f+++g +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +# SSTables 000001 and 000002 are optional additions to the above compaction, as they +# overlap with base files that overlap with L0 files in the seed interval. +# Marking 0002 as compacting should be enough to exclude both from the +# chosen compaction. + +in-use-key-ranges +a-z +---- +a-b, c-d, e-i + +define +L0 + 000001:a.SET.2-b.SET.3 + 000002:c.SET.3-d.SET.5 intra_l0_compacting + 000003:e.SET.5-f.SET.7 + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000009:f.SET.10-i.SET.10 + 000010:f.SET.11-g.SET.11 +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 7, sublevels: 5, intervals: 10 +flush split keys(3): [d, f, g] +0.4: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [5, 6] + 000010:[f#11,1-g#11,1] +0.3: file count: 1, bytes: 256, width (mean, max): 4.0, 4, interval range: [5, 8] + 000009:[f#10,1-i#10,1] +0.2: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [5, 7] + 000005:[f#6,1-h#9,1] +0.1: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [4, 5] + 000003:[e#5,1-f#7,1] +0.0: file count: 3, bytes: 768, width (mean, max): 1.3, 2, interval range: [0, 6] + 000001:[a#2,1-b#3,1] + 000002:[c#3,1-d#5,1] + 000006:[f#4,1-g#5,1] +compacting file count: 1, base compacting intervals: none +L0.4: f---g +L0.3: f---------i +L0.2: f------h +L0.1: e---f +L0.0: a---b c^^^d f---g +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + + +pick-base-compaction min_depth=3 +---- +compaction picked with stack depth reduction 5 +000006,000003,000005,000009,000010 +seed interval: f-f +L0.4: f+++g +L0.3: f+++++++++i +L0.2: f++++++h +L0.1: e+++f +L0.0: a---b c^^^d f+++g +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +# Mark the above compaction as started. + +update-state-for-compaction files=(000006,000003,000005,000009,000010) +---- +OK + +describe +---- +file count: 7, sublevels: 5, intervals: 10 +flush split keys(3): [d, f, g] +0.4: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [5, 6] + 000010:[f#11,1-g#11,1] +0.3: file count: 1, bytes: 256, width (mean, max): 4.0, 4, interval range: [5, 8] + 000009:[f#10,1-i#10,1] +0.2: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [5, 7] + 000005:[f#6,1-h#9,1] +0.1: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [4, 5] + 000003:[e#5,1-f#7,1] +0.0: file count: 3, bytes: 768, width (mean, max): 1.3, 2, interval range: [0, 6] + 000001:[a#2,1-b#3,1] + 000002:[c#3,1-d#5,1] + 000006:[f#4,1-g#5,1] +compacting file count: 6, base compacting intervals: [4, 9] +L0.4: fvvvg +L0.3: fvvvvvvvvvi +L0.2: fvvvvvvh +L0.1: evvvf +L0.0: a---b c^^^d fvvvg +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-base-compaction min_depth=3 +---- +no compaction picked + +# Extend one of the SSTables (000009) to the right, and place an SSTable "under" +# the extension (000011). This adds it to the compaction. + +define +L0 + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000009:f.SET.10-p.SET.10 + 000010:f.SET.11-g.SET.11 + 000011:n.SET.8-p.SET.10 +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 5, sublevels: 4, intervals: 5 +flush split keys(2): [g, p] +0.3: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.2: file count: 1, bytes: 256, width (mean, max): 4.0, 4, interval range: [0, 3] + 000009:[f#10,1-p#10,1] +0.1: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [0, 1] + 000005:[f#6,1-h#9,1] +0.0: file count: 2, bytes: 512, width (mean, max): 1.0, 1, interval range: [0, 3] + 000006:[f#4,1-g#5,1] + 000011:[n#8,1-p#10,1] +compacting file count: 0, base compacting intervals: none +L0.3: f---g +L0.2: f------------------------------p +L0.1: f------h +L0.0: f---g n------p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +in-use-key-ranges +a-z +---- +f-p + +pick-base-compaction min_depth=3 +---- +compaction picked with stack depth reduction 4 +000006,000005,000009,000011,000010 +seed interval: f-g +L0.3: f+++g +L0.2: f++++++++++++++++++++++++++++++p +L0.1: f++++++h +L0.0: f+++g n++++++p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +# Set SSTable 000011 which is under/older SSTable 000009 to IsBaseCompacting = true. +# This should prevent SSTable 000009 from participating in a base compaction. + +define +L0 + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000009:f.SET.10-p.SET.10 + 000010:f.SET.11-g.SET.11 + 000011:n.SET.8-p.SET.9 base_compacting +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 5, sublevels: 4, intervals: 5 +flush split keys(2): [g, p] +0.3: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.2: file count: 1, bytes: 256, width (mean, max): 4.0, 4, interval range: [0, 3] + 000009:[f#10,1-p#10,1] +0.1: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [0, 1] + 000005:[f#6,1-h#9,1] +0.0: file count: 2, bytes: 512, width (mean, max): 1.0, 1, interval range: [0, 3] + 000006:[f#4,1-g#5,1] + 000011:[n#8,1-p#9,1] +compacting file count: 1, base compacting intervals: [3, 4] +L0.3: f---g +L0.2: f------------------------------p +L0.1: f------h +L0.0: f---g nvvvvvvp +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-base-compaction min_depth=3 +---- +no compaction picked + +pick-intra-l0-compaction min_depth=3 +---- +compaction picked with stack depth reduction 4 +000010,000009,000005,000006 +seed interval: f-g +L0.3: f+++g +L0.2: f++++++++++++++++++++++++++++++p +L0.1: f++++++h +L0.0: f+++g nvvvvvvp +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +# Raise 000009 to a higher level, so that there's still a stack depth of 3 below +# it. This should make f-g a candidate for base compaction again. + +define +L0 + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000009:f.SET.12-p.SET.12 + 000010:f.SET.11-g.SET.11 + 000011:n.SET.8-p.SET.10 base_compacting +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 5, sublevels: 4, intervals: 5 +flush split keys(2): [g, p] +0.3: file count: 1, bytes: 256, width (mean, max): 4.0, 4, interval range: [0, 3] + 000009:[f#12,1-p#12,1] +0.2: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.1: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [0, 1] + 000005:[f#6,1-h#9,1] +0.0: file count: 2, bytes: 512, width (mean, max): 1.0, 1, interval range: [0, 3] + 000006:[f#4,1-g#5,1] + 000011:[n#8,1-p#10,1] +compacting file count: 1, base compacting intervals: [3, 4] +L0.3: f------------------------------p +L0.2: f---g +L0.1: f------h +L0.0: f---g nvvvvvvp +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-base-compaction min_depth=3 +---- +compaction picked with stack depth reduction 3 +000006,000005,000010 +seed interval: f-g +L0.3: f------------------------------p +L0.2: f+++g +L0.1: f++++++h +L0.0: f+++g nvvvvvvp +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-intra-l0-compaction min_depth=3 +---- +compaction picked with stack depth reduction 4 +000009,000010,000005,000006 +seed interval: f-g +L0.3: f++++++++++++++++++++++++++++++p +L0.2: f+++g +L0.1: f++++++h +L0.0: f+++g nvvvvvvp +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +max-depth-after-ongoing-compactions +---- +4 + +# Assume the above base compaction is chosen. This should reduce max depth after +# ongoing compactions. + +define +L0 + 000005:f.SET.6-h.SET.9 base_compacting + 000006:f.SET.4-g.SET.5 base_compacting + 000009:f.SET.12-p.SET.12 + 000010:f.SET.11-g.SET.11 base_compacting + 000011:n.SET.8-p.SET.10 base_compacting +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 5, sublevels: 4, intervals: 5 +flush split keys(2): [g, p] +0.3: file count: 1, bytes: 256, width (mean, max): 4.0, 4, interval range: [0, 3] + 000009:[f#12,1-p#12,1] +0.2: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.1: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [0, 1] + 000005:[f#6,1-h#9,1] +0.0: file count: 2, bytes: 512, width (mean, max): 1.0, 1, interval range: [0, 3] + 000006:[f#4,1-g#5,1] + 000011:[n#8,1-p#10,1] +compacting file count: 4, base compacting intervals: [0, 1], [3, 4] +L0.3: f------------------------------p +L0.2: fvvvg +L0.1: fvvvvvvh +L0.0: fvvvg nvvvvvvp +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-base-compaction min_depth=3 +---- +no compaction picked + +pick-intra-l0-compaction min_depth=3 +---- +no compaction picked + +max-depth-after-ongoing-compactions +---- +1 + +# Ensure that when 000011 is not base compacting, it's chosen for compactions +# along with 000009. + +define +L0 + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000009:f.SET.12-p.SET.12 + 000010:f.SET.11-g.SET.11 + 000011:n.SET.8-p.SET.10 +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 5, sublevels: 4, intervals: 5 +flush split keys(2): [g, p] +0.3: file count: 1, bytes: 256, width (mean, max): 4.0, 4, interval range: [0, 3] + 000009:[f#12,1-p#12,1] +0.2: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.1: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [0, 1] + 000005:[f#6,1-h#9,1] +0.0: file count: 2, bytes: 512, width (mean, max): 1.0, 1, interval range: [0, 3] + 000006:[f#4,1-g#5,1] + 000011:[n#8,1-p#10,1] +compacting file count: 0, base compacting intervals: none +L0.3: f------------------------------p +L0.2: f---g +L0.1: f------h +L0.0: f---g n------p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-base-compaction min_depth=3 +---- +compaction picked with stack depth reduction 4 +000006,000005,000010,000009,000011 +seed interval: f-g +L0.3: f++++++++++++++++++++++++++++++p +L0.2: f+++g +L0.1: f++++++h +L0.0: f+++g n++++++p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-intra-l0-compaction min_depth=3 +---- +compaction picked with stack depth reduction 4 +000009,000010,000005,000006,000011 +seed interval: f-g +L0.3: f++++++++++++++++++++++++++++++p +L0.2: f+++g +L0.1: f++++++h +L0.0: f+++g n++++++p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +# Don't pick a base compaction if the overlapping Lbase files are marked as +# compacting. + +define +L0 + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000009:f.SET.12-p.SET.12 + 000010:f.SET.11-g.SET.11 + 000011:n.SET.8-p.SET.10 +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 compacting +---- +file count: 5, sublevels: 4, intervals: 5 +flush split keys(2): [g, p] +0.3: file count: 1, bytes: 256, width (mean, max): 4.0, 4, interval range: [0, 3] + 000009:[f#12,1-p#12,1] +0.2: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.1: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [0, 1] + 000005:[f#6,1-h#9,1] +0.0: file count: 2, bytes: 512, width (mean, max): 1.0, 1, interval range: [0, 3] + 000006:[f#4,1-g#5,1] + 000011:[n#8,1-p#10,1] +compacting file count: 0, base compacting intervals: none +L0.3: f------------------------------p +L0.2: f---g +L0.1: f------h +L0.0: f---g n------p +L6: a---------------f g====================================s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-base-compaction min_depth=3 +---- +no compaction picked + +# Greatly increase the size of SSTable 000009, past 100 << 20. This should make +# it no longer a candidate for base compaction. + +define +L0 + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000009:f.SET.12-p.SET.12 size=104859600 + 000010:f.SET.11-g.SET.11 + 000011:n.SET.8-p.SET.10 +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 5, sublevels: 4, intervals: 5 +flush split keys(4): [g, h, n, p] +0.3: file count: 1, bytes: 104859600, width (mean, max): 4.0, 4, interval range: [0, 3] + 000009:[f#12,1-p#12,1] +0.2: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.1: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [0, 1] + 000005:[f#6,1-h#9,1] +0.0: file count: 2, bytes: 512, width (mean, max): 1.0, 1, interval range: [0, 3] + 000006:[f#4,1-g#5,1] + 000011:[n#8,1-p#10,1] +compacting file count: 0, base compacting intervals: none +L0.3: f------------------------------p +L0.2: f---g +L0.1: f------h +L0.0: f---g n------p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-base-compaction min_depth=3 +---- +compaction picked with stack depth reduction 3 +000006,000005,000010,000011 +seed interval: f-g +L0.3: f------------------------------p +L0.2: f+++g +L0.1: f++++++h +L0.0: f+++g n++++++p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-intra-l0-compaction min_depth=3 +---- +compaction picked with stack depth reduction 4 +000009,000010,000005,000006,000011 +seed interval: f-g +L0.3: f++++++++++++++++++++++++++++++p +L0.2: f+++g +L0.1: f++++++h +L0.0: f+++g n++++++p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +# However, when the size increase is applied to a lower sublevel that is +# necessary to include to meet the minimum stack depth reduction, we overlook +# the size difference and choose the file for compaction anyway. + +define +L0 + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000009:f.SET.12-p.SET.12 + 000010:f.SET.11-g.SET.11 size=104859600 + 000011:n.SET.8-p.SET.10 +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 5, sublevels: 4, intervals: 5 +flush split keys(2): [g, p] +0.3: file count: 1, bytes: 256, width (mean, max): 4.0, 4, interval range: [0, 3] + 000009:[f#12,1-p#12,1] +0.2: file count: 1, bytes: 104859600, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.1: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [0, 1] + 000005:[f#6,1-h#9,1] +0.0: file count: 2, bytes: 512, width (mean, max): 1.0, 1, interval range: [0, 3] + 000006:[f#4,1-g#5,1] + 000011:[n#8,1-p#10,1] +compacting file count: 0, base compacting intervals: none +L0.3: f------------------------------p +L0.2: f---g +L0.1: f------h +L0.0: f---g n------p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-base-compaction min_depth=3 +---- +compaction picked with stack depth reduction 4 +000006,000005,000010,000009,000011 +seed interval: f-g +L0.3: f++++++++++++++++++++++++++++++p +L0.2: f+++g +L0.1: f++++++h +L0.0: f+++g n++++++p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-intra-l0-compaction min_depth=3 +---- +compaction picked with stack depth reduction 4 +000009,000010,000005,000006,000011 +seed interval: f-g +L0.3: f++++++++++++++++++++++++++++++p +L0.2: f+++g +L0.1: f++++++h +L0.0: f+++g n++++++p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +read-amp +---- +4 + +# In L0.0, SST 000007 is marked as base compacting. There are two SSTs to the left +# of it in the sublevel, and one to its right. The ones to its left should be +# chosen by extendCandidateToRectangle. + +define +L0 + 000004:h.SET.2-j.SET.4 + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000007:k.SET.2-l.SET.4 base_compacting + 000009:f.SET.12-p.SET.12 + 000010:f.SET.11-g.SET.11 + 000011:n.SET.8-p.SET.10 +L6 + 000012:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 7, sublevels: 4, intervals: 9 +flush split keys(4): [g, h, l, p] +0.3: file count: 1, bytes: 256, width (mean, max): 8.0, 8, interval range: [0, 7] + 000009:[f#12,1-p#12,1] +0.2: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.1: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [0, 2] + 000005:[f#6,1-h#9,1] +0.0: file count: 4, bytes: 1024, width (mean, max): 1.2, 2, interval range: [0, 7] + 000006:[f#4,1-g#5,1] + 000004:[h#2,1-j#4,1] + 000007:[k#2,1-l#4,1] + 000011:[n#8,1-p#10,1] +compacting file count: 1, base compacting intervals: [5, 5] +L0.3: f------------------------------p +L0.2: f---g +L0.1: f------h +L0.0: f---g h------j kvvvl n------p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-intra-l0-compaction min_depth=3 +---- +compaction picked with stack depth reduction 4 +000009,000010,000005,000006,000004 +seed interval: f-g +L0.3: f++++++++++++++++++++++++++++++p +L0.2: f+++g +L0.1: f++++++h +L0.0: f+++g h++++++j kvvvl n------p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-base-compaction min_depth=3 +---- +compaction picked with stack depth reduction 3 +000006,000005,000004,000010 +seed interval: f-g +L0.3: f------------------------------p +L0.2: f+++g +L0.1: f++++++h +L0.0: f+++g h++++++j kvvvl n------p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + + +# Now shift the base_compacting marker one SST to the left. But since file 6 +# was already chosen as part of the seed compaction construction, we still +# prefer to choose it over files 7 and 11. + +define +L0 + 000004:h.SET.2-j.SET.4 base_compacting + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000007:k.SET.2-l.SET.4 + 000009:f.SET.12-p.SET.12 + 000010:f.SET.11-g.SET.11 + 000011:n.SET.8-p.SET.10 +L6 + 000012:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 7, sublevels: 4, intervals: 9 +flush split keys(4): [g, h, l, p] +0.3: file count: 1, bytes: 256, width (mean, max): 8.0, 8, interval range: [0, 7] + 000009:[f#12,1-p#12,1] +0.2: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.1: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [0, 2] + 000005:[f#6,1-h#9,1] +0.0: file count: 4, bytes: 1024, width (mean, max): 1.2, 2, interval range: [0, 7] + 000006:[f#4,1-g#5,1] + 000004:[h#2,1-j#4,1] + 000007:[k#2,1-l#4,1] + 000011:[n#8,1-p#10,1] +compacting file count: 1, base compacting intervals: [2, 3] +L0.3: f------------------------------p +L0.2: f---g +L0.1: f------h +L0.0: f---g hvvvvvvj k---l n------p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-intra-l0-compaction min_depth=3 +---- +compaction picked with stack depth reduction 4 +000009,000010,000005,000006 +seed interval: f-g +L0.3: f++++++++++++++++++++++++++++++p +L0.2: f+++g +L0.1: f++++++h +L0.0: f+++g hvvvvvvj k---l n------p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +# Without any base_compacting markers, all SSTs in the bottom sublevel should +# be chosen for an intra-L0 compaction. + +define +L0 + 000004:h.SET.2-j.SET.4 + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000007:k.SET.2-l.SET.4 + 000009:f.SET.12-p.SET.12 + 000010:f.SET.11-g.SET.11 + 000011:n.SET.8-p.SET.10 +L6 + 000012:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 7, sublevels: 4, intervals: 9 +flush split keys(4): [g, h, l, p] +0.3: file count: 1, bytes: 256, width (mean, max): 8.0, 8, interval range: [0, 7] + 000009:[f#12,1-p#12,1] +0.2: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.1: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [0, 2] + 000005:[f#6,1-h#9,1] +0.0: file count: 4, bytes: 1024, width (mean, max): 1.2, 2, interval range: [0, 7] + 000006:[f#4,1-g#5,1] + 000004:[h#2,1-j#4,1] + 000007:[k#2,1-l#4,1] + 000011:[n#8,1-p#10,1] +compacting file count: 0, base compacting intervals: none +L0.3: f------------------------------p +L0.2: f---g +L0.1: f------h +L0.0: f---g h------j k---l n------p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-intra-l0-compaction min_depth=3 +---- +compaction picked with stack depth reduction 4 +000009,000010,000005,000006,000004,000007,000011 +seed interval: f-g +L0.3: f++++++++++++++++++++++++++++++p +L0.2: f+++g +L0.1: f++++++h +L0.0: f+++g h++++++j k+++l n++++++p +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +define flush_split_max_bytes=32 +L0 + 000001:a.SET.2-e.SET.5 size=64 + 000002:c.SET.6-g.SET.8 size=16 + 000003:f.SET.9-j.SET.11 size=16 +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 3, sublevels: 3, intervals: 6 +flush split keys(0): [] +0.2: file count: 1, bytes: 16, width (mean, max): 2.0, 2, interval range: [3, 4] + 000003:[f#9,1-j#11,1] +0.1: file count: 1, bytes: 16, width (mean, max): 3.0, 3, interval range: [1, 3] + 000002:[c#6,1-g#8,1] +0.0: file count: 1, bytes: 64, width (mean, max): 2.0, 2, interval range: [0, 1] + 000001:[a#2,1-e#5,1] +compacting file count: 0, base compacting intervals: none +L0.2: f------------j +L0.1: c------------g +L0.0: a------------e +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +# Check that read amplification is the sublevel height of the tallest key +# interval, not the overall count of sublevels. + +read-amp +---- +2 + +in-use-key-ranges +a-z +---- +a-j + +# The comparison of a cumulative count of interpolated bytes and +# flushSplitMaxBytes is a <, so even though the cumulative count equals 32 after +# a-c, we do not emit a flush split key until the end of the next interval, c-e. + +flush-split-keys +---- +flush user split keys: none + +# Reduce flush_split_max_bytes by 1, and there should also be a split key at c. + +define flush_split_max_bytes=31 +L0 + 000001:a.SET.2-e.SET.5 size=64 + 000002:c.SET.6-g.SET.8 size=16 + 000003:f.SET.9-j.SET.11 size=16 +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 3, sublevels: 3, intervals: 6 +flush split keys(1): [j] +0.2: file count: 1, bytes: 16, width (mean, max): 2.0, 2, interval range: [3, 4] + 000003:[f#9,1-j#11,1] +0.1: file count: 1, bytes: 16, width (mean, max): 3.0, 3, interval range: [1, 3] + 000002:[c#6,1-g#8,1] +0.0: file count: 1, bytes: 64, width (mean, max): 2.0, 2, interval range: [0, 1] + 000001:[a#2,1-e#5,1] +compacting file count: 0, base compacting intervals: none +L0.2: f------------j +L0.1: c------------g +L0.0: a------------e +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +flush-split-keys +---- +flush user split keys: j + +max-depth-after-ongoing-compactions +---- +2 + +define flush_split_max_bytes=64 +L0 + 000001:a.SET.2-d.SET.5 size=64 + 000002:e.SET.6-g.SET.8 size=64 + 000003:h.SET.9-j.SET.11 size=16 +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 3, sublevels: 1, intervals: 6 +flush split keys(1): [g] +0.0: file count: 3, bytes: 144, width (mean, max): 1.0, 1, interval range: [0, 4] + 000001:[a#2,1-d#5,1] + 000002:[e#6,1-g#8,1] + 000003:[h#9,1-j#11,1] +compacting file count: 0, base compacting intervals: none +L0.0: a---------d e------g h------j +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +flush-split-keys +---- +flush user split keys: g + +# The calculation for flush split bytes multiplies the specified max bytes +# parameter with the number of sublevels. In the case below, that should mean +# a flush split key would not be emitted at d despite the estimated bytes tally +# exceeding 64 bytes. Instead, it would be emitted when 64 * 2 = 128 bytes have +# been exceeded. + +define flush_split_max_bytes=64 +L0 + 000001:a.SET.2-d.SET.5 size=64 + 000004:d.SET.12-e.SET.12 size=64 + 000002:e.SET.6-g.SET.8 size=64 + 000003:h.SET.9-j.SET.11 size=16 +L6 + 000007:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 4, sublevels: 2, intervals: 8 +flush split keys(1): [e] +0.1: file count: 1, bytes: 64, width (mean, max): 3.0, 3, interval range: [1, 3] + 000004:[d#12,1-e#12,1] +0.0: file count: 3, bytes: 144, width (mean, max): 1.7, 2, interval range: [0, 6] + 000001:[a#2,1-d#5,1] + 000002:[e#6,1-g#8,1] + 000003:[h#9,1-j#11,1] +compacting file count: 0, base compacting intervals: none +L0.1: d---e +L0.0: a---------d e------g h------j +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +in-use-key-ranges +b-b +dd-e +dd-i +dd-h +dd-j +dd-s +---- +a-d +d-g +d-g, h-j +d-g, h-j +d-g, h-j +d-g, h-j + +flush-split-keys +---- +flush user split keys: e + +# Ensure that the compaction picker doesn't error out when all seed files are +# compacting. + +define +L0 + 000004:h.SET.2-j.SET.4 base_compacting + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 base_compacting + 000007:k.SET.2-l.SET.4 base_compacting + 000009:f.SET.12-p.SET.12 intra_l0_compacting + 000010:f.SET.11-g.SET.11 + 000011:n.SET.8-p.SET.10 base_compacting +L6 + 000012:a.SET.0-f.SET.0 + 000008:g.SET.0-s.SET.0 +---- +file count: 7, sublevels: 4, intervals: 9 +flush split keys(4): [g, h, l, p] +0.3: file count: 1, bytes: 256, width (mean, max): 8.0, 8, interval range: [0, 7] + 000009:[f#12,1-p#12,1] +0.2: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.1: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [0, 2] + 000005:[f#6,1-h#9,1] +0.0: file count: 4, bytes: 1024, width (mean, max): 1.2, 2, interval range: [0, 7] + 000006:[f#4,1-g#5,1] + 000004:[h#2,1-j#4,1] + 000007:[k#2,1-l#4,1] + 000011:[n#8,1-p#10,1] +compacting file count: 5, base compacting intervals: [0, 0], [2, 3], [5, 5], [7, 8] +L0.3: f^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^p +L0.2: f---g +L0.1: f------h +L0.0: fvvvg hvvvvvvj kvvvl nvvvvvvp +L6: a---------------f g------------------------------------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-base-compaction min_depth=2 +---- +no compaction picked + +pick-intra-l0-compaction min_depth=2 +---- +no compaction picked + +# Ensure that base files with largest key set to the rangedel sentinel key are +# treated as not containing the largest user key. If L0 files containing that +# user key get added to that compaction, it could trigger a +# "files have overlapping ranges" error in Lbase as one of the outputs of the +# compaction would overlap with an Lbase file not in the compaction. +# Compare the output of the next two calls to PickBaseCompaction below; as the +# base file's end key is changed to the range deletion sentinel, L0 files +# overlapping with it are no longer chosen for compaction. + +define +L0 + 000004:h.SET.2-j.SET.4 + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000007:k.SET.2-l.SET.4 + 000009:n.SET.12-o.SET.12 + 000010:f.SET.11-g.SET.11 + 000011:n.SET.8-o.SET.10 +L6 + 000001:a.SET.0-o.SET.0 + 000008:p.SET.0-s.SET.0 +---- +file count: 7, sublevels: 3, intervals: 9 +flush split keys(4): [g, h, l, o] +0.2: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.1: file count: 2, bytes: 512, width (mean, max): 2.0, 3, interval range: [0, 7] + 000005:[f#6,1-h#9,1] + 000009:[n#12,1-o#12,1] +0.0: file count: 4, bytes: 1024, width (mean, max): 1.2, 2, interval range: [0, 7] + 000006:[f#4,1-g#5,1] + 000004:[h#2,1-j#4,1] + 000007:[k#2,1-l#4,1] + 000011:[n#8,1-o#10,1] +compacting file count: 0, base compacting intervals: none +L0.2: f---g +L0.1: f------h n---o +L0.0: f---g h------j k---l n---o +L6: a------------------------------------------o p---------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-base-compaction min_depth=2 +---- +compaction picked with stack depth reduction 3 +000006,000005,000004,000010,000007,000011,000009 +seed interval: f-g +L0.2: f+++g +L0.1: f++++++h n+++o +L0.0: f+++g h++++++j k+++l n+++o +L6: a------------------------------------------o p---------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +define +L0 + 000004:h.SET.2-j.SET.4 + 000005:f.SET.6-h.SET.9 + 000006:f.SET.4-g.SET.5 + 000007:k.SET.2-l.SET.4 + 000009:n.SET.12-o.SET.12 + 000010:f.SET.11-g.SET.11 + 000011:n.SET.8-o.SET.10 +L6 + 000001:a.SET.0-o.RANGEDEL.72057594037927935 + 000008:p.SET.0-s.SET.0 +---- +file count: 7, sublevels: 3, intervals: 9 +flush split keys(4): [g, h, l, o] +0.2: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000010:[f#11,1-g#11,1] +0.1: file count: 2, bytes: 512, width (mean, max): 2.0, 3, interval range: [0, 7] + 000005:[f#6,1-h#9,1] + 000009:[n#12,1-o#12,1] +0.0: file count: 4, bytes: 1024, width (mean, max): 1.2, 2, interval range: [0, 7] + 000006:[f#4,1-g#5,1] + 000004:[h#2,1-j#4,1] + 000007:[k#2,1-l#4,1] + 000011:[n#8,1-o#10,1] +compacting file count: 0, base compacting intervals: none +L0.2: f---g +L0.1: f------h n---o +L0.0: f---g h------j k---l n---o +L6: a------------------------------------------o p---------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +pick-base-compaction min_depth=2 +---- +compaction picked with stack depth reduction 3 +000006,000005,000004,000010,000007,000011,000009 +seed interval: f-g +L0.2: f+++g +L0.1: f++++++h n+++o +L0.0: f+++g h++++++j k+++l n+++o +L6: a------------------------------------------o p---------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +in-use-key-ranges +a-z +n-o +---- +f-j, k-l, n-o +n-o + +# Ensure that two L0 sstables where one ends at a rangedel sentinel key and +# the other starts at the same user key occupy the same sublevel. + +define +L0 + 000004:a.SET.2-d.RANGEDEL.72057594037927935 + 000005:d.SET.3-g.SET.5 +L6 + 000001:a.SET.0-o.SET.0 + 000008:p.SET.0-s.SET.0 +---- +file count: 2, sublevels: 1, intervals: 3 +flush split keys(2): [d, g] +0.0: file count: 2, bytes: 512, width (mean, max): 1.0, 1, interval range: [0, 1] + 000004:[a#2,1-d#72057594037927935,15] + 000005:[d#3,1-g#5,1] +compacting file count: 0, base compacting intervals: none +L0.0: a--------d---------g +L6: a------------------------------------------o p---------s + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss + +in-use-key-ranges +a-z +a-g +b-c +---- +a-g +a-g +a-d + +define +L0 + 000004:a.SET.2-d.RANGEDEL.72057594037927935 + 000005:d.SET.3-g.SET.5 + 000006:f.SET.6-i.SET.6 + 000007:h.SET.7-m.SET.7 + 000009:q.SET.7-r.SET.7 + 000010:g.SET.10-i.SET.10 +---- +file count: 6, sublevels: 4, intervals: 10 +flush split keys(4): [f, g, i, r] +0.3: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [3, 5] + 000010:[g#10,1-i#10,1] +0.2: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [5, 6] + 000007:[h#7,1-m#7,1] +0.1: file count: 1, bytes: 256, width (mean, max): 4.0, 4, interval range: [2, 5] + 000006:[f#6,1-i#6,1] +0.0: file count: 3, bytes: 768, width (mean, max): 1.7, 3, interval range: [0, 8] + 000004:[a#2,1-d#72057594037927935,15] + 000005:[d#3,1-g#5,1] + 000009:[q#7,1-r#7,1] +compacting file count: 0, base compacting intervals: none +L0.3: g------i +L0.2: h---------------m +L0.1: f---------i +L0.0: a--------d---------g q---r + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr + +in-use-key-ranges +f-m +f-n +f-l +ff-m +ff-n +ff-l +---- +f-m +f-m +f-m +f-m +f-m +f-m + +in-use-key-ranges +n-o +m-q +l-qq +---- +. +i-m, q-r +i-m, q-r + +in-use-key-ranges +a-z +g-l +---- +a-m, q-r +g-m + +in-use-key-ranges +a-ff +a-gg +a-i +d-d +---- +a-g +a-i +a-m +d-g + +# Same example as above, except we incrementally add the sublevels. The output +# of in-use-key-ranges must be the same. + +define +L0 + 000004:a.SET.2-d.RANGEDEL.72057594037927935 +---- +file count: 1, sublevels: 1, intervals: 2 +flush split keys(1): [d] +0.0: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000004:[a#2,1-d#72057594037927935,15] +compacting file count: 0, base compacting intervals: none +L0.0: a---------d + aa bb cc dd + +add-l0-files + 000005:d.SET.3-g.SET.5 +---- +file count: 2, sublevels: 1, intervals: 3 +flush split keys(2): [d, g] +0.0: file count: 2, bytes: 512, width (mean, max): 1.0, 1, interval range: [0, 1] + 000004:[a#2,1-d#72057594037927935,15] + 000005:[d#3,1-g#5,1] +compacting file count: 0, base compacting intervals: none +L0.0: a--------d---------g + aa bb cc dd ee ff gg + +add-l0-files + 000006:f.SET.6-i.SET.6 +---- +file count: 3, sublevels: 2, intervals: 5 +flush split keys(2): [d, g] +0.1: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [2, 3] + 000006:[f#6,1-i#6,1] +0.0: file count: 2, bytes: 512, width (mean, max): 1.5, 2, interval range: [0, 2] + 000004:[a#2,1-d#72057594037927935,15] + 000005:[d#3,1-g#5,1] +compacting file count: 0, base compacting intervals: none +L0.1: f---------i +L0.0: a--------d---------g + aa bb cc dd ee ff gg hh ii + +add-l0-files + 000007:h.SET.7-m.SET.7 + 000009:q.SET.8-r.SET.8 +---- +file count: 5, sublevels: 3, intervals: 9 +flush split keys(4): [d, g, i, r] +0.2: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [4, 5] + 000007:[h#7,1-m#7,1] +0.1: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [2, 4] + 000006:[f#6,1-i#6,1] +0.0: file count: 3, bytes: 768, width (mean, max): 1.3, 2, interval range: [0, 7] + 000004:[a#2,1-d#72057594037927935,15] + 000005:[d#3,1-g#5,1] + 000009:[q#8,1-r#8,1] +compacting file count: 0, base compacting intervals: none +L0.2: h---------------m +L0.1: f---------i +L0.0: a--------d---------g q---r + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr + +# The output below should exactly match the output of the second last define. + +add-l0-files + 000010:g.SET.10-i.SET.10 +---- +file count: 6, sublevels: 4, intervals: 10 +flush split keys(4): [f, g, i, r] +0.3: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [3, 5] + 000010:[g#10,1-i#10,1] +0.2: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [5, 6] + 000007:[h#7,1-m#7,1] +0.1: file count: 1, bytes: 256, width (mean, max): 4.0, 4, interval range: [2, 5] + 000006:[f#6,1-i#6,1] +0.0: file count: 3, bytes: 768, width (mean, max): 1.7, 3, interval range: [0, 8] + 000004:[a#2,1-d#72057594037927935,15] + 000005:[d#3,1-g#5,1] + 000009:[q#8,1-r#8,1] +compacting file count: 0, base compacting intervals: none +L0.3: g------i +L0.2: h---------------m +L0.1: f---------i +L0.0: a--------d---------g q---r + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr + +in-use-key-ranges +f-m +f-n +f-l +ff-m +ff-n +ff-l +---- +f-m +f-m +f-m +f-m +f-m +f-m + +in-use-key-ranges +n-o +m-q +l-qq +---- +. +i-m, q-r +i-m, q-r + +in-use-key-ranges +a-z +g-l +---- +a-m, q-r +g-m + +in-use-key-ranges +a-ff +a-gg +a-i +d-d +---- +a-g +a-i +a-m +d-g + +pick-base-compaction min_depth=3 +---- +compaction picked with stack depth reduction 3 +000005,000006,000010,000007,000004,000009 +seed interval: g-g +L0.3: g++++++i +L0.2: h+++++++++++++++m +L0.1: f+++++++++i +L0.0: a++++++++d+++++++++g q+++r + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr + +# Adding two overlapping L0 files is supported too, as long as they're disjoint +# in sequence number ranges. + +add-l0-files + 000011:b.SET.13-e.SET.15 + 000012:c.SET.16-e.SET.17 +---- +file count: 8, sublevels: 4, intervals: 13 +flush split keys(5): [d, e, g, i, r] +0.3: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [6, 8] + 000010:[g#10,1-i#10,1] +0.2: file count: 2, bytes: 512, width (mean, max): 2.0, 2, interval range: [2, 9] + 000012:[c#16,1-e#17,1] + 000007:[h#7,1-m#7,1] +0.1: file count: 2, bytes: 512, width (mean, max): 3.5, 4, interval range: [1, 8] + 000011:[b#13,1-e#15,1] + 000006:[f#6,1-i#6,1] +0.0: file count: 3, bytes: 768, width (mean, max): 2.7, 4, interval range: [0, 11] + 000004:[a#2,1-d#72057594037927935,15] + 000005:[d#3,1-g#5,1] + 000009:[q#8,1-r#8,1] +compacting file count: 0, base compacting intervals: none +L0.3: g------i +L0.2: c------e h---------------m +L0.1: b---------e f---------i +L0.0: a--------d---------g q---r + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr + +# Multiple sublevels can also be added in one add-l0-files. + +add-l0-files + 000013:h.SET.18-i.SET.19 + 000014:g.SET.20-i.SET.21 +---- +file count: 10, sublevels: 6, intervals: 13 +flush split keys(4): [d, g, h, i] +0.5: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [6, 8] + 000014:[g#20,1-i#21,1] +0.4: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [8, 8] + 000013:[h#18,1-i#19,1] +0.3: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [6, 8] + 000010:[g#10,1-i#10,1] +0.2: file count: 2, bytes: 512, width (mean, max): 2.0, 2, interval range: [2, 9] + 000012:[c#16,1-e#17,1] + 000007:[h#7,1-m#7,1] +0.1: file count: 2, bytes: 512, width (mean, max): 3.5, 4, interval range: [1, 8] + 000011:[b#13,1-e#15,1] + 000006:[f#6,1-i#6,1] +0.0: file count: 3, bytes: 768, width (mean, max): 2.7, 4, interval range: [0, 11] + 000004:[a#2,1-d#72057594037927935,15] + 000005:[d#3,1-g#5,1] + 000009:[q#8,1-r#8,1] +compacting file count: 0, base compacting intervals: none +L0.5: g------i +L0.4: h---i +L0.3: g------i +L0.2: c------e h---------------m +L0.1: b---------e f---------i +L0.0: a--------d---------g q---r + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr + +# Adding an old L0 file returns an error. + +add-l0-files + 000015:h.SET.17-i.SET.17 +---- +pebble: L0 sublevel generation optimization cannot be used + +# The following test cases cover the examples provided in the documentation. +# NOTE: following initialization, some of the files fall down into lower levels +# where there is space. + +# Example 1. No in-progress L0 -> LBase compaction. + +define +L0.3 + 000011:a.SET.18-d.SET.19 + 000012:g.SET.20-j.SET.21 +L0.2 + 000009:f.SET.14-j.SET.15 + 000010:r.SET.16-t.SET.17 +L0.1 + 000007:b.SET.10-d.SET.11 + 000008:e.SET.12-j.SET.13 +L0.0 + 000003:a.SET.2-d.SET.3 + 000004:f.SET.4-j.SET.5 + 000005:l.SET.6-o.SET.7 + 000006:p.SET.8-x.SET.9 +L6 + 000001:a.SET.0-i.SET.0 + 000002:m.SET.0-w.SET.0 +---- +file count: 10, sublevels: 4, intervals: 13 +flush split keys(5): [d, g, j, r, t] +0.3: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [5, 5] + 000012:[g#20,1-j#21,1] +0.2: file count: 2, bytes: 512, width (mean, max): 2.0, 2, interval range: [0, 5] + 000011:[a#18,1-d#19,1] + 000009:[f#14,1-j#15,1] +0.1: file count: 3, bytes: 768, width (mean, max): 1.7, 3, interval range: [1, 10] + 000007:[b#10,1-d#11,1] + 000008:[e#12,1-j#13,1] + 000010:[r#16,1-t#17,1] +0.0: file count: 4, bytes: 1024, width (mean, max): 2.0, 3, interval range: [0, 11] + 000003:[a#2,1-d#3,1] + 000004:[f#4,1-j#5,1] + 000005:[l#6,1-o#7,1] + 000006:[p#8,1-x#9,1] +compacting file count: 0, base compacting intervals: none +L0.3: g---------j +L0.2: a---------d f------------j +L0.1: b------d e---------------j r------t +L0.0: a---------d f------------j l---------o p------------------------x +L6: a------------------------i m------------------------------w + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx + +pick-base-compaction min_depth=3 +---- +compaction picked with stack depth reduction 4 +000004,000008,000009,000012,000003,000007,000011 +seed interval: g-j +L0.3: g+++++++++j +L0.2: a+++++++++d f++++++++++++j +L0.1: b++++++d e+++++++++++++++j r------t +L0.0: a+++++++++d f++++++++++++j l---------o p------------------------x +L6: a------------------------i m------------------------------w + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx + +# Example 2. Left half of the keyspace compacting. Select the "next best" +# compaction. + +define +L0.3 + 000011:a.SET.18-d.SET.19 base_compacting + 000012:g.SET.20-j.SET.21 base_compacting +L0.2 + 000009:f.SET.14-j.SET.15 base_compacting + 000010:r.SET.16-t.SET.17 +L0.1 + 000007:b.SET.10-d.SET.11 base_compacting + 000008:e.SET.12-j.SET.13 base_compacting +L0.0 + 000003:a.SET.2-d.SET.3 base_compacting + 000004:f.SET.4-j.SET.5 base_compacting + 000005:l.SET.6-o.SET.7 + 000006:p.SET.8-x.SET.9 +L6 + 000001:a.SET.0-i.SET.0 + 000002:m.SET.0-w.SET.0 +---- +file count: 10, sublevels: 4, intervals: 13 +flush split keys(5): [d, g, j, r, t] +0.3: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [5, 5] + 000012:[g#20,1-j#21,1] +0.2: file count: 2, bytes: 512, width (mean, max): 2.0, 2, interval range: [0, 5] + 000011:[a#18,1-d#19,1] + 000009:[f#14,1-j#15,1] +0.1: file count: 3, bytes: 768, width (mean, max): 1.7, 3, interval range: [1, 10] + 000007:[b#10,1-d#11,1] + 000008:[e#12,1-j#13,1] + 000010:[r#16,1-t#17,1] +0.0: file count: 4, bytes: 1024, width (mean, max): 2.0, 3, interval range: [0, 11] + 000003:[a#2,1-d#3,1] + 000004:[f#4,1-j#5,1] + 000005:[l#6,1-o#7,1] + 000006:[p#8,1-x#9,1] +compacting file count: 7, base compacting intervals: [0, 6] +L0.3: gvvvvvvvvvj +L0.2: avvvvvvvvvd fvvvvvvvvvvvvj +L0.1: bvvvvvvd evvvvvvvvvvvvvvvj r------t +L0.0: avvvvvvvvvd fvvvvvvvvvvvvj l---------o p------------------------x +L6: a------------------------i m------------------------------w + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx + +pick-base-compaction min_depth=3 +---- +no compaction picked + +pick-base-compaction min_depth=2 +---- +compaction picked with stack depth reduction 2 +000006,000010,000005 +seed interval: r-t +L0.3: gvvvvvvvvvj +L0.2: avvvvvvvvvd fvvvvvvvvvvvvj +L0.1: bvvvvvvd evvvvvvvvvvvvvvvj r++++++t +L0.0: avvvvvvvvvd fvvvvvvvvvvvvj l+++++++++o p++++++++++++++++++++++++x +L6: a------------------------i m------------------------------w + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx + +# Example 3. The same as Example 2, except there is now an additional file in +# LBase that overlaps with the [l,o] file in L0.0. + +define +L0.3 + 000011:a.SET.18-d.SET.19 base_compacting + 000012:g.SET.20-j.SET.21 base_compacting +L0.2 + 000009:f.SET.14-j.SET.15 base_compacting + 000010:r.SET.16-t.SET.17 +L0.1 + 000007:b.SET.10-d.SET.11 base_compacting + 000008:e.SET.12-j.SET.13 base_compacting +L0.0 + 000003:a.SET.2-d.SET.3 base_compacting + 000004:f.SET.4-j.SET.5 base_compacting + 000005:l.SET.6-o.SET.7 + 000006:p.SET.8-x.SET.9 +L6 + 000001:a.SET.0-i.SET.0 + 000013:j.SET.0-l.SET.0 + 000002:m.SET.0-w.SET.0 +---- +file count: 10, sublevels: 4, intervals: 13 +flush split keys(5): [d, g, j, r, t] +0.3: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [5, 5] + 000012:[g#20,1-j#21,1] +0.2: file count: 2, bytes: 512, width (mean, max): 2.0, 2, interval range: [0, 5] + 000011:[a#18,1-d#19,1] + 000009:[f#14,1-j#15,1] +0.1: file count: 3, bytes: 768, width (mean, max): 1.7, 3, interval range: [1, 10] + 000007:[b#10,1-d#11,1] + 000008:[e#12,1-j#13,1] + 000010:[r#16,1-t#17,1] +0.0: file count: 4, bytes: 1024, width (mean, max): 2.0, 3, interval range: [0, 11] + 000003:[a#2,1-d#3,1] + 000004:[f#4,1-j#5,1] + 000005:[l#6,1-o#7,1] + 000006:[p#8,1-x#9,1] +compacting file count: 7, base compacting intervals: [0, 6] +L0.3: gvvvvvvvvvj +L0.2: avvvvvvvvvd fvvvvvvvvvvvvj +L0.1: bvvvvvvd evvvvvvvvvvvvvvvj r------t +L0.0: avvvvvvvvvd fvvvvvvvvvvvvj l---------o p------------------------x +L6: a------------------------i j------l m------------------------------w + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx + +pick-base-compaction min_depth=2 +---- +compaction picked with stack depth reduction 2 +000006,000010 +seed interval: r-t +L0.3: gvvvvvvvvvj +L0.2: avvvvvvvvvd fvvvvvvvvvvvvj +L0.1: bvvvvvvd evvvvvvvvvvvvvvvj r++++++t +L0.0: avvvvvvvvvd fvvvvvvvvvvvvj l---------o p++++++++++++++++++++++++x +L6: a------------------------i j------l m------------------------------w + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx + +# Example 4. Intra-L0 compactions. + +define +L0.3 + 000011:a.SET.18-d.SET.19 + 000012:g.SET.20-j.SET.21 base_compacting +L0.2 + 000009:f.SET.14-j.SET.15 base_compacting + 000010:r.SET.16-t.SET.17 base_compacting +L0.1 + 000007:b.SET.10-d.SET.11 + 000008:e.SET.12-j.SET.13 base_compacting +L0.0 + 000003:a.SET.2-d.SET.3 + 000004:f.SET.4-j.SET.5 base_compacting + 000005:l.SET.6-o.SET.7 + 000006:p.SET.8-x.SET.9 base_compacting +L6 + 000001:a.SET.0-i.SET.0 + 000002:m.SET.0-w.SET.0 +---- +file count: 10, sublevels: 4, intervals: 13 +flush split keys(5): [d, g, j, r, t] +0.3: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [5, 5] + 000012:[g#20,1-j#21,1] +0.2: file count: 2, bytes: 512, width (mean, max): 2.0, 2, interval range: [0, 5] + 000011:[a#18,1-d#19,1] + 000009:[f#14,1-j#15,1] +0.1: file count: 3, bytes: 768, width (mean, max): 1.7, 3, interval range: [1, 10] + 000007:[b#10,1-d#11,1] + 000008:[e#12,1-j#13,1] + 000010:[r#16,1-t#17,1] +0.0: file count: 4, bytes: 1024, width (mean, max): 2.0, 3, interval range: [0, 11] + 000003:[a#2,1-d#3,1] + 000004:[f#4,1-j#5,1] + 000005:[l#6,1-o#7,1] + 000006:[p#8,1-x#9,1] +compacting file count: 6, base compacting intervals: [3, 6], [9, 12] +L0.3: gvvvvvvvvvj +L0.2: a---------d fvvvvvvvvvvvvj +L0.1: b------d evvvvvvvvvvvvvvvj rvvvvvvt +L0.0: a---------d fvvvvvvvvvvvvj l---------o pvvvvvvvvvvvvvvvvvvvvvvvvx +L6: a------------------------i m------------------------------w + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx + +pick-intra-l0-compaction min_depth=2 +---- +compaction picked with stack depth reduction 3 +000011,000007,000003 +seed interval: b-d +L0.3: gvvvvvvvvvj +L0.2: a+++++++++d fvvvvvvvvvvvvj +L0.1: b++++++d evvvvvvvvvvvvvvvj rvvvvvvt +L0.0: a+++++++++d fvvvvvvvvvvvvj l---------o pvvvvvvvvvvvvvvvvvvvvvvvvx +L6: a------------------------i m------------------------------w + aa bb cc dd ee ff gg hh ii jj kk ll mm nn oo pp qq rr ss tt uu vv ww xx + +# Regression test for cockroachdb/cockroach#101896. We must return +# errInvalidL0SublevelOpt in any case where a new L0 file is being AddL0File'd +# with a largest sequence number below an existing file in the same interval. + +define +L0 + 000004:a.SET.2-e.SET.3 + 000006:a.SET.7-b.SET.8 + 000007:d.SET.12-f.SET.12 +---- +file count: 3, sublevels: 2, intervals: 5 +flush split keys(2): [b, e] +0.1: file count: 2, bytes: 512, width (mean, max): 1.5, 2, interval range: [0, 3] + 000006:[a#7,1-b#8,1] + 000007:[d#12,1-f#12,1] +0.0: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [0, 2] + 000004:[a#2,1-e#3,1] +compacting file count: 0, base compacting intervals: none +L0.1: a---b d------f +L0.0: a------------e + aa bb cc dd ee ff + +# Note that 000006 will bump the sublevel for the incoming file to 2. We +# should still realize that it's slotting below 000007 and return an error. + +add-l0-files + 000015:a.SET.9-g.SET.10 +---- +pebble: L0 sublevel generation optimization cannot be used + +# Fully-regenerated L0 sublevels allow us to pick an intra-L0 compaction that +# does not violate sublevel ordering. + +define +L0 + 000004:a.SET.2-e.SET.3 + 000006:a.SET.7-b.SET.8 + 000007:d.SET.12-f.SET.12 + 000015:a.SET.9-g.SET.10 +---- +file count: 4, sublevels: 4, intervals: 6 +flush split keys(2): [b, e] +0.3: file count: 1, bytes: 256, width (mean, max): 2.0, 2, interval range: [2, 3] + 000007:[d#12,1-f#12,1] +0.2: file count: 1, bytes: 256, width (mean, max): 5.0, 5, interval range: [0, 4] + 000015:[a#9,1-g#10,1] +0.1: file count: 1, bytes: 256, width (mean, max): 1.0, 1, interval range: [0, 0] + 000006:[a#7,1-b#8,1] +0.0: file count: 1, bytes: 256, width (mean, max): 3.0, 3, interval range: [0, 2] + 000004:[a#2,1-e#3,1] +compacting file count: 0, base compacting intervals: none +L0.3: d------f +L0.2: a------------------g +L0.1: a---b +L0.0: a------------e + aa bb cc dd ee ff gg + +# Exclude the d-f file through earliest_unflushed_seqnum. + +pick-intra-l0-compaction min_depth=2 earliest_unflushed_seqnum=11 +---- +compaction picked with stack depth reduction 3 +000015,000006,000004 +seed interval: a-b +L0.3: d------f +L0.2: a++++++++++++++++++g +L0.1: a+++b +L0.0: a++++++++++++e + aa bb cc dd ee ff gg + +pick-intra-l0-compaction min_depth=2 +---- +compaction picked with stack depth reduction 3 +000015,000007,000006,000004 +seed interval: a-b +L0.3: d++++++f +L0.2: a++++++++++++++++++g +L0.1: a+++b +L0.0: a++++++++++++e + aa bb cc dd ee ff gg diff --git a/pebble/internal/manifest/testdata/level_iterator b/pebble/internal/manifest/testdata/level_iterator new file mode 100644 index 0000000..dca3e8e --- /dev/null +++ b/pebble/internal/manifest/testdata/level_iterator @@ -0,0 +1,128 @@ +define +[ ] +---- + +iter +first +last +seek-lt a +seek-lt z +seek-ge a +seek-ge z +---- +. +. +. +. +. +. + +define +[ a.SET.1-b.SET.2 ] +---- + +iter +last +---- +000001:[a#1,1-b#2,1] + +iter +first +next +prev +prev +---- +000001:[a#1,1-b#2,1] +. +000001:[a#1,1-b#2,1] +. + +iter +seek-ge a +seek-ge b +seek-ge c +---- +000001:[a#1,1-b#2,1] +000001:[a#1,1-b#2,1] +. + +iter +seek-lt a +seek-lt b +seek-lt z +---- +. +000001:[a#1,1-b#2,1] +000001:[a#1,1-b#2,1] + +define +[ b.SET.1-c.SET.2 ] +---- + +iter +seek-ge a +seek-ge d +seek-lt a +seek-lt z +---- +000001:[b#1,1-c#2,1] +. +. +000001:[b#1,1-c#2,1] + + +define +a.SET.1-b.SET.2 [ c.SET.3-d.SET.4 e.SET.5-f.SET.6 ] g.SET.7-h.SET.8 +---- + +iter +first +prev +last +next +---- +000002:[c#3,1-d#4,1] +. +000003:[e#5,1-f#6,1] +. + +iter +seek-ge a +seek-ge b +seek-ge c +seek-ge h +prev +---- +000002:[c#3,1-d#4,1] +000002:[c#3,1-d#4,1] +000002:[c#3,1-d#4,1] +. +000003:[e#5,1-f#6,1] + +iter +seek-lt b +next +seek-lt a +next +seek-lt z +---- +. +000002:[c#3,1-d#4,1] +. +000002:[c#3,1-d#4,1] +000003:[e#5,1-f#6,1] + +define +a.SET.1-b.SET.2 c.SET.3-d.SET.4 e.SET.5-f.SET.6 g.SET.7-h.SET.8 [ ] +---- + +iter +seek-ge cat +seek-lt cat +first +last +---- +. +. +. +. diff --git a/pebble/internal/manifest/testdata/level_iterator_filtered b/pebble/internal/manifest/testdata/level_iterator_filtered new file mode 100644 index 0000000..066207c --- /dev/null +++ b/pebble/internal/manifest/testdata/level_iterator_filtered @@ -0,0 +1,537 @@ +define +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +---- + +iter key-type=points +seek-ge a +seek-ge m +seek-ge n +seek-ge o +seek-ge p +---- +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +. +. + +iter key-type=ranges +seek-ge a +seek-ge m +seek-ge n +seek-ge o +seek-ge p +---- +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +. +. +. +. + +iter key-type=points +seek-lt a +seek-lt b +seek-lt c +seek-lt j +seek-lt k +seek-lt l +seek-lt m +seek-lt n +seek-lt o +seek-lt p +---- +. +. +. +. +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] + +iter key-type=ranges +seek-lt a +seek-lt b +seek-lt c +seek-lt j +seek-lt k +seek-lt l +seek-lt m +seek-lt n +seek-lt o +seek-lt p +---- +. +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] + +iter key-type=points +seek-lt a +next +next +seek-ge o +prev +prev +---- +. +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +. +. +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +. + +iter key-type=ranges +seek-lt a +next +next +seek-ge m +prev +prev +---- +. +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +. +. +000000:[a#42,RANGEKEYSET-o#inf,RANGEDEL] seqnums:[0-0] points:[j#0,SET-o#inf,RANGEDEL] ranges:[a#42,RANGEKEYSET-m#inf,RANGEKEYSET] +. + +define +000000:[a#9,SET-b#2,DEL] points:[a#9,SET-b#2,DEL] +000001:[c#9,SET-d#2,DEL] points:[c#9,SET-d#2,DEL] +000002:[e#9,SET-f#2,DEL] points:[e#9,SET-f#2,DEL] +000003:[g#9,SET-g#2,DEL] points:[g#9,SET-g#2,DEL] +000004:[i#9,SET-j#2,DEL] points:[i#9,SET-j#2,DEL] +000005:[k#9,SET-k#2,DEL] points:[k#9,SET-k#2,DEL] +---- + +iter key-type=points +seek-ge a +seek-ge apple +seek-ge b +seek-ge banana +seek-ge c +seek-ge cantalope +seek-ge d +seek-ge dragonfruit +---- +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000001:[c#9,SET-d#2,DEL] seqnums:[0-0] points:[c#9,SET-d#2,DEL] +000001:[c#9,SET-d#2,DEL] seqnums:[0-0] points:[c#9,SET-d#2,DEL] +000001:[c#9,SET-d#2,DEL] seqnums:[0-0] points:[c#9,SET-d#2,DEL] +000001:[c#9,SET-d#2,DEL] seqnums:[0-0] points:[c#9,SET-d#2,DEL] +000002:[e#9,SET-f#2,DEL] seqnums:[0-0] points:[e#9,SET-f#2,DEL] + +iter key-type=points +seek-lt a +seek-lt apple +seek-lt b +seek-lt banana +seek-lt c +seek-lt cantalope +seek-lt d +seek-lt dragonfruit +---- +. +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000001:[c#9,SET-d#2,DEL] seqnums:[0-0] points:[c#9,SET-d#2,DEL] +000001:[c#9,SET-d#2,DEL] seqnums:[0-0] points:[c#9,SET-d#2,DEL] +000001:[c#9,SET-d#2,DEL] seqnums:[0-0] points:[c#9,SET-d#2,DEL] + +iter key-type=ranges +seek-ge a +seek-ge apple +seek-ge b +seek-ge banana +seek-ge c +seek-ge cantalope +seek-ge d +seek-ge dragonfruit +---- +. +. +. +. +. +. +. +. + +iter key-type=ranges +seek-lt a +seek-lt apple +seek-lt b +seek-lt banana +seek-lt c +seek-lt cantalope +seek-lt d +seek-lt dragonfruit +---- +. +. +. +. +. +. +. +. + +define +000000:[a#9,SET-b#2,DEL] points:[a#9,SET-b#2,DEL] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000002:[e#9,SET-f#inf,RANGEKEYDEL] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000003:[g#9,SET-g#2,DEL] points:[g#9,SET-g#2,DEL] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000005:[k#9,SET-k#2,DEL] points:[k#9,SET-k#2,DEL] +---- + +iter key-type=both +seek-ge a +seek-ge apple +seek-ge b +seek-ge banana +seek-ge c +seek-ge cantalope +seek-ge d +seek-ge dragonfruit +seek-ge e +seek-ge elderberry +seek-ge f +seek-ge figs +seek-ge g +seek-ge guava +seek-ge h +seek-ge huckleberry +seek-ge i +seek-ge incaberry +seek-ge j +seek-ge jujube +seek-ge k +seek-ge kiwi +seek-ge l +---- +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000005:[k#9,SET-k#2,DEL] seqnums:[0-0] points:[k#9,SET-k#2,DEL] +000005:[k#9,SET-k#2,DEL] seqnums:[0-0] points:[k#9,SET-k#2,DEL] +. +. + +iter key-type=both +seek-lt a +seek-lt apple +seek-lt b +seek-lt banana +seek-lt c +seek-lt cantalope +seek-lt d +seek-lt dragonfruit +seek-lt e +seek-lt elderberry +seek-lt f +seek-lt figs +seek-lt g +seek-lt guava +seek-lt h +seek-lt huckleberry +seek-lt i +seek-lt incaberry +seek-lt j +seek-lt jujube +seek-lt k +seek-lt kiwi +seek-lt l +---- +. +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000005:[k#9,SET-k#2,DEL] seqnums:[0-0] points:[k#9,SET-k#2,DEL] +000005:[k#9,SET-k#2,DEL] seqnums:[0-0] points:[k#9,SET-k#2,DEL] + + +iter key-type=points +seek-ge a +seek-ge apple +seek-ge b +seek-ge banana +seek-ge c +seek-ge cantalope +seek-ge d +seek-ge dragonfruit +seek-ge e +seek-ge elderberry +seek-ge f +seek-ge figs +seek-ge g +seek-ge guava +seek-ge h +seek-ge huckleberry +seek-ge i +seek-ge incaberry +seek-ge j +seek-ge jujube +seek-ge k +seek-ge kiwi +seek-ge l +---- +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000005:[k#9,SET-k#2,DEL] seqnums:[0-0] points:[k#9,SET-k#2,DEL] +000005:[k#9,SET-k#2,DEL] seqnums:[0-0] points:[k#9,SET-k#2,DEL] +000005:[k#9,SET-k#2,DEL] seqnums:[0-0] points:[k#9,SET-k#2,DEL] +. +. + +iter key-type=points +seek-lt a +seek-lt apple +seek-lt b +seek-lt banana +seek-lt c +seek-lt cantalope +seek-lt d +seek-lt dragonfruit +seek-lt e +seek-lt elderberry +seek-lt f +seek-lt figs +seek-lt g +seek-lt guava +seek-lt h +seek-lt huckleberry +seek-lt i +seek-lt incaberry +seek-lt j +seek-lt jujube +seek-lt k +seek-lt kiwi +seek-lt l +---- +. +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000005:[k#9,SET-k#2,DEL] seqnums:[0-0] points:[k#9,SET-k#2,DEL] +000005:[k#9,SET-k#2,DEL] seqnums:[0-0] points:[k#9,SET-k#2,DEL] + +iter key-type=ranges +seek-ge a +seek-ge apple +seek-ge b +seek-ge banana +seek-ge c +seek-ge cantalope +seek-ge d +seek-ge dragonfruit +seek-ge e +seek-ge elderberry +seek-ge f +seek-ge figs +seek-ge g +seek-ge guava +seek-ge h +seek-ge huckleberry +seek-ge i +seek-ge incaberry +seek-ge j +seek-ge jujube +seek-ge k +seek-ge kiwi +seek-ge l +---- +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +. +. +. +. + +iter key-type=ranges +seek-lt a +seek-lt apple +seek-lt b +seek-lt banana +seek-lt c +seek-lt cantalope +seek-lt d +seek-lt dragonfruit +seek-lt e +seek-lt elderberry +seek-lt f +seek-lt figs +seek-lt g +seek-lt guava +seek-lt h +seek-lt huckleberry +seek-lt i +seek-lt incaberry +seek-lt j +seek-lt jujube +seek-lt k +seek-lt kiwi +seek-lt l +---- +. +. +. +. +. +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] + +iter key-type=both +first +next +next +next +next +next +next +---- +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000005:[k#9,SET-k#2,DEL] seqnums:[0-0] points:[k#9,SET-k#2,DEL] +. + +iter key-type=points +first +next +next +next +next +next +---- +000000:[a#9,SET-b#2,DEL] seqnums:[0-0] points:[a#9,SET-b#2,DEL] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000003:[g#9,SET-g#2,DEL] seqnums:[0-0] points:[g#9,SET-g#2,DEL] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +000005:[k#9,SET-k#2,DEL] seqnums:[0-0] points:[k#9,SET-k#2,DEL] +. + +iter key-type=ranges +first +next +next +next +---- +000001:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#9,RANGEKEYSET-d#inf,RANGEKEYSET] +000002:[e#9,SET-f#inf,RANGEKEYDEL] seqnums:[0-0] points:[e#9,SET-elderberry#2,DEL] ranges:[e#3,RANGEKEYSET-f#inf,RANGEKEYDEL] +000004:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] seqnums:[0-0] points:[incaberry#9,SET-incaettry#9,SET] ranges:[i#9,RANGEKEYSET-j#2,RANGEKEYSET] +. diff --git a/pebble/internal/manifest/testdata/overlaps b/pebble/internal/manifest/testdata/overlaps new file mode 100644 index 0000000..e584991 --- /dev/null +++ b/pebble/internal/manifest/testdata/overlaps @@ -0,0 +1,566 @@ +define +0: + 000700:[b#7008,SET-e#7009,SET] + 000701:[c#7018,SET-f#7019,SET] + 000702:[f#7028,SET-g#7029,SET] + 000703:[x#7038,SET-y#7039,SET] + 000704:[n#7048,SET-p#7049,SET] + 000705:[p#7058,SET-p#7059,SET] + 000706:[p#7068,SET-u#7069,SET] + 000707:[r#7078,SET-s#7079,SET] +1: + 000710:[a#7140,SET-d#inf,RANGEDEL] + 000711:[d#7108,SET-g#7109,SET] + 000712:[g#7118,SET-j#7119,SET] + 000713:[n#7128,SET-p#7129,SET] + 000714:[p#7148,SET-p#7149,SET] + 000715:[p#7138,SET-u#7139,SET] +---- +0.3: + 000704:[n#7048,SET-p#7049,SET] +0.2: + 000700:[b#7008,SET-e#7009,SET] + 000705:[p#7058,SET-p#7059,SET] +0.1: + 000701:[c#7018,SET-f#7019,SET] + 000706:[p#7068,SET-u#7069,SET] +0.0: + 000702:[f#7028,SET-g#7029,SET] + 000707:[r#7078,SET-s#7079,SET] + 000703:[x#7038,SET-y#7039,SET] +1: + 000710:[a#7140,SET-d#inf,RANGEDEL] + 000711:[d#7108,SET-g#7109,SET] + 000712:[g#7118,SET-j#7119,SET] + 000713:[n#7128,SET-p#7129,SET] + 000714:[p#7148,SET-p#7149,SET] + 000715:[p#7138,SET-u#7139,SET] + +# Level 0 + +overlaps level=0 start=a end=a exclusive-end=false +---- +0 files: + +overlaps level=0 start=a end=b exclusive-end=false +---- +3 files: +000700:[b#7008,SET-e#7009,SET] +000701:[c#7018,SET-f#7019,SET] +000702:[f#7028,SET-g#7029,SET] + +overlaps level=0 start=a end=d exclusive-end=false +---- +3 files: +000700:[b#7008,SET-e#7009,SET] +000701:[c#7018,SET-f#7019,SET] +000702:[f#7028,SET-g#7029,SET] + +overlaps level=0 start=a end=e exclusive-end=false +---- +3 files: +000700:[b#7008,SET-e#7009,SET] +000701:[c#7018,SET-f#7019,SET] +000702:[f#7028,SET-g#7029,SET] + +overlaps level=0 start=a end=g exclusive-end=false +---- +3 files: +000700:[b#7008,SET-e#7009,SET] +000701:[c#7018,SET-f#7019,SET] +000702:[f#7028,SET-g#7029,SET] + +overlaps level=0 start=a end=z exclusive-end=false +---- +8 files: +000700:[b#7008,SET-e#7009,SET] +000701:[c#7018,SET-f#7019,SET] +000702:[f#7028,SET-g#7029,SET] +000703:[x#7038,SET-y#7039,SET] +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=c end=e exclusive-end=false +---- +3 files: +000700:[b#7008,SET-e#7009,SET] +000701:[c#7018,SET-f#7019,SET] +000702:[f#7028,SET-g#7029,SET] + +overlaps level=0 start=d end=d exclusive-end=false +---- +3 files: +000700:[b#7008,SET-e#7009,SET] +000701:[c#7018,SET-f#7019,SET] +000702:[f#7028,SET-g#7029,SET] + +# The below case relies on exclusive-end changing to false after picking some file. + +overlaps level=0 start=b end=f exclusive-end=true +---- +3 files: +000700:[b#7008,SET-e#7009,SET] +000701:[c#7018,SET-f#7019,SET] +000702:[f#7028,SET-g#7029,SET] + +overlaps level=0 start=g end=n exclusive-end=false +---- +7 files: +000700:[b#7008,SET-e#7009,SET] +000701:[c#7018,SET-f#7019,SET] +000702:[f#7028,SET-g#7029,SET] +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=h end=i exclusive-end=false +---- +0 files: + +overlaps level=0 start=h end=o exclusive-end=false +---- +4 files: +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=h end=u exclusive-end=false +---- +4 files: +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=k end=l exclusive-end=false +---- +0 files: + +overlaps level=0 start=k end=o exclusive-end=false +---- +4 files: +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=k end=p exclusive-end=false +---- +4 files: +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=n end=o exclusive-end=false +---- +4 files: +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=n end=z exclusive-end=false +---- +5 files: +000703:[x#7038,SET-y#7039,SET] +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=o end=z exclusive-end=false +---- +5 files: +000703:[x#7038,SET-y#7039,SET] +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=p end=z exclusive-end=false +---- +5 files: +000703:[x#7038,SET-y#7039,SET] +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=q end=z exclusive-end=false +---- +5 files: +000703:[x#7038,SET-y#7039,SET] +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=r end=s exclusive-end=false +---- +4 files: +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=r end=z exclusive-end=false +---- +5 files: +000703:[x#7038,SET-y#7039,SET] +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=s end=z exclusive-end=false +---- +5 files: +000703:[x#7038,SET-y#7039,SET] +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=u end=z exclusive-end=false +---- +5 files: +000703:[x#7038,SET-y#7039,SET] +000704:[n#7048,SET-p#7049,SET] +000705:[p#7058,SET-p#7059,SET] +000706:[p#7068,SET-u#7069,SET] +000707:[r#7078,SET-s#7079,SET] + +overlaps level=0 start=y end=z exclusive-end=false +---- +1 files: +000703:[x#7038,SET-y#7039,SET] + +overlaps level=0 start=z end=z exclusive-end=false +---- +0 files: + +# Level 1 + +overlaps level=1 start=a end=a exclusive-end=false +---- +1 files: +000710:[a#7140,SET-d#inf,RANGEDEL] + +overlaps level=1 start=a end=b exclusive-end=false +---- +1 files: +000710:[a#7140,SET-d#inf,RANGEDEL] + +overlaps level=1 start=a end=d exclusive-end=false +---- +2 files: +000710:[a#7140,SET-d#inf,RANGEDEL] +000711:[d#7108,SET-g#7109,SET] + +overlaps level=1 start=a end=e exclusive-end=false +---- +2 files: +000710:[a#7140,SET-d#inf,RANGEDEL] +000711:[d#7108,SET-g#7109,SET] + +overlaps level=1 start=a end=g exclusive-end=false +---- +3 files: +000710:[a#7140,SET-d#inf,RANGEDEL] +000711:[d#7108,SET-g#7109,SET] +000712:[g#7118,SET-j#7119,SET] + +overlaps level=1 start=a end=g exclusive-end=true +---- +2 files: +000710:[a#7140,SET-d#inf,RANGEDEL] +000711:[d#7108,SET-g#7109,SET] + +overlaps level=1 start=a end=z exclusive-end=false +---- +6 files: +000710:[a#7140,SET-d#inf,RANGEDEL] +000711:[d#7108,SET-g#7109,SET] +000712:[g#7118,SET-j#7119,SET] +000713:[n#7128,SET-p#7129,SET] +000714:[p#7148,SET-p#7149,SET] +000715:[p#7138,SET-u#7139,SET] + +overlaps level=1 start=a end=z exclusive-end=true +---- +6 files: +000710:[a#7140,SET-d#inf,RANGEDEL] +000711:[d#7108,SET-g#7109,SET] +000712:[g#7118,SET-j#7119,SET] +000713:[n#7128,SET-p#7129,SET] +000714:[p#7148,SET-p#7149,SET] +000715:[p#7138,SET-u#7139,SET] + +overlaps level=1 start=c end=e exclusive-end=false +---- +2 files: +000710:[a#7140,SET-d#inf,RANGEDEL] +000711:[d#7108,SET-g#7109,SET] + +overlaps level=1 start=d end=d exclusive-end=false +---- +1 files: +000711:[d#7108,SET-g#7109,SET] + +overlaps level=1 start=g end=n exclusive-end=false +---- +3 files: +000711:[d#7108,SET-g#7109,SET] +000712:[g#7118,SET-j#7119,SET] +000713:[n#7128,SET-p#7129,SET] + +overlaps level=1 start=h end=i exclusive-end=false +---- +1 files: +000712:[g#7118,SET-j#7119,SET] + +overlaps level=1 start=h end=n exclusive-end=true +---- +1 files: +000712:[g#7118,SET-j#7119,SET] + +overlaps level=1 start=h end=n exclusive-end=false +---- +2 files: +000712:[g#7118,SET-j#7119,SET] +000713:[n#7128,SET-p#7129,SET] + +overlaps level=1 start=h end=o exclusive-end=false +---- +2 files: +000712:[g#7118,SET-j#7119,SET] +000713:[n#7128,SET-p#7129,SET] + +overlaps level=1 start=h end=u exclusive-end=false +---- +4 files: +000712:[g#7118,SET-j#7119,SET] +000713:[n#7128,SET-p#7129,SET] +000714:[p#7148,SET-p#7149,SET] +000715:[p#7138,SET-u#7139,SET] + +overlaps level=1 start=k end=l exclusive-end=false +---- +0 files: + +overlaps level=1 start=k end=o exclusive-end=false +---- +1 files: +000713:[n#7128,SET-p#7129,SET] + +overlaps level=1 start=k end=p exclusive-end=false +---- +3 files: +000713:[n#7128,SET-p#7129,SET] +000714:[p#7148,SET-p#7149,SET] +000715:[p#7138,SET-u#7139,SET] + +overlaps level=1 start=k end=p exclusive-end=true +---- +1 files: +000713:[n#7128,SET-p#7129,SET] + +overlaps level=1 start=n end=o exclusive-end=false +---- +1 files: +000713:[n#7128,SET-p#7129,SET] + +overlaps level=1 start=n end=z exclusive-end=false +---- +3 files: +000713:[n#7128,SET-p#7129,SET] +000714:[p#7148,SET-p#7149,SET] +000715:[p#7138,SET-u#7139,SET] + +overlaps level=1 start=o end=z exclusive-end=false +---- +3 files: +000713:[n#7128,SET-p#7129,SET] +000714:[p#7148,SET-p#7149,SET] +000715:[p#7138,SET-u#7139,SET] + +overlaps level=1 start=p end=z exclusive-end=false +---- +3 files: +000713:[n#7128,SET-p#7129,SET] +000714:[p#7148,SET-p#7149,SET] +000715:[p#7138,SET-u#7139,SET] + +overlaps level=1 start=q end=z exclusive-end=false +---- +1 files: +000715:[p#7138,SET-u#7139,SET] + +overlaps level=1 start=r end=s exclusive-end=false +---- +1 files: +000715:[p#7138,SET-u#7139,SET] + +overlaps level=1 start=r end=z exclusive-end=false +---- +1 files: +000715:[p#7138,SET-u#7139,SET] + +overlaps level=1 start=s end=z exclusive-end=false +---- +1 files: +000715:[p#7138,SET-u#7139,SET] + +overlaps level=1 start=u end=z exclusive-end=false +---- +1 files: +000715:[p#7138,SET-u#7139,SET] + +overlaps level=1 start=y end=z exclusive-end=false +---- +0 files: + +overlaps level=1 start=z end=z exclusive-end=false +---- +0 files: + +# Level 2 is empty. + +overlaps level=2 start=a end=z exclusive-end=false +---- +0 files: + +# Test a scenario where an originally exclusive-end must be promoted to +# inclusive during the iterative expansion of L0 overlaps. +# +# 000003 with the f largest bound must be included. + +define +0: + 000001:[a#1,SET-d#2,SET] + 000002:[c#3,SET-f#4,SET] + 000003:[f#5,SET-f#5,SET] +---- +0.2: + 000001:[a#1,SET-d#2,SET] +0.1: + 000002:[c#3,SET-f#4,SET] +0.0: + 000003:[f#5,SET-f#5,SET] + +overlaps level=0 start=a end=b exclusive-end=true +---- +3 files: +000001:[a#1,SET-d#2,SET] +000002:[c#3,SET-f#4,SET] +000003:[f#5,SET-f#5,SET] + +# The below is a verbatim reproduction of the case detected by the +# metamorphic tests in pebble#1459: The above case is already a +# simplified version of the same condition. The verbatim reproduction is +# included for completeness. + +define +0.4: + 000987:[aiinjp@20#4667,SET-fcklu@5#inf,RANGEDEL] + 000988:[fcklu@5#4668,MERGE-glpw@1#inf,RANGEDEL] + 000989:[glpw@1#4662,RANGEDEL-mlgxnog@19#inf,RANGEDEL] + 000990:[mlgxnog@19#4662,RANGEDEL-nwnmqtyvjt@5#inf,RANGEDEL] + 000991:[nwnmqtyvjt@5#4662,RANGEDEL-wmkrrxp@6#inf,RANGEDEL] +0.3: + 000978:[dygfdczcax@15#4609,DEL-vtocgpw@18#4609,DEL] + 000992:[wmkrrxp@6#4657,MERGE-yyquzcd@21#4624,SET] + 000993:[zslykqao@12#4636,SINGLEDEL-zzqwavxgrec@12#4627,DEL] +0.2: + 000981:[fhcykuix@5#4601,MERGE-kiati@10#4595,MERGE] + 000977:[mgksrvk@15#4598,DEL-mgksrvk@15#4598,DEL] + 000982:[nirnrarzktp@12#4600,MERGE-zaowx@3#4602,SET] + 000828:[zzqwavxgrec@12#4092,SINGLEDEL-zzqwavxgrec@12#4092,SINGLEDEL] +0.1: + 000980:[dusu@10#4603,SET-duyeldgvnll@21#4605,SET] + 000973:[ewqqtp@15#4591,RANGEDEL-zaygjmy@1#inf,RANGEDEL] + 000605:[zzqwavxgrec@12#2894,SET-zzqwavxgrec@12#2894,SET] +0.0: + 000910:[abddymplk@20#4370,MERGE-abddymplk@20#4370,MERGE] + 000939:[abvukibeofb@13#4439,SET-abvukibeofb@13#4439,SET] + 000975:[ajoqjxr@16#4578,MERGE-zjyqka@1#4544,DEL] + 000983:[znnoar@20#4604,SINGLEDEL-znnoar@20#4604,SINGLEDEL] + 000535:[zzqwavxgrec@12#2657,SINGLEDEL-zzqwavxgrec@12#2526,SET] +5: + 000971:[acutc@6#4227,SET-zzhra@12#inf,RANGEDEL] +6: + 000806:[gourk@18#0,SET-zzhra@2#0,SET] +---- +0.4: + 000987:[aiinjp@20#4667,SET-fcklu@5#inf,RANGEDEL] + 000988:[fcklu@5#4668,MERGE-glpw@1#inf,RANGEDEL] + 000989:[glpw@1#4662,RANGEDEL-mlgxnog@19#inf,RANGEDEL] + 000990:[mlgxnog@19#4662,RANGEDEL-nwnmqtyvjt@5#inf,RANGEDEL] + 000991:[nwnmqtyvjt@5#4662,RANGEDEL-wmkrrxp@6#inf,RANGEDEL] +0.3: + 000978:[dygfdczcax@15#4609,DEL-vtocgpw@18#4609,DEL] + 000992:[wmkrrxp@6#4657,MERGE-yyquzcd@21#4624,SET] + 000993:[zslykqao@12#4636,SINGLEDEL-zzqwavxgrec@12#4627,DEL] +0.2: + 000981:[fhcykuix@5#4601,MERGE-kiati@10#4595,MERGE] + 000977:[mgksrvk@15#4598,DEL-mgksrvk@15#4598,DEL] + 000982:[nirnrarzktp@12#4600,MERGE-zaowx@3#4602,SET] + 000828:[zzqwavxgrec@12#4092,SINGLEDEL-zzqwavxgrec@12#4092,SINGLEDEL] +0.1: + 000980:[dusu@10#4603,SET-duyeldgvnll@21#4605,SET] + 000973:[ewqqtp@15#4591,RANGEDEL-zaygjmy@1#inf,RANGEDEL] + 000605:[zzqwavxgrec@12#2894,SET-zzqwavxgrec@12#2894,SET] +0.0: + 000910:[abddymplk@20#4370,MERGE-abddymplk@20#4370,MERGE] + 000939:[abvukibeofb@13#4439,SET-abvukibeofb@13#4439,SET] + 000975:[ajoqjxr@16#4578,MERGE-zjyqka@1#4544,DEL] + 000983:[znnoar@20#4604,SINGLEDEL-znnoar@20#4604,SINGLEDEL] + 000535:[zzqwavxgrec@12#2657,SINGLEDEL-zzqwavxgrec@12#2526,SET] +5: + 000971:[acutc@6#4227,SET-zzhra@12#inf,RANGEDEL] +6: + 000806:[gourk@18#0,SET-zzhra@2#0,SET] + +overlaps level=0 start=heacptnep@12 end=kiicbzwtpe@16 exclusive-end=false +---- +13 files: +000973:[ewqqtp@15#4591,RANGEDEL-zaygjmy@1#inf,RANGEDEL] +000975:[ajoqjxr@16#4578,MERGE-zjyqka@1#4544,DEL] +000977:[mgksrvk@15#4598,DEL-mgksrvk@15#4598,DEL] +000978:[dygfdczcax@15#4609,DEL-vtocgpw@18#4609,DEL] +000980:[dusu@10#4603,SET-duyeldgvnll@21#4605,SET] +000981:[fhcykuix@5#4601,MERGE-kiati@10#4595,MERGE] +000982:[nirnrarzktp@12#4600,MERGE-zaowx@3#4602,SET] +000987:[aiinjp@20#4667,SET-fcklu@5#inf,RANGEDEL] +000988:[fcklu@5#4668,MERGE-glpw@1#inf,RANGEDEL] +000989:[glpw@1#4662,RANGEDEL-mlgxnog@19#inf,RANGEDEL] +000990:[mlgxnog@19#4662,RANGEDEL-nwnmqtyvjt@5#inf,RANGEDEL] +000991:[nwnmqtyvjt@5#4662,RANGEDEL-wmkrrxp@6#inf,RANGEDEL] +000992:[wmkrrxp@6#4657,MERGE-yyquzcd@21#4624,SET] + +overlaps level=0 start=acutc@6 end=zzhra@12 exclusive-end=true +---- +18 files: +000535:[zzqwavxgrec@12#2657,SINGLEDEL-zzqwavxgrec@12#2526,SET] +000605:[zzqwavxgrec@12#2894,SET-zzqwavxgrec@12#2894,SET] +000828:[zzqwavxgrec@12#4092,SINGLEDEL-zzqwavxgrec@12#4092,SINGLEDEL] +000973:[ewqqtp@15#4591,RANGEDEL-zaygjmy@1#inf,RANGEDEL] +000975:[ajoqjxr@16#4578,MERGE-zjyqka@1#4544,DEL] +000977:[mgksrvk@15#4598,DEL-mgksrvk@15#4598,DEL] +000978:[dygfdczcax@15#4609,DEL-vtocgpw@18#4609,DEL] +000980:[dusu@10#4603,SET-duyeldgvnll@21#4605,SET] +000981:[fhcykuix@5#4601,MERGE-kiati@10#4595,MERGE] +000982:[nirnrarzktp@12#4600,MERGE-zaowx@3#4602,SET] +000983:[znnoar@20#4604,SINGLEDEL-znnoar@20#4604,SINGLEDEL] +000987:[aiinjp@20#4667,SET-fcklu@5#inf,RANGEDEL] +000988:[fcklu@5#4668,MERGE-glpw@1#inf,RANGEDEL] +000989:[glpw@1#4662,RANGEDEL-mlgxnog@19#inf,RANGEDEL] +000990:[mlgxnog@19#4662,RANGEDEL-nwnmqtyvjt@5#inf,RANGEDEL] +000991:[nwnmqtyvjt@5#4662,RANGEDEL-wmkrrxp@6#inf,RANGEDEL] +000992:[wmkrrxp@6#4657,MERGE-yyquzcd@21#4624,SET] +000993:[zslykqao@12#4636,SINGLEDEL-zzqwavxgrec@12#4627,DEL] diff --git a/pebble/internal/manifest/testdata/version_check_ordering b/pebble/internal/manifest/testdata/version_check_ordering new file mode 100644 index 0000000..9f8f710 --- /dev/null +++ b/pebble/internal/manifest/testdata/version_check_ordering @@ -0,0 +1,302 @@ +# Note: when specifying test cases with tables in L0, the L0 files should be +# specified in seqnum descending order, as the test case input is parsed as the +# inverse of `(*FileMetadata).DebugString`. + +check-ordering +0: + 000001:[a#1,SET-b#2,SET] +---- +OK + +check-ordering +0: + 000002:[c#3,SET-d#4,SET] + 000001:[a#1,SET-b#2,SET] +---- +OK + +check-ordering +0: + 000002:[a#1,SET-b#2,SET] + 000001:[c#3,SET-d#4,SET] +---- +L0 files 000001 and 000002 are not properly ordered: <#3-#4> vs <#1-#2> +0.0: + 000002:[a#1,SET-b#2,SET] seqnums:[1-2] points:[a#1,SET-b#2,SET] + 000001:[c#3,SET-d#4,SET] seqnums:[3-4] points:[c#3,SET-d#4,SET] + +check-ordering +0: + 000008:[k#16,SET-n#19,SET] + 000007:[a#14,SET-j#17,SET] + 000006:[b#15,SET-d#15,SET] + 000005:[i#8,SET-j#13,SET] + 000004:[g#6,SET-h#12,SET] + 000003:[e#2,SET-f#7,SET] + 000002:[a#1,SET-b#5,SET] + 000001:[c#3,SET-d#4,SET] +---- +OK + +# Add some ingested SSTables around the 14-19 seqnum cases. +check-ordering +0: + 000010:[m#20,SET-n#20,SET] + 000009:[k#16,SET-n#19,SET] + 000008:[m#18,SET-n#18,SET] + 000007:[a#14,SET-j#17,SET] + 000006:[b#15,SET-d#15,SET] + 000005:[i#8,SET-j#13,SET] + 000004:[g#6,SET-h#12,SET] + 000003:[e#2,SET-f#7,SET] + 000002:[a#1,SET-b#5,SET] + 000001:[c#3,SET-d#4,SET] +---- +OK + +# Coincident sequence numbers around sstables with overlapping sequence numbers +# are possible due to flush splitting, so this is acceptable. +check-ordering +0: + 000010:[m#20,SET-n#20,SET] + 000009:[k#16,SET-n#19,SET] + 000008:[m#18,SET-n#18,SET] + 000007:[a#15,SET-j#17,SET] + 000006:[b#15,SET-d#15,SET] + 000005:[i#8,SET-j#13,SET] + 000004:[g#6,SET-h#12,SET] + 000003:[e#2,SET-f#7,SET] + 000002:[a#1,SET-b#5,SET] + 000001:[c#3,SET-d#4,SET] +---- +OK + +# Ensure that sstables passed in a non-sorted order are detected. +check-ordering +0: + 000002:[a#1,SET-b#2,SET] + 000001:[a#3,SET-d#3,SET] +---- +L0 files 000001 and 000002 are not properly ordered: <#3-#3> vs <#1-#2> +0.1: + 000002:[a#1,SET-b#2,SET] seqnums:[1-2] points:[a#1,SET-b#2,SET] +0.0: + 000001:[a#3,SET-d#3,SET] seqnums:[3-3] points:[a#3,SET-d#3,SET] + +check-ordering +0: + 000002:[a#3,SET-b#3,SET] + 000001:[a#2,SET-d#4,SET] +---- +L0 files 000001 and 000002 are not properly ordered: <#2-#4> vs <#3-#3> +0.1: + 000002:[a#3,SET-b#3,SET] seqnums:[3-3] points:[a#3,SET-b#3,SET] +0.0: + 000001:[a#2,SET-d#4,SET] seqnums:[2-4] points:[a#2,SET-d#4,SET] + +check-ordering +0: + 000002:[a#3,SET-b#3,SET] + 000001:[a#3,SET-d#3,SET] +---- +OK + +check-ordering +0: + 000002:[a#3,SET-d#5,SET] + 000001:[a#3,SET-d#3,SET] +---- +OK + +check-ordering +0: + 000002:[a#3,SET-d#5,SET] + 000001:[a#4,SET-d#4,SET] +---- +OK + +check-ordering +0: + 000002:[a#5,SET-d#5,SET] + 000001:[a#3,SET-d#5,SET] +---- +OK + +check-ordering +0: + 000003:[a#4,SET-d#6,SET] + 000002:[a#5,SET-d#5,SET] + 000001:[a#4,SET-d#4,SET] +---- +OK + +check-ordering +0: + 000003:[a#0,SET-d#3,SET] + 000002:[a#0,SET-d#0,SET] + 000001:[a#0,SET-d#0,SET] +---- +OK + +check-ordering +1: + 000001:[a#1,SET-b#2,SET] +---- +OK + +check-ordering +1: + 000001:[b#1,SET-a#2,SET] +---- +L1 : file 000001 has inconsistent bounds: b#1,SET vs a#2,SET +1: + 000001:[b#1,SET-a#2,SET] seqnums:[0-0] points:[b#1,SET-a#2,SET] + +check-ordering +1: + 000001:[a#1,SET-b#2,SET] + 000002:[c#3,SET-d#4,SET] +---- +OK + +check-ordering +1: + 000001:[a#1,SET-b#2,SET] + 000002:[d#3,SET-c#4,SET] +---- +L1 : file 000002 has inconsistent bounds: d#3,SET vs c#4,SET +1: + 000001:[a#1,SET-b#2,SET] seqnums:[0-0] points:[a#1,SET-b#2,SET] + 000002:[d#3,SET-c#4,SET] seqnums:[0-0] points:[d#3,SET-c#4,SET] + +check-ordering +1: + 000001:[a#1,SET-b#2,SET] + 000002:[b#1,SET-d#4,SET] +---- +L1 files 000001 and 000002 have overlapping ranges: [a#1,SET-b#2,SET] vs [b#1,SET-d#4,SET] +1: + 000001:[a#1,SET-b#2,SET] seqnums:[0-0] points:[a#1,SET-b#2,SET] + 000002:[b#1,SET-d#4,SET] seqnums:[0-0] points:[b#1,SET-d#4,SET] + +check-ordering allow-split-user-keys +1: + 000001:[a#1,SET-b#2,SET] + 000002:[b#1,SET-d#4,SET] +---- +OK + +check-ordering +1: + 000001:[a#1,SET-b#2,SET] + 000002:[b#2,SET-d#4,SET] +---- +L1 files 000001 and 000002 have overlapping ranges: [a#1,SET-b#2,SET] vs [b#2,SET-d#4,SET] +1: + 000001:[a#1,SET-b#2,SET] seqnums:[0-0] points:[a#1,SET-b#2,SET] + 000002:[b#2,SET-d#4,SET] seqnums:[0-0] points:[b#2,SET-d#4,SET] + +check-ordering +1: + 000001:[a#1,SET-c#2,SET] + 000002:[b#3,SET-d#4,SET] +---- +L1 files 000001 and 000002 have overlapping ranges: [a#1,SET-c#2,SET] vs [b#3,SET-d#4,SET] +1: + 000001:[a#1,SET-c#2,SET] seqnums:[0-0] points:[a#1,SET-c#2,SET] + 000002:[b#3,SET-d#4,SET] seqnums:[0-0] points:[b#3,SET-d#4,SET] + +check-ordering +1: + 000001:[a#1,SET-c#2,SET] +2: + 000002:[b#3,SET-d#4,SET] +---- +OK + +check-ordering +1: + 000001:[a#1,SET-c#2,SET] +2: + 000002:[b#3,SET-d#4,SET] + 000003:[c#5,SET-e#6,SET] +---- +L2 files 000002 and 000003 have overlapping ranges: [b#3,SET-d#4,SET] vs [c#5,SET-e#6,SET] +1: + 000001:[a#1,SET-c#2,SET] seqnums:[0-0] points:[a#1,SET-c#2,SET] +2: + 000002:[b#3,SET-d#4,SET] seqnums:[0-0] points:[b#3,SET-d#4,SET] + 000003:[c#5,SET-e#6,SET] seqnums:[0-0] points:[c#5,SET-e#6,SET] + +# Ordering considers tables with just range keys. + +check-ordering +0: + 000002:[c#3,RANGEKEYSET-d#inf,RANGEKEYSET] ranges:[c#3,RANGEKEYSET-d#inf,RANGEKEYSET] + 000001:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] ranges:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] +---- +OK + +check-ordering +0: + 000002:[c#1,RANGEKEYSET-d#inf,RANGEKEYSET] ranges:[c#1,RANGEKEYSET-d#inf,RANGEKEYSET] + 000001:[a#3,RANGEKEYSET-b#inf,RANGEKEYSET] ranges:[a#3,RANGEKEYSET-b#inf,RANGEKEYSET] +---- +L0 files 000001 and 000002 are not properly ordered: <#3-#72057594037927935> vs <#1-#72057594037927935> +0.0: + 000001:[a#3,RANGEKEYSET-b#inf,RANGEKEYSET] seqnums:[3-72057594037927935] ranges:[a#3,RANGEKEYSET-b#inf,RANGEKEYSET] + 000002:[c#1,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[1-72057594037927935] ranges:[c#1,RANGEKEYSET-d#inf,RANGEKEYSET] + +check-ordering +1: + 000001:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] ranges:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] + 000002:[c#3,RANGEKEYSET-d#inf,RANGEKEYSET] ranges:[c#3,RANGEKEYSET-d#inf,RANGEKEYSET] +---- +OK + +check-ordering +1: + 000001:[c#3,RANGEKEYSET-d#inf,RANGEKEYSET] ranges:[c#3,RANGEKEYSET-d#inf,RANGEKEYSET] + 000002:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] ranges:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] +---- +L1 files 000001 and 000002 are not properly ordered: [c#3,RANGEKEYSET-d#inf,RANGEKEYSET] vs [a#1,RANGEKEYSET-b#inf,RANGEKEYSET] +1: + 000001:[c#3,RANGEKEYSET-d#inf,RANGEKEYSET] seqnums:[0-0] ranges:[c#3,RANGEKEYSET-d#inf,RANGEKEYSET] + 000002:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] seqnums:[0-0] ranges:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] + +# Ordering considers tables with both point and range keys. + +check-ordering +0: + 000002:[c#1,RANGEKEYSET-e#4,SET] points:[d#3,SET-e#4,SET] ranges:[c#1,RANGEKEYSET-d#inf,RANGEKEYSET] + 000001:[a#1,RANGEKEYSET-c#2,SET] points:[b#1,SET-c#2,SET] ranges:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] +---- +OK + +check-ordering +0: + 000002:[c#1,RANGEKEYSET-e#2,SET] points:[d#3,SET-e#2,SET] ranges:[c#1,RANGEKEYSET-d#inf,RANGEKEYSET] + 000001:[a#1,RANGEKEYSET-c#4,SET] points:[b#1,SET-c#4,SET] ranges:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] +---- +L0 files 000001 and 000002 are not properly ordered: <#1-#4> vs <#1-#2> +0.1: + 000002:[c#1,RANGEKEYSET-e#2,SET] seqnums:[1-2] points:[d#3,SET-e#2,SET] ranges:[c#1,RANGEKEYSET-d#inf,RANGEKEYSET] +0.0: + 000001:[a#1,RANGEKEYSET-c#4,SET] seqnums:[1-4] points:[b#1,SET-c#4,SET] ranges:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] + +check-ordering +1: + 000001:[a#1,RANGEKEYSET-c#2,SET] points:[b#1,SET-c#2,SET] ranges:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] + 000002:[d#3,RANGEKEYSET-f#4,SET] points:[e#3,SET-f#4,SET] ranges:[d#3,RANGEKEYSET-e#inf,RANGEKEYSET] +---- +OK + +check-ordering +1: + 000001:[a#1,RANGEKEYSET-c#2,SET] points:[b#1,SET-c#2,SET] ranges:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] + 000002:[c#3,RANGEKEYSET-f#4,SET] points:[e#3,SET-f#4,SET] ranges:[c#3,RANGEKEYSET-e#inf,RANGEKEYSET] +---- +L1 files 000001 and 000002 have overlapping ranges: [a#1,RANGEKEYSET-c#2,SET] vs [c#3,RANGEKEYSET-f#4,SET] +1: + 000001:[a#1,RANGEKEYSET-c#2,SET] seqnums:[0-0] points:[b#1,SET-c#2,SET] ranges:[a#1,RANGEKEYSET-b#inf,RANGEKEYSET] + 000002:[c#3,RANGEKEYSET-f#4,SET] seqnums:[0-0] points:[e#3,SET-f#4,SET] ranges:[c#3,RANGEKEYSET-e#inf,RANGEKEYSET] diff --git a/pebble/internal/manifest/testdata/version_edit_apply b/pebble/internal/manifest/testdata/version_edit_apply new file mode 100644 index 0000000..94df012 --- /dev/null +++ b/pebble/internal/manifest/testdata/version_edit_apply @@ -0,0 +1,191 @@ +apply + L0 + 1:[a#1,SET-b#2,SET] + 2:[c#3,SET-d#4,SET] +edit + delete + L0 + 1 + add + L2 + 1:[a#1,SET-b#2,SET] + 4:[c#3,SET-d#4,SET] +---- +0.0: + 000002:[c#3,SET-d#4,SET] +2: + 000001:[a#1,SET-b#2,SET] + 000004:[c#3,SET-d#4,SET] +zombies [] + +apply + L0 + 1:[a#1,SET-b#2,SET] + 2:[c#3,SET-d#4,SET] +edit + delete + L1 + 1 +---- +pebble: internal error: No current or added files but have deleted files: 1 + +apply + L0 + 1:[a#1,SET-c#2,SET] + 2:[c#3,SET-d#4,SET] +edit + delete + L0 + 1 + add + L2 + 1:[a#1,SET-c#2,SET] + 4:[b#3,SET-d#4,SET] +---- +pebble: internal error: L2 files 000001 and 000004 have overlapping ranges: [a#1,SET-c#2,SET] vs [b#3,SET-d#4,SET] + +apply + L0 + 1:[a#1,SET-c#2,SET] + 2:[c#3,SET-d#4,SET] +edit + add + L0 + 4:[b#3,SET-d#5,SET] +---- +0.2: + 000004:[b#3,SET-d#5,SET] +0.1: + 000002:[c#3,SET-d#4,SET] +0.0: + 000001:[a#1,SET-c#2,SET] +zombies [] + +apply + L0 + 1:[a#1,SET-c#2,SET] + 2:[c#3,SET-d#4,SET] +edit + add + L0 + 4:[b#0,SET-d#0,SET] +---- +0.2: + 000002:[c#3,SET-d#4,SET] +0.1: + 000001:[a#1,SET-c#2,SET] +0.0: + 000004:[b#0,SET-d#0,SET] +zombies [] + + +apply +edit + add + L0 + 1:[a#1,SET-c#2,SET] + 4:[b#3,SET-d#5,SET] +---- +0.1: + 000004:[b#3,SET-d#5,SET] +0.0: + 000001:[a#1,SET-c#2,SET] +zombies [] + +apply + L0 + 1:[a#1,SET-c#2,SET] +---- +0.0: + 000001:[a#1,SET-c#2,SET] +zombies [] + +apply + L2 + 3:[b#1,SET-c#2,SET] + 4:[d#3,SET-f#4,SET] + 5:[h#3,SET-h#2,SET] + 2:[n#5,SET-q#3,SET] + 1:[r#2,SET-t#1,SET] +edit + delete + L2 + 4 + 1 + add + L2 + 6:[a#10,SET-a#7,SET] + 7:[e#1,SET-g#2,SET] + 10:[j#3,SET-m#2,SET] +---- +2: + 000006:[a#10,SET-a#7,SET] + 000003:[b#1,SET-c#2,SET] + 000007:[e#1,SET-g#2,SET] + 000005:[h#3,SET-h#2,SET] + 000010:[j#3,SET-m#2,SET] + 000002:[n#5,SET-q#3,SET] +zombies [1 4] + +apply +edit + add + L2 + 10:[j#3,SET-m#2,SET] + 6:[a#10,SET-a#7,SET] +---- +2: + 000006:[a#10,SET-a#7,SET] + 000010:[j#3,SET-m#2,SET] +zombies [] + +# Verify that the zombies map is populated correctly. + +apply + L0 + 1:[a#1,SET-b#2,SET] + L1 + 2:[c#3,SET-d#2,SET] +edit + delete + L0 + 1 + L1 + 2 +---- +zombies [1 2] + +# Deletion of a non-existent table results in an error. + +apply + L0 + 1:[a#1,SET-b#2,SET] +edit + delete + L0 + 2 +---- +pebble: file deleted L0.000002 before it was inserted + +apply + L0 + 1:[a#1,SET-b#2,SET] +edit + delete + L0 + 1 + add + L2 + 1:[a#1,SET-b#2,SET] + 4:[c#3,SET-d#4,SET] + 5:[s#3,SET-z#4,SET] +edit + delete + L2 + 1 + L2 + 4 +---- +2: + 000005:[s#3,SET-z#4,SET] +zombies [] diff --git a/pebble/internal/manifest/version.go b/pebble/internal/manifest/version.go new file mode 100644 index 0000000..549aa22 --- /dev/null +++ b/pebble/internal/manifest/version.go @@ -0,0 +1,1561 @@ +// Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manifest + +import ( + "bytes" + "fmt" + "sort" + "strconv" + "strings" + "sync" + "sync/atomic" + "unicode" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invariants" + stdcmp "github.com/cockroachdb/pebble/shims/cmp" +) + +// Compare exports the base.Compare type. +type Compare = base.Compare + +// InternalKey exports the base.InternalKey type. +type InternalKey = base.InternalKey + +// TableInfo contains the common information for table related events. +type TableInfo struct { + // FileNum is the internal DB identifier for the table. + FileNum base.FileNum + // Size is the size of the file in bytes. + Size uint64 + // Smallest is the smallest internal key in the table. + Smallest InternalKey + // Largest is the largest internal key in the table. + Largest InternalKey + // SmallestSeqNum is the smallest sequence number in the table. + SmallestSeqNum uint64 + // LargestSeqNum is the largest sequence number in the table. + LargestSeqNum uint64 +} + +// TableStats contains statistics on a table used for compaction heuristics, +// and export via Metrics. +type TableStats struct { + // The total number of entries in the table. + NumEntries uint64 + // The number of point and range deletion entries in the table. + NumDeletions uint64 + // NumRangeKeySets is the total number of range key sets in the table. + // + // NB: If there's a chance that the sstable contains any range key sets, + // then NumRangeKeySets must be > 0. + NumRangeKeySets uint64 + // Estimate of the total disk space that may be dropped by this table's + // point deletions by compacting them. + PointDeletionsBytesEstimate uint64 + // Estimate of the total disk space that may be dropped by this table's + // range deletions by compacting them. This estimate is at data-block + // granularity and is not updated if compactions beneath the table reduce + // the amount of reclaimable disk space. It also does not account for + // overlapping data in L0 and ignores L0 sublevels, but the error that + // introduces is expected to be small. + // + // Tables in the bottommost level of the LSM may have a nonzero estimate if + // snapshots or move compactions prevented the elision of their range + // tombstones. A table in the bottommost level that was ingested into L6 + // will have a zero estimate, because the file's sequence numbers indicate + // that the tombstone cannot drop any data contained within the file itself. + RangeDeletionsBytesEstimate uint64 + // Total size of value blocks and value index block. + ValueBlocksSize uint64 +} + +// boundType represents the type of key (point or range) present as the smallest +// and largest keys. +type boundType uint8 + +const ( + boundTypePointKey boundType = iota + 1 + boundTypeRangeKey +) + +// CompactionState is the compaction state of a file. +// +// The following shows the valid state transitions: +// +// NotCompacting --> Compacting --> Compacted +// ^ | +// | | +// +-------<-------+ +// +// Input files to a compaction transition to Compacting when a compaction is +// picked. A file that has finished compacting typically transitions into the +// Compacted state, at which point it is effectively obsolete ("zombied") and +// will eventually be removed from the LSM. A file that has been move-compacted +// will transition from Compacting back into the NotCompacting state, signaling +// that the file may be selected for a subsequent compaction. A failed +// compaction will result in all input tables transitioning from Compacting to +// NotCompacting. +// +// This state is in-memory only. It is not persisted to the manifest. +type CompactionState uint8 + +// CompactionStates. +const ( + CompactionStateNotCompacting CompactionState = iota + CompactionStateCompacting + CompactionStateCompacted +) + +// String implements fmt.Stringer. +func (s CompactionState) String() string { + switch s { + case CompactionStateNotCompacting: + return "NotCompacting" + case CompactionStateCompacting: + return "Compacting" + case CompactionStateCompacted: + return "Compacted" + default: + panic(fmt.Sprintf("pebble: unknown compaction state %d", s)) + } +} + +// FileMetadata is maintained for leveled-ssts, i.e., they belong to a level of +// some version. FileMetadata does not contain the actual level of the sst, +// since such leveled-ssts can move across levels in different versions, while +// sharing the same FileMetadata. There are two kinds of leveled-ssts, physical +// and virtual. Underlying both leveled-ssts is a backing-sst, for which the +// only state is FileBacking. A backing-sst is level-less. It is possible for a +// backing-sst to be referred to by a physical sst in one version and by one or +// more virtual ssts in one or more versions. A backing-sst becomes obsolete +// and can be deleted once it is no longer required by any physical or virtual +// sst in any version. +// +// We maintain some invariants: +// +// 1. Each physical and virtual sst will have a unique FileMetadata.FileNum, +// and there will be exactly one FileMetadata associated with the FileNum. +// +// 2. Within a version, a backing-sst is either only referred to by one +// physical sst or one or more virtual ssts. +// +// 3. Once a backing-sst is referred to by a virtual sst in the latest version, +// it cannot go back to being referred to by a physical sst in any future +// version. +// +// Once a physical sst is no longer needed by any version, we will no longer +// maintain the file metadata associated with it. We will still maintain the +// FileBacking associated with the physical sst if the backing sst is required +// by any virtual ssts in any version. +type FileMetadata struct { + // AllowedSeeks is used to determine if a file should be picked for + // a read triggered compaction. It is decremented when read sampling + // in pebble.Iterator after every after every positioning operation + // that returns a user key (eg. Next, Prev, SeekGE, SeekLT, etc). + AllowedSeeks atomic.Int64 + + // statsValid indicates if stats have been loaded for the table. The + // TableStats structure is populated only if valid is true. + statsValid atomic.Bool + + // FileBacking is the state which backs either a physical or virtual + // sstables. + FileBacking *FileBacking + + // InitAllowedSeeks is the inital value of allowed seeks. This is used + // to re-set allowed seeks on a file once it hits 0. + InitAllowedSeeks int64 + // FileNum is the file number. + // + // INVARIANT: when !FileMetadata.Virtual, FileNum == FileBacking.DiskFileNum. + FileNum base.FileNum + // Size is the size of the file, in bytes. Size is an approximate value for + // virtual sstables. + // + // INVARIANTS: + // - When !FileMetadata.Virtual, Size == FileBacking.Size. + // - Size should be non-zero. Size 0 virtual sstables must not be created. + Size uint64 + // File creation time in seconds since the epoch (1970-01-01 00:00:00 + // UTC). For ingested sstables, this corresponds to the time the file was + // ingested. For virtual sstables, this corresponds to the wall clock time + // when the FileMetadata for the virtual sstable was first created. + CreationTime int64 + // Lower and upper bounds for the smallest and largest sequence numbers in + // the table, across both point and range keys. For physical sstables, these + // values are tight bounds. For virtual sstables, there is no guarantee that + // there will be keys with SmallestSeqNum or LargestSeqNum within virtual + // sstable bounds. + SmallestSeqNum uint64 + LargestSeqNum uint64 + // SmallestPointKey and LargestPointKey are the inclusive bounds for the + // internal point keys stored in the table. This includes RANGEDELs, which + // alter point keys. + // NB: these field should be set using ExtendPointKeyBounds. They are left + // exported for reads as an optimization. + SmallestPointKey InternalKey + LargestPointKey InternalKey + // SmallestRangeKey and LargestRangeKey are the inclusive bounds for the + // internal range keys stored in the table. + // NB: these field should be set using ExtendRangeKeyBounds. They are left + // exported for reads as an optimization. + SmallestRangeKey InternalKey + LargestRangeKey InternalKey + // Smallest and Largest are the inclusive bounds for the internal keys stored + // in the table, across both point and range keys. + // NB: these fields are derived from their point and range key equivalents, + // and are updated via the MaybeExtend{Point,Range}KeyBounds methods. + Smallest InternalKey + Largest InternalKey + // Stats describe table statistics. Protected by DB.mu. + // + // For virtual sstables, set stats upon virtual sstable creation as + // asynchronous computation of stats is not currently supported. + // + // TODO(bananabrick): To support manifest replay for virtual sstables, we + // probably need to compute virtual sstable stats asynchronously. Otherwise, + // we'd have to write virtual sstable stats to the version edit. + Stats TableStats + + // For L0 files only. Protected by DB.mu. Used to generate L0 sublevels and + // pick L0 compactions. Only accurate for the most recent Version. + SubLevel int + L0Index int + minIntervalIndex int + maxIntervalIndex int + + // NB: the alignment of this struct is 8 bytes. We pack all the bools to + // ensure an optimal packing. + + // IsIntraL0Compacting is set to True if this file is part of an intra-L0 + // compaction. When it's true, IsCompacting must also return true. If + // Compacting is true and IsIntraL0Compacting is false for an L0 file, the + // file must be part of a compaction to Lbase. + IsIntraL0Compacting bool + CompactionState CompactionState + // True if compaction of this file has been explicitly requested. + // Previously, RocksDB and earlier versions of Pebble allowed this + // flag to be set by a user table property collector. Some earlier + // versions of Pebble respected this flag, while other more recent + // versions ignored this flag. + // + // More recently this flag has been repurposed to facilitate the + // compaction of 'atomic compaction units'. Files marked for + // compaction are compacted in a rewrite compaction at the lowest + // possible compaction priority. + // + // NB: A count of files marked for compaction is maintained on + // Version, and compaction picking reads cached annotations + // determined by this field. + // + // Protected by DB.mu. + MarkedForCompaction bool + // HasPointKeys tracks whether the table contains point keys (including + // RANGEDELs). If a table contains only range deletions, HasPointsKeys is + // still true. + HasPointKeys bool + // HasRangeKeys tracks whether the table contains any range keys. + HasRangeKeys bool + // smallestSet and largestSet track whether the overall bounds have been set. + boundsSet bool + // boundTypeSmallest and boundTypeLargest provide an indication as to which + // key type (point or range) corresponds to the smallest and largest overall + // table bounds. + boundTypeSmallest, boundTypeLargest boundType + // Virtual is true if the FileMetadata belongs to a virtual sstable. + Virtual bool +} + +// PhysicalFileMeta is used by functions which want a guarantee that their input +// belongs to a physical sst and not a virtual sst. +// +// NB: This type should only be constructed by calling +// FileMetadata.PhysicalMeta. +type PhysicalFileMeta struct { + *FileMetadata +} + +// VirtualFileMeta is used by functions which want a guarantee that their input +// belongs to a virtual sst and not a physical sst. +// +// A VirtualFileMeta inherits all the same fields as a FileMetadata. These +// fields have additional invariants imposed on them, and/or slightly varying +// meanings: +// - Smallest and Largest (and their counterparts +// {Smallest, Largest}{Point,Range}Key) remain tight bounds that represent a +// key at that exact bound. We make the effort to determine the next smallest +// or largest key in an sstable after virtualizing it, to maintain this +// tightness. If the largest is a sentinel key (IsExclusiveSentinel()), it +// could mean that a rangedel or range key ends at that user key, or has been +// truncated to that user key. +// - One invariant is that if a rangedel or range key is truncated on its +// upper bound, the virtual sstable *must* have a rangedel or range key +// sentinel key as its upper bound. This is because truncation yields +// an exclusive upper bound for the rangedel/rangekey, and if there are +// any points at that exclusive upper bound within the same virtual +// sstable, those could get uncovered by this truncation. We enforce this +// invariant in calls to keyspan.Truncate. +// - Size is an estimate of the size of the virtualized portion of this sstable. +// The underlying file's size is stored in FileBacking.Size, though it could +// also be estimated or could correspond to just the referenced portion of +// a file (eg. if the file originated on another node). +// - Size must be > 0. +// - SmallestSeqNum and LargestSeqNum are loose bounds for virtual sstables. +// This means that all keys in the virtual sstable must have seqnums within +// [SmallestSeqNum, LargestSeqNum], however there's no guarantee that there's +// a key with a seqnum at either of the bounds. Calculating tight seqnum +// bounds would be too expensive and deliver little value. +// +// NB: This type should only be constructed by calling FileMetadata.VirtualMeta. +type VirtualFileMeta struct { + *FileMetadata +} + +// PhysicalMeta should be the only source of creating the PhysicalFileMeta +// wrapper type. +func (m *FileMetadata) PhysicalMeta() PhysicalFileMeta { + if m.Virtual { + panic("pebble: file metadata does not belong to a physical sstable") + } + return PhysicalFileMeta{ + m, + } +} + +// VirtualMeta should be the only source of creating the VirtualFileMeta wrapper +// type. +func (m *FileMetadata) VirtualMeta() VirtualFileMeta { + if !m.Virtual { + panic("pebble: file metadata does not belong to a virtual sstable") + } + return VirtualFileMeta{ + m, + } +} + +// FileBacking either backs a single physical sstable, or one or more virtual +// sstables. +// +// See the comment above the FileMetadata type for sstable terminology. +type FileBacking struct { + // Reference count for the backing file on disk: incremented when a + // physical or virtual sstable which is backed by the FileBacking is + // added to a version and decremented when the version is unreferenced. + // We ref count in order to determine when it is safe to delete a + // backing sst file from disk. The backing file is obsolete when the + // reference count falls to zero. + refs atomic.Int32 + // latestVersionRefs are the references to the FileBacking in the + // latest version. This reference can be through a single physical + // sstable in the latest version, or one or more virtual sstables in the + // latest version. + // + // INVARIANT: latestVersionRefs <= refs. + latestVersionRefs atomic.Int32 + // VirtualizedSize is set iff the backing sst is only referred to by + // virtual ssts in the latest version. VirtualizedSize is the sum of the + // virtual sstable sizes of all of the virtual sstables in the latest + // version which are backed by the physical sstable. When a virtual + // sstable is removed from the latest version, we will decrement the + // VirtualizedSize. During compaction picking, we'll compensate a + // virtual sstable file size by + // (FileBacking.Size - FileBacking.VirtualizedSize) / latestVersionRefs. + // The intuition is that if FileBacking.Size - FileBacking.VirtualizedSize + // is high, then the space amplification due to virtual sstables is + // high, and we should pick the virtual sstable with a higher priority. + // + // TODO(bananabrick): Compensate the virtual sstable file size using + // the VirtualizedSize during compaction picking and test. + VirtualizedSize atomic.Uint64 + DiskFileNum base.DiskFileNum + Size uint64 +} + +// InitPhysicalBacking allocates and sets the FileBacking which is required by a +// physical sstable FileMetadata. +// +// Ensure that the state required by FileBacking, such as the FileNum, is +// already set on the FileMetadata before InitPhysicalBacking is called. +// Calling InitPhysicalBacking only after the relevant state has been set in the +// FileMetadata is not necessary in tests which don't rely on FileBacking. +func (m *FileMetadata) InitPhysicalBacking() { + if m.Virtual { + panic("pebble: virtual sstables should use a pre-existing FileBacking") + } + if m.FileBacking == nil { + m.FileBacking = &FileBacking{Size: m.Size, DiskFileNum: m.FileNum.DiskFileNum()} + } +} + +// InitProviderBacking creates a new FileBacking for a file backed by +// an objstorage.Provider. +func (m *FileMetadata) InitProviderBacking(fileNum base.DiskFileNum) { + if !m.Virtual { + panic("pebble: provider-backed sstables must be virtual") + } + if m.FileBacking == nil { + m.FileBacking = &FileBacking{DiskFileNum: fileNum} + } +} + +// ValidateVirtual should be called once the FileMetadata for a virtual sstable +// is created to verify that the fields of the virtual sstable are sound. +func (m *FileMetadata) ValidateVirtual(createdFrom *FileMetadata) { + if !m.Virtual { + panic("pebble: invalid virtual sstable") + } + + if createdFrom.SmallestSeqNum != m.SmallestSeqNum { + panic("pebble: invalid smallest sequence number for virtual sstable") + } + + if createdFrom.LargestSeqNum != m.LargestSeqNum { + panic("pebble: invalid largest sequence number for virtual sstable") + } + + if createdFrom.FileBacking != nil && createdFrom.FileBacking != m.FileBacking { + panic("pebble: invalid physical sstable state for virtual sstable") + } + + if m.Size == 0 { + panic("pebble: virtual sstable size must be set upon creation") + } +} + +// Refs returns the refcount of backing sstable. +func (m *FileMetadata) Refs() int32 { + return m.FileBacking.refs.Load() +} + +// Ref increments the ref count associated with the backing sstable. +func (m *FileMetadata) Ref() { + m.FileBacking.refs.Add(1) +} + +// Unref decrements the ref count associated with the backing sstable. +func (m *FileMetadata) Unref() int32 { + v := m.FileBacking.refs.Add(-1) + if invariants.Enabled && v < 0 { + panic("pebble: invalid FileMetadata refcounting") + } + return v +} + +// LatestRef increments the latest ref count associated with the backing +// sstable. +func (m *FileMetadata) LatestRef() { + m.FileBacking.latestVersionRefs.Add(1) + + if m.Virtual { + m.FileBacking.VirtualizedSize.Add(m.Size) + } +} + +// LatestUnref decrements the latest ref count associated with the backing +// sstable. +func (m *FileMetadata) LatestUnref() int32 { + if m.Virtual { + m.FileBacking.VirtualizedSize.Add(-m.Size) + } + + v := m.FileBacking.latestVersionRefs.Add(-1) + if invariants.Enabled && v < 0 { + panic("pebble: invalid FileMetadata latest refcounting") + } + return v +} + +// LatestRefs returns the latest ref count associated with the backing sstable. +func (m *FileMetadata) LatestRefs() int32 { + return m.FileBacking.latestVersionRefs.Load() +} + +// SetCompactionState transitions this file's compaction state to the given +// state. Protected by DB.mu. +func (m *FileMetadata) SetCompactionState(to CompactionState) { + if invariants.Enabled { + transitionErr := func() error { + return errors.Newf("pebble: invalid compaction state transition: %s -> %s", m.CompactionState, to) + } + switch m.CompactionState { + case CompactionStateNotCompacting: + if to != CompactionStateCompacting { + panic(transitionErr()) + } + case CompactionStateCompacting: + if to != CompactionStateCompacted && to != CompactionStateNotCompacting { + panic(transitionErr()) + } + case CompactionStateCompacted: + panic(transitionErr()) + default: + panic(fmt.Sprintf("pebble: unknown compaction state: %d", m.CompactionState)) + } + } + m.CompactionState = to +} + +// IsCompacting returns true if this file's compaction state is +// CompactionStateCompacting. Protected by DB.mu. +func (m *FileMetadata) IsCompacting() bool { + return m.CompactionState == CompactionStateCompacting +} + +// StatsValid returns true if the table stats have been populated. If StatValid +// returns true, the Stats field may be read (with or without holding the +// database mutex). +func (m *FileMetadata) StatsValid() bool { + return m.statsValid.Load() +} + +// StatsMarkValid marks the TableStats as valid. The caller must hold DB.mu +// while populating TableStats and calling StatsMarkValud. Once stats are +// populated, they must not be mutated. +func (m *FileMetadata) StatsMarkValid() { + m.statsValid.Store(true) +} + +// ExtendPointKeyBounds attempts to extend the lower and upper point key bounds +// and overall table bounds with the given smallest and largest keys. The +// smallest and largest bounds may not be extended if the table already has a +// bound that is smaller or larger, respectively. The receiver is returned. +// NB: calling this method should be preferred to manually setting the bounds by +// manipulating the fields directly, to maintain certain invariants. +func (m *FileMetadata) ExtendPointKeyBounds( + cmp Compare, smallest, largest InternalKey, +) *FileMetadata { + // Update the point key bounds. + if !m.HasPointKeys { + m.SmallestPointKey, m.LargestPointKey = smallest, largest + m.HasPointKeys = true + } else { + if base.InternalCompare(cmp, smallest, m.SmallestPointKey) < 0 { + m.SmallestPointKey = smallest + } + if base.InternalCompare(cmp, largest, m.LargestPointKey) > 0 { + m.LargestPointKey = largest + } + } + // Update the overall bounds. + m.extendOverallBounds(cmp, m.SmallestPointKey, m.LargestPointKey, boundTypePointKey) + return m +} + +// ExtendRangeKeyBounds attempts to extend the lower and upper range key bounds +// and overall table bounds with the given smallest and largest keys. The +// smallest and largest bounds may not be extended if the table already has a +// bound that is smaller or larger, respectively. The receiver is returned. +// NB: calling this method should be preferred to manually setting the bounds by +// manipulating the fields directly, to maintain certain invariants. +func (m *FileMetadata) ExtendRangeKeyBounds( + cmp Compare, smallest, largest InternalKey, +) *FileMetadata { + // Update the range key bounds. + if !m.HasRangeKeys { + m.SmallestRangeKey, m.LargestRangeKey = smallest, largest + m.HasRangeKeys = true + } else { + if base.InternalCompare(cmp, smallest, m.SmallestRangeKey) < 0 { + m.SmallestRangeKey = smallest + } + if base.InternalCompare(cmp, largest, m.LargestRangeKey) > 0 { + m.LargestRangeKey = largest + } + } + // Update the overall bounds. + m.extendOverallBounds(cmp, m.SmallestRangeKey, m.LargestRangeKey, boundTypeRangeKey) + return m +} + +// extendOverallBounds attempts to extend the overall table lower and upper +// bounds. The given bounds may not be used if a lower or upper bound already +// exists that is smaller or larger than the given keys, respectively. The given +// boundType will be used if the bounds are updated. +func (m *FileMetadata) extendOverallBounds( + cmp Compare, smallest, largest InternalKey, bTyp boundType, +) { + if !m.boundsSet { + m.Smallest, m.Largest = smallest, largest + m.boundsSet = true + m.boundTypeSmallest, m.boundTypeLargest = bTyp, bTyp + } else { + if base.InternalCompare(cmp, smallest, m.Smallest) < 0 { + m.Smallest = smallest + m.boundTypeSmallest = bTyp + } + if base.InternalCompare(cmp, largest, m.Largest) > 0 { + m.Largest = largest + m.boundTypeLargest = bTyp + } + } +} + +// Overlaps returns true if the file key range overlaps with the given range. +func (m *FileMetadata) Overlaps(cmp Compare, start []byte, end []byte, exclusiveEnd bool) bool { + if c := cmp(m.Largest.UserKey, start); c < 0 || (c == 0 && m.Largest.IsExclusiveSentinel()) { + // f is completely before the specified range; no overlap. + return false + } + if c := cmp(m.Smallest.UserKey, end); c > 0 || (c == 0 && exclusiveEnd) { + // f is completely after the specified range; no overlap. + return false + } + return true +} + +// ContainedWithinSpan returns true if the file key range completely overlaps with the +// given range ("end" is assumed to exclusive). +func (m *FileMetadata) ContainedWithinSpan(cmp Compare, start, end []byte) bool { + lowerCmp, upperCmp := cmp(m.Smallest.UserKey, start), cmp(m.Largest.UserKey, end) + return lowerCmp >= 0 && (upperCmp < 0 || (upperCmp == 0 && m.Largest.IsExclusiveSentinel())) +} + +// ContainsKeyType returns whether or not the file contains keys of the provided +// type. +func (m *FileMetadata) ContainsKeyType(kt KeyType) bool { + switch kt { + case KeyTypePointAndRange: + return true + case KeyTypePoint: + return m.HasPointKeys + case KeyTypeRange: + return m.HasRangeKeys + default: + panic("unrecognized key type") + } +} + +// SmallestBound returns the file's smallest bound of the key type. It returns a +// false second return value if the file does not contain any keys of the key +// type. +func (m *FileMetadata) SmallestBound(kt KeyType) (*InternalKey, bool) { + switch kt { + case KeyTypePointAndRange: + return &m.Smallest, true + case KeyTypePoint: + return &m.SmallestPointKey, m.HasPointKeys + case KeyTypeRange: + return &m.SmallestRangeKey, m.HasRangeKeys + default: + panic("unrecognized key type") + } +} + +// LargestBound returns the file's largest bound of the key type. It returns a +// false second return value if the file does not contain any keys of the key +// type. +func (m *FileMetadata) LargestBound(kt KeyType) (*InternalKey, bool) { + switch kt { + case KeyTypePointAndRange: + return &m.Largest, true + case KeyTypePoint: + return &m.LargestPointKey, m.HasPointKeys + case KeyTypeRange: + return &m.LargestRangeKey, m.HasRangeKeys + default: + panic("unrecognized key type") + } +} + +const ( + maskContainsPointKeys = 1 << 0 + maskSmallest = 1 << 1 + maskLargest = 1 << 2 +) + +// boundsMarker returns a marker byte whose bits encode the following +// information (in order from least significant bit): +// - if the table contains point keys +// - if the table's smallest key is a point key +// - if the table's largest key is a point key +func (m *FileMetadata) boundsMarker() (sentinel uint8, err error) { + if m.HasPointKeys { + sentinel |= maskContainsPointKeys + } + switch m.boundTypeSmallest { + case boundTypePointKey: + sentinel |= maskSmallest + case boundTypeRangeKey: + // No op - leave bit unset. + default: + return 0, base.CorruptionErrorf("file %s has neither point nor range key as smallest key", m.FileNum) + } + switch m.boundTypeLargest { + case boundTypePointKey: + sentinel |= maskLargest + case boundTypeRangeKey: + // No op - leave bit unset. + default: + return 0, base.CorruptionErrorf("file %s has neither point nor range key as largest key", m.FileNum) + } + return +} + +// String implements fmt.Stringer, printing the file number and the overall +// table bounds. +func (m *FileMetadata) String() string { + return fmt.Sprintf("%s:[%s-%s]", m.FileNum, m.Smallest, m.Largest) +} + +// DebugString returns a verbose representation of FileMetadata, typically for +// use in tests and debugging, returning the file number and the point, range +// and overall bounds for the table. +func (m *FileMetadata) DebugString(format base.FormatKey, verbose bool) string { + var b bytes.Buffer + fmt.Fprintf(&b, "%s:[%s-%s]", + m.FileNum, m.Smallest.Pretty(format), m.Largest.Pretty(format)) + if !verbose { + return b.String() + } + fmt.Fprintf(&b, " seqnums:[%d-%d]", m.SmallestSeqNum, m.LargestSeqNum) + if m.HasPointKeys { + fmt.Fprintf(&b, " points:[%s-%s]", + m.SmallestPointKey.Pretty(format), m.LargestPointKey.Pretty(format)) + } + if m.HasRangeKeys { + fmt.Fprintf(&b, " ranges:[%s-%s]", + m.SmallestRangeKey.Pretty(format), m.LargestRangeKey.Pretty(format)) + } + return b.String() +} + +// ParseFileMetadataDebug parses a FileMetadata from its DebugString +// representation. +func ParseFileMetadataDebug(s string) (*FileMetadata, error) { + // Split lines of the form: + // 000000:[a#0,SET-z#0,SET] seqnums:[5-5] points:[...] ranges:[...] + fields := strings.FieldsFunc(s, func(c rune) bool { + switch c { + case ':', '[', '-', ']': + return true + default: + return unicode.IsSpace(c) // NB: also trim whitespace padding. + } + }) + if len(fields)%3 != 0 { + return nil, errors.Newf("malformed input: %s", s) + } + m := &FileMetadata{} + for len(fields) > 0 { + prefix := fields[0] + if prefix == "seqnums" { + smallestSeqNum, err := strconv.ParseUint(fields[1], 10, 64) + if err != nil { + return m, errors.Newf("malformed input: %s: %s", s, err) + } + largestSeqNum, err := strconv.ParseUint(fields[2], 10, 64) + if err != nil { + return m, errors.Newf("malformed input: %s: %s", s, err) + } + m.SmallestSeqNum, m.LargestSeqNum = smallestSeqNum, largestSeqNum + fields = fields[3:] + continue + } + smallest := base.ParsePrettyInternalKey(fields[1]) + largest := base.ParsePrettyInternalKey(fields[2]) + switch prefix { + case "points": + m.SmallestPointKey, m.LargestPointKey = smallest, largest + m.HasPointKeys = true + case "ranges": + m.SmallestRangeKey, m.LargestRangeKey = smallest, largest + m.HasRangeKeys = true + default: + fileNum, err := strconv.ParseUint(prefix, 10, 64) + if err != nil { + return m, errors.Newf("malformed input: %s: %s", s, err) + } + m.FileNum = base.FileNum(fileNum) + m.Smallest, m.Largest = smallest, largest + m.boundsSet = true + } + fields = fields[3:] + } + // By default, when the parser sees just the overall bounds, we set the point + // keys. This preserves backwards compatability with existing test cases that + // specify only the overall bounds. + if !m.HasPointKeys && !m.HasRangeKeys { + m.SmallestPointKey, m.LargestPointKey = m.Smallest, m.Largest + m.HasPointKeys = true + } + m.InitPhysicalBacking() + return m, nil +} + +// Validate validates the metadata for consistency with itself, returning an +// error if inconsistent. +func (m *FileMetadata) Validate(cmp Compare, formatKey base.FormatKey) error { + // Combined range and point key validation. + + if !m.HasPointKeys && !m.HasRangeKeys { + return base.CorruptionErrorf("file %s has neither point nor range keys", + errors.Safe(m.FileNum)) + } + if base.InternalCompare(cmp, m.Smallest, m.Largest) > 0 { + return base.CorruptionErrorf("file %s has inconsistent bounds: %s vs %s", + errors.Safe(m.FileNum), m.Smallest.Pretty(formatKey), + m.Largest.Pretty(formatKey)) + } + if m.SmallestSeqNum > m.LargestSeqNum { + return base.CorruptionErrorf("file %s has inconsistent seqnum bounds: %d vs %d", + errors.Safe(m.FileNum), m.SmallestSeqNum, m.LargestSeqNum) + } + + // Point key validation. + + if m.HasPointKeys { + if base.InternalCompare(cmp, m.SmallestPointKey, m.LargestPointKey) > 0 { + return base.CorruptionErrorf("file %s has inconsistent point key bounds: %s vs %s", + errors.Safe(m.FileNum), m.SmallestPointKey.Pretty(formatKey), + m.LargestPointKey.Pretty(formatKey)) + } + if base.InternalCompare(cmp, m.SmallestPointKey, m.Smallest) < 0 || + base.InternalCompare(cmp, m.LargestPointKey, m.Largest) > 0 { + return base.CorruptionErrorf( + "file %s has inconsistent point key bounds relative to overall bounds: "+ + "overall = [%s-%s], point keys = [%s-%s]", + errors.Safe(m.FileNum), + m.Smallest.Pretty(formatKey), m.Largest.Pretty(formatKey), + m.SmallestPointKey.Pretty(formatKey), m.LargestPointKey.Pretty(formatKey), + ) + } + } + + // Range key validation. + + if m.HasRangeKeys { + if base.InternalCompare(cmp, m.SmallestRangeKey, m.LargestRangeKey) > 0 { + return base.CorruptionErrorf("file %s has inconsistent range key bounds: %s vs %s", + errors.Safe(m.FileNum), m.SmallestRangeKey.Pretty(formatKey), + m.LargestRangeKey.Pretty(formatKey)) + } + if base.InternalCompare(cmp, m.SmallestRangeKey, m.Smallest) < 0 || + base.InternalCompare(cmp, m.LargestRangeKey, m.Largest) > 0 { + return base.CorruptionErrorf( + "file %s has inconsistent range key bounds relative to overall bounds: "+ + "overall = [%s-%s], range keys = [%s-%s]", + errors.Safe(m.FileNum), + m.Smallest.Pretty(formatKey), m.Largest.Pretty(formatKey), + m.SmallestRangeKey.Pretty(formatKey), m.LargestRangeKey.Pretty(formatKey), + ) + } + } + + // Ensure that FileMetadata.Init was called. + if m.FileBacking == nil { + return base.CorruptionErrorf("file metadata FileBacking not set") + } + + return nil +} + +// TableInfo returns a subset of the FileMetadata state formatted as a +// TableInfo. +func (m *FileMetadata) TableInfo() TableInfo { + return TableInfo{ + FileNum: m.FileNum, + Size: m.Size, + Smallest: m.Smallest, + Largest: m.Largest, + SmallestSeqNum: m.SmallestSeqNum, + LargestSeqNum: m.LargestSeqNum, + } +} + +func (m *FileMetadata) cmpSeqNum(b *FileMetadata) int { + // NB: This is the same ordering that RocksDB uses for L0 files. + + // Sort first by largest sequence number. + if v := stdcmp.Compare(m.LargestSeqNum, b.LargestSeqNum); v != 0 { + return v + } + // Then by smallest sequence number. + if v := stdcmp.Compare(m.SmallestSeqNum, b.SmallestSeqNum); v != 0 { + return v + } + // Break ties by file number. + return stdcmp.Compare(m.FileNum, b.FileNum) +} + +func (m *FileMetadata) lessSeqNum(b *FileMetadata) bool { + return m.cmpSeqNum(b) < 0 +} + +func (m *FileMetadata) cmpSmallestKey(b *FileMetadata, cmp Compare) int { + return base.InternalCompare(cmp, m.Smallest, b.Smallest) +} + +// KeyRange returns the minimum smallest and maximum largest internalKey for +// all the FileMetadata in iters. +func KeyRange(ucmp Compare, iters ...LevelIterator) (smallest, largest InternalKey) { + first := true + for _, iter := range iters { + for meta := iter.First(); meta != nil; meta = iter.Next() { + if first { + first = false + smallest, largest = meta.Smallest, meta.Largest + continue + } + if base.InternalCompare(ucmp, smallest, meta.Smallest) >= 0 { + smallest = meta.Smallest + } + if base.InternalCompare(ucmp, largest, meta.Largest) <= 0 { + largest = meta.Largest + } + } + } + return smallest, largest +} + +type bySeqNum []*FileMetadata + +func (b bySeqNum) Len() int { return len(b) } +func (b bySeqNum) Less(i, j int) bool { + return b[i].lessSeqNum(b[j]) +} +func (b bySeqNum) Swap(i, j int) { b[i], b[j] = b[j], b[i] } + +// SortBySeqNum sorts the specified files by increasing sequence number. +func SortBySeqNum(files []*FileMetadata) { + sort.Sort(bySeqNum(files)) +} + +type bySmallest struct { + files []*FileMetadata + cmp Compare +} + +func (b bySmallest) Len() int { return len(b.files) } +func (b bySmallest) Less(i, j int) bool { + return b.files[i].cmpSmallestKey(b.files[j], b.cmp) < 0 +} +func (b bySmallest) Swap(i, j int) { b.files[i], b.files[j] = b.files[j], b.files[i] } + +// SortBySmallest sorts the specified files by smallest key using the supplied +// comparison function to order user keys. +func SortBySmallest(files []*FileMetadata, cmp Compare) { + sort.Sort(bySmallest{files, cmp}) +} + +func overlaps(iter LevelIterator, cmp Compare, start, end []byte, exclusiveEnd bool) LevelSlice { + startIter := iter.Clone() + { + startIterFile := startIter.SeekGE(cmp, start) + // SeekGE compares user keys. The user key `start` may be equal to the + // f.Largest because f.Largest is a range deletion sentinel, indicating + // that the user key `start` is NOT contained within the file f. If + // that's the case, we can narrow the overlapping bounds to exclude the + // file with the sentinel. + if startIterFile != nil && startIterFile.Largest.IsExclusiveSentinel() && + cmp(startIterFile.Largest.UserKey, start) == 0 { + startIterFile = startIter.Next() + } + _ = startIterFile // Ignore unused assignment. + } + + endIter := iter.Clone() + { + endIterFile := endIter.SeekGE(cmp, end) + + if !exclusiveEnd { + // endIter is now pointing at the *first* file with a largest key >= end. + // If there are multiple files including the user key `end`, we want all + // of them, so move forward. + for endIterFile != nil && cmp(endIterFile.Largest.UserKey, end) == 0 { + endIterFile = endIter.Next() + } + } + + // LevelSlice uses inclusive bounds, so if we seeked to the end sentinel + // or nexted too far because Largest.UserKey equaled `end`, go back. + // + // Consider !exclusiveEnd and end = 'f', with the following file bounds: + // + // [b,d] [e, f] [f, f] [g, h] + // + // the above for loop will Next until it arrives at [g, h]. We need to + // observe that g > f, and Prev to the file with bounds [f, f]. + if endIterFile == nil { + endIterFile = endIter.Prev() + } else if c := cmp(endIterFile.Smallest.UserKey, end); c > 0 || c == 0 && exclusiveEnd { + endIterFile = endIter.Prev() + } + _ = endIterFile // Ignore unused assignment. + } + return newBoundedLevelSlice(startIter.Clone().iter, &startIter.iter, &endIter.iter) +} + +// NumLevels is the number of levels a Version contains. +const NumLevels = 7 + +// NewVersion constructs a new Version with the provided files. It requires +// the provided files are already well-ordered. It's intended for testing. +func NewVersion( + cmp Compare, formatKey base.FormatKey, flushSplitBytes int64, files [NumLevels][]*FileMetadata, +) *Version { + var v Version + for l := range files { + // NB: We specifically insert `files` into the B-Tree in the order + // they appear within `files`. Some tests depend on this behavior in + // order to test consistency checking, etc. Once we've constructed the + // initial B-Tree, we swap out the btreeCmp for the correct one. + // TODO(jackson): Adjust or remove the tests and remove this. + v.Levels[l].tree, _ = makeBTree(btreeCmpSpecificOrder(files[l]), files[l]) + v.Levels[l].level = l + if l == 0 { + v.Levels[l].tree.cmp = btreeCmpSeqNum + } else { + v.Levels[l].tree.cmp = btreeCmpSmallestKey(cmp) + } + for _, f := range files[l] { + v.Levels[l].totalSize += f.Size + } + } + if err := v.InitL0Sublevels(cmp, formatKey, flushSplitBytes); err != nil { + panic(err) + } + return &v +} + +// Version is a collection of file metadata for on-disk tables at various +// levels. In-memory DBs are written to level-0 tables, and compactions +// migrate data from level N to level N+1. The tables map internal keys (which +// are a user key, a delete or set bit, and a sequence number) to user values. +// +// The tables at level 0 are sorted by largest sequence number. Due to file +// ingestion, there may be overlap in the ranges of sequence numbers contain in +// level 0 sstables. In particular, it is valid for one level 0 sstable to have +// the seqnum range [1,100] while an adjacent sstable has the seqnum range +// [50,50]. This occurs when the [50,50] table was ingested and given a global +// seqnum. The ingestion code will have ensured that the [50,50] sstable will +// not have any keys that overlap with the [1,100] in the seqnum range +// [1,49]. The range of internal keys [fileMetadata.smallest, +// fileMetadata.largest] in each level 0 table may overlap. +// +// The tables at any non-0 level are sorted by their internal key range and any +// two tables at the same non-0 level do not overlap. +// +// The internal key ranges of two tables at different levels X and Y may +// overlap, for any X != Y. +// +// Finally, for every internal key in a table at level X, there is no internal +// key in a higher level table that has both the same user key and a higher +// sequence number. +type Version struct { + refs atomic.Int32 + + // The level 0 sstables are organized in a series of sublevels. Similar to + // the seqnum invariant in normal levels, there is no internal key in a + // higher level table that has both the same user key and a higher sequence + // number. Within a sublevel, tables are sorted by their internal key range + // and any two tables at the same sublevel do not overlap. Unlike the normal + // levels, sublevel n contains older tables (lower sequence numbers) than + // sublevel n+1. + // + // The L0Sublevels struct is mostly used for compaction picking. As most + // internal data structures in it are only necessary for compaction picking + // and not for iterator creation, the reference to L0Sublevels is nil'd + // after this version becomes the non-newest version, to reduce memory + // usage. + // + // L0Sublevels.Levels contains L0 files ordered by sublevels. All the files + // in Levels[0] are in L0Sublevels.Levels. L0SublevelFiles is also set to + // a reference to that slice, as that slice is necessary for iterator + // creation and needs to outlast L0Sublevels. + L0Sublevels *L0Sublevels + L0SublevelFiles []LevelSlice + + Levels [NumLevels]LevelMetadata + + // RangeKeyLevels holds a subset of the same files as Levels that contain range + // keys (i.e. fileMeta.HasRangeKeys == true). The memory amplification of this + // duplication should be minimal, as range keys are expected to be rare. + RangeKeyLevels [NumLevels]LevelMetadata + + // The callback to invoke when the last reference to a version is + // removed. Will be called with list.mu held. + Deleted func(obsolete []*FileBacking) + + // Stats holds aggregated stats about the version maintained from + // version to version. + Stats struct { + // MarkedForCompaction records the count of files marked for + // compaction within the version. + MarkedForCompaction int + } + + // The list the version is linked into. + list *VersionList + + // The next/prev link for the versionList doubly-linked list of versions. + prev, next *Version +} + +// String implements fmt.Stringer, printing the FileMetadata for each level in +// the Version. +func (v *Version) String() string { + return v.string(base.DefaultFormatter, false) +} + +// DebugString returns an alternative format to String() which includes sequence +// number and kind information for the sstable boundaries. +func (v *Version) DebugString(format base.FormatKey) string { + return v.string(format, true) +} + +func describeSublevels(format base.FormatKey, verbose bool, sublevels []LevelSlice) string { + var buf bytes.Buffer + for sublevel := len(sublevels) - 1; sublevel >= 0; sublevel-- { + fmt.Fprintf(&buf, "0.%d:\n", sublevel) + sublevels[sublevel].Each(func(f *FileMetadata) { + fmt.Fprintf(&buf, " %s\n", f.DebugString(format, verbose)) + }) + } + return buf.String() +} + +func (v *Version) string(format base.FormatKey, verbose bool) string { + var buf bytes.Buffer + if len(v.L0SublevelFiles) > 0 { + fmt.Fprintf(&buf, "%s", describeSublevels(format, verbose, v.L0SublevelFiles)) + } + for level := 1; level < NumLevels; level++ { + if v.Levels[level].Empty() { + continue + } + fmt.Fprintf(&buf, "%d:\n", level) + iter := v.Levels[level].Iter() + for f := iter.First(); f != nil; f = iter.Next() { + fmt.Fprintf(&buf, " %s\n", f.DebugString(format, verbose)) + } + } + return buf.String() +} + +// ParseVersionDebug parses a Version from its DebugString output. +func ParseVersionDebug( + cmp Compare, formatKey base.FormatKey, flushSplitBytes int64, s string, +) (*Version, error) { + var level int + var files [NumLevels][]*FileMetadata + for _, l := range strings.Split(s, "\n") { + l = strings.TrimSpace(l) + + switch l[:2] { + case "0.", "0:", "1:", "2:", "3:", "4:", "5:", "6:": + var err error + level, err = strconv.Atoi(l[:1]) + if err != nil { + return nil, err + } + default: + m, err := ParseFileMetadataDebug(l) + if err != nil { + return nil, err + } + // If we only parsed overall bounds, default to setting the point bounds. + if !m.HasPointKeys && !m.HasRangeKeys { + m.SmallestPointKey, m.LargestPointKey = m.Smallest, m.Largest + m.HasPointKeys = true + } + files[level] = append(files[level], m) + } + } + // Reverse the order of L0 files. This ensures we construct the same + // sublevels. (They're printed from higher sublevel to lower, which means in + // a partial order that represents newest to oldest). + for i := 0; i < len(files[0])/2; i++ { + files[0][i], files[0][len(files[0])-i-1] = files[0][len(files[0])-i-1], files[0][i] + } + return NewVersion(cmp, formatKey, flushSplitBytes, files), nil +} + +// Refs returns the number of references to the version. +func (v *Version) Refs() int32 { + return v.refs.Load() +} + +// Ref increments the version refcount. +func (v *Version) Ref() { + v.refs.Add(1) +} + +// Unref decrements the version refcount. If the last reference to the version +// was removed, the version is removed from the list of versions and the +// Deleted callback is invoked. Requires that the VersionList mutex is NOT +// locked. +func (v *Version) Unref() { + if v.refs.Add(-1) == 0 { + l := v.list + l.mu.Lock() + l.Remove(v) + v.Deleted(v.unrefFiles()) + l.mu.Unlock() + } +} + +// UnrefLocked decrements the version refcount. If the last reference to the +// version was removed, the version is removed from the list of versions and +// the Deleted callback is invoked. Requires that the VersionList mutex is +// already locked. +func (v *Version) UnrefLocked() { + if v.refs.Add(-1) == 0 { + v.list.Remove(v) + v.Deleted(v.unrefFiles()) + } +} + +func (v *Version) unrefFiles() []*FileBacking { + var obsolete []*FileBacking + for _, lm := range v.Levels { + obsolete = append(obsolete, lm.release()...) + } + for _, lm := range v.RangeKeyLevels { + obsolete = append(obsolete, lm.release()...) + } + return obsolete +} + +// Next returns the next version in the list of versions. +func (v *Version) Next() *Version { + return v.next +} + +// InitL0Sublevels initializes the L0Sublevels +func (v *Version) InitL0Sublevels( + cmp Compare, formatKey base.FormatKey, flushSplitBytes int64, +) error { + var err error + v.L0Sublevels, err = NewL0Sublevels(&v.Levels[0], cmp, formatKey, flushSplitBytes) + if err == nil && v.L0Sublevels != nil { + v.L0SublevelFiles = v.L0Sublevels.Levels + } + return err +} + +// Contains returns a boolean indicating whether the provided file exists in +// the version at the given level. If level is non-zero then Contains binary +// searches among the files. If level is zero, Contains scans the entire +// level. +func (v *Version) Contains(level int, cmp Compare, m *FileMetadata) bool { + iter := v.Levels[level].Iter() + if level > 0 { + overlaps := v.Overlaps(level, cmp, m.Smallest.UserKey, m.Largest.UserKey, + m.Largest.IsExclusiveSentinel()) + iter = overlaps.Iter() + } + for f := iter.First(); f != nil; f = iter.Next() { + if f == m { + return true + } + } + return false +} + +// Overlaps returns all elements of v.files[level] whose user key range +// intersects the given range. If level is non-zero then the user key ranges of +// v.files[level] are assumed to not overlap (although they may touch). If level +// is zero then that assumption cannot be made, and the [start, end] range is +// expanded to the union of those matching ranges so far and the computation is +// repeated until [start, end] stabilizes. +// The returned files are a subsequence of the input files, i.e., the ordering +// is not changed. +func (v *Version) Overlaps( + level int, cmp Compare, start, end []byte, exclusiveEnd bool, +) LevelSlice { + if level == 0 { + // Indices that have been selected as overlapping. + l0 := v.Levels[level] + l0Iter := l0.Iter() + selectedIndices := make([]bool, l0.Len()) + numSelected := 0 + var slice LevelSlice + for { + restart := false + for i, meta := 0, l0Iter.First(); meta != nil; i, meta = i+1, l0Iter.Next() { + selected := selectedIndices[i] + if selected { + continue + } + if !meta.Overlaps(cmp, start, end, exclusiveEnd) { + // meta is completely outside the specified range; skip it. + continue + } + // Overlaps. + selectedIndices[i] = true + numSelected++ + + smallest := meta.Smallest.UserKey + largest := meta.Largest.UserKey + // Since level == 0, check if the newly added fileMetadata has + // expanded the range. We expand the range immediately for files + // we have remaining to check in this loop. All already checked + // and unselected files will need to be rechecked via the + // restart below. + if cmp(smallest, start) < 0 { + start = smallest + restart = true + } + if v := cmp(largest, end); v > 0 { + end = largest + exclusiveEnd = meta.Largest.IsExclusiveSentinel() + restart = true + } else if v == 0 && exclusiveEnd && !meta.Largest.IsExclusiveSentinel() { + // Only update the exclusivity of our existing `end` + // bound. + exclusiveEnd = false + restart = true + } + } + + if !restart { + // Construct a B-Tree containing only the matching items. + var tr btree + tr.cmp = v.Levels[level].tree.cmp + for i, meta := 0, l0Iter.First(); meta != nil; i, meta = i+1, l0Iter.Next() { + if selectedIndices[i] { + err := tr.Insert(meta) + if err != nil { + panic(err) + } + } + } + slice = newLevelSlice(tr.Iter()) + // TODO(jackson): Avoid the oddity of constructing and + // immediately releasing a B-Tree. Make LevelSlice an + // interface? + tr.Release() + break + } + // Continue looping to retry the files that were not selected. + } + return slice + } + + return overlaps(v.Levels[level].Iter(), cmp, start, end, exclusiveEnd) +} + +// CheckOrdering checks that the files are consistent with respect to +// increasing file numbers (for level 0 files) and increasing and non- +// overlapping internal key ranges (for level non-0 files). +func (v *Version) CheckOrdering( + cmp Compare, format base.FormatKey, order OrderingInvariants, +) error { + for sublevel := len(v.L0SublevelFiles) - 1; sublevel >= 0; sublevel-- { + sublevelIter := v.L0SublevelFiles[sublevel].Iter() + // Sublevels have NEVER allowed split user keys, so we can pass + // ProhibitSplitUserKeys. + if err := CheckOrdering(cmp, format, L0Sublevel(sublevel), sublevelIter, ProhibitSplitUserKeys); err != nil { + return base.CorruptionErrorf("%s\n%s", err, v.DebugString(format)) + } + } + + for level, lm := range v.Levels { + if err := CheckOrdering(cmp, format, Level(level), lm.Iter(), order); err != nil { + return base.CorruptionErrorf("%s\n%s", err, v.DebugString(format)) + } + } + return nil +} + +// VersionList holds a list of versions. The versions are ordered from oldest +// to newest. +type VersionList struct { + mu *sync.Mutex + root Version +} + +// Init initializes the version list. +func (l *VersionList) Init(mu *sync.Mutex) { + l.mu = mu + l.root.next = &l.root + l.root.prev = &l.root +} + +// Empty returns true if the list is empty, and false otherwise. +func (l *VersionList) Empty() bool { + return l.root.next == &l.root +} + +// Front returns the oldest version in the list. Note that this version is only +// valid if Empty() returns true. +func (l *VersionList) Front() *Version { + return l.root.next +} + +// Back returns the newest version in the list. Note that this version is only +// valid if Empty() returns true. +func (l *VersionList) Back() *Version { + return l.root.prev +} + +// PushBack adds a new version to the back of the list. This new version +// becomes the "newest" version in the list. +func (l *VersionList) PushBack(v *Version) { + if v.list != nil || v.prev != nil || v.next != nil { + panic("pebble: version list is inconsistent") + } + v.prev = l.root.prev + v.prev.next = v + v.next = &l.root + v.next.prev = v + v.list = l + // Let L0Sublevels on the second newest version get GC'd, as it is no longer + // necessary. See the comment in Version. + v.prev.L0Sublevels = nil +} + +// Remove removes the specified version from the list. +func (l *VersionList) Remove(v *Version) { + if v == &l.root { + panic("pebble: cannot remove version list root node") + } + if v.list != l { + panic("pebble: version list is inconsistent") + } + v.prev.next = v.next + v.next.prev = v.prev + v.next = nil // avoid memory leaks + v.prev = nil // avoid memory leaks + v.list = nil // avoid memory leaks +} + +// OrderingInvariants dictates the file ordering invariants active. +type OrderingInvariants int8 + +const ( + // ProhibitSplitUserKeys indicates that adjacent files within a level cannot + // contain the same user key. + ProhibitSplitUserKeys OrderingInvariants = iota + // AllowSplitUserKeys indicates that adjacent files within a level may + // contain the same user key. This is only allowed by historical format + // major versions. + // + // TODO(jackson): Remove. + AllowSplitUserKeys +) + +// CheckOrdering checks that the files are consistent with respect to +// seqnums (for level 0 files -- see detailed comment below) and increasing and non- +// overlapping internal key ranges (for non-level 0 files). +// +// The ordering field may be passed AllowSplitUserKeys to allow adjacent files that are both +// inclusive of the same user key. Pebble no longer creates version edits +// installing such files, and Pebble databases with sufficiently high format +// major version should no longer have any such files within their LSM. +// TODO(jackson): Remove AllowSplitUserKeys when we remove support for the +// earlier format major versions. +func CheckOrdering( + cmp Compare, format base.FormatKey, level Level, files LevelIterator, ordering OrderingInvariants, +) error { + // The invariants to check for L0 sublevels are the same as the ones to + // check for all other levels. However, if L0 is not organized into + // sublevels, or if all L0 files are being passed in, we do the legacy L0 + // checks, defined in the detailed comment below. + if level == Level(0) { + // We have 2 kinds of files: + // - Files with exactly one sequence number: these could be either ingested files + // or flushed files. We cannot tell the difference between them based on FileMetadata, + // so our consistency checking here uses the weaker checks assuming it is a narrow + // flushed file. We cannot error on ingested files having sequence numbers coincident + // with flushed files as the seemingly ingested file could just be a flushed file + // with just one key in it which is a truncated range tombstone sharing sequence numbers + // with other files in the same flush. + // - Files with multiple sequence numbers: these are necessarily flushed files. + // + // Three cases of overlapping sequence numbers: + // Case 1: + // An ingested file contained in the sequence numbers of the flushed file -- it must be + // fully contained (not coincident with either end of the flushed file) since the memtable + // must have been at [a, b-1] (where b > a) when the ingested file was assigned sequence + // num b, and the memtable got a subsequent update that was given sequence num b+1, before + // being flushed. + // + // So a sequence [1000, 1000] [1002, 1002] [1000, 2000] is invalid since the first and + // third file are inconsistent with each other. So comparing adjacent files is insufficient + // for consistency checking. + // + // Visually we have something like + // x------y x-----------yx-------------y (flushed files where x, y are the endpoints) + // y y y y (y's represent ingested files) + // And these are ordered in increasing order of y. Note that y's must be unique. + // + // Case 2: + // A flushed file that did not overlap in keys with any file in any level, but does overlap + // in the file key intervals. This file is placed in L0 since it overlaps in the file + // key intervals but since it has no overlapping data, it is assigned a sequence number + // of 0 in RocksDB. We handle this case for compatibility with RocksDB. + // + // Case 3: + // A sequence of flushed files that overlap in sequence numbers with one another, + // but do not overlap in keys inside the sstables. These files correspond to + // partitioned flushes or the results of intra-L0 compactions of partitioned + // flushes. + // + // Since these types of SSTables violate most other sequence number + // overlap invariants, and handling this case is important for compatibility + // with future versions of pebble, this method relaxes most L0 invariant + // checks. + + var prev *FileMetadata + for f := files.First(); f != nil; f, prev = files.Next(), f { + if prev == nil { + continue + } + // Validate that the sorting is sane. + if prev.LargestSeqNum == 0 && f.LargestSeqNum == prev.LargestSeqNum { + // Multiple files satisfying case 2 mentioned above. + } else if !prev.lessSeqNum(f) { + return base.CorruptionErrorf("L0 files %s and %s are not properly ordered: <#%d-#%d> vs <#%d-#%d>", + errors.Safe(prev.FileNum), errors.Safe(f.FileNum), + errors.Safe(prev.SmallestSeqNum), errors.Safe(prev.LargestSeqNum), + errors.Safe(f.SmallestSeqNum), errors.Safe(f.LargestSeqNum)) + } + } + } else { + var prev *FileMetadata + for f := files.First(); f != nil; f, prev = files.Next(), f { + if err := f.Validate(cmp, format); err != nil { + return errors.Wrapf(err, "%s ", level) + } + if prev != nil { + if prev.cmpSmallestKey(f, cmp) >= 0 { + return base.CorruptionErrorf("%s files %s and %s are not properly ordered: [%s-%s] vs [%s-%s]", + errors.Safe(level), errors.Safe(prev.FileNum), errors.Safe(f.FileNum), + prev.Smallest.Pretty(format), prev.Largest.Pretty(format), + f.Smallest.Pretty(format), f.Largest.Pretty(format)) + } + + // What's considered "overlapping" is dependent on the format + // major version. If ordering=ProhibitSplitUserKeys, then both + // files cannot contain keys with the same user keys. If the + // bounds have the same user key, the previous file's boundary + // must have a Trailer indicating that it's exclusive. + switch ordering { + case AllowSplitUserKeys: + if base.InternalCompare(cmp, prev.Largest, f.Smallest) >= 0 { + return base.CorruptionErrorf("%s files %s and %s have overlapping ranges: [%s-%s] vs [%s-%s]", + errors.Safe(level), errors.Safe(prev.FileNum), errors.Safe(f.FileNum), + prev.Smallest.Pretty(format), prev.Largest.Pretty(format), + f.Smallest.Pretty(format), f.Largest.Pretty(format)) + } + case ProhibitSplitUserKeys: + if v := cmp(prev.Largest.UserKey, f.Smallest.UserKey); v > 0 || (v == 0 && !prev.Largest.IsExclusiveSentinel()) { + return base.CorruptionErrorf("%s files %s and %s have overlapping ranges: [%s-%s] vs [%s-%s]", + errors.Safe(level), errors.Safe(prev.FileNum), errors.Safe(f.FileNum), + prev.Smallest.Pretty(format), prev.Largest.Pretty(format), + f.Smallest.Pretty(format), f.Largest.Pretty(format)) + } + default: + panic("unreachable") + } + } + } + } + return nil +} diff --git a/pebble/internal/manifest/version_edit.go b/pebble/internal/manifest/version_edit.go new file mode 100644 index 0000000..ee3a919 --- /dev/null +++ b/pebble/internal/manifest/version_edit.go @@ -0,0 +1,1122 @@ +// Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manifest + +import ( + "bufio" + "bytes" + "encoding/binary" + "fmt" + "io" + "time" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/invariants" + stdcmp "github.com/cockroachdb/pebble/shims/cmp" + "github.com/cockroachdb/pebble/shims/slices" +) + +// TODO(peter): describe the MANIFEST file format, independently of the C++ +// project. + +var errCorruptManifest = base.CorruptionErrorf("pebble: corrupt manifest") + +type byteReader interface { + io.ByteReader + io.Reader +} + +// Tags for the versionEdit disk format. +// Tag 8 is no longer used. +const ( + // LevelDB tags. + tagComparator = 1 + tagLogNumber = 2 + tagNextFileNumber = 3 + tagLastSequence = 4 + tagCompactPointer = 5 + tagDeletedFile = 6 + tagNewFile = 7 + tagPrevLogNumber = 9 + + // RocksDB tags. + tagNewFile2 = 100 + tagNewFile3 = 102 + tagNewFile4 = 103 + tagColumnFamily = 200 + tagColumnFamilyAdd = 201 + tagColumnFamilyDrop = 202 + tagMaxColumnFamily = 203 + + // Pebble tags. + tagNewFile5 = 104 // Range keys. + tagCreatedBackingTable = 105 + tagRemovedBackingTable = 106 + + // The custom tags sub-format used by tagNewFile4 and above. + customTagTerminate = 1 + customTagNeedsCompaction = 2 + customTagCreationTime = 6 + customTagPathID = 65 + customTagNonSafeIgnoreMask = 1 << 6 + customTagVirtual = 66 +) + +// DeletedFileEntry holds the state for a file deletion from a level. The file +// itself might still be referenced by another level. +type DeletedFileEntry struct { + Level int + FileNum base.FileNum +} + +// NewFileEntry holds the state for a new file or one moved from a different +// level. +type NewFileEntry struct { + Level int + Meta *FileMetadata + // BackingFileNum is only set during manifest replay, and only for virtual + // sstables. + BackingFileNum base.DiskFileNum +} + +// VersionEdit holds the state for an edit to a Version along with other +// on-disk state (log numbers, next file number, and the last sequence number). +type VersionEdit struct { + // ComparerName is the value of Options.Comparer.Name. This is only set in + // the first VersionEdit in a manifest (either when the DB is created, or + // when a new manifest is created) and is used to verify that the comparer + // specified at Open matches the comparer that was previously used. + ComparerName string + + // MinUnflushedLogNum is the smallest WAL log file number corresponding to + // mutations that have not been flushed to an sstable. + // + // This is an optional field, and 0 represents it is not set. + MinUnflushedLogNum base.DiskFileNum + + // ObsoletePrevLogNum is a historic artifact from LevelDB that is not used by + // Pebble, RocksDB, or even LevelDB. Its use in LevelDB was deprecated in + // 6/2011. We keep it around purely for informational purposes when + // displaying MANIFEST contents. + ObsoletePrevLogNum uint64 + + // The next file number. A single counter is used to assign file numbers + // for the WAL, MANIFEST, sstable, and OPTIONS files. + NextFileNum uint64 + + // LastSeqNum is an upper bound on the sequence numbers that have been + // assigned in flushed WALs. Unflushed WALs (that will be replayed during + // recovery) may contain sequence numbers greater than this value. + LastSeqNum uint64 + + // A file num may be present in both deleted files and new files when it + // is moved from a lower level to a higher level (when the compaction + // found that there was no overlapping file at the higher level). + DeletedFiles map[DeletedFileEntry]*FileMetadata + NewFiles []NewFileEntry + // CreatedBackingTables can be used to preserve the FileBacking associated + // with a physical sstable. This is useful when virtual sstables in the + // latest version are reconstructed during manifest replay, and we also need + // to reconstruct the FileBacking which is required by these virtual + // sstables. + // + // INVARIANT: The FileBacking associated with a physical sstable must only + // be added as a backing file in the same version edit where the physical + // sstable is first virtualized. This means that the physical sstable must + // be present in DeletedFiles and that there must be at least one virtual + // sstable with the same FileBacking as the physical sstable in NewFiles. A + // file must be present in CreatedBackingTables in exactly one version edit. + // The physical sstable associated with the FileBacking must also not be + // present in NewFiles. + CreatedBackingTables []*FileBacking + // RemovedBackingTables is used to remove the FileBacking associated with a + // virtual sstable. Note that a backing sstable can be removed as soon as + // there are no virtual sstables in the latest version which are using the + // backing sstable, but the backing sstable doesn't necessarily have to be + // removed atomically with the version edit which removes the last virtual + // sstable associated with the backing sstable. The removal can happen in a + // future version edit. + // + // INVARIANT: A file must only be added to RemovedBackingTables if it was + // added to CreateBackingTables in a prior version edit. The same version + // edit also cannot have the same file present in both CreateBackingTables + // and RemovedBackingTables. A file must be present in RemovedBackingTables + // in exactly one version edit. + RemovedBackingTables []base.DiskFileNum +} + +// Decode decodes an edit from the specified reader. +// +// Note that the Decode step will not set the FileBacking for virtual sstables +// and the responsibility is left to the caller. However, the Decode step will +// populate the NewFileEntry.BackingFileNum in VersionEdit.NewFiles. +func (v *VersionEdit) Decode(r io.Reader) error { + br, ok := r.(byteReader) + if !ok { + br = bufio.NewReader(r) + } + d := versionEditDecoder{br} + for { + tag, err := binary.ReadUvarint(br) + if err == io.EOF { + break + } + if err != nil { + return err + } + switch tag { + case tagComparator: + s, err := d.readBytes() + if err != nil { + return err + } + v.ComparerName = string(s) + + case tagLogNumber: + n, err := d.readUvarint() + if err != nil { + return err + } + v.MinUnflushedLogNum = base.DiskFileNum(n) + + case tagNextFileNumber: + n, err := d.readUvarint() + if err != nil { + return err + } + v.NextFileNum = n + + case tagLastSequence: + n, err := d.readUvarint() + if err != nil { + return err + } + v.LastSeqNum = n + + case tagCompactPointer: + if _, err := d.readLevel(); err != nil { + return err + } + if _, err := d.readBytes(); err != nil { + return err + } + // NB: RocksDB does not use compaction pointers anymore. + + case tagRemovedBackingTable: + n, err := d.readUvarint() + if err != nil { + return err + } + v.RemovedBackingTables = append( + v.RemovedBackingTables, base.FileNum(n).DiskFileNum(), + ) + case tagCreatedBackingTable: + dfn, err := d.readUvarint() + if err != nil { + return err + } + size, err := d.readUvarint() + if err != nil { + return err + } + fileBacking := &FileBacking{ + DiskFileNum: base.FileNum(dfn).DiskFileNum(), + Size: size, + } + v.CreatedBackingTables = append(v.CreatedBackingTables, fileBacking) + case tagDeletedFile: + level, err := d.readLevel() + if err != nil { + return err + } + fileNum, err := d.readFileNum() + if err != nil { + return err + } + if v.DeletedFiles == nil { + v.DeletedFiles = make(map[DeletedFileEntry]*FileMetadata) + } + v.DeletedFiles[DeletedFileEntry{level, fileNum}] = nil + + case tagNewFile, tagNewFile2, tagNewFile3, tagNewFile4, tagNewFile5: + level, err := d.readLevel() + if err != nil { + return err + } + fileNum, err := d.readFileNum() + if err != nil { + return err + } + if tag == tagNewFile3 { + // The pathID field appears unused in RocksDB. + _ /* pathID */, err := d.readUvarint() + if err != nil { + return err + } + } + size, err := d.readUvarint() + if err != nil { + return err + } + // We read the smallest / largest key bounds differently depending on + // whether we have point, range or both types of keys present in the + // table. + var ( + smallestPointKey, largestPointKey []byte + smallestRangeKey, largestRangeKey []byte + parsedPointBounds bool + boundsMarker byte + ) + if tag != tagNewFile5 { + // Range keys not present in the table. Parse the point key bounds. + smallestPointKey, err = d.readBytes() + if err != nil { + return err + } + largestPointKey, err = d.readBytes() + if err != nil { + return err + } + } else { + // Range keys are present in the table. Determine whether we have point + // keys to parse, in addition to the bounds. + boundsMarker, err = d.ReadByte() + if err != nil { + return err + } + // Parse point key bounds, if present. + if boundsMarker&maskContainsPointKeys > 0 { + smallestPointKey, err = d.readBytes() + if err != nil { + return err + } + largestPointKey, err = d.readBytes() + if err != nil { + return err + } + parsedPointBounds = true + } else { + // The table does not have point keys. + // Sanity check: the bounds must be range keys. + if boundsMarker&maskSmallest != 0 || boundsMarker&maskLargest != 0 { + return base.CorruptionErrorf( + "new-file-4-range-keys: table without point keys has point key bounds: marker=%x", + boundsMarker, + ) + } + } + // Parse range key bounds. + smallestRangeKey, err = d.readBytes() + if err != nil { + return err + } + largestRangeKey, err = d.readBytes() + if err != nil { + return err + } + } + var smallestSeqNum uint64 + var largestSeqNum uint64 + if tag != tagNewFile { + smallestSeqNum, err = d.readUvarint() + if err != nil { + return err + } + largestSeqNum, err = d.readUvarint() + if err != nil { + return err + } + } + var markedForCompaction bool + var creationTime uint64 + virtualState := struct { + virtual bool + backingFileNum uint64 + }{} + if tag == tagNewFile4 || tag == tagNewFile5 { + for { + customTag, err := d.readUvarint() + if err != nil { + return err + } + if customTag == customTagTerminate { + break + } else if customTag == customTagVirtual { + virtualState.virtual = true + n, err := d.readUvarint() + if err != nil { + return err + } + virtualState.backingFileNum = n + continue + } + + field, err := d.readBytes() + if err != nil { + return err + } + switch customTag { + case customTagNeedsCompaction: + if len(field) != 1 { + return base.CorruptionErrorf("new-file4: need-compaction field wrong size") + } + markedForCompaction = (field[0] == 1) + + case customTagCreationTime: + var n int + creationTime, n = binary.Uvarint(field) + if n != len(field) { + return base.CorruptionErrorf("new-file4: invalid file creation time") + } + + case customTagPathID: + return base.CorruptionErrorf("new-file4: path-id field not supported") + + default: + if (customTag & customTagNonSafeIgnoreMask) != 0 { + return base.CorruptionErrorf("new-file4: custom field not supported: %d", customTag) + } + } + } + } + m := &FileMetadata{ + FileNum: fileNum, + Size: size, + CreationTime: int64(creationTime), + SmallestSeqNum: smallestSeqNum, + LargestSeqNum: largestSeqNum, + MarkedForCompaction: markedForCompaction, + Virtual: virtualState.virtual, + } + if tag != tagNewFile5 { // no range keys present + m.SmallestPointKey = base.DecodeInternalKey(smallestPointKey) + m.LargestPointKey = base.DecodeInternalKey(largestPointKey) + m.HasPointKeys = true + m.Smallest, m.Largest = m.SmallestPointKey, m.LargestPointKey + m.boundTypeSmallest, m.boundTypeLargest = boundTypePointKey, boundTypePointKey + } else { // range keys present + // Set point key bounds, if parsed. + if parsedPointBounds { + m.SmallestPointKey = base.DecodeInternalKey(smallestPointKey) + m.LargestPointKey = base.DecodeInternalKey(largestPointKey) + m.HasPointKeys = true + } + // Set range key bounds. + m.SmallestRangeKey = base.DecodeInternalKey(smallestRangeKey) + m.LargestRangeKey = base.DecodeInternalKey(largestRangeKey) + m.HasRangeKeys = true + // Set overall bounds (by default assume range keys). + m.Smallest, m.Largest = m.SmallestRangeKey, m.LargestRangeKey + m.boundTypeSmallest, m.boundTypeLargest = boundTypeRangeKey, boundTypeRangeKey + if boundsMarker&maskSmallest == maskSmallest { + m.Smallest = m.SmallestPointKey + m.boundTypeSmallest = boundTypePointKey + } + if boundsMarker&maskLargest == maskLargest { + m.Largest = m.LargestPointKey + m.boundTypeLargest = boundTypePointKey + } + } + m.boundsSet = true + if !virtualState.virtual { + m.InitPhysicalBacking() + } + + nfe := NewFileEntry{ + Level: level, + Meta: m, + } + if virtualState.virtual { + nfe.BackingFileNum = base.FileNum(virtualState.backingFileNum).DiskFileNum() + } + v.NewFiles = append(v.NewFiles, nfe) + + case tagPrevLogNumber: + n, err := d.readUvarint() + if err != nil { + return err + } + v.ObsoletePrevLogNum = n + + case tagColumnFamily, tagColumnFamilyAdd, tagColumnFamilyDrop, tagMaxColumnFamily: + return base.CorruptionErrorf("column families are not supported") + + default: + return errCorruptManifest + } + } + return nil +} + +func (v *VersionEdit) string(verbose bool, fmtKey base.FormatKey) string { + var buf bytes.Buffer + if v.ComparerName != "" { + fmt.Fprintf(&buf, " comparer: %s", v.ComparerName) + } + if v.MinUnflushedLogNum != 0 { + fmt.Fprintf(&buf, " log-num: %d\n", v.MinUnflushedLogNum) + } + if v.ObsoletePrevLogNum != 0 { + fmt.Fprintf(&buf, " prev-log-num: %d\n", v.ObsoletePrevLogNum) + } + if v.NextFileNum != 0 { + fmt.Fprintf(&buf, " next-file-num: %d\n", v.NextFileNum) + } + if v.LastSeqNum != 0 { + fmt.Fprintf(&buf, " last-seq-num: %d\n", v.LastSeqNum) + } + entries := make([]DeletedFileEntry, 0, len(v.DeletedFiles)) + for df := range v.DeletedFiles { + entries = append(entries, df) + } + slices.SortFunc(entries, func(a, b DeletedFileEntry) int { + if v := stdcmp.Compare(a.Level, b.Level); v != 0 { + return v + } + return stdcmp.Compare(a.FileNum, b.FileNum) + }) + for _, df := range entries { + fmt.Fprintf(&buf, " deleted: L%d %s\n", df.Level, df.FileNum) + } + for _, nf := range v.NewFiles { + fmt.Fprintf(&buf, " added: L%d", nf.Level) + if verbose { + fmt.Fprintf(&buf, " %s", nf.Meta.DebugString(fmtKey, true /* verbose */)) + } else { + fmt.Fprintf(&buf, " %s", nf.Meta.String()) + } + if nf.Meta.CreationTime != 0 { + fmt.Fprintf(&buf, " (%s)", + time.Unix(nf.Meta.CreationTime, 0).UTC().Format(time.RFC3339)) + } + fmt.Fprintln(&buf) + } + return buf.String() +} + +// DebugString is a more verbose version of String(). Use this in tests. +func (v *VersionEdit) DebugString(fmtKey base.FormatKey) string { + return v.string(true /* verbose */, fmtKey) +} + +// String implements fmt.Stringer for a VersionEdit. +func (v *VersionEdit) String() string { + return v.string(false /* verbose */, base.DefaultFormatter) +} + +// Encode encodes an edit to the specified writer. +func (v *VersionEdit) Encode(w io.Writer) error { + e := versionEditEncoder{new(bytes.Buffer)} + + if v.ComparerName != "" { + e.writeUvarint(tagComparator) + e.writeString(v.ComparerName) + } + if v.MinUnflushedLogNum != 0 { + e.writeUvarint(tagLogNumber) + e.writeUvarint(uint64(v.MinUnflushedLogNum)) + } + if v.ObsoletePrevLogNum != 0 { + e.writeUvarint(tagPrevLogNumber) + e.writeUvarint(v.ObsoletePrevLogNum) + } + if v.NextFileNum != 0 { + e.writeUvarint(tagNextFileNumber) + e.writeUvarint(uint64(v.NextFileNum)) + } + for _, dfn := range v.RemovedBackingTables { + e.writeUvarint(tagRemovedBackingTable) + e.writeUvarint(uint64(dfn.FileNum())) + } + for _, fileBacking := range v.CreatedBackingTables { + e.writeUvarint(tagCreatedBackingTable) + e.writeUvarint(uint64(fileBacking.DiskFileNum.FileNum())) + e.writeUvarint(fileBacking.Size) + } + // RocksDB requires LastSeqNum to be encoded for the first MANIFEST entry, + // even though its value is zero. We detect this by encoding LastSeqNum when + // ComparerName is set. + if v.LastSeqNum != 0 || v.ComparerName != "" { + e.writeUvarint(tagLastSequence) + e.writeUvarint(v.LastSeqNum) + } + for x := range v.DeletedFiles { + e.writeUvarint(tagDeletedFile) + e.writeUvarint(uint64(x.Level)) + e.writeUvarint(uint64(x.FileNum)) + } + for _, x := range v.NewFiles { + customFields := x.Meta.MarkedForCompaction || x.Meta.CreationTime != 0 || x.Meta.Virtual + var tag uint64 + switch { + case x.Meta.HasRangeKeys: + tag = tagNewFile5 + case customFields: + tag = tagNewFile4 + default: + tag = tagNewFile2 + } + e.writeUvarint(tag) + e.writeUvarint(uint64(x.Level)) + e.writeUvarint(uint64(x.Meta.FileNum)) + e.writeUvarint(x.Meta.Size) + if !x.Meta.HasRangeKeys { + // If we have no range keys, preserve the original format and write the + // smallest and largest point keys. + e.writeKey(x.Meta.SmallestPointKey) + e.writeKey(x.Meta.LargestPointKey) + } else { + // When range keys are present, we first write a marker byte that + // indicates if the table also contains point keys, in addition to how the + // overall bounds for the table should be reconstructed. This byte is + // followed by the keys themselves. + b, err := x.Meta.boundsMarker() + if err != nil { + return err + } + if err = e.WriteByte(b); err != nil { + return err + } + // Write point key bounds (if present). + if x.Meta.HasPointKeys { + e.writeKey(x.Meta.SmallestPointKey) + e.writeKey(x.Meta.LargestPointKey) + } + // Write range key bounds. + e.writeKey(x.Meta.SmallestRangeKey) + e.writeKey(x.Meta.LargestRangeKey) + } + e.writeUvarint(x.Meta.SmallestSeqNum) + e.writeUvarint(x.Meta.LargestSeqNum) + if customFields { + if x.Meta.CreationTime != 0 { + e.writeUvarint(customTagCreationTime) + var buf [binary.MaxVarintLen64]byte + n := binary.PutUvarint(buf[:], uint64(x.Meta.CreationTime)) + e.writeBytes(buf[:n]) + } + if x.Meta.MarkedForCompaction { + e.writeUvarint(customTagNeedsCompaction) + e.writeBytes([]byte{1}) + } + if x.Meta.Virtual { + e.writeUvarint(customTagVirtual) + e.writeUvarint(uint64(x.Meta.FileBacking.DiskFileNum.FileNum())) + } + e.writeUvarint(customTagTerminate) + } + } + _, err := w.Write(e.Bytes()) + return err +} + +// versionEditDecoder should be used to decode version edits. +type versionEditDecoder struct { + byteReader +} + +func (d versionEditDecoder) readBytes() ([]byte, error) { + n, err := d.readUvarint() + if err != nil { + return nil, err + } + s := make([]byte, n) + _, err = io.ReadFull(d, s) + if err != nil { + if err == io.ErrUnexpectedEOF { + return nil, errCorruptManifest + } + return nil, err + } + return s, nil +} + +func (d versionEditDecoder) readLevel() (int, error) { + u, err := d.readUvarint() + if err != nil { + return 0, err + } + if u >= NumLevels { + return 0, errCorruptManifest + } + return int(u), nil +} + +func (d versionEditDecoder) readFileNum() (base.FileNum, error) { + u, err := d.readUvarint() + if err != nil { + return 0, err + } + return base.FileNum(u), nil +} + +func (d versionEditDecoder) readUvarint() (uint64, error) { + u, err := binary.ReadUvarint(d) + if err != nil { + if err == io.EOF { + return 0, errCorruptManifest + } + return 0, err + } + return u, nil +} + +type versionEditEncoder struct { + *bytes.Buffer +} + +func (e versionEditEncoder) writeBytes(p []byte) { + e.writeUvarint(uint64(len(p))) + e.Write(p) +} + +func (e versionEditEncoder) writeKey(k InternalKey) { + e.writeUvarint(uint64(k.Size())) + e.Write(k.UserKey) + buf := k.EncodeTrailer() + e.Write(buf[:]) +} + +func (e versionEditEncoder) writeString(s string) { + e.writeUvarint(uint64(len(s))) + e.WriteString(s) +} + +func (e versionEditEncoder) writeUvarint(u uint64) { + var buf [binary.MaxVarintLen64]byte + n := binary.PutUvarint(buf[:], u) + e.Write(buf[:n]) +} + +// BulkVersionEdit summarizes the files added and deleted from a set of version +// edits. +// +// INVARIANTS: +// No file can be added to a level more than once. This is true globally, and +// also true for all of the calls to Accumulate for a single bulk version edit. +// +// No file can be removed from a level more than once. This is true globally, +// and also true for all of the calls to Accumulate for a single bulk version +// edit. +// +// A file must not be added and removed from a given level in the same version +// edit. +// +// A file that is being removed from a level must have been added to that level +// before (in a prior version edit). Note that a given file can be deleted from +// a level and added to another level in a single version edit +type BulkVersionEdit struct { + Added [NumLevels]map[base.FileNum]*FileMetadata + Deleted [NumLevels]map[base.FileNum]*FileMetadata + + // AddedFileBacking is a map to support lookup so that we can populate the + // FileBacking of virtual sstables during manifest replay. + AddedFileBacking map[base.DiskFileNum]*FileBacking + RemovedFileBacking []base.DiskFileNum + + // AddedByFileNum maps file number to file metadata for all added files + // from accumulated version edits. AddedByFileNum is only populated if set + // to non-nil by a caller. It must be set to non-nil when replaying + // version edits read from a MANIFEST (as opposed to VersionEdits + // constructed in-memory). While replaying a MANIFEST file, + // VersionEdit.DeletedFiles map entries have nil values, because the + // on-disk deletion record encodes only the file number. Accumulate + // uses AddedByFileNum to correctly populate the BulkVersionEdit's Deleted + // field with non-nil *FileMetadata. + AddedByFileNum map[base.FileNum]*FileMetadata + + // MarkedForCompactionCountDiff holds the aggregated count of files + // marked for compaction added or removed. + MarkedForCompactionCountDiff int +} + +// Accumulate adds the file addition and deletions in the specified version +// edit to the bulk edit's internal state. +// +// INVARIANTS: +// If a file is added to a given level in a call to Accumulate and then removed +// from that level in a subsequent call, the file will not be present in the +// resulting BulkVersionEdit.Deleted for that level. +// +// After accumulation of version edits, the bulk version edit may have +// information about a file which has been deleted from a level, but it may +// not have information about the same file added to the same level. The add +// could've occurred as part of a previous bulk version edit. In this case, +// the deleted file must be present in BulkVersionEdit.Deleted, at the end +// of the accumulation, because we need to decrease the refcount of the +// deleted file in Apply. +func (b *BulkVersionEdit) Accumulate(ve *VersionEdit) error { + for df, m := range ve.DeletedFiles { + dmap := b.Deleted[df.Level] + if dmap == nil { + dmap = make(map[base.FileNum]*FileMetadata) + b.Deleted[df.Level] = dmap + } + + if m == nil { + // m is nil only when replaying a MANIFEST. + if b.AddedByFileNum == nil { + return errors.Errorf("deleted file L%d.%s's metadata is absent and bve.AddedByFileNum is nil", df.Level, df.FileNum) + } + m = b.AddedByFileNum[df.FileNum] + if m == nil { + return base.CorruptionErrorf("pebble: file deleted L%d.%s before it was inserted", df.Level, df.FileNum) + } + } + if m.MarkedForCompaction { + b.MarkedForCompactionCountDiff-- + } + if _, ok := b.Added[df.Level][df.FileNum]; !ok { + dmap[df.FileNum] = m + } else { + // Present in b.Added for the same level. + delete(b.Added[df.Level], df.FileNum) + } + } + + // Generate state for Added backing files. Note that these must be generated + // before we loop through the NewFiles, because we need to populate the + // FileBackings which might be used by the NewFiles loop. + if b.AddedFileBacking == nil { + b.AddedFileBacking = make(map[base.DiskFileNum]*FileBacking) + } + for _, fb := range ve.CreatedBackingTables { + if _, ok := b.AddedFileBacking[fb.DiskFileNum]; ok { + // There is already a FileBacking associated with fb.DiskFileNum. + // This should never happen. There must always be only one FileBacking + // associated with a backing sstable. + panic(fmt.Sprintf("pebble: duplicate file backing %s", fb.DiskFileNum.String())) + } + b.AddedFileBacking[fb.DiskFileNum] = fb + } + + for _, nf := range ve.NewFiles { + // A new file should not have been deleted in this or a preceding + // VersionEdit at the same level (though files can move across levels). + if dmap := b.Deleted[nf.Level]; dmap != nil { + if _, ok := dmap[nf.Meta.FileNum]; ok { + return base.CorruptionErrorf("pebble: file deleted L%d.%s before it was inserted", nf.Level, nf.Meta.FileNum) + } + } + if nf.Meta.Virtual && nf.Meta.FileBacking == nil { + // FileBacking for a virtual sstable must only be nil if we're performing + // manifest replay. + nf.Meta.FileBacking = b.AddedFileBacking[nf.BackingFileNum] + if nf.Meta.FileBacking == nil { + return errors.Errorf("FileBacking for virtual sstable must not be nil") + } + } else if nf.Meta.FileBacking == nil { + return errors.Errorf("Added file L%d.%s's has no FileBacking", nf.Level, nf.Meta.FileNum) + } + + if b.Added[nf.Level] == nil { + b.Added[nf.Level] = make(map[base.FileNum]*FileMetadata) + } + b.Added[nf.Level][nf.Meta.FileNum] = nf.Meta + if b.AddedByFileNum != nil { + b.AddedByFileNum[nf.Meta.FileNum] = nf.Meta + } + if nf.Meta.MarkedForCompaction { + b.MarkedForCompactionCountDiff++ + } + } + + // Since a file can be removed from backing files in exactly one version + // edit it is safe to just append without any de-duplication. + b.RemovedFileBacking = append(b.RemovedFileBacking, ve.RemovedBackingTables...) + + return nil +} + +// AccumulateIncompleteAndApplySingleVE should be called if a single version edit +// is to be applied to the provided curr Version and if the caller needs to +// update the versionSet.zombieTables map. This function exists separately from +// BulkVersionEdit.Apply because it is easier to reason about properties +// regarding BulkVersionedit.Accumulate/Apply and zombie table generation, if we +// know that exactly one version edit is being accumulated. +// +// Note that the version edit passed into this function may be incomplete +// because compactions don't have the ref counting information necessary to +// populate VersionEdit.RemovedBackingTables. This function will complete such a +// version edit by populating RemovedBackingTables. +// +// Invariant: Any file being deleted through ve must belong to the curr Version. +// We can't have a delete for some arbitrary file which does not exist in curr. +func AccumulateIncompleteAndApplySingleVE( + ve *VersionEdit, + curr *Version, + cmp Compare, + formatKey base.FormatKey, + flushSplitBytes int64, + readCompactionRate int64, + backingStateMap map[base.DiskFileNum]*FileBacking, + addBackingFunc func(*FileBacking), + removeBackingFunc func(base.DiskFileNum), + orderingInvariants OrderingInvariants, +) (_ *Version, zombies map[base.DiskFileNum]uint64, _ error) { + if len(ve.RemovedBackingTables) != 0 { + panic("pebble: invalid incomplete version edit") + } + var b BulkVersionEdit + err := b.Accumulate(ve) + if err != nil { + return nil, nil, err + } + zombies = make(map[base.DiskFileNum]uint64) + v, err := b.Apply( + curr, cmp, formatKey, flushSplitBytes, readCompactionRate, zombies, orderingInvariants, + ) + if err != nil { + return nil, nil, err + } + + for _, s := range b.AddedFileBacking { + addBackingFunc(s) + } + + for fileNum := range zombies { + if _, ok := backingStateMap[fileNum]; ok { + // This table was backing some virtual sstable in the latest version, + // but is now a zombie. We add RemovedBackingTables entries for + // these, before the version edit is written to disk. + ve.RemovedBackingTables = append( + ve.RemovedBackingTables, fileNum, + ) + removeBackingFunc(fileNum) + } + } + return v, zombies, nil +} + +// Apply applies the delta b to the current version to produce a new +// version. The new version is consistent with respect to the comparer cmp. +// +// curr may be nil, which is equivalent to a pointer to a zero version. +// +// On success, if a non-nil zombies map is provided to Apply, the map is updated +// with file numbers and files sizes of deleted files. These files are +// considered zombies because they are no longer referenced by the returned +// Version, but cannot be deleted from disk as they are still in use by the +// incoming Version. +func (b *BulkVersionEdit) Apply( + curr *Version, + cmp Compare, + formatKey base.FormatKey, + flushSplitBytes int64, + readCompactionRate int64, + zombies map[base.DiskFileNum]uint64, + orderingInvariants OrderingInvariants, +) (*Version, error) { + addZombie := func(state *FileBacking) { + if zombies != nil { + zombies[state.DiskFileNum] = state.Size + } + } + removeZombie := func(state *FileBacking) { + if zombies != nil { + delete(zombies, state.DiskFileNum) + } + } + + v := new(Version) + + // Adjust the count of files marked for compaction. + if curr != nil { + v.Stats.MarkedForCompaction = curr.Stats.MarkedForCompaction + } + v.Stats.MarkedForCompaction += b.MarkedForCompactionCountDiff + if v.Stats.MarkedForCompaction < 0 { + return nil, base.CorruptionErrorf("pebble: version marked for compaction count negative") + } + + for level := range v.Levels { + if curr == nil || curr.Levels[level].tree.root == nil { + v.Levels[level] = makeLevelMetadata(cmp, level, nil /* files */) + } else { + v.Levels[level] = curr.Levels[level].clone() + } + if curr == nil || curr.RangeKeyLevels[level].tree.root == nil { + v.RangeKeyLevels[level] = makeLevelMetadata(cmp, level, nil /* files */) + } else { + v.RangeKeyLevels[level] = curr.RangeKeyLevels[level].clone() + } + + if len(b.Added[level]) == 0 && len(b.Deleted[level]) == 0 { + // There are no edits on this level. + if level == 0 { + // Initialize L0Sublevels. + if curr == nil || curr.L0Sublevels == nil { + if err := v.InitL0Sublevels(cmp, formatKey, flushSplitBytes); err != nil { + return nil, errors.Wrap(err, "pebble: internal error") + } + } else { + v.L0Sublevels = curr.L0Sublevels + v.L0SublevelFiles = v.L0Sublevels.Levels + } + } + continue + } + + // Some edits on this level. + lm := &v.Levels[level] + lmRange := &v.RangeKeyLevels[level] + + addedFilesMap := b.Added[level] + deletedFilesMap := b.Deleted[level] + if n := v.Levels[level].Len() + len(addedFilesMap); n == 0 { + return nil, base.CorruptionErrorf( + "pebble: internal error: No current or added files but have deleted files: %d", + errors.Safe(len(deletedFilesMap))) + } + + // NB: addedFilesMap may be empty. If a file is present in addedFilesMap + // for a level, it won't be present in deletedFilesMap for the same + // level. + + for _, f := range deletedFilesMap { + if obsolete := v.Levels[level].remove(f); obsolete { + // Deleting a file from the B-Tree may decrement its + // reference count. However, because we cloned the + // previous level's B-Tree, this should never result in a + // file's reference count dropping to zero. + err := errors.Errorf("pebble: internal error: file L%d.%s obsolete during B-Tree removal", level, f.FileNum) + return nil, err + } + if f.HasRangeKeys { + if obsolete := v.RangeKeyLevels[level].remove(f); obsolete { + // Deleting a file from the B-Tree may decrement its + // reference count. However, because we cloned the + // previous level's B-Tree, this should never result in a + // file's reference count dropping to zero. + err := errors.Errorf("pebble: internal error: file L%d.%s obsolete during range-key B-Tree removal", level, f.FileNum) + return nil, err + } + } + + // Note that a backing sst will only become a zombie if the + // references to it in the latest version is 0. We will remove the + // backing sst from the zombie list in the next loop if one of the + // addedFiles in any of the levels is referencing the backing sst. + // This is possible if a physical sstable is virtualized, or if it + // is moved. + latestRefCount := f.LatestRefs() + if latestRefCount <= 0 { + // If a file is present in deletedFilesMap for a level, then it + // must have already been added to the level previously, which + // means that its latest ref count cannot be 0. + err := errors.Errorf("pebble: internal error: incorrect latestRefs reference counting for file", f.FileNum) + return nil, err + } else if f.LatestUnref() == 0 { + addZombie(f.FileBacking) + } + } + + addedFiles := make([]*FileMetadata, 0, len(addedFilesMap)) + for _, f := range addedFilesMap { + addedFiles = append(addedFiles, f) + } + // Sort addedFiles by file number. This isn't necessary, but tests which + // replay invalid manifests check the error output, and the error output + // depends on the order in which files are added to the btree. + slices.SortFunc(addedFiles, func(a, b *FileMetadata) int { + return stdcmp.Compare(a.FileNum, b.FileNum) + }) + + var sm, la *FileMetadata + for _, f := range addedFiles { + // NB: allowedSeeks is used for read triggered compactions. It is set using + // Options.Experimental.ReadCompactionRate which defaults to 32KB. + var allowedSeeks int64 + if readCompactionRate != 0 { + allowedSeeks = int64(f.Size) / readCompactionRate + } + if allowedSeeks < 100 { + allowedSeeks = 100 + } + f.AllowedSeeks.Store(allowedSeeks) + f.InitAllowedSeeks = allowedSeeks + + err := lm.insert(f) + // We're adding this file to the new version, so increment the + // latest refs count. + f.LatestRef() + if err != nil { + return nil, errors.Wrap(err, "pebble") + } + if f.HasRangeKeys { + err = lmRange.insert(f) + if err != nil { + return nil, errors.Wrap(err, "pebble") + } + } + removeZombie(f.FileBacking) + // Track the keys with the smallest and largest keys, so that we can + // check consistency of the modified span. + if sm == nil || base.InternalCompare(cmp, sm.Smallest, f.Smallest) > 0 { + sm = f + } + if la == nil || base.InternalCompare(cmp, la.Largest, f.Largest) < 0 { + la = f + } + } + + if level == 0 { + if curr != nil && curr.L0Sublevels != nil && len(deletedFilesMap) == 0 { + // Flushes and ingestions that do not delete any L0 files do not require + // a regeneration of L0Sublevels from scratch. We can instead generate + // it incrementally. + var err error + // AddL0Files requires addedFiles to be sorted in seqnum order. + SortBySeqNum(addedFiles) + v.L0Sublevels, err = curr.L0Sublevels.AddL0Files(addedFiles, flushSplitBytes, &v.Levels[0]) + if errors.Is(err, errInvalidL0SublevelsOpt) { + err = v.InitL0Sublevels(cmp, formatKey, flushSplitBytes) + } else if invariants.Enabled && err == nil { + copyOfSublevels, err := NewL0Sublevels(&v.Levels[0], cmp, formatKey, flushSplitBytes) + if err != nil { + panic(fmt.Sprintf("error when regenerating sublevels: %s", err)) + } + s1 := describeSublevels(base.DefaultFormatter, false /* verbose */, copyOfSublevels.Levels) + s2 := describeSublevels(base.DefaultFormatter, false /* verbose */, v.L0Sublevels.Levels) + if s1 != s2 { + panic(fmt.Sprintf("incremental L0 sublevel generation produced different output than regeneration: %s != %s", s1, s2)) + } + } + if err != nil { + return nil, errors.Wrap(err, "pebble: internal error") + } + v.L0SublevelFiles = v.L0Sublevels.Levels + } else if err := v.InitL0Sublevels(cmp, formatKey, flushSplitBytes); err != nil { + return nil, errors.Wrap(err, "pebble: internal error") + } + if err := CheckOrdering(cmp, formatKey, Level(0), v.Levels[level].Iter(), orderingInvariants); err != nil { + return nil, errors.Wrap(err, "pebble: internal error") + } + continue + } + + // Check consistency of the level in the vicinity of our edits. + if sm != nil && la != nil { + overlap := overlaps(v.Levels[level].Iter(), cmp, sm.Smallest.UserKey, + la.Largest.UserKey, la.Largest.IsExclusiveSentinel()) + // overlap contains all of the added files. We want to ensure that + // the added files are consistent with neighboring existing files + // too, so reslice the overlap to pull in a neighbor on each side. + check := overlap.Reslice(func(start, end *LevelIterator) { + if m := start.Prev(); m == nil { + start.Next() + } + if m := end.Next(); m == nil { + end.Prev() + } + }) + if err := CheckOrdering(cmp, formatKey, Level(level), check.Iter(), orderingInvariants); err != nil { + return nil, errors.Wrap(err, "pebble: internal error") + } + } + } + return v, nil +} diff --git a/pebble/internal/manifest/version_edit_test.go b/pebble/internal/manifest/version_edit_test.go new file mode 100644 index 0000000..6d09153 --- /dev/null +++ b/pebble/internal/manifest/version_edit_test.go @@ -0,0 +1,545 @@ +// Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manifest + +import ( + "bytes" + "fmt" + "io" + "os" + "reflect" + "slices" + "strconv" + "strings" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/record" + "github.com/kr/pretty" + "github.com/stretchr/testify/require" +) + +func checkRoundTrip(e0 VersionEdit) error { + var e1 VersionEdit + buf := new(bytes.Buffer) + if err := e0.Encode(buf); err != nil { + return errors.Wrap(err, "encode") + } + if err := e1.Decode(buf); err != nil { + return errors.Wrap(err, "decode") + } + if diff := pretty.Diff(e0, e1); diff != nil { + return errors.Errorf("%s", strings.Join(diff, "\n")) + } + return nil +} + +// Version edits with virtual sstables will not be the same after a round trip +// as the Decode function will not set the FileBacking for a virtual sstable. +// We test round trip + bve accumulation here, after which the virtual sstable +// FileBacking should be set. +func TestVERoundTripAndAccumulate(t *testing.T) { + cmp := base.DefaultComparer.Compare + m1 := (&FileMetadata{ + FileNum: 810, + Size: 8090, + CreationTime: 809060, + SmallestSeqNum: 9, + LargestSeqNum: 11, + }).ExtendPointKeyBounds( + cmp, + base.MakeInternalKey([]byte("a"), 0, base.InternalKeyKindSet), + base.MakeInternalKey([]byte("m"), 0, base.InternalKeyKindSet), + ).ExtendRangeKeyBounds( + cmp, + base.MakeInternalKey([]byte("l"), 0, base.InternalKeyKindRangeKeySet), + base.MakeExclusiveSentinelKey(base.InternalKeyKindRangeKeySet, []byte("z")), + ) + m1.InitPhysicalBacking() + + m2 := (&FileMetadata{ + FileNum: 812, + Size: 8090, + CreationTime: 809060, + SmallestSeqNum: 9, + LargestSeqNum: 11, + Virtual: true, + FileBacking: m1.FileBacking, + }).ExtendPointKeyBounds( + cmp, + base.MakeInternalKey([]byte("a"), 0, base.InternalKeyKindSet), + base.MakeInternalKey([]byte("c"), 0, base.InternalKeyKindSet), + ) + + ve1 := VersionEdit{ + ComparerName: "11", + MinUnflushedLogNum: 22, + ObsoletePrevLogNum: 33, + NextFileNum: 44, + LastSeqNum: 55, + CreatedBackingTables: []*FileBacking{m1.FileBacking}, + NewFiles: []NewFileEntry{ + { + Level: 4, + Meta: m2, + // Only set for the test. + BackingFileNum: m2.FileBacking.DiskFileNum, + }, + }, + } + var err error + buf := new(bytes.Buffer) + if err = ve1.Encode(buf); err != nil { + t.Error(err) + } + var ve2 VersionEdit + if err = ve2.Decode(buf); err != nil { + t.Error(err) + } + // Perform accumulation to set the FileBacking on the files in the Decoded + // version edit. + var bve BulkVersionEdit + require.NoError(t, bve.Accumulate(&ve2)) + if diff := pretty.Diff(ve1, ve2); diff != nil { + t.Error(errors.Errorf("%s", strings.Join(diff, "\n"))) + } +} + +func TestVersionEditRoundTrip(t *testing.T) { + cmp := base.DefaultComparer.Compare + m1 := (&FileMetadata{ + FileNum: 805, + Size: 8050, + CreationTime: 805030, + }).ExtendPointKeyBounds( + cmp, + base.DecodeInternalKey([]byte("abc\x00\x01\x02\x03\x04\x05\x06\x07")), + base.DecodeInternalKey([]byte("xyz\x01\xff\xfe\xfd\xfc\xfb\xfa\xf9")), + ) + m1.InitPhysicalBacking() + + m2 := (&FileMetadata{ + FileNum: 806, + Size: 8060, + CreationTime: 806040, + SmallestSeqNum: 3, + LargestSeqNum: 5, + MarkedForCompaction: true, + }).ExtendPointKeyBounds( + cmp, + base.DecodeInternalKey([]byte("A\x00\x01\x02\x03\x04\x05\x06\x07")), + base.DecodeInternalKey([]byte("Z\x01\xff\xfe\xfd\xfc\xfb\xfa\xf9")), + ) + m2.InitPhysicalBacking() + + m3 := (&FileMetadata{ + FileNum: 807, + Size: 8070, + CreationTime: 807050, + }).ExtendRangeKeyBounds( + cmp, + base.MakeInternalKey([]byte("aaa"), 0, base.InternalKeyKindRangeKeySet), + base.MakeExclusiveSentinelKey(base.InternalKeyKindRangeKeySet, []byte("zzz")), + ) + m3.InitPhysicalBacking() + + m4 := (&FileMetadata{ + FileNum: 809, + Size: 8090, + CreationTime: 809060, + SmallestSeqNum: 9, + LargestSeqNum: 11, + }).ExtendPointKeyBounds( + cmp, + base.MakeInternalKey([]byte("a"), 0, base.InternalKeyKindSet), + base.MakeInternalKey([]byte("m"), 0, base.InternalKeyKindSet), + ).ExtendRangeKeyBounds( + cmp, + base.MakeInternalKey([]byte("l"), 0, base.InternalKeyKindRangeKeySet), + base.MakeExclusiveSentinelKey(base.InternalKeyKindRangeKeySet, []byte("z")), + ) + m4.InitPhysicalBacking() + + m5 := (&FileMetadata{ + FileNum: 810, + Size: 8090, + CreationTime: 809060, + SmallestSeqNum: 9, + LargestSeqNum: 11, + }).ExtendPointKeyBounds( + cmp, + base.MakeInternalKey([]byte("a"), 0, base.InternalKeyKindSet), + base.MakeInternalKey([]byte("m"), 0, base.InternalKeyKindSet), + ).ExtendRangeKeyBounds( + cmp, + base.MakeInternalKey([]byte("l"), 0, base.InternalKeyKindRangeKeySet), + base.MakeExclusiveSentinelKey(base.InternalKeyKindRangeKeySet, []byte("z")), + ) + m5.InitPhysicalBacking() + + m6 := (&FileMetadata{ + FileNum: 811, + Size: 8090, + CreationTime: 809060, + SmallestSeqNum: 9, + LargestSeqNum: 11, + }).ExtendPointKeyBounds( + cmp, + base.MakeInternalKey([]byte("a"), 0, base.InternalKeyKindSet), + base.MakeInternalKey([]byte("m"), 0, base.InternalKeyKindSet), + ).ExtendRangeKeyBounds( + cmp, + base.MakeInternalKey([]byte("l"), 0, base.InternalKeyKindRangeKeySet), + base.MakeExclusiveSentinelKey(base.InternalKeyKindRangeKeySet, []byte("z")), + ) + m6.InitPhysicalBacking() + + testCases := []VersionEdit{ + // An empty version edit. + {}, + // A complete version edit. + { + ComparerName: "11", + MinUnflushedLogNum: 22, + ObsoletePrevLogNum: 33, + NextFileNum: 44, + LastSeqNum: 55, + RemovedBackingTables: []base.DiskFileNum{ + base.FileNum(10).DiskFileNum(), base.FileNum(11).DiskFileNum(), + }, + CreatedBackingTables: []*FileBacking{m5.FileBacking, m6.FileBacking}, + DeletedFiles: map[DeletedFileEntry]*FileMetadata{ + { + Level: 3, + FileNum: 703, + }: nil, + { + Level: 4, + FileNum: 704, + }: nil, + }, + NewFiles: []NewFileEntry{ + { + Level: 4, + Meta: m1, + }, + { + Level: 5, + Meta: m2, + }, + { + Level: 6, + Meta: m3, + }, + { + Level: 6, + Meta: m4, + }, + }, + }, + } + for _, tc := range testCases { + if err := checkRoundTrip(tc); err != nil { + t.Error(err) + } + } +} + +func TestVersionEditDecode(t *testing.T) { + // TODO(radu): these should be datadriven tests that output the encoded and + // decoded edits. + cmp := base.DefaultComparer.Compare + m := (&FileMetadata{ + FileNum: 4, + Size: 709, + SmallestSeqNum: 12, + LargestSeqNum: 14, + CreationTime: 1701712644, + }).ExtendPointKeyBounds( + cmp, + base.MakeInternalKey([]byte("bar"), 14, base.InternalKeyKindDelete), + base.MakeInternalKey([]byte("foo"), 13, base.InternalKeyKindSet), + ) + m.InitPhysicalBacking() + + testCases := []struct { + filename string + encodedEdits []string + edits []VersionEdit + }{ + // db-stage-1 and db-stage-2 have the same manifest. + { + filename: "db-stage-1/MANIFEST-000001", + encodedEdits: []string{ + "\x01\x1aleveldb.BytewiseComparator\x03\x02\x04\x00", + "\x02\x02\x03\x03\x04\t", + }, + edits: []VersionEdit{ + { + ComparerName: "leveldb.BytewiseComparator", + NextFileNum: 2, + }, + { + MinUnflushedLogNum: 0x2, + NextFileNum: 0x3, + LastSeqNum: 0x9, + }, + }, + }, + // db-stage-3 and db-stage-4 have the same manifest. + { + filename: "db-stage-3/MANIFEST-000006", + encodedEdits: []string{ + "\x01\x1aleveldb.BytewiseComparator\x02\x02\x03\a\x04\x00", + "\x02\x05\x03\x06\x04\x0eg\x00\x04\xc5\x05\vbar\x00\x0e\x00\x00\x00\x00\x00\x00\vfoo\x01\r\x00\x00\x00\x00\x00\x00\f\x0e\x06\x05\x84\xa6\xb8\xab\x06\x01", + }, + edits: []VersionEdit{ + { + ComparerName: "leveldb.BytewiseComparator", + MinUnflushedLogNum: 0x2, + NextFileNum: 0x7, + }, + { + MinUnflushedLogNum: 0x5, + NextFileNum: 0x6, + LastSeqNum: 0xe, + NewFiles: []NewFileEntry{ + { + Level: 0, + Meta: m, + }, + }, + }, + }, + }, + } + + for _, tc := range testCases { + t.Run("", func(t *testing.T) { + f, err := os.Open("../../testdata/" + tc.filename) + if err != nil { + t.Fatalf("filename=%q: open error: %v", tc.filename, err) + } + defer f.Close() + i, r := 0, record.NewReader(f, 0 /* logNum */) + for { + rr, err := r.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatalf("filename=%q i=%d: record reader error: %v", tc.filename, i, err) + } + if i >= len(tc.edits) { + t.Fatalf("filename=%q i=%d: too many version edits", tc.filename, i+1) + } + + encodedEdit, err := io.ReadAll(rr) + if err != nil { + t.Fatalf("filename=%q i=%d: read error: %v", tc.filename, i, err) + } + if s := string(encodedEdit); s != tc.encodedEdits[i] { + t.Fatalf("filename=%q i=%d: got encoded %q, want %q", tc.filename, i, s, tc.encodedEdits[i]) + } + + var edit VersionEdit + err = edit.Decode(bytes.NewReader(encodedEdit)) + if err != nil { + t.Fatalf("filename=%q i=%d: decode error: %v", tc.filename, i, err) + } + if !reflect.DeepEqual(edit, tc.edits[i]) { + t.Fatalf("filename=%q i=%d: decode\n\tgot %#v\n\twant %#v\n%s", tc.filename, i, edit, tc.edits[i], + strings.Join(pretty.Diff(edit, tc.edits[i]), "\n")) + } + if err := checkRoundTrip(edit); err != nil { + t.Fatalf("filename=%q i=%d: round trip: %v", tc.filename, i, err) + } + + i++ + } + if i != len(tc.edits) { + t.Fatalf("filename=%q: got %d edits, want %d", tc.filename, i, len(tc.edits)) + } + }) + } +} + +func TestVersionEditEncodeLastSeqNum(t *testing.T) { + testCases := []struct { + edit VersionEdit + encoded string + }{ + // If ComparerName is unset, LastSeqNum is only encoded if non-zero. + {VersionEdit{LastSeqNum: 0}, ""}, + {VersionEdit{LastSeqNum: 1}, "\x04\x01"}, + // For compatibility with RocksDB, if ComparerName is set we always encode + // LastSeqNum. + {VersionEdit{ComparerName: "foo", LastSeqNum: 0}, "\x01\x03\x66\x6f\x6f\x04\x00"}, + {VersionEdit{ComparerName: "foo", LastSeqNum: 1}, "\x01\x03\x66\x6f\x6f\x04\x01"}, + } + for _, c := range testCases { + t.Run("", func(t *testing.T) { + var buf bytes.Buffer + require.NoError(t, c.edit.Encode(&buf)) + if result := buf.String(); c.encoded != result { + t.Fatalf("expected %x, but found %x", c.encoded, result) + } + + if c.edit.ComparerName != "" { + // Manually decode the version edit so that we can verify the contents + // even if the LastSeqNum decodes to 0. + d := versionEditDecoder{strings.NewReader(c.encoded)} + + // Decode ComparerName. + tag, err := d.readUvarint() + require.NoError(t, err) + if tag != tagComparator { + t.Fatalf("expected %d, but found %d", tagComparator, tag) + } + s, err := d.readBytes() + require.NoError(t, err) + if c.edit.ComparerName != string(s) { + t.Fatalf("expected %q, but found %q", c.edit.ComparerName, s) + } + + // Decode LastSeqNum. + tag, err = d.readUvarint() + require.NoError(t, err) + if tag != tagLastSequence { + t.Fatalf("expected %d, but found %d", tagLastSequence, tag) + } + val, err := d.readUvarint() + require.NoError(t, err) + if c.edit.LastSeqNum != val { + t.Fatalf("expected %d, but found %d", c.edit.LastSeqNum, val) + } + } + }) + } +} + +func TestVersionEditApply(t *testing.T) { + parseMeta := func(s string) (*FileMetadata, error) { + m, err := ParseFileMetadataDebug(s) + if err != nil { + return nil, err + } + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + if m.SmallestSeqNum > m.LargestSeqNum { + m.SmallestSeqNum, m.LargestSeqNum = m.LargestSeqNum, m.SmallestSeqNum + } + m.InitPhysicalBacking() + return m, nil + } + + // TODO(bananabrick): Improve the parsing logic in this test. + datadriven.RunTest(t, "testdata/version_edit_apply", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "apply": + // TODO(sumeer): move this Version parsing code to utils, to + // avoid repeating it, and make it the inverse of + // Version.DebugString(). + var v *Version + var veList []*VersionEdit + isVersion := true + isDelete := true + var level int + var err error + versionFiles := map[base.FileNum]*FileMetadata{} + for _, data := range strings.Split(d.Input, "\n") { + data = strings.TrimSpace(data) + switch data { + case "edit": + isVersion = false + veList = append(veList, &VersionEdit{}) + case "delete": + isVersion = false + isDelete = true + case "add": + isVersion = false + isDelete = false + case "L0", "L1", "L2", "L3", "L4", "L5", "L6": + level, err = strconv.Atoi(data[1:]) + if err != nil { + return err.Error() + } + default: + var ve *VersionEdit + if len(veList) > 0 { + ve = veList[len(veList)-1] + } + if isVersion || !isDelete { + meta, err := parseMeta(data) + if err != nil { + return err.Error() + } + if isVersion { + if v == nil { + v = new(Version) + for l := 0; l < NumLevels; l++ { + v.Levels[l] = makeLevelMetadata(base.DefaultComparer.Compare, l, nil /* files */) + } + } + versionFiles[meta.FileNum] = meta + v.Levels[level].insert(meta) + meta.LatestRef() + } else { + ve.NewFiles = + append(ve.NewFiles, NewFileEntry{Level: level, Meta: meta}) + } + } else { + fileNum, err := strconv.Atoi(data) + if err != nil { + return err.Error() + } + dfe := DeletedFileEntry{Level: level, FileNum: base.FileNum(fileNum)} + if ve.DeletedFiles == nil { + ve.DeletedFiles = make(map[DeletedFileEntry]*FileMetadata) + } + ve.DeletedFiles[dfe] = versionFiles[dfe.FileNum] + } + } + } + + if v != nil { + if err := v.InitL0Sublevels(base.DefaultComparer.Compare, base.DefaultFormatter, 10<<20); err != nil { + return err.Error() + } + } + + bve := BulkVersionEdit{} + bve.AddedByFileNum = make(map[base.FileNum]*FileMetadata) + for _, ve := range veList { + if err := bve.Accumulate(ve); err != nil { + return err.Error() + } + } + zombies := make(map[base.DiskFileNum]uint64) + newv, err := bve.Apply(v, base.DefaultComparer.Compare, base.DefaultFormatter, 10<<20, 32000, zombies, ProhibitSplitUserKeys) + if err != nil { + return err.Error() + } + + zombieFileNums := make([]base.DiskFileNum, 0, len(zombies)) + if len(veList) == 1 { + // Only care about zombies if a single version edit was + // being applied. + for fileNum := range zombies { + zombieFileNums = append(zombieFileNums, fileNum) + } + slices.Sort(zombieFileNums) + } + + return fmt.Sprintf("%szombies %d\n", newv, zombieFileNums) + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} diff --git a/pebble/internal/manifest/version_test.go b/pebble/internal/manifest/version_test.go new file mode 100644 index 0000000..abde613 --- /dev/null +++ b/pebble/internal/manifest/version_test.go @@ -0,0 +1,429 @@ +// Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manifest + +import ( + "bytes" + "fmt" + "strings" + "sync" + "testing" + + "github.com/cockroachdb/datadriven" + "github.com/cockroachdb/pebble/internal/base" + "github.com/cockroachdb/pebble/internal/testkeys" + "github.com/stretchr/testify/require" +) + +func levelMetadata(level int, files ...*FileMetadata) LevelMetadata { + return makeLevelMetadata(base.DefaultComparer.Compare, level, files) +} + +func ikey(s string) InternalKey { + return base.MakeInternalKey([]byte(s), 0, base.InternalKeyKindSet) +} + +func TestIkeyRange(t *testing.T) { + cmp := base.DefaultComparer.Compare + testCases := []struct { + input, want string + }{ + { + "", + "-", + }, + { + "a-e", + "a-e", + }, + { + "a-e a-e", + "a-e", + }, + { + "c-g a-e", + "a-g", + }, + { + "a-e c-g a-e", + "a-g", + }, + { + "b-d f-g", + "b-g", + }, + { + "d-e b-d", + "b-e", + }, + { + "e-e", + "e-e", + }, + { + "f-g e-e d-e c-g b-d a-e", + "a-g", + }, + } + for _, tc := range testCases { + var f []*FileMetadata + if tc.input != "" { + for i, s := range strings.Split(tc.input, " ") { + m := (&FileMetadata{ + FileNum: base.FileNum(i), + }).ExtendPointKeyBounds(cmp, ikey(s[0:1]), ikey(s[2:3])) + m.InitPhysicalBacking() + f = append(f, m) + } + } + levelMetadata := makeLevelMetadata(base.DefaultComparer.Compare, 0, f) + + sm, la := KeyRange(base.DefaultComparer.Compare, levelMetadata.Iter()) + got := string(sm.UserKey) + "-" + string(la.UserKey) + if got != tc.want { + t.Errorf("KeyRange(%q) = %q, %q", tc.input, got, tc.want) + } + } +} + +func TestOverlaps(t *testing.T) { + var v *Version + cmp := testkeys.Comparer.Compare + fmtKey := testkeys.Comparer.FormatKey + datadriven.RunTest(t, "testdata/overlaps", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "define": + var err error + v, err = ParseVersionDebug(cmp, fmtKey, 64>>10 /* flush split bytes */, d.Input) + if err != nil { + return err.Error() + } + return v.String() + case "overlaps": + var level int + var start, end string + var exclusiveEnd bool + d.ScanArgs(t, "level", &level) + d.ScanArgs(t, "start", &start) + d.ScanArgs(t, "end", &end) + d.ScanArgs(t, "exclusive-end", &exclusiveEnd) + overlaps := v.Overlaps(level, testkeys.Comparer.Compare, []byte(start), []byte(end), exclusiveEnd) + var buf bytes.Buffer + fmt.Fprintf(&buf, "%d files:\n", overlaps.Len()) + overlaps.Each(func(f *FileMetadata) { + fmt.Fprintf(&buf, "%s\n", f.DebugString(base.DefaultFormatter, false)) + }) + return buf.String() + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestContains(t *testing.T) { + cmp := base.DefaultComparer.Compare + newFileMeta := func(fileNum base.FileNum, size uint64, smallest, largest base.InternalKey) *FileMetadata { + m := (&FileMetadata{ + FileNum: fileNum, + Size: size, + }).ExtendPointKeyBounds(cmp, smallest, largest) + m.InitPhysicalBacking() + return m + } + m00 := newFileMeta( + 700, + 1, + base.ParseInternalKey("b.SET.7008"), + base.ParseInternalKey("e.SET.7009"), + ) + m01 := newFileMeta( + 701, + 1, + base.ParseInternalKey("c.SET.7018"), + base.ParseInternalKey("f.SET.7019"), + ) + m02 := newFileMeta( + 702, + 1, + base.ParseInternalKey("f.SET.7028"), + base.ParseInternalKey("g.SET.7029"), + ) + m03 := newFileMeta( + 703, + 1, + base.ParseInternalKey("x.SET.7038"), + base.ParseInternalKey("y.SET.7039"), + ) + m04 := newFileMeta( + 704, + 1, + base.ParseInternalKey("n.SET.7048"), + base.ParseInternalKey("p.SET.7049"), + ) + m05 := newFileMeta( + 705, + 1, + base.ParseInternalKey("p.SET.7058"), + base.ParseInternalKey("p.SET.7059"), + ) + m06 := newFileMeta( + 706, + 1, + base.ParseInternalKey("p.SET.7068"), + base.ParseInternalKey("u.SET.7069"), + ) + m07 := newFileMeta( + 707, + 1, + base.ParseInternalKey("r.SET.7078"), + base.ParseInternalKey("s.SET.7079"), + ) + + m10 := newFileMeta( + 710, + 1, + base.ParseInternalKey("d.SET.7108"), + base.ParseInternalKey("g.SET.7109"), + ) + m11 := newFileMeta( + 711, + 1, + base.ParseInternalKey("g.SET.7118"), + base.ParseInternalKey("j.SET.7119"), + ) + m12 := newFileMeta( + 712, + 1, + base.ParseInternalKey("n.SET.7128"), + base.ParseInternalKey("p.SET.7129"), + ) + m13 := newFileMeta( + 713, + 1, + base.ParseInternalKey("p.SET.7148"), + base.ParseInternalKey("p.SET.7149"), + ) + m14 := newFileMeta( + 714, + 1, + base.ParseInternalKey("p.SET.7138"), + base.ParseInternalKey("u.SET.7139"), + ) + + v := Version{ + Levels: [NumLevels]LevelMetadata{ + 0: levelMetadata(0, m00, m01, m02, m03, m04, m05, m06, m07), + 1: levelMetadata(1, m10, m11, m12, m13, m14), + }, + } + + testCases := []struct { + level int + file *FileMetadata + want bool + }{ + // Level 0: m00=b-e, m01=c-f, m02=f-g, m03=x-y, m04=n-p, m05=p-p, m06=p-u, m07=r-s. + // Note that: + // - the slice isn't sorted (e.g. m02=f-g, m03=x-y, m04=n-p), + // - m00 and m01 overlap (not just touch), + // - m06 contains m07, + // - m00, m01 and m02 transitively overlap/touch each other, and + // - m04, m05, m06 and m07 transitively overlap/touch each other. + {0, m00, true}, + {0, m01, true}, + {0, m02, true}, + {0, m03, true}, + {0, m04, true}, + {0, m05, true}, + {0, m06, true}, + {0, m07, true}, + {0, m10, false}, + {0, m11, false}, + {0, m12, false}, + {0, m13, false}, + {0, m14, false}, + {1, m00, false}, + {1, m01, false}, + {1, m02, false}, + {1, m03, false}, + {1, m04, false}, + {1, m05, false}, + {1, m06, false}, + {1, m07, false}, + {1, m10, true}, + {1, m11, true}, + {1, m12, true}, + {1, m13, true}, + {1, m14, true}, + + // Level 2: empty. + {2, m00, false}, + {2, m14, false}, + } + + for _, tc := range testCases { + got := v.Contains(tc.level, cmp, tc.file) + if got != tc.want { + t.Errorf("level=%d, file=%s\ngot %t\nwant %t", tc.level, tc.file, got, tc.want) + } + } +} + +func TestVersionUnref(t *testing.T) { + list := &VersionList{} + list.Init(&sync.Mutex{}) + v := &Version{Deleted: func([]*FileBacking) {}} + v.Ref() + list.PushBack(v) + v.Unref() + if !list.Empty() { + t.Fatalf("expected version list to be empty") + } +} + +func TestCheckOrdering(t *testing.T) { + cmp := base.DefaultComparer.Compare + fmtKey := base.DefaultComparer.FormatKey + datadriven.RunTest(t, "testdata/version_check_ordering", + func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "check-ordering": + orderingInvariants := ProhibitSplitUserKeys + if d.HasArg("allow-split-user-keys") { + orderingInvariants = AllowSplitUserKeys + } + v, err := ParseVersionDebug(cmp, fmtKey, 10<<20, d.Input) + if err != nil { + return err.Error() + } + // L0 files compare on sequence numbers. Use the seqnums from the + // smallest / largest bounds for the table. + v.Levels[0].Slice().Each(func(m *FileMetadata) { + m.SmallestSeqNum = m.Smallest.SeqNum() + m.LargestSeqNum = m.Largest.SeqNum() + }) + if err = v.CheckOrdering(cmp, base.DefaultFormatter, orderingInvariants); err != nil { + return err.Error() + } + return "OK" + + default: + return fmt.Sprintf("unknown command: %s", d.Cmd) + } + }) +} + +func TestExtendBounds(t *testing.T) { + cmp := base.DefaultComparer.Compare + parseBounds := func(line string) (lower, upper InternalKey) { + parts := strings.Split(line, "-") + if len(parts) == 1 { + parts = strings.Split(parts[0], ":") + start, end := strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]) + lower = base.ParseInternalKey(start) + switch k := lower.Kind(); k { + case base.InternalKeyKindRangeDelete: + upper = base.MakeRangeDeleteSentinelKey([]byte(end)) + case base.InternalKeyKindRangeKeySet, base.InternalKeyKindRangeKeyUnset, base.InternalKeyKindRangeKeyDelete: + upper = base.MakeExclusiveSentinelKey(k, []byte(end)) + default: + panic(fmt.Sprintf("unknown kind %s with end key", k)) + } + } else { + l, u := strings.TrimSpace(parts[0]), strings.TrimSpace(parts[1]) + lower, upper = base.ParseInternalKey(l), base.ParseInternalKey(u) + } + return + } + format := func(m *FileMetadata) string { + var b bytes.Buffer + var smallest, largest string + switch m.boundTypeSmallest { + case boundTypePointKey: + smallest = "point" + case boundTypeRangeKey: + smallest = "range" + default: + return fmt.Sprintf("unknown bound type %d", m.boundTypeSmallest) + } + switch m.boundTypeLargest { + case boundTypePointKey: + largest = "point" + case boundTypeRangeKey: + largest = "range" + default: + return fmt.Sprintf("unknown bound type %d", m.boundTypeLargest) + } + bounds, err := m.boundsMarker() + if err != nil { + panic(err) + } + fmt.Fprintf(&b, "%s\n", m.DebugString(base.DefaultFormatter, true)) + fmt.Fprintf(&b, " bounds: (smallest=%s,largest=%s) (0x%08b)\n", smallest, largest, bounds) + return b.String() + } + m := &FileMetadata{} + datadriven.RunTest(t, "testdata/file_metadata_bounds", func(t *testing.T, d *datadriven.TestData) string { + switch d.Cmd { + case "reset": + m = &FileMetadata{} + return "" + case "extend-point-key-bounds": + u, l := parseBounds(d.Input) + m.ExtendPointKeyBounds(cmp, u, l) + return format(m) + case "extend-range-key-bounds": + u, l := parseBounds(d.Input) + m.ExtendRangeKeyBounds(cmp, u, l) + return format(m) + default: + return fmt.Sprintf("unknown command %s\n", d.Cmd) + } + }) +} + +func TestFileMetadata_ParseRoundTrip(t *testing.T) { + testCases := []struct { + name string + input string + output string + }{ + { + name: "point keys only", + input: "000001:[a#0,SET-z#0,DEL] seqnums:[0-0] points:[a#0,SET-z#0,DEL]", + }, + { + name: "range keys only", + input: "000001:[a#0,RANGEKEYSET-z#0,RANGEKEYDEL] seqnums:[0-0] ranges:[a#0,RANGEKEYSET-z#0,RANGEKEYDEL]", + }, + { + name: "point and range keys", + input: "000001:[a#0,RANGEKEYSET-d#0,DEL] seqnums:[0-0] points:[b#0,SET-d#0,DEL] ranges:[a#0,RANGEKEYSET-c#0,RANGEKEYDEL]", + }, + { + name: "point and range keys with nonzero senums", + input: "000001:[a#3,RANGEKEYSET-d#4,DEL] seqnums:[3-7] points:[b#3,SET-d#4,DEL] ranges:[a#3,RANGEKEYSET-c#5,RANGEKEYDEL]", + }, + { + name: "whitespace", + input: " 000001 : [ a#0,SET - z#0,DEL] points : [ a#0,SET - z#0,DEL] ", + output: "000001:[a#0,SET-z#0,DEL] seqnums:[0-0] points:[a#0,SET-z#0,DEL]", + }, + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + m, err := ParseFileMetadataDebug(tc.input) + require.NoError(t, err) + err = m.Validate(base.DefaultComparer.Compare, base.DefaultFormatter) + require.NoError(t, err) + got := m.DebugString(base.DefaultFormatter, true) + want := tc.input + if tc.output != "" { + want = tc.output + } + require.Equal(t, want, got) + }) + } +} diff --git a/pebble/internal/manual/manual.go b/pebble/internal/manual/manual.go new file mode 100644 index 0000000..640816a --- /dev/null +++ b/pebble/internal/manual/manual.go @@ -0,0 +1,60 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +package manual + +// #include +import "C" +import "unsafe" + +// The go:linkname directives provides backdoor access to private functions in +// the runtime. Below we're accessing the throw function. + +//go:linkname throw runtime.throw +func throw(s string) + +// TODO(peter): Rather than relying an C malloc/free, we could fork the Go +// runtime page allocator and allocate large chunks of memory using mmap or +// similar. + +// New allocates a slice of size n. The returned slice is from manually managed +// memory and MUST be released by calling Free. Failure to do so will result in +// a memory leak. +func New(n int) []byte { + if n == 0 { + return make([]byte, 0) + } + // We need to be conscious of the Cgo pointer passing rules: + // + // https://golang.org/cmd/cgo/#hdr-Passing_pointers + // + // ... + // Note: the current implementation has a bug. While Go code is permitted + // to write nil or a C pointer (but not a Go pointer) to C memory, the + // current implementation may sometimes cause a runtime error if the + // contents of the C memory appear to be a Go pointer. Therefore, avoid + // passing uninitialized C memory to Go code if the Go code is going to + // store pointer values in it. Zero out the memory in C before passing it + // to Go. + ptr := C.calloc(C.size_t(n), 1) + if ptr == nil { + // NB: throw is like panic, except it guarantees the process will be + // terminated. The call below is exactly what the Go runtime invokes when + // it cannot allocate memory. + throw("out of memory") + } + // Interpret the C pointer as a pointer to a Go array, then slice. + return (*[MaxArrayLen]byte)(unsafe.Pointer(ptr))[:n:n] +} + +// Free frees the specified slice. +func Free(b []byte) { + if cap(b) != 0 { + if len(b) == 0 { + b = b[:cap(b)] + } + ptr := unsafe.Pointer(&b[0]) + C.free(ptr) + } +} diff --git a/pebble/internal/manual/manual_32bit.go b/pebble/internal/manual/manual_32bit.go new file mode 100644 index 0000000..19369fa --- /dev/null +++ b/pebble/internal/manual/manual_32bit.go @@ -0,0 +1,13 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build 386 || amd64p32 || arm || armbe || ppc || sparc +// +build 386 amd64p32 arm armbe ppc sparc + +package manual + +const ( + // MaxArrayLen is a safe maximum length for slices on this architecture. + MaxArrayLen = 1<<31 - 1 +) diff --git a/pebble/internal/manual/manual_64bit.go b/pebble/internal/manual/manual_64bit.go new file mode 100644 index 0000000..8c08232 --- /dev/null +++ b/pebble/internal/manual/manual_64bit.go @@ -0,0 +1,13 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build amd64 || arm64 || arm64be || ppc64 || ppc64le || mips64 || mips64le || s390x || sparc64 || riscv64 +// +build amd64 arm64 arm64be ppc64 ppc64le mips64 mips64le s390x sparc64 riscv64 + +package manual + +const ( + // MaxArrayLen is a safe maximum length for slices on this architecture. + MaxArrayLen = 1<<50 - 1 +) diff --git a/pebble/internal/manual/manual_mips.go b/pebble/internal/manual/manual_mips.go new file mode 100644 index 0000000..08bb880 --- /dev/null +++ b/pebble/internal/manual/manual_mips.go @@ -0,0 +1,13 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build mips || mipsle || mips64p32 || mips64p32le +// +build mips mipsle mips64p32 mips64p32le + +package manual + +const ( + // MaxArrayLen is a safe maximum length for slices on this architecture. + MaxArrayLen = 1 << 30 +) diff --git a/pebble/internal/manual/manual_nocgo.go b/pebble/internal/manual/manual_nocgo.go new file mode 100644 index 0000000..74befbd --- /dev/null +++ b/pebble/internal/manual/manual_nocgo.go @@ -0,0 +1,20 @@ +// Copyright 2020 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +//go:build !cgo +// +build !cgo + +package manual + +// Provides versions of New and Free when cgo is not available (e.g. cross +// compilation). + +// New allocates a slice of size n. +func New(n int) []byte { + return make([]byte, n) +} + +// Free frees the specified slice. +func Free(b []byte) { +} diff --git a/pebble/internal/metamorphic/.gitignore b/pebble/internal/metamorphic/.gitignore new file mode 100644 index 0000000..33a7810 --- /dev/null +++ b/pebble/internal/metamorphic/.gitignore @@ -0,0 +1,2 @@ +_meta/ +*.test diff --git a/pebble/internal/metamorphic/crossversion/crossversion_test.go b/pebble/internal/metamorphic/crossversion/crossversion_test.go new file mode 100644 index 0000000..192140e --- /dev/null +++ b/pebble/internal/metamorphic/crossversion/crossversion_test.go @@ -0,0 +1,409 @@ +// Copyright 2022 The LevelDB-Go and Pebble Authors. All rights reserved. Use +// of this source code is governed by a BSD-style license that can be found in +// the LICENSE file. + +// Package crossversion builds on the metamorphic testing implemented in +// internal/metamorphic, performing metamorphic testing across versions of +// Pebble. This improves test coverage of upgrade and migration code paths. +package crossversion + +import ( + "bytes" + "context" + "flag" + "fmt" + "io" + "math/rand" + "os" + "os/exec" + "path/filepath" + "strconv" + "strings" + "sync" + "testing" + "time" + "unicode" + + "github.com/cockroachdb/errors" + "github.com/cockroachdb/pebble/metamorphic" + "github.com/cockroachdb/pebble/vfs" + "github.com/stretchr/testify/require" +) + +var ( + factor int + seed int64 + versions pebbleVersions + artifactsDir string + streamOutput bool +) + +func init() { + // NB: If you add new command-line flags, you should update the + // reproductionCommand function. + flag.Int64Var(&seed, "seed", 0, + `a pseudorandom number generator seed`) + flag.IntVar(&factor, "factor", 10, + `the number of data directories to carry forward +from one version's run to the subsequent version's runs.`) + flag.Var(&versions, "version", + `a comma-separated 3-tuple defining a Pebble version to test. +The expected format is