ceremonyclient/go-libp2p-kad-dht/netsize/netsize.go

package netsize

import (
	"fmt"
	"math"
	"math/big"
	"sort"
	"strings"
	"sync"
	"sync/atomic"
	"time"

	logging "github.com/ipfs/go-log/v2"
	kbucket "github.com/libp2p/go-libp2p-kbucket"
	"github.com/libp2p/go-libp2p/core/peer"
	ks "github.com/whyrusleeping/go-keyspace"
)

// invalidEstimate indicates that we currently have no valid estimate cached.
const invalidEstimate int32 = -1

var (
	ErrNotEnoughData   = fmt.Errorf("not enough data")
	ErrWrongNumOfPeers = fmt.Errorf("expected bucket size number of peers")
)

var (
	logger                   = logging.Logger("dht/netsize")
	MaxMeasurementAge        = 2 * time.Hour
	MinMeasurementsThreshold = 5
	MaxMeasurementsThreshold = 150
	keyspaceMaxInt, _        = new(big.Int).SetString(strings.Repeat("1", 256), 2)
	keyspaceMaxFloat         = new(big.Float).SetInt(keyspaceMaxInt)
)

type Estimator struct {
	localID    kbucket.ID
	rt         *kbucket.RoutingTable
	bucketSize int

	measurementsLk sync.RWMutex
	measurements   map[int][]measurement

	netSizeCache int32
}

func NewEstimator(localID peer.ID, rt *kbucket.RoutingTable, bucketSize int) *Estimator {
	// initialize map to hold measurement observations
	measurements := map[int][]measurement{}
	for i := 0; i < bucketSize; i++ {
		measurements[i] = []measurement{}
	}

	return &Estimator{
		localID:      kbucket.ConvertPeerID(localID),
		rt:           rt,
		bucketSize:   bucketSize,
		measurements: measurements,
		netSizeCache: invalidEstimate,
	}
}

// NormedDistance calculates the normed XOR distance of the given keys (from 0 to 1).
func NormedDistance(p peer.ID, k ks.Key) float64 {
	pKey := ks.XORKeySpace.Key([]byte(p))
	ksDistance := new(big.Float).SetInt(pKey.Distance(k))
	normedDist, _ := new(big.Float).Quo(ksDistance, keyspaceMaxFloat).Float64()
	return normedDist
}

type measurement struct {
	distance  float64
	weight    float64
	timestamp time.Time
}

// Track tracks the list of peers for the given key to incorporate in the next network size estimate.
// key is expected **NOT** to be in the kademlia keyspace and peers is expected to be a sorted list of
// the closest peers to the given key (the closest first).
// This function expects peers to have the same length as the routing table bucket size. It also
// strips old and limits the number of data points (favouring new).
func (e *Estimator) Track(key string, peers []peer.ID) error {
	e.measurementsLk.Lock()
	defer e.measurementsLk.Unlock()

	// sanity check
	if len(peers) != e.bucketSize {
		return ErrWrongNumOfPeers
	}

	logger.Debugw("Tracking peers for key", "key", key)

	now := time.Now()

	// invalidate cache
	atomic.StoreInt32(&e.netSizeCache, invalidEstimate)

	// Calculate weight for the peer distances.
	weight := e.calcWeight(key, peers)

	// Map given key to the Kademlia key space (hash it)
	ksKey := ks.XORKeySpace.Key([]byte(key))

	// the maximum age timestamp of the measurement data points
	maxAgeTs := now.Add(-MaxMeasurementAge)

	for i, p := range peers {
		// Construct measurement struct
		m := measurement{
			distance:  NormedDistance(p, ksKey),
			weight:    weight,
			timestamp: now,
		}

		measurements := append(e.measurements[i], m)

		// find the smallest index of a measurement that is still in the allowed time window
		// all measurements with a lower index should be discarded as they are too old
		n := len(measurements)
		idx := sort.Search(n, func(j int) bool {
			return measurements[j].timestamp.After(maxAgeTs)
		})

		// if measurements are outside the allowed time window remove them.
		// idx == n - there is no measurement in the allowed time window -> reset slice
		// idx == 0 - the normal case where we only have valid entries
		// idx != 0 - there is a mix of valid and obsolete entries
		if idx != 0 {
			x := make([]measurement, n-idx)
			copy(x, measurements[idx:])
			measurements = x
		}

		// if the number of data points exceed the max threshold, strip oldest measurement data points.
		if len(measurements) > MaxMeasurementsThreshold {
			measurements = measurements[len(measurements)-MaxMeasurementsThreshold:]
		}

		e.measurements[i] = measurements
	}

	return nil
}

// NetworkSize instructs the Estimator to calculate the current network size estimate.
func (e *Estimator) NetworkSize() (int32, error) {

	// return cached calculation lock-free (fast path)
	if estimate := atomic.LoadInt32(&e.netSizeCache); estimate != invalidEstimate {
		logger.Debugw("Cached network size estimation", "estimate", estimate)
		return estimate, nil
	}

	e.measurementsLk.Lock()
	defer e.measurementsLk.Unlock()

	// Check a second time. This is needed because we maybe had to wait on another goroutine doing the computation.
	// Then the computation was just finished by the other goroutine, and we don't need to redo it.
	if estimate := e.netSizeCache; estimate != invalidEstimate {
		logger.Debugw("Cached network size estimation", "estimate", estimate)
		return estimate, nil
	}

	// remove obsolete data points
	e.garbageCollect()

	// initialize slices for linear fit
	xs := make([]float64, e.bucketSize)
	ys := make([]float64, e.bucketSize)
	yerrs := make([]float64, e.bucketSize)

	for i := 0; i < e.bucketSize; i++ {
		observationCount := len(e.measurements[i])

		// If we don't have enough data to reasonably calculate the network size, return early
		if observationCount < MinMeasurementsThreshold {
			return 0, ErrNotEnoughData
		}

		// Calculate Average Distance
		sumDistances := 0.0
		sumWeights := 0.0
		for _, m := range e.measurements[i] {
			sumDistances += m.weight * m.distance
			sumWeights += m.weight
		}
		distanceAvg := sumDistances / sumWeights

		// Calculate standard deviation
		sumWeightedDiffs := 0.0
		for _, m := range e.measurements[i] {
			diff := m.distance - distanceAvg
			sumWeightedDiffs += m.weight * diff * diff
		}
		variance := sumWeightedDiffs / (float64(observationCount-1) / float64(observationCount) * sumWeights)
		distanceStd := math.Sqrt(variance)

		// Track calculations
		xs[i] = float64(i + 1)
		ys[i] = distanceAvg
		yerrs[i] = distanceStd
	}

	// Calculate linear regression (assumes the line goes through the origin)
	var x2Sum, xySum float64
	for i, xi := range xs {
		yi := ys[i]
		xySum += yerrs[i] * xi * yi
		x2Sum += yerrs[i] * xi * xi
	}
	slope := xySum / x2Sum

	// calculate final network size
	netSize := int32(1/slope - 1)

	// cache network size estimation
	atomic.StoreInt32(&e.netSizeCache, netSize)

	logger.Debugw("New network size estimation", "estimate", netSize)
	return netSize, nil
}

// calcWeight weighs data points exponentially less if they fall into a non-full bucket.
// It weighs distance estimates based on their CPLs and bucket levels.
// Bucket Level: 20 -> 1/2^0 -> weight: 1
// Bucket Level: 17 -> 1/2^3 -> weight: 1/8
// Bucket Level: 10 -> 1/2^10 -> weight: 1/1024
//
// It can happen that the routing table doesn't have a full bucket, but we are tracking here
// a list of peers that would theoretically have been suitable for that bucket. Let's imagine
// there are only 13 peers in bucket 3 although there is space for 20. Now, the Track function
// gets a peers list (len 20) where all peers fall into bucket 3. The weight of this set of peers
// should be 1 instead of 1/2^7.
// I actually thought this cannot happen as peers would have been added to the routing table before
// the Track function gets called. But they seem sometimes not to be added.
func (e *Estimator) calcWeight(key string, peers []peer.ID) float64 {

	cpl := kbucket.CommonPrefixLen(kbucket.ConvertKey(key), e.localID)
	bucketLevel := e.rt.NPeersForCpl(uint(cpl))

	if bucketLevel < e.bucketSize {
		// routing table doesn't have a full bucket. Check how many peers would fit into that bucket
		peerLevel := 0
		for _, p := range peers {
			if cpl == kbucket.CommonPrefixLen(kbucket.ConvertPeerID(p), e.localID) {
				peerLevel += 1
			}
		}

		if peerLevel > bucketLevel {
			return math.Pow(2, float64(peerLevel-e.bucketSize))
		}
	}

	return math.Pow(2, float64(bucketLevel-e.bucketSize))
}

// garbageCollect removes all measurements from the list that fell out of the measurement time window.
func (e *Estimator) garbageCollect() {
	logger.Debug("Running garbage collection")

	// the maximum age timestamp of the measurement data points
	maxAgeTs := time.Now().Add(-MaxMeasurementAge)

	for i := 0; i < e.bucketSize; i++ {

		// find the smallest index of a measurement that is still in the allowed time window
		// all measurements with a lower index should be discarded as they are too old
		n := len(e.measurements[i])
		idx := sort.Search(n, func(j int) bool {
			return e.measurements[i][j].timestamp.After(maxAgeTs)
		})

		// if measurements are outside the allowed time window remove them.
		// idx == n - there is no measurement in the allowed time window -> reset slice
		// idx == 0 - the normal case where we only have valid entries
		// idx != 0 - there is a mix of valid and obsolete entries
		if idx == n {
			e.measurements[i] = []measurement{}
		} else if idx != 0 {
			e.measurements[i] = e.measurements[i][idx:]
		}
	}
}
detangling merge of main node for v2 (#293) 2024-10-12 18:55:17 +00:00			`package netsize`

			`import (`
			`"fmt"`
			`"math"`
			`"math/big"`
			`"sort"`
			`"strings"`
			`"sync"`
			`"sync/atomic"`
			`"time"`

			`logging "github.com/ipfs/go-log/v2"`
			`kbucket "github.com/libp2p/go-libp2p-kbucket"`
			`"github.com/libp2p/go-libp2p/core/peer"`
			`ks "github.com/whyrusleeping/go-keyspace"`
			`)`

			`// invalidEstimate indicates that we currently have no valid estimate cached.`
			`const invalidEstimate int32 = -1`

			`var (`
			`ErrNotEnoughData = fmt.Errorf("not enough data")`
			`ErrWrongNumOfPeers = fmt.Errorf("expected bucket size number of peers")`
			`)`

			`var (`
			`logger = logging.Logger("dht/netsize")`
			`MaxMeasurementAge = 2 * time.Hour`
			`MinMeasurementsThreshold = 5`
			`MaxMeasurementsThreshold = 150`
			`keyspaceMaxInt, _ = new(big.Int).SetString(strings.Repeat("1", 256), 2)`
			`keyspaceMaxFloat = new(big.Float).SetInt(keyspaceMaxInt)`
			`)`

			`type Estimator struct {`
			`localID kbucket.ID`
			`rt *kbucket.RoutingTable`
			`bucketSize int`

			`measurementsLk sync.RWMutex`
			`measurements map[int][]measurement`

			`netSizeCache int32`
			`}`

			`func NewEstimator(localID peer.ID, rt kbucket.RoutingTable, bucketSize int) Estimator {`
			`// initialize map to hold measurement observations`
			`measurements := map[int][]measurement{}`
			`for i := 0; i < bucketSize; i++ {`
			`measurements[i] = []measurement{}`
			`}`

			`return &Estimator{`
			`localID: kbucket.ConvertPeerID(localID),`
			`rt: rt,`
			`bucketSize: bucketSize,`
			`measurements: measurements,`
			`netSizeCache: invalidEstimate,`
			`}`
			`}`

			`// NormedDistance calculates the normed XOR distance of the given keys (from 0 to 1).`
			`func NormedDistance(p peer.ID, k ks.Key) float64 {`
			`pKey := ks.XORKeySpace.Key([]byte(p))`
			`ksDistance := new(big.Float).SetInt(pKey.Distance(k))`
			`normedDist, _ := new(big.Float).Quo(ksDistance, keyspaceMaxFloat).Float64()`
			`return normedDist`
			`}`

			`type measurement struct {`
			`distance float64`
			`weight float64`
			`timestamp time.Time`
			`}`

			`// Track tracks the list of peers for the given key to incorporate in the next network size estimate.`
			`// key is expected NOT to be in the kademlia keyspace and peers is expected to be a sorted list of`
			`// the closest peers to the given key (the closest first).`
			`// This function expects peers to have the same length as the routing table bucket size. It also`
			`// strips old and limits the number of data points (favouring new).`
			`func (e *Estimator) Track(key string, peers []peer.ID) error {`
			`e.measurementsLk.Lock()`
			`defer e.measurementsLk.Unlock()`

			`// sanity check`
			`if len(peers) != e.bucketSize {`
			`return ErrWrongNumOfPeers`
			`}`

			`logger.Debugw("Tracking peers for key", "key", key)`

			`now := time.Now()`

			`// invalidate cache`
			`atomic.StoreInt32(&e.netSizeCache, invalidEstimate)`

			`// Calculate weight for the peer distances.`
			`weight := e.calcWeight(key, peers)`

			`// Map given key to the Kademlia key space (hash it)`
			`ksKey := ks.XORKeySpace.Key([]byte(key))`

			`// the maximum age timestamp of the measurement data points`
			`maxAgeTs := now.Add(-MaxMeasurementAge)`

			`for i, p := range peers {`
			`// Construct measurement struct`
			`m := measurement{`
			`distance: NormedDistance(p, ksKey),`
			`weight: weight,`
			`timestamp: now,`
			`}`

			`measurements := append(e.measurements[i], m)`

			`// find the smallest index of a measurement that is still in the allowed time window`
			`// all measurements with a lower index should be discarded as they are too old`
			`n := len(measurements)`
			`idx := sort.Search(n, func(j int) bool {`
			`return measurements[j].timestamp.After(maxAgeTs)`
			`})`

			`// if measurements are outside the allowed time window remove them.`
			`// idx == n - there is no measurement in the allowed time window -> reset slice`
			`// idx == 0 - the normal case where we only have valid entries`
			`// idx != 0 - there is a mix of valid and obsolete entries`
			`if idx != 0 {`
			`x := make([]measurement, n-idx)`
			`copy(x, measurements[idx:])`
			`measurements = x`
			`}`

			`// if the number of data points exceed the max threshold, strip oldest measurement data points.`
			`if len(measurements) > MaxMeasurementsThreshold {`
			`measurements = measurements[len(measurements)-MaxMeasurementsThreshold:]`
			`}`

			`e.measurements[i] = measurements`
			`}`

			`return nil`
			`}`

			`// NetworkSize instructs the Estimator to calculate the current network size estimate.`
			`func (e *Estimator) NetworkSize() (int32, error) {`

			`// return cached calculation lock-free (fast path)`
			`if estimate := atomic.LoadInt32(&e.netSizeCache); estimate != invalidEstimate {`
			`logger.Debugw("Cached network size estimation", "estimate", estimate)`
			`return estimate, nil`
			`}`

			`e.measurementsLk.Lock()`
			`defer e.measurementsLk.Unlock()`

			`// Check a second time. This is needed because we maybe had to wait on another goroutine doing the computation.`
			`// Then the computation was just finished by the other goroutine, and we don't need to redo it.`
			`if estimate := e.netSizeCache; estimate != invalidEstimate {`
			`logger.Debugw("Cached network size estimation", "estimate", estimate)`
			`return estimate, nil`
			`}`

			`// remove obsolete data points`
			`e.garbageCollect()`

			`// initialize slices for linear fit`
			`xs := make([]float64, e.bucketSize)`
			`ys := make([]float64, e.bucketSize)`
			`yerrs := make([]float64, e.bucketSize)`

			`for i := 0; i < e.bucketSize; i++ {`
			`observationCount := len(e.measurements[i])`

			`// If we don't have enough data to reasonably calculate the network size, return early`
			`if observationCount < MinMeasurementsThreshold {`
			`return 0, ErrNotEnoughData`
			`}`

			`// Calculate Average Distance`
			`sumDistances := 0.0`
			`sumWeights := 0.0`
			`for _, m := range e.measurements[i] {`
			`sumDistances += m.weight * m.distance`
			`sumWeights += m.weight`
			`}`
			`distanceAvg := sumDistances / sumWeights`

			`// Calculate standard deviation`
			`sumWeightedDiffs := 0.0`
			`for _, m := range e.measurements[i] {`
			`diff := m.distance - distanceAvg`
			`sumWeightedDiffs += m.weight * diff * diff`
			`}`
			`variance := sumWeightedDiffs / (float64(observationCount-1) / float64(observationCount) * sumWeights)`
			`distanceStd := math.Sqrt(variance)`

			`// Track calculations`
			`xs[i] = float64(i + 1)`
			`ys[i] = distanceAvg`
			`yerrs[i] = distanceStd`
			`}`

			`// Calculate linear regression (assumes the line goes through the origin)`
			`var x2Sum, xySum float64`
			`for i, xi := range xs {`
			`yi := ys[i]`
			`xySum += yerrs[i] * xi * yi`
			`x2Sum += yerrs[i] * xi * xi`
			`}`
			`slope := xySum / x2Sum`

			`// calculate final network size`
			`netSize := int32(1/slope - 1)`

			`// cache network size estimation`
			`atomic.StoreInt32(&e.netSizeCache, netSize)`

			`logger.Debugw("New network size estimation", "estimate", netSize)`
			`return netSize, nil`
			`}`

			`// calcWeight weighs data points exponentially less if they fall into a non-full bucket.`
			`// It weighs distance estimates based on their CPLs and bucket levels.`
			`// Bucket Level: 20 -> 1/2^0 -> weight: 1`
			`// Bucket Level: 17 -> 1/2^3 -> weight: 1/8`
			`// Bucket Level: 10 -> 1/2^10 -> weight: 1/1024`
			`//`
			`// It can happen that the routing table doesn't have a full bucket, but we are tracking here`
			`// a list of peers that would theoretically have been suitable for that bucket. Let's imagine`
			`// there are only 13 peers in bucket 3 although there is space for 20. Now, the Track function`
			`// gets a peers list (len 20) where all peers fall into bucket 3. The weight of this set of peers`
			`// should be 1 instead of 1/2^7.`
			`// I actually thought this cannot happen as peers would have been added to the routing table before`
			`// the Track function gets called. But they seem sometimes not to be added.`
			`func (e *Estimator) calcWeight(key string, peers []peer.ID) float64 {`

			`cpl := kbucket.CommonPrefixLen(kbucket.ConvertKey(key), e.localID)`
			`bucketLevel := e.rt.NPeersForCpl(uint(cpl))`

			`if bucketLevel < e.bucketSize {`
			`// routing table doesn't have a full bucket. Check how many peers would fit into that bucket`
			`peerLevel := 0`
			`for _, p := range peers {`
			`if cpl == kbucket.CommonPrefixLen(kbucket.ConvertPeerID(p), e.localID) {`
			`peerLevel += 1`
			`}`
			`}`

			`if peerLevel > bucketLevel {`
			`return math.Pow(2, float64(peerLevel-e.bucketSize))`
			`}`
			`}`

			`return math.Pow(2, float64(bucketLevel-e.bucketSize))`
			`}`

			`// garbageCollect removes all measurements from the list that fell out of the measurement time window.`
			`func (e *Estimator) garbageCollect() {`
			`logger.Debug("Running garbage collection")`

			`// the maximum age timestamp of the measurement data points`
			`maxAgeTs := time.Now().Add(-MaxMeasurementAge)`

			`for i := 0; i < e.bucketSize; i++ {`

			`// find the smallest index of a measurement that is still in the allowed time window`
			`// all measurements with a lower index should be discarded as they are too old`
			`n := len(e.measurements[i])`
			`idx := sort.Search(n, func(j int) bool {`
			`return e.measurements[i][j].timestamp.After(maxAgeTs)`
			`})`

			`// if measurements are outside the allowed time window remove them.`
			`// idx == n - there is no measurement in the allowed time window -> reset slice`
			`// idx == 0 - the normal case where we only have valid entries`
			`// idx != 0 - there is a mix of valid and obsolete entries`
			`if idx == n {`
			`e.measurements[i] = []measurement{}`
			`} else if idx != 0 {`
			`e.measurements[i] = e.measurements[i][idx:]`
			`}`
			`}`
			`}`