fix: nodes with long-tail syncs or not in validator trie hit OOM due to accumulation of state transition messages that do not clear

This commit is contained in:
Cassandra Heart 2024-03-16 19:33:48 -05:00
parent 3f867d80f6
commit 7ccd9f9ab0
No known key found for this signature in database
GPG Key ID: 6352152859385958
3 changed files with 25 additions and 18 deletions

View File

@ -460,10 +460,6 @@ func (e *CeremonyDataClockConsensusEngine) runLoop() {
e.frameChan <- latestFrame
}()
if bytes.Equal(
e.frameProverTrie.FindNearest(e.provingKeyAddress).External.Key,
e.provingKeyAddress,
) {
var nextFrame *protobufs.ClockFrame
if nextFrame, err = e.prove(latestFrame); err != nil {
e.logger.Error("could not prove", zap.Error(err))
@ -471,6 +467,10 @@ func (e *CeremonyDataClockConsensusEngine) runLoop() {
continue
}
if bytes.Equal(
e.frameProverTrie.FindNearest(e.provingKeyAddress).External.Key,
e.provingKeyAddress,
) {
e.dataTimeReel.Insert(nextFrame)
if err = e.publishProof(nextFrame); err != nil {
@ -502,7 +502,6 @@ func (e *CeremonyDataClockConsensusEngine) runLoop() {
e.frameChan <- latestFrame
}()
if e.frameProverTrie.Contains(e.provingKeyAddress) {
var nextFrame *protobufs.ClockFrame
if nextFrame, err = e.prove(latestFrame); err != nil {
e.logger.Error("could not prove", zap.Error(err))
@ -510,6 +509,10 @@ func (e *CeremonyDataClockConsensusEngine) runLoop() {
continue
}
if bytes.Equal(
e.frameProverTrie.FindNearest(e.provingKeyAddress).External.Key,
e.provingKeyAddress,
) {
e.dataTimeReel.Insert(nextFrame)
if err = e.publishProof(nextFrame); err != nil {

View File

@ -454,9 +454,6 @@ func (e *CeremonyDataClockConsensusEngine) collect(
e.logger.Info("collecting vdf proofs")
latest := currentFramePublished
if e.syncingStatus == SyncStatusFailed {
e.syncingStatus = SyncStatusNotSyncing
}
// With the increase of network size, constrain down to top thirty
for i := 0; i < 30; i++ {
@ -468,6 +465,7 @@ func (e *CeremonyDataClockConsensusEngine) collect(
e.logger.Info("currently up to date, skipping sync")
break
} else if maxFrame-2 > latest.FrameNumber {
e.syncingStatus = SyncStatusSynchronizing
latest, err = e.sync(latest, maxFrame, peerId)
if err == nil {
break
@ -475,6 +473,8 @@ func (e *CeremonyDataClockConsensusEngine) collect(
}
}
e.syncingStatus = SyncStatusNotSyncing
if latest.FrameNumber < currentFramePublished.FrameNumber {
latest = currentFramePublished
}

View File

@ -30,7 +30,11 @@ func (e *CeremonyDataClockConsensusEngine) runMessageHandler() {
peer, ok := e.peerMap[string(message.From)]
e.peerMapMx.RUnlock()
if ok && bytes.Compare(peer.version, config.GetMinimumVersion()) >= 0 {
if ok && bytes.Compare(peer.version, config.GetMinimumVersion()) >= 0 &&
bytes.Equal(
e.frameProverTrie.FindNearest(e.provingKeyAddress).External.Key,
e.provingKeyAddress,
) && e.syncingStatus == SyncStatusNotSyncing {
for name := range e.executionEngines {
name := name
go func() error {