mirror of
https://github.com/0glabs/0g-storage-node.git
synced 2025-01-23 21:46:17 +00:00
Terminate file sync if failed (#121)
* terminate file sync if failed * fix fmt * always add log for file termination result
This commit is contained in:
parent
0d2caf9b76
commit
cc5f8c2da4
@ -22,7 +22,7 @@ pub trait Rpc {
|
||||
|
||||
/// Terminate file or chunks sync for specified tx_seq.
|
||||
#[method(name = "terminateSync")]
|
||||
async fn terminate_sync(&self, tx_seq: u64) -> RpcResult<()>;
|
||||
async fn terminate_sync(&self, tx_seq: u64) -> RpcResult<bool>;
|
||||
|
||||
#[method(name = "getSyncStatus")]
|
||||
async fn get_sync_status(&self, tx_seq: u64) -> RpcResult<String>;
|
||||
|
@ -78,7 +78,7 @@ impl RpcServer for RpcServerImpl {
|
||||
}
|
||||
|
||||
#[tracing::instrument(skip(self), err)]
|
||||
async fn terminate_sync(&self, tx_seq: u64) -> RpcResult<()> {
|
||||
async fn terminate_sync(&self, tx_seq: u64) -> RpcResult<bool> {
|
||||
info!("admin_terminateSync({tx_seq})");
|
||||
|
||||
let response = self
|
||||
@ -90,7 +90,7 @@ impl RpcServer for RpcServerImpl {
|
||||
.await?;
|
||||
|
||||
match response {
|
||||
SyncResponse::TerminateFileSync { .. } => Ok(()),
|
||||
SyncResponse::TerminateFileSync { count } => Ok(count > 0),
|
||||
_ => Err(error::internal_error("unexpected response type")),
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
use crate::{controllers::SyncState, Config, SyncRequest, SyncResponse, SyncSender};
|
||||
use anyhow::{bail, Ok, Result};
|
||||
use anyhow::{bail, Result};
|
||||
use std::fmt::Debug;
|
||||
use storage_async::Store;
|
||||
|
||||
@ -83,6 +83,10 @@ impl Batcher {
|
||||
async fn poll_tx(&self, tx_seq: u64) -> Result<Option<SyncResult>> {
|
||||
// file already exists
|
||||
if self.store.check_tx_completed(tx_seq).await? {
|
||||
// File may be finalized during file sync, e.g. user uploaded file via RPC.
|
||||
// In this case, just terminate the file sync.
|
||||
let num_terminated = self.terminate_file_sync(tx_seq, false).await;
|
||||
info!(%tx_seq, %num_terminated, "Terminate file sync due to file already finalized in db");
|
||||
return Ok(Some(SyncResult::Completed));
|
||||
}
|
||||
|
||||
@ -114,7 +118,11 @@ impl Batcher {
|
||||
|
||||
// file sync failed
|
||||
Some(SyncState::Failed { reason }) => {
|
||||
debug!(?reason, "Failed to sync file");
|
||||
debug!(
|
||||
?reason,
|
||||
"Failed to sync file and terminate the failed file sync"
|
||||
);
|
||||
self.terminate_file_sync(tx_seq, false).await;
|
||||
Ok(Some(SyncResult::Failed))
|
||||
}
|
||||
|
||||
@ -132,8 +140,8 @@ impl Batcher {
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn terminate_file_sync(&self, tx_seq: u64, is_reverted: bool) {
|
||||
if let Err(err) = self
|
||||
pub async fn terminate_file_sync(&self, tx_seq: u64, is_reverted: bool) -> usize {
|
||||
match self
|
||||
.sync_send
|
||||
.request(SyncRequest::TerminateFileSync {
|
||||
tx_seq,
|
||||
@ -141,8 +149,16 @@ impl Batcher {
|
||||
})
|
||||
.await
|
||||
{
|
||||
// just log and go ahead for any error, e.g. timeout
|
||||
error!(%err, %tx_seq, %is_reverted, "Failed to terminate file sync");
|
||||
Ok(SyncResponse::TerminateFileSync { count }) => count,
|
||||
Ok(resp) => {
|
||||
error!(?resp, %tx_seq, %is_reverted, "Invalid sync response type to terminate file sync");
|
||||
0
|
||||
}
|
||||
Err(err) => {
|
||||
// just log and go ahead for any error, e.g. timeout
|
||||
error!(%err, %tx_seq, %is_reverted, "Failed to terminate file sync");
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -166,7 +166,7 @@ impl SerialBatcher {
|
||||
return true;
|
||||
}
|
||||
|
||||
info!(%reverted_tx_seq, ?self, "Handle reorg");
|
||||
info!(%reverted_tx_seq, ?self, "Handle reorg started");
|
||||
|
||||
// terminate all files in progress
|
||||
self.batcher
|
||||
@ -190,6 +190,8 @@ impl SerialBatcher {
|
||||
}
|
||||
}
|
||||
|
||||
info!(%reverted_tx_seq, ?self, "Handle reorg ended");
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
@ -233,6 +235,8 @@ impl SerialBatcher {
|
||||
|
||||
/// Update file sync index in db.
|
||||
async fn update_completed_txs_in_db(&mut self) -> Result<()> {
|
||||
let origin = self.next_tx_seq_in_db;
|
||||
|
||||
while let Some(sync_result) = self.pending_completed_txs.get(&self.next_tx_seq_in_db) {
|
||||
// downgrade to random sync if file sync failed or timeout
|
||||
if matches!(sync_result, SyncResult::Failed | SyncResult::Timeout) {
|
||||
@ -251,6 +255,10 @@ impl SerialBatcher {
|
||||
self.next_tx_seq_in_db += 1;
|
||||
}
|
||||
|
||||
if self.next_tx_seq_in_db > origin {
|
||||
info!(%origin, %self.next_tx_seq_in_db, "Move forward in db");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user