mirror of
https://github.com/0glabs/0g-storage-node.git
synced 2024-12-24 23:35:18 +00:00
Terminate file sync if failed (#121)
* terminate file sync if failed * fix fmt * always add log for file termination result
This commit is contained in:
parent
0d2caf9b76
commit
cc5f8c2da4
@ -22,7 +22,7 @@ pub trait Rpc {
|
|||||||
|
|
||||||
/// Terminate file or chunks sync for specified tx_seq.
|
/// Terminate file or chunks sync for specified tx_seq.
|
||||||
#[method(name = "terminateSync")]
|
#[method(name = "terminateSync")]
|
||||||
async fn terminate_sync(&self, tx_seq: u64) -> RpcResult<()>;
|
async fn terminate_sync(&self, tx_seq: u64) -> RpcResult<bool>;
|
||||||
|
|
||||||
#[method(name = "getSyncStatus")]
|
#[method(name = "getSyncStatus")]
|
||||||
async fn get_sync_status(&self, tx_seq: u64) -> RpcResult<String>;
|
async fn get_sync_status(&self, tx_seq: u64) -> RpcResult<String>;
|
||||||
|
@ -78,7 +78,7 @@ impl RpcServer for RpcServerImpl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[tracing::instrument(skip(self), err)]
|
#[tracing::instrument(skip(self), err)]
|
||||||
async fn terminate_sync(&self, tx_seq: u64) -> RpcResult<()> {
|
async fn terminate_sync(&self, tx_seq: u64) -> RpcResult<bool> {
|
||||||
info!("admin_terminateSync({tx_seq})");
|
info!("admin_terminateSync({tx_seq})");
|
||||||
|
|
||||||
let response = self
|
let response = self
|
||||||
@ -90,7 +90,7 @@ impl RpcServer for RpcServerImpl {
|
|||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
match response {
|
match response {
|
||||||
SyncResponse::TerminateFileSync { .. } => Ok(()),
|
SyncResponse::TerminateFileSync { count } => Ok(count > 0),
|
||||||
_ => Err(error::internal_error("unexpected response type")),
|
_ => Err(error::internal_error("unexpected response type")),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use crate::{controllers::SyncState, Config, SyncRequest, SyncResponse, SyncSender};
|
use crate::{controllers::SyncState, Config, SyncRequest, SyncResponse, SyncSender};
|
||||||
use anyhow::{bail, Ok, Result};
|
use anyhow::{bail, Result};
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use storage_async::Store;
|
use storage_async::Store;
|
||||||
|
|
||||||
@ -83,6 +83,10 @@ impl Batcher {
|
|||||||
async fn poll_tx(&self, tx_seq: u64) -> Result<Option<SyncResult>> {
|
async fn poll_tx(&self, tx_seq: u64) -> Result<Option<SyncResult>> {
|
||||||
// file already exists
|
// file already exists
|
||||||
if self.store.check_tx_completed(tx_seq).await? {
|
if self.store.check_tx_completed(tx_seq).await? {
|
||||||
|
// File may be finalized during file sync, e.g. user uploaded file via RPC.
|
||||||
|
// In this case, just terminate the file sync.
|
||||||
|
let num_terminated = self.terminate_file_sync(tx_seq, false).await;
|
||||||
|
info!(%tx_seq, %num_terminated, "Terminate file sync due to file already finalized in db");
|
||||||
return Ok(Some(SyncResult::Completed));
|
return Ok(Some(SyncResult::Completed));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,7 +118,11 @@ impl Batcher {
|
|||||||
|
|
||||||
// file sync failed
|
// file sync failed
|
||||||
Some(SyncState::Failed { reason }) => {
|
Some(SyncState::Failed { reason }) => {
|
||||||
debug!(?reason, "Failed to sync file");
|
debug!(
|
||||||
|
?reason,
|
||||||
|
"Failed to sync file and terminate the failed file sync"
|
||||||
|
);
|
||||||
|
self.terminate_file_sync(tx_seq, false).await;
|
||||||
Ok(Some(SyncResult::Failed))
|
Ok(Some(SyncResult::Failed))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -132,8 +140,8 @@ impl Batcher {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn terminate_file_sync(&self, tx_seq: u64, is_reverted: bool) {
|
pub async fn terminate_file_sync(&self, tx_seq: u64, is_reverted: bool) -> usize {
|
||||||
if let Err(err) = self
|
match self
|
||||||
.sync_send
|
.sync_send
|
||||||
.request(SyncRequest::TerminateFileSync {
|
.request(SyncRequest::TerminateFileSync {
|
||||||
tx_seq,
|
tx_seq,
|
||||||
@ -141,8 +149,16 @@ impl Batcher {
|
|||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
// just log and go ahead for any error, e.g. timeout
|
Ok(SyncResponse::TerminateFileSync { count }) => count,
|
||||||
error!(%err, %tx_seq, %is_reverted, "Failed to terminate file sync");
|
Ok(resp) => {
|
||||||
|
error!(?resp, %tx_seq, %is_reverted, "Invalid sync response type to terminate file sync");
|
||||||
|
0
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
// just log and go ahead for any error, e.g. timeout
|
||||||
|
error!(%err, %tx_seq, %is_reverted, "Failed to terminate file sync");
|
||||||
|
0
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -166,7 +166,7 @@ impl SerialBatcher {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
info!(%reverted_tx_seq, ?self, "Handle reorg");
|
info!(%reverted_tx_seq, ?self, "Handle reorg started");
|
||||||
|
|
||||||
// terminate all files in progress
|
// terminate all files in progress
|
||||||
self.batcher
|
self.batcher
|
||||||
@ -190,6 +190,8 @@ impl SerialBatcher {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
info!(%reverted_tx_seq, ?self, "Handle reorg ended");
|
||||||
|
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -233,6 +235,8 @@ impl SerialBatcher {
|
|||||||
|
|
||||||
/// Update file sync index in db.
|
/// Update file sync index in db.
|
||||||
async fn update_completed_txs_in_db(&mut self) -> Result<()> {
|
async fn update_completed_txs_in_db(&mut self) -> Result<()> {
|
||||||
|
let origin = self.next_tx_seq_in_db;
|
||||||
|
|
||||||
while let Some(sync_result) = self.pending_completed_txs.get(&self.next_tx_seq_in_db) {
|
while let Some(sync_result) = self.pending_completed_txs.get(&self.next_tx_seq_in_db) {
|
||||||
// downgrade to random sync if file sync failed or timeout
|
// downgrade to random sync if file sync failed or timeout
|
||||||
if matches!(sync_result, SyncResult::Failed | SyncResult::Timeout) {
|
if matches!(sync_result, SyncResult::Failed | SyncResult::Timeout) {
|
||||||
@ -251,6 +255,10 @@ impl SerialBatcher {
|
|||||||
self.next_tx_seq_in_db += 1;
|
self.next_tx_seq_in_db += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if self.next_tx_seq_in_db > origin {
|
||||||
|
info!(%origin, %self.next_tx_seq_in_db, "Move forward in db");
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user