From 16a57fdae1072d01812bbc3e812e48612d9bf6a4 Mon Sep 17 00:00:00 2001 From: Diego Prats Date: Tue, 12 Nov 2024 19:12:14 -0800 Subject: [PATCH] update error handling for connecting to websockets --- clients/cli/src/prover.rs | 60 +++++++++++++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/clients/cli/src/prover.rs b/clients/cli/src/prover.rs index 0c029b3..56d9e6f 100644 --- a/clients/cli/src/prover.rs +++ b/clients/cli/src/prover.rs @@ -135,16 +135,60 @@ async fn main() { json!({"prover_id": prover_id}), ); - let (mut client, _) = tokio_tungstenite::connect_async(&ws_addr_string) + // This function connects to the Orchestrator via websockets + // and returns the connected client + async fn connect_to_orchestrator(ws_addr: &str) -> Result>, Box> { + + // Connect to the Orchestrator via websockets + let (client, _) = tokio_tungstenite::connect_async(ws_addr) .await - .unwrap(); + // If the connection fails, print an error and return the error + .map_err(|e| { + eprintln!("Failed to connect to orchestrator at {}: {}", ws_addr, e); + e + })?; - track( - "connected".into(), - "Connected.".into(), - &ws_addr_string, - json!({"prover_id": prover_id}), - ); + // Return the connected client + Ok(client) + } + + /// This function wraps connect_to_orchestrator and retries + /// with exponential backoff if the connection fails + async fn connect_to_orchestrator_with_retry(ws_addr: &str) -> WebSocketStream> { + let mut attempt = 1; + + loop { + match connect_to_orchestrator(ws_addr).await { + Ok(client) => { + track( + "connected".into(), + "Connected.".into(), + &ws_addr_string, + json!({"prover_id": prover_id}), + ); + return client; + }, + Err(e) => { + + eprintln!( + "Could not connect to orchestrator (attempt {}). Retrying in {} seconds...", + attempt, + 2u64.pow(attempt.min(6)), // Cap exponential backoff at 64 seconds + ); + + // Exponential backoff + tokio::time::sleep( + tokio::time::Duration::from_secs(2u64.pow(attempt.min(6))) + ).await; + + attempt += 1; + } + } + } + } + + // Connect to the Orchestrator with exponential backoff + let mut client = connect_to_orchestrator_with_retry(&ws_addr_string).await; let registration = ProverRequest { contents: Some(prover_request::Contents::Registration(