diff --git a/src/live/finalization.rs b/src/live/finalization.rs index 1def50e..52d4cd0 100644 --- a/src/live/finalization.rs +++ b/src/live/finalization.rs @@ -5,13 +5,18 @@ use crate::beacon_client::types::FinalizedCheckpointEvent; use crate::config::EffectiveScanMode; use crate::db::Pool as PgPool; use crate::db::scanner as db_scanner; -use crate::error::Result; +use crate::error::{Error, Result}; use crate::exits; use crate::scanner; +fn is_beacon_request_failure(e: &Error) -> bool { + matches!(e, Error::Http(_) | Error::BeaconApi { .. }) +} + #[allow(clippy::too_many_arguments)] pub(super) async fn process_finalized_rescan( client: &BeaconClient, + backfill_client: Option<&BeaconClient>, pool: &PgPool, scan_validators: &HashSet, validator_exits: &HashMap, @@ -47,12 +52,32 @@ pub(super) async fn process_finalized_rescan( if active.is_empty() { continue; } - scanner::scan_epoch(client, pool, epoch, &active, true, scan_mode) - .await - .map_err(|e| { + match scanner::scan_epoch(client, pool, epoch, &active, true, scan_mode).await { + Ok(()) => {} + Err(e) if is_beacon_request_failure(&e) && backfill_client.is_some() => { + let bf = backfill_client.expect("checked above"); + tracing::warn!( + epoch, + error = %e, + "Live client failed re-scan; retrying with backfill client" + ); + scanner::scan_epoch(bf, pool, epoch, &active, true, scan_mode) + .await + .map_err(|e2| { + tracing::error!( + epoch, + primary_error = %e, + backfill_error = %e2, + "Backfill client also failed re-scan; aborting" + ); + e2 + })?; + } + Err(e) => { tracing::error!(epoch, error = %e, "Failed to re-scan finalized epoch; aborting"); - e - })?; + return Err(e); + } + } } crate::metrics::LIVE_FINALIZED_RESCAN_DURATION .with_label_values(&["rescan_loop"]) diff --git a/src/live/mod.rs b/src/live/mod.rs index 43af2bd..015eb68 100644 --- a/src/live/mod.rs +++ b/src/live/mod.rs @@ -25,6 +25,7 @@ use reorg::process_chain_reorg; #[allow(clippy::too_many_arguments)] pub async fn run_live_tracking( client: &BeaconClient, + backfill_client: Option<&BeaconClient>, pool: &PgPool, instance_id: Uuid, tracked: &HashSet, @@ -114,6 +115,7 @@ pub async fn run_live_tracking( .inc(); process_finalized_rescan( client, + backfill_client, pool, &scan_validators, validator_exits, diff --git a/src/main.rs b/src/main.rs index cb278ef..dbf7d93 100644 --- a/src/main.rs +++ b/src/main.rs @@ -310,8 +310,16 @@ async fn main() -> anyhow::Result<()> { } else { // Concurrent mode: live in the foreground, backfill in the background. // SSE events are never blocked by the slower historical scan. + // Only pass a backfill client to the live loop when it's a *separate* + // beacon node — otherwise it's just an Arc clone of the live client + // and retrying against it on a finalized-rescan failure is wasted. + let live_backfill = config + .backfill_beacon_url + .as_ref() + .map(|_| backfill_client.as_ref()); let live_result = live::run_live_tracking( live_client.as_ref(), + live_backfill, &pool, instance_id, &tracked_set,