diff --git a/polkadot/node/core/approval-voting/src/approval_checking.rs b/polkadot/node/core/approval-voting/src/approval_checking.rs index 8667d3639185..96eb25626de8 100644 --- a/polkadot/node/core/approval-voting/src/approval_checking.rs +++ b/polkadot/node/core/approval-voting/src/approval_checking.rs @@ -23,6 +23,7 @@ use polkadot_primitives::ValidatorIndex; use crate::{ persisted_entries::{ApprovalEntry, CandidateEntry, TrancheEntry}, time::Tick, + MAX_RECORDED_NO_SHOW_VALIDATORS_PER_CANDIDATE, }; /// Result of counting the necessary tranches needed for approving a block. @@ -32,6 +33,7 @@ pub struct TranchesToApproveResult { pub required_tranches: RequiredTranches, /// The total number of no_shows at the moment we are doing the counting. pub total_observed_no_shows: usize, + pub no_show_validators: Vec, } /// The required tranches of assignments needed to determine whether a candidate is approved. @@ -188,6 +190,8 @@ struct State { /// The last tick at which a considered assignment was received. last_assignment_tick: Option, total_observed_no_shows: usize, + // The validator's index that are no-shows. + no_show_validators: Vec, } impl State { @@ -203,6 +207,7 @@ impl State { return TranchesToApproveResult { required_tranches: RequiredTranches::All, total_observed_no_shows: self.total_observed_no_shows, + no_show_validators: self.no_show_validators.clone(), } } @@ -217,6 +222,7 @@ impl State { last_assignment_tick: self.last_assignment_tick, }, total_observed_no_shows: self.total_observed_no_shows, + no_show_validators: self.no_show_validators.clone(), } } @@ -234,6 +240,7 @@ impl State { clock_drift, }, total_observed_no_shows: self.total_observed_no_shows, + no_show_validators: self.no_show_validators.clone(), } } else { TranchesToApproveResult { @@ -244,6 +251,7 @@ impl State { clock_drift, }, total_observed_no_shows: self.total_observed_no_shows, + no_show_validators: self.no_show_validators.clone(), } } } @@ -253,11 +261,12 @@ impl State { } fn advance( - &self, + mut self, new_assignments: usize, new_no_shows: usize, next_no_show: Option, last_assignment_tick: Option, + no_show_validators: Vec, ) -> State { let new_covered = if self.depth == 0 { new_assignments @@ -290,6 +299,17 @@ impl State { (self.depth, covering, uncovered) }; + // Make sure we don't store too many no-show validators, since this metric + // is valuable if there are just a few of them to identify the problematic + // validators. + // If there are a lot then we've got bigger problems and no need to make this + // array unnecessarily large. + if self.no_show_validators.len() + no_show_validators.len() < + MAX_RECORDED_NO_SHOW_VALIDATORS_PER_CANDIDATE + { + self.no_show_validators.extend(no_show_validators); + } + State { assignments, depth, @@ -299,6 +319,7 @@ impl State { next_no_show, last_assignment_tick, total_observed_no_shows: self.total_observed_no_shows + new_no_shows, + no_show_validators: self.no_show_validators, } } } @@ -354,8 +375,9 @@ fn count_no_shows( block_tick: Tick, no_show_duration: Tick, drifted_tick_now: Tick, -) -> (usize, Option) { +) -> (usize, Option, Vec) { let mut next_no_show = None; + let mut no_show_validators = Vec::new(); let no_shows = assignments .iter() .map(|(v_index, tick)| { @@ -379,12 +401,14 @@ fn count_no_shows( // the clock drift will be removed again to do the comparison above. next_no_show = super::min_prefer_some(next_no_show, Some(no_show_at + clock_drift)); } - + if is_no_show { + no_show_validators.push(*v_index); + } is_no_show }) .count(); - (no_shows, next_no_show) + (no_shows, next_no_show, no_show_validators) } /// Determine the amount of tranches of assignments needed to determine approval of a candidate. @@ -408,6 +432,7 @@ pub fn tranches_to_approve( next_no_show: None, last_assignment_tick: None, total_observed_no_shows: 0, + no_show_validators: Vec::new(), }; // The `ApprovalEntry` doesn't have any data for empty tranches. We still want to iterate over @@ -446,7 +471,7 @@ pub fn tranches_to_approve( // // While we count the no-shows, we also determine the next possible no-show we might // see within this tranche. - let (no_shows, next_no_show) = count_no_shows( + let (no_shows, next_no_show, no_show_validators) = count_no_shows( assignments, approvals, clock_drift, @@ -455,7 +480,7 @@ pub fn tranches_to_approve( drifted_tick_now, ); - let s = s.advance(n_assignments, no_shows, next_no_show, last_assignment_tick); + let s = s.advance(n_assignments, no_shows, next_no_show, last_assignment_tick, no_show_validators); let output = s.output(tranche, needed_approvals, n_validators, no_show_duration); *state = match output.required_tranches { @@ -1186,7 +1211,7 @@ mod tests { approvals.set(v_index, true); } - let (no_shows, next_no_show) = count_no_shows( + let (no_shows, next_no_show, _) = count_no_shows( &test.assignments, &approvals, test.clock_drift, @@ -1390,6 +1415,7 @@ mod tests { next_no_show: None, last_assignment_tick: None, total_observed_no_shows: 0, + no_show_validators: Default::default(), }; assert_eq!( @@ -1414,6 +1440,7 @@ mod tests { next_no_show: None, last_assignment_tick: None, total_observed_no_shows: 0, + no_show_validators: Default::default(), }; assert_eq!( diff --git a/polkadot/node/core/approval-voting/src/import.rs b/polkadot/node/core/approval-voting/src/import.rs index 59b6f91c0a82..3ddef1e01c45 100644 --- a/polkadot/node/core/approval-voting/src/import.rs +++ b/polkadot/node/core/approval-voting/src/import.rs @@ -662,6 +662,7 @@ pub(crate) mod tests { per_block_assignments_gathering_times: LruMap::new(ByLength::new( MAX_BLOCKS_WITH_ASSIGNMENT_TIMESTAMPS, )), + no_show_stats: Default::default(), } } diff --git a/polkadot/node/core/approval-voting/src/lib.rs b/polkadot/node/core/approval-voting/src/lib.rs index eece6b15805c..d4b6855a44d0 100644 --- a/polkadot/node/core/approval-voting/src/lib.rs +++ b/polkadot/node/core/approval-voting/src/lib.rs @@ -691,6 +691,7 @@ struct ApprovalStatus { tranche_now: DelayTranche, block_tick: Tick, last_no_shows: usize, + no_show_validators: Vec, } #[derive(Copy, Clone)] @@ -832,6 +833,51 @@ struct State { // tranches we trigger and why. per_block_assignments_gathering_times: LruMap>, + no_show_stats: NoShowStats, +} + +// Regularly dump the no-show stats at this block number frequency. +const NO_SHOW_DUMP_FREQUENCY: BlockNumber = 50; +// The maximum number of validators we record no-shows for, per candidate. +pub(crate) const MAX_RECORDED_NO_SHOW_VALIDATORS_PER_CANDIDATE: usize = 20; + +// No show stats per validator and per parachain. +// This is valuable information when we have to debug live network issue, because +// it gives information if things are going wrong only for some validators or just +// for some parachains. +#[derive(Debug, Clone, PartialEq, Eq, Default)] +struct NoShowStats { + per_validator_no_show: HashMap>, + per_parachain_no_show: HashMap, + last_dumped_block_number: BlockNumber, +} + +impl NoShowStats { + // Print the no-show stats if NO_SHOW_DUMP_FREQUENCY blocks have passed since the last + // print. + fn maybe_print(&mut self, current_block_number: BlockNumber) { + if self.last_dumped_block_number > current_block_number || + current_block_number - self.last_dumped_block_number < NO_SHOW_DUMP_FREQUENCY + { + return + } + if self.per_parachain_no_show.is_empty() && self.per_validator_no_show.is_empty() { + return + } + + gum::debug!( + target: LOG_TARGET, + "Validators with no_show {:?} and parachains with no_shows {:?} since {:}", + self.per_validator_no_show, + self.per_parachain_no_show, + self.last_dumped_block_number + ); + + self.last_dumped_block_number = current_block_number; + + self.per_validator_no_show.clear(); + self.per_parachain_no_show.clear(); + } } #[derive(Debug, Clone, PartialEq, Eq)] @@ -887,21 +933,25 @@ impl State { ); if let Some(approval_entry) = candidate_entry.approval_entry(&block_hash) { - let TranchesToApproveResult { required_tranches, total_observed_no_shows } = - approval_checking::tranches_to_approve( - approval_entry, - candidate_entry.approvals(), - tranche_now, - block_tick, - no_show_duration, - session_info.needed_approvals as _, - ); + let TranchesToApproveResult { + required_tranches, + total_observed_no_shows, + no_show_validators, + } = approval_checking::tranches_to_approve( + approval_entry, + candidate_entry.approvals(), + tranche_now, + block_tick, + no_show_duration, + session_info.needed_approvals as _, + ); let status = ApprovalStatus { required_tranches, block_tick, tranche_now, last_no_shows: total_observed_no_shows, + no_show_validators, }; Some((approval_entry, status)) @@ -1044,6 +1094,26 @@ impl State { }, } } + + fn record_no_shows( + &mut self, + session_index: SessionIndex, + para_id: u32, + no_show_validators: &Vec, + ) { + if !no_show_validators.is_empty() { + *self.no_show_stats.per_parachain_no_show.entry(para_id.into()).or_default() += 1; + } + for validator_index in no_show_validators { + *self + .no_show_stats + .per_validator_no_show + .entry(session_index) + .or_default() + .entry(*validator_index) + .or_default() += 1; + } + } } #[derive(Debug, Clone)] @@ -1096,6 +1166,7 @@ where per_block_assignments_gathering_times: LruMap::new(ByLength::new( MAX_BLOCKS_WITH_ASSIGNMENT_TIMESTAMPS, )), + no_show_stats: NoShowStats::default(), }; // `None` on start-up. Gets initialized/updated on leaf update @@ -1740,6 +1811,8 @@ async fn handle_from_overseer( "Imported new block.", ); + state.no_show_stats.maybe_print(block_batch.block_number); + for (c_hash, c_entry) in block_batch.imported_candidates { metrics.on_candidate_imported(); @@ -2904,7 +2977,8 @@ where let mut actions = Vec::new(); let block_hash = block_entry.block_hash(); let block_number = block_entry.block_number(); - + let session_index = block_entry.session(); + let para_id = candidate_entry.candidate_receipt().descriptor().para_id; let tick_now = state.clock.tick_now(); let (is_approved, status) = if let Some((approval_entry, status)) = state @@ -3000,7 +3074,9 @@ where if is_approved { approval_entry.mark_approved(); } - + if newly_approved { + state.record_no_shows(session_index, para_id.into(), &status.no_show_validators); + } actions.extend(schedule_wakeup_action( &approval_entry, block_hash, diff --git a/polkadot/node/core/approval-voting/src/tests.rs b/polkadot/node/core/approval-voting/src/tests.rs index 5cbae7f908fc..64ae86bc013a 100644 --- a/polkadot/node/core/approval-voting/src/tests.rs +++ b/polkadot/node/core/approval-voting/src/tests.rs @@ -5068,6 +5068,7 @@ fn test_gathering_assignments_statements() { per_block_assignments_gathering_times: LruMap::new(ByLength::new( MAX_BLOCKS_WITH_ASSIGNMENT_TIMESTAMPS, )), + no_show_stats: NoShowStats::default(), }; for i in 0..200i32 { @@ -5162,6 +5163,7 @@ fn test_observe_assignment_gathering_status() { per_block_assignments_gathering_times: LruMap::new(ByLength::new( MAX_BLOCKS_WITH_ASSIGNMENT_TIMESTAMPS, )), + no_show_stats: NoShowStats::default(), }; let metrics_inner = MetricsInner {