-
Notifications
You must be signed in to change notification settings - Fork 2.6k
Ensure election offchain workers don't overlap #8828
Changes from 12 commits
5eaa2e0
0259721
3e464ee
dba8c2e
763419a
7f61dd4
3c1ac92
eeac4eb
1af1660
695b142
cf67be7
8db9e64
7c57f47
ca0c160
e11974f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -653,38 +653,24 @@ pub mod pallet { | |
} | ||
|
||
fn offchain_worker(now: T::BlockNumber) { | ||
match Self::current_phase() { | ||
Phase::Unsigned((true, opened)) if opened == now => { | ||
// mine a new solution, cache it, and attempt to submit it | ||
let initial_output = Self::try_acquire_offchain_lock(now) | ||
.and_then(|_| Self::mine_check_save_submit()); | ||
log!(info, "initial OCW output at {:?}: {:?}", now, initial_output); | ||
} | ||
Phase::Unsigned((true, opened)) if opened < now => { | ||
// keep trying to submit solutions. worst case, we note that the stored solution | ||
// is better than our cached/computed one, and decide not to submit after all. | ||
// | ||
// the offchain_lock prevents us from spamming submissions too often. | ||
let resubmit_output = Self::try_acquire_offchain_lock(now) | ||
.and_then(|_| Self::restore_or_compute_then_maybe_submit()); | ||
log!(info, "resubmit OCW output at {:?}: {:?}", now, resubmit_output); | ||
use sp_runtime::offchain::storage_lock::{StorageLock, BlockAndTime}; | ||
|
||
// create a lock with the maximum deadline of number of blocks in the unsigned phase. | ||
// This should only come useful in an **abrupt** termination of execution, otherwise the | ||
// guard will be dropped upon successful execution. | ||
let mut lock = StorageLock::<'_, BlockAndTime<Self>>::with_block_deadline( | ||
kianenigma marked this conversation as resolved.
Show resolved
Hide resolved
|
||
unsigned::OFFCHAIN_LOCK, | ||
T::UnsignedPhase::get().saturated_into(), | ||
); | ||
|
||
match lock.try_lock() { | ||
Ok(_guard) => { | ||
Self::do_synchronized_offchain_worker(now); | ||
}, | ||
Err(deadline) => { | ||
log!(debug, "offchain worker lock not released, deadline is {:?}", deadline); | ||
} | ||
_ => {} | ||
} | ||
// after election finalization, clear OCW solution storage | ||
if <frame_system::Pallet<T>>::events() | ||
.into_iter() | ||
.filter_map(|event_record| { | ||
let local_event = <T as Config>::Event::from(event_record.event); | ||
local_event.try_into().ok() | ||
}) | ||
.find(|event| { | ||
matches!(event, Event::ElectionFinalized(_)) | ||
}) | ||
.is_some() | ||
{ | ||
unsigned::kill_ocw_solution::<T>(); | ||
} | ||
}; | ||
} | ||
|
||
fn integrity_test() { | ||
|
@@ -928,7 +914,53 @@ pub mod pallet { | |
pub struct Pallet<T>(PhantomData<T>); | ||
} | ||
|
||
impl<T: Config> sp_runtime::offchain::storage_lock::BlockNumberProvider for Pallet<T> { | ||
type BlockNumber = T::BlockNumber; | ||
fn current_block_number() -> Self::BlockNumber { | ||
<frame_system::Pallet<T>>::block_number() | ||
} | ||
} | ||
|
||
kianenigma marked this conversation as resolved.
Show resolved
Hide resolved
|
||
impl<T: Config> Pallet<T> { | ||
/// Internal logic of the offchain worker, to be executed only when the offchain lock is | ||
/// acquired with success. | ||
fn do_synchronized_offchain_worker(now: T::BlockNumber) { | ||
log!(trace, "lock for offchain worker acquired."); | ||
match Self::current_phase() { | ||
Phase::Unsigned((true, opened)) if opened == now => { | ||
// mine a new solution, cache it, and attempt to submit it | ||
let initial_output = Self::ensure_offchain_repeat_frequency(now).and_then(|_| { | ||
Self::mine_check_save_submit() | ||
}); | ||
log!(debug, "initial offchain thread output: {:?}", initial_output); | ||
} | ||
Phase::Unsigned((true, opened)) if opened < now => { | ||
// try and resubmit the cached solution, and recompute ONLY if it is not | ||
// feasible. | ||
let resubmit_output = Self::ensure_offchain_repeat_frequency(now).and_then(|_| { | ||
Self::restore_or_compute_then_maybe_submit() | ||
}); | ||
log!(debug, "resubmit offchain thread output: {:?}", resubmit_output); | ||
} | ||
_ => {} | ||
} | ||
|
||
// after election finalization, clear OCW solution storage. | ||
if <frame_system::Pallet<T>>::events() | ||
.into_iter() | ||
.filter_map(|event_record| { | ||
let local_event = <T as Config>::Event::from(event_record.event); | ||
local_event.try_into().ok() | ||
}) | ||
.find(|event| { | ||
matches!(event, Event::ElectionFinalized(_)) | ||
}) | ||
.is_some() | ||
kianenigma marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
unsigned::kill_ocw_solution::<T>(); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What if this function is not run on this specific block due to the newly introduced lock ? It shouldn't harm too much, as it will be removed due to failing feasibility. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do you suggest anywhere else where we would kill the solution? Thinking about it again, if we simply don't kill this, it will be replaced with a fresh one on the next round. In the past it would have been tricky since we could race around it. But now, the initial OCW will attempt to mine a new solution and replace this, and no other OCW will work in parallel, so it should be safe. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't have very convincing suggestion, maybe we could match the phase and remove when the phase is close, but removing on every block when the phase is close is a bit useless too. anyway I agree with what you say, current implementation is fine, even if not removed, it is not an issue |
||
} | ||
|
||
/// Logic for [`<Pallet as Hooks>::on_initialize`] when signed phase is being opened. | ||
/// | ||
/// This is decoupled for easy weight calculation. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
offchain elections are only acceptable if the provide exactly enough validators. So since we have double the
initial_authorities
, offchain elections always fail on a dev chain and it produces some warn/erorr logs.can revert if it offends anything else.