From a563e259a21687b3fd339e2d5e38718342c5389c Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Tue, 17 Oct 2023 10:31:49 +0800 Subject: [PATCH 01/38] make pruning explicit --- polkadot/node/core/pvf/src/artifacts.rs | 14 +++++++------- polkadot/node/core/pvf/src/host.rs | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index d7b15ece7b29..91b7914adf76 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -180,11 +180,16 @@ pub struct Artifacts { } impl Artifacts { + #[cfg(test)] + pub(crate) fn new() -> Self { + Self { artifacts: HashMap::new() } + } + /// Initialize a blank cache at the given path. This will clear everything present at the /// given path, to be populated over time. /// /// The recognized artifacts will be filled in the table and unrecognized will be removed. - pub async fn new(cache_path: &Path) -> Self { + pub async fn new_and_prune(cache_path: &Path) -> Self { // First delete the entire cache. This includes artifacts and any leftover worker dirs (see // [`WorkerDir`]). Nodes are long-running so this should populate shortly. let _ = tokio::fs::remove_dir_all(cache_path).await; @@ -194,11 +199,6 @@ impl Artifacts { Self { artifacts: HashMap::new() } } - #[cfg(test)] - pub(crate) fn empty() -> Self { - Self { artifacts: HashMap::new() } - } - /// Returns the state of the given artifact by its ID. pub fn artifact_state_mut(&mut self, artifact_id: &ArtifactId) -> Option<&mut ArtifactState> { self.artifacts.get_mut(artifact_id) @@ -334,7 +334,7 @@ mod tests { // this should remove it and re-create. let p = &fake_cache_path; - Artifacts::new(p).await; + Artifacts::new_and_prune(p).await; assert_eq!(std::fs::read_dir(&fake_cache_path).unwrap().count(), 0); diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 81695829122b..662c50401add 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -247,7 +247,7 @@ pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future Date: Tue, 17 Oct 2023 22:09:30 +0800 Subject: [PATCH 02/38] preserve cache unless stale --- polkadot/node/core/pvf/src/artifacts.rs | 156 ++++++++++++++++++------ polkadot/node/core/pvf/src/lib.rs | 2 +- 2 files changed, 121 insertions(+), 37 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 91b7914adf76..d98d281af312 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -16,7 +16,7 @@ //! PVF artifacts (final compiled code blobs). //! -//! # Lifecycle of an artifact +//! # Lifecycle of an artifact //! //! 1. During node start-up, the artifacts cache is cleaned up. This means that all local artifacts //! stored on-disk are cleared, and we start with an empty [`Artifacts`] table. @@ -55,7 +55,7 @@ //! older by a predefined parameter. This process is run very rarely (say, once a day). Once the //! artifact is expired it is removed from disk eagerly atomically. -use crate::host::PrepareResultSender; +use crate::{host::PrepareResultSender, LOG_TARGET}; use always_assert::always; use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData}; use polkadot_node_primitives::NODE_VERSION; @@ -67,6 +67,42 @@ use std::{ time::{Duration, SystemTime}, }; +macro_rules! concat_const { + ($x:expr, $y:expr) => {{ + // ensure inputs to be strings + const _: &str = $x; + const _: &str = $y; + + const X: &[u8] = $x.as_bytes(); + const Y: &[u8] = $y.as_bytes(); + const XL: usize = X.len(); + const YL: usize = Y.len(); + const L: usize = XL + YL; + + const fn concat() -> [u8; L] { + let mut cat = [0u8; L]; + let mut i = 0; + while i < XL { + cat[i] = X[i]; + i += 1; + } + while i < L { + cat[i] = Y[i - XL]; + i += 1; + } + cat + } + + // SAFETY: safe because x and y are ensured to be valid + unsafe { std::str::from_utf8_unchecked(&concat()) } + }}; +} + +const RUNTIME_PREFIX: &str = "wasmtime_"; +const NODE_PREFIX: &str = "polkadot_v"; +const ARTIFACT_PREFIX: &str = + concat_const!(concat_const!(RUNTIME_PREFIX, NODE_PREFIX), NODE_VERSION); + /// Identifier of an artifact. Encodes a code hash of the PVF and a hash of executor parameter set. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ArtifactId { @@ -75,9 +111,6 @@ pub struct ArtifactId { } impl ArtifactId { - const PREFIX: &'static str = "wasmtime_"; - const NODE_VERSION_PREFIX: &'static str = "polkadot_v"; - /// Creates a new artifact ID with the given hash. pub fn new(code_hash: ValidationCodeHash, executor_params_hash: ExecutorParamsHash) -> Self { Self { code_hash, executor_params_hash } @@ -94,8 +127,7 @@ impl ArtifactId { use polkadot_core_primitives::Hash; use std::str::FromStr as _; - let file_name = - file_name.strip_prefix(Self::PREFIX)?.strip_prefix(Self::NODE_VERSION_PREFIX)?; + let file_name = file_name.strip_prefix(RUNTIME_PREFIX)?.strip_prefix(NODE_PREFIX)?; // [ node version | code hash | param hash ] let parts: Vec<&str> = file_name.split('_').collect(); @@ -112,11 +144,7 @@ impl ArtifactId { pub fn path(&self, cache_path: &Path) -> PathBuf { let file_name = format!( "{}{}{}_{:#x}_{:#x}", - Self::PREFIX, - Self::NODE_VERSION_PREFIX, - NODE_VERSION, - self.code_hash, - self.executor_params_hash + RUNTIME_PREFIX, NODE_PREFIX, NODE_VERSION, self.code_hash, self.executor_params_hash ); cache_path.join(file_name) } @@ -185,20 +213,52 @@ impl Artifacts { Self { artifacts: HashMap::new() } } - /// Initialize a blank cache at the given path. This will clear everything present at the - /// given path, to be populated over time. - /// - /// The recognized artifacts will be filled in the table and unrecognized will be removed. + /// Create a table with valid artifacts and prune the invalid ones. pub async fn new_and_prune(cache_path: &Path) -> Self { - // First delete the entire cache. This includes artifacts and any leftover worker dirs (see - // [`WorkerDir`]). Nodes are long-running so this should populate shortly. - let _ = tokio::fs::remove_dir_all(cache_path).await; // Make sure that the cache path directory and all its parents are created. let _ = tokio::fs::create_dir_all(cache_path).await; + Self::prune_stale(cache_path).await; + Self { artifacts: HashMap::new() } } + // Prune stale and unknown artifacts, judging by the name. + async fn prune_stale(cache_path: impl AsRef) { + fn should_prune(file_name: impl AsRef) -> bool { + if let Some(file_name) = file_name.as_ref().to_str() { + !file_name.starts_with(ARTIFACT_PREFIX) + } else { + true + } + } + + let cache_path = cache_path.as_ref(); + + if let Ok(mut dir) = tokio::fs::read_dir(cache_path).await { + let mut prunes = vec![]; + + loop { + match dir.next_entry().await { + Ok(None) => break, + Ok(Some(entry)) => { + let file_name = entry.file_name(); + if should_prune(&file_name) { + prunes.push(tokio::fs::remove_file(cache_path.join(file_name))); + } + }, + Err(err) => gum::error!( + target: LOG_TARGET, + ?err, + "collecting stale artifacts", + ), + } + } + + futures::future::join_all(prunes).await; + } + } + /// Returns the state of the given artifact by its ID. pub fn artifact_state_mut(&mut self, artifact_id: &ArtifactId) -> Option<&mut ArtifactState> { self.artifacts.get_mut(artifact_id) @@ -266,15 +326,42 @@ impl Artifacts { #[cfg(test)] mod tests { - use super::{ArtifactId, Artifacts, NODE_VERSION}; + use super::{ArtifactId, Artifacts, ARTIFACT_PREFIX, NODE_VERSION}; use polkadot_primitives::ExecutorParamsHash; use sp_core::H256; - use std::{path::Path, str::FromStr}; + use std::{ + fs, + path::{Path, PathBuf}, + str::FromStr, + }; fn file_name(code_hash: &str, param_hash: &str) -> String { format!("wasmtime_polkadot_v{}_0x{}_0x{}", NODE_VERSION, code_hash, param_hash) } + fn fake_artifact_path>(dir: D, prefix: &str) -> PathBuf { + let code_hash = "1234567890123456789012345678901234567890123456789012345678901234"; + let params_hash = "4321098765432109876543210987654321098765432109876543210987654321"; + let file_name = format!("{}_0x{}_0x{}", prefix, code_hash, params_hash); + + let mut path = dir.as_ref().to_path_buf(); + path.push(file_name); + path + } + + fn create_fake_artifact>(dir: D, prefix: &str) { + let path = fake_artifact_path(dir, prefix); + fs::File::create(path).unwrap(); + } + + #[test] + fn artifact_prefix() { + assert_eq!( + ARTIFACT_PREFIX, + format!("wasmtime_polkadot_v{}", NODE_VERSION), + ) + } + #[test] fn from_file_name() { assert!(ArtifactId::from_file_name("").is_none()); @@ -318,26 +405,23 @@ mod tests { } #[tokio::test] - async fn artifacts_removes_cache_on_startup() { - let fake_cache_path = crate::worker_intf::tmppath("test-cache").await.unwrap(); - let fake_artifact_path = { - let mut p = fake_cache_path.clone(); - p.push("wasmtime_0x1234567890123456789012345678901234567890123456789012345678901234"); - p - }; + async fn remove_stale_cache_on_startup() { + let cache_dir = crate::worker_intf::tmppath("test-cache").await.unwrap(); - // create a tmp cache with 1 artifact. + fs::create_dir_all(&cache_dir).unwrap(); - std::fs::create_dir_all(&fake_cache_path).unwrap(); - std::fs::File::create(fake_artifact_path).unwrap(); + // 3 invalid, 1 valid + create_fake_artifact(&cache_dir, ""); + create_fake_artifact(&cache_dir, "wasmtime_polkadot_v"); + create_fake_artifact(&cache_dir, "wasmtime_polkadot_v1.0.0"); + create_fake_artifact(&cache_dir, ARTIFACT_PREFIX); - // this should remove it and re-create. + assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 4); - let p = &fake_cache_path; - Artifacts::new_and_prune(p).await; + Artifacts::new_and_prune(&cache_dir).await; - assert_eq!(std::fs::read_dir(&fake_cache_path).unwrap().count(), 0); + assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 1); - std::fs::remove_dir_all(fake_cache_path).unwrap(); + fs::remove_dir_all(cache_dir).unwrap(); } } diff --git a/polkadot/node/core/pvf/src/lib.rs b/polkadot/node/core/pvf/src/lib.rs index e3c6da9c4c6b..b883099c2b27 100644 --- a/polkadot/node/core/pvf/src/lib.rs +++ b/polkadot/node/core/pvf/src/lib.rs @@ -84,7 +84,7 @@ //! A pruning task will run at a fixed interval of time. This task will remove all artifacts that //! weren't used or received a heads up signal for a while. //! -//! ## Execution +//! ## Execution //! //! The execute workers will be fed by the requests from the execution queue, which is basically a //! combination of a path to the compiled artifact and the From 195bbce1ed726c177abc2abee44003f5e120e953 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Wed, 18 Oct 2023 02:02:35 +0800 Subject: [PATCH 03/38] barely working --- polkadot/node/core/pvf/src/artifacts.rs | 89 +++++++++++++++++-------- 1 file changed, 61 insertions(+), 28 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index d98d281af312..08d13af2fb3b 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -57,6 +57,7 @@ use crate::{host::PrepareResultSender, LOG_TARGET}; use always_assert::always; +use polkadot_core_primitives::Hash; use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData}; use polkadot_node_primitives::NODE_VERSION; use polkadot_parachain_primitives::primitives::ValidationCodeHash; @@ -64,6 +65,7 @@ use polkadot_primitives::ExecutorParamsHash; use std::{ collections::HashMap, path::{Path, PathBuf}, + str::FromStr as _, time::{Duration, SystemTime}, }; @@ -124,9 +126,6 @@ impl ArtifactId { /// Tries to recover the artifact id from the given file name. #[cfg(test)] pub fn from_file_name(file_name: &str) -> Option { - use polkadot_core_primitives::Hash; - use std::str::FromStr as _; - let file_name = file_name.strip_prefix(RUNTIME_PREFIX)?.strip_prefix(NODE_PREFIX)?; // [ node version | code hash | param hash ] @@ -204,36 +203,66 @@ pub enum ArtifactState { /// A container of all known artifact ids and their states. pub struct Artifacts { - artifacts: HashMap, + inner: HashMap, } impl Artifacts { #[cfg(test)] pub(crate) fn new() -> Self { - Self { artifacts: HashMap::new() } + Self { inner: HashMap::new() } + } + + #[cfg(test)] + pub(crate) fn len(&self) -> usize { + self.inner.len() } /// Create a table with valid artifacts and prune the invalid ones. pub async fn new_and_prune(cache_path: &Path) -> Self { - // Make sure that the cache path directory and all its parents are created. - let _ = tokio::fs::create_dir_all(cache_path).await; + let mut artifacts = Self { inner: HashMap::new() }; + Self::prune_and_insert(cache_path, &mut artifacts).await; + artifacts + } - Self::prune_stale(cache_path).await; + // FIXME eagr: extremely janky, please comment on the appropriate way of setting + // * `last_time_needed` set as roughly around startup time + // * `prepare_stats` set as Nones, since the metadata was lost + async fn prune_and_insert(cache_path: impl AsRef, artifacts: &mut Artifacts) { + fn is_stale(file_name: &str) -> bool { + !file_name.starts_with(ARTIFACT_PREFIX) + } - Self { artifacts: HashMap::new() } - } + fn id_from_file_name(file_name: &str) -> Option { + let file_name = file_name.strip_prefix(ARTIFACT_PREFIX)?.strip_prefix('_')?; + + // [ code hash | param hash ] + let hashes: Vec<&str> = file_name.split('_').collect(); - // Prune stale and unknown artifacts, judging by the name. - async fn prune_stale(cache_path: impl AsRef) { - fn should_prune(file_name: impl AsRef) -> bool { - if let Some(file_name) = file_name.as_ref().to_str() { - !file_name.starts_with(ARTIFACT_PREFIX) - } else { - true + if hashes.len() != 2 { + return None } + + let (code_hash_str, executor_params_hash_str) = (hashes[0], hashes[1]); + + let code_hash = Hash::from_str(code_hash_str).ok()?.into(); + let executor_params_hash = + ExecutorParamsHash::from_hash(Hash::from_str(executor_params_hash_str).ok()?); + + Some(ArtifactId { code_hash, executor_params_hash }) + } + + fn insert_cache_as_prepared(artifacts: &mut Artifacts, id: ArtifactId) { + let last_time_needed = SystemTime::now(); + let prepare_stats = Default::default(); + always!(artifacts + .inner + .insert(id, ArtifactState::Prepared { last_time_needed, prepare_stats }) + .is_none()); } + // Make sure that the cache path directory and all its parents are created. let cache_path = cache_path.as_ref(); + let _ = tokio::fs::create_dir_all(cache_path).await; if let Ok(mut dir) = tokio::fs::read_dir(cache_path).await { let mut prunes = vec![]; @@ -243,8 +272,14 @@ impl Artifacts { Ok(None) => break, Ok(Some(entry)) => { let file_name = entry.file_name(); - if should_prune(&file_name) { + let file_name = file_name + .to_str() + .expect("existing file path should always be valid unicode"); + + if is_stale(file_name) { prunes.push(tokio::fs::remove_file(cache_path.join(file_name))); + } else if let Some(id) = id_from_file_name(file_name) { + insert_cache_as_prepared(artifacts, id); } }, Err(err) => gum::error!( @@ -261,7 +296,7 @@ impl Artifacts { /// Returns the state of the given artifact by its ID. pub fn artifact_state_mut(&mut self, artifact_id: &ArtifactId) -> Option<&mut ArtifactState> { - self.artifacts.get_mut(artifact_id) + self.inner.get_mut(artifact_id) } /// Inform the table about the artifact with the given ID. The state will be set to "preparing". @@ -275,7 +310,7 @@ impl Artifacts { ) { // See the precondition. always!(self - .artifacts + .inner .insert(artifact_id, ArtifactState::Preparing { waiting_for_response, num_failures: 0 }) .is_none()); } @@ -293,7 +328,7 @@ impl Artifacts { ) { // See the precondition. always!(self - .artifacts + .inner .insert(artifact_id, ArtifactState::Prepared { last_time_needed, prepare_stats }) .is_none()); } @@ -304,7 +339,7 @@ impl Artifacts { let now = SystemTime::now(); let mut to_remove = vec![]; - for (k, v) in self.artifacts.iter() { + for (k, v) in self.inner.iter() { if let ArtifactState::Prepared { last_time_needed, .. } = *v { if now .duration_since(last_time_needed) @@ -317,7 +352,7 @@ impl Artifacts { } for artifact in &to_remove { - self.artifacts.remove(artifact); + self.inner.remove(artifact); } to_remove @@ -356,10 +391,7 @@ mod tests { #[test] fn artifact_prefix() { - assert_eq!( - ARTIFACT_PREFIX, - format!("wasmtime_polkadot_v{}", NODE_VERSION), - ) + assert_eq!(ARTIFACT_PREFIX, format!("wasmtime_polkadot_v{}", NODE_VERSION),) } #[test] @@ -418,9 +450,10 @@ mod tests { assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 4); - Artifacts::new_and_prune(&cache_dir).await; + let artifacts = Artifacts::new_and_prune(&cache_dir).await; assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 1); + assert_eq!(artifacts.len(), 1); fs::remove_dir_all(cache_dir).unwrap(); } From 99aa012c12c7347f4145b29497e400b9a3b1290c Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Wed, 18 Oct 2023 02:29:06 +0800 Subject: [PATCH 04/38] use ArtifactId::from_file_name() --- polkadot/node/core/pvf/src/artifacts.rs | 61 ++++++++++--------------- 1 file changed, 23 insertions(+), 38 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 08d13af2fb3b..722dcc32da53 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -103,7 +103,7 @@ macro_rules! concat_const { const RUNTIME_PREFIX: &str = "wasmtime_"; const NODE_PREFIX: &str = "polkadot_v"; const ARTIFACT_PREFIX: &str = - concat_const!(concat_const!(RUNTIME_PREFIX, NODE_PREFIX), NODE_VERSION); + concat_const!(RUNTIME_PREFIX, concat_const!(NODE_PREFIX, NODE_VERSION)); /// Identifier of an artifact. Encodes a code hash of the PVF and a hash of executor parameter set. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -123,14 +123,27 @@ impl ArtifactId { Self::new(pvf.code_hash(), pvf.executor_params().hash()) } + /// Returns the expected path to this artifact given the root of the cache. + pub fn path(&self, cache_path: &Path) -> PathBuf { + let file_name = format!( + "{}{}{}_{:#x}_{:#x}", + RUNTIME_PREFIX, NODE_PREFIX, NODE_VERSION, self.code_hash, self.executor_params_hash + ); + cache_path.join(file_name) + } + /// Tries to recover the artifact id from the given file name. - #[cfg(test)] - pub fn from_file_name(file_name: &str) -> Option { - let file_name = file_name.strip_prefix(RUNTIME_PREFIX)?.strip_prefix(NODE_PREFIX)?; + pub(crate) fn from_file_name(file_name: &str) -> Option { + let file_name = file_name.strip_prefix(ARTIFACT_PREFIX)?.strip_prefix('_')?; - // [ node version | code hash | param hash ] - let parts: Vec<&str> = file_name.split('_').collect(); - let (_node_ver, code_hash_str, executor_params_hash_str) = (parts[0], parts[1], parts[2]); + // [ code hash | param hash ] + let hashes: Vec<&str> = file_name.split('_').collect(); + + if hashes.len() != 2 { + return None + } + + let (code_hash_str, executor_params_hash_str) = (hashes[0], hashes[1]); let code_hash = Hash::from_str(code_hash_str).ok()?.into(); let executor_params_hash = @@ -138,15 +151,6 @@ impl ArtifactId { Some(Self { code_hash, executor_params_hash }) } - - /// Returns the expected path to this artifact given the root of the cache. - pub fn path(&self, cache_path: &Path) -> PathBuf { - let file_name = format!( - "{}{}{}_{:#x}_{:#x}", - RUNTIME_PREFIX, NODE_PREFIX, NODE_VERSION, self.code_hash, self.executor_params_hash - ); - cache_path.join(file_name) - } } /// A bundle of the artifact ID and the path. @@ -232,26 +236,7 @@ impl Artifacts { !file_name.starts_with(ARTIFACT_PREFIX) } - fn id_from_file_name(file_name: &str) -> Option { - let file_name = file_name.strip_prefix(ARTIFACT_PREFIX)?.strip_prefix('_')?; - - // [ code hash | param hash ] - let hashes: Vec<&str> = file_name.split('_').collect(); - - if hashes.len() != 2 { - return None - } - - let (code_hash_str, executor_params_hash_str) = (hashes[0], hashes[1]); - - let code_hash = Hash::from_str(code_hash_str).ok()?.into(); - let executor_params_hash = - ExecutorParamsHash::from_hash(Hash::from_str(executor_params_hash_str).ok()?); - - Some(ArtifactId { code_hash, executor_params_hash }) - } - - fn insert_cache_as_prepared(artifacts: &mut Artifacts, id: ArtifactId) { + fn insert_cache(artifacts: &mut Artifacts, id: ArtifactId) { let last_time_needed = SystemTime::now(); let prepare_stats = Default::default(); always!(artifacts @@ -278,8 +263,8 @@ impl Artifacts { if is_stale(file_name) { prunes.push(tokio::fs::remove_file(cache_path.join(file_name))); - } else if let Some(id) = id_from_file_name(file_name) { - insert_cache_as_prepared(artifacts, id); + } else if let Some(id) = ArtifactId::from_file_name(file_name) { + insert_cache(artifacts, id); } }, Err(err) => gum::error!( From 17973ef5268fcd72404b396a76b8339746a4bafa Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Wed, 18 Oct 2023 02:56:30 +0800 Subject: [PATCH 05/38] ignore non-unicode file names --- polkadot/node/core/pvf/src/artifacts.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 722dcc32da53..6229da938806 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -257,14 +257,12 @@ impl Artifacts { Ok(None) => break, Ok(Some(entry)) => { let file_name = entry.file_name(); - let file_name = file_name - .to_str() - .expect("existing file path should always be valid unicode"); - - if is_stale(file_name) { - prunes.push(tokio::fs::remove_file(cache_path.join(file_name))); - } else if let Some(id) = ArtifactId::from_file_name(file_name) { - insert_cache(artifacts, id); + if let Some(file_name) = file_name.to_str() { + if is_stale(file_name) { + prunes.push(tokio::fs::remove_file(cache_path.join(file_name))); + } else if let Some(id) = ArtifactId::from_file_name(file_name) { + insert_cache(artifacts, id); + } } }, Err(err) => gum::error!( From e81b45666f08035e9708460d27efa163b18362bb Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Wed, 18 Oct 2023 19:41:14 +0800 Subject: [PATCH 06/38] generalize concat_const!() --- polkadot/node/core/pvf/src/artifacts.rs | 60 ++++++++++++------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 6229da938806..ff20b09673db 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -70,40 +70,40 @@ use std::{ }; macro_rules! concat_const { - ($x:expr, $y:expr) => {{ - // ensure inputs to be strings - const _: &str = $x; - const _: &str = $y; - - const X: &[u8] = $x.as_bytes(); - const Y: &[u8] = $y.as_bytes(); - const XL: usize = X.len(); - const YL: usize = Y.len(); - const L: usize = XL + YL; - - const fn concat() -> [u8; L] { - let mut cat = [0u8; L]; - let mut i = 0; - while i < XL { - cat[i] = X[i]; - i += 1; - } - while i < L { - cat[i] = Y[i - XL]; - i += 1; - } - cat - } - - // SAFETY: safe because x and y are ensured to be valid - unsafe { std::str::from_utf8_unchecked(&concat()) } - }}; + ($($arg:tt),*) => {{ + // ensure inputs to be strings + $(const _: &str = $arg;)* + + const LEN: usize = 0 $(+ $arg.len())*; + + const CAT: [u8; LEN] = { + let mut cat = [0u8; LEN]; + // for turning off unused warning + let mut _offset = 0; + + $({ + const BYTES: &[u8] = $arg.as_bytes(); + + let mut i = 0; + let len = BYTES.len(); + while i < len { + cat[_offset + i] = BYTES[i]; + i += 1; + } + _offset += len; + })* + + cat + }; + + // SAFETY: safe because x and y are guaranteed to be valid + unsafe { std::str::from_utf8_unchecked(&CAT) } + }} } const RUNTIME_PREFIX: &str = "wasmtime_"; const NODE_PREFIX: &str = "polkadot_v"; -const ARTIFACT_PREFIX: &str = - concat_const!(RUNTIME_PREFIX, concat_const!(NODE_PREFIX, NODE_VERSION)); +const ARTIFACT_PREFIX: &str = concat_const!(RUNTIME_PREFIX, NODE_PREFIX, NODE_VERSION); /// Identifier of an artifact. Encodes a code hash of the PVF and a hash of executor parameter set. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] From 851a77ab46aea1f6a139cab60312368d0020197b Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Wed, 18 Oct 2023 20:13:46 +0800 Subject: [PATCH 07/38] per advices Co-authored-by: Marcin S --- polkadot/node/core/pvf/src/artifacts.rs | 53 +++++++++++++------------ polkadot/node/core/pvf/src/host.rs | 2 +- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index ff20b09673db..a242022c9606 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -96,8 +96,10 @@ macro_rules! concat_const { cat }; - // SAFETY: safe because x and y are guaranteed to be valid - unsafe { std::str::from_utf8_unchecked(&CAT) } + match std::str::from_utf8(&CAT) { + Ok(s) => s, + Err(_) => panic!("Error converting bytes to str"), + } }} } @@ -125,15 +127,13 @@ impl ArtifactId { /// Returns the expected path to this artifact given the root of the cache. pub fn path(&self, cache_path: &Path) -> PathBuf { - let file_name = format!( - "{}{}{}_{:#x}_{:#x}", - RUNTIME_PREFIX, NODE_PREFIX, NODE_VERSION, self.code_hash, self.executor_params_hash - ); + let file_name = + format!("{}_{:#x}_{:#x}", ARTIFACT_PREFIX, self.code_hash, self.executor_params_hash); cache_path.join(file_name) } /// Tries to recover the artifact id from the given file name. - pub(crate) fn from_file_name(file_name: &str) -> Option { + fn from_file_name(file_name: &str) -> Option { let file_name = file_name.strip_prefix(ARTIFACT_PREFIX)?.strip_prefix('_')?; // [ code hash | param hash ] @@ -212,7 +212,7 @@ pub struct Artifacts { impl Artifacts { #[cfg(test)] - pub(crate) fn new() -> Self { + pub(crate) fn empty() -> Self { Self { inner: HashMap::new() } } @@ -221,30 +221,20 @@ impl Artifacts { self.inner.len() } - /// Create a table with valid artifacts and prune the invalid ones. + /// Create an empty table and populate it with valid artifacts as [`ArtifactState::Prepared`], + /// if any. The existing caches will be checked by their file name to determine whether they are + /// valid, e.g., matching the current node version. The ones deemed invalid will be pruned. pub async fn new_and_prune(cache_path: &Path) -> Self { let mut artifacts = Self { inner: HashMap::new() }; - Self::prune_and_insert(cache_path, &mut artifacts).await; + artifacts.insert_and_prune(cache_path).await; artifacts } - // FIXME eagr: extremely janky, please comment on the appropriate way of setting - // * `last_time_needed` set as roughly around startup time - // * `prepare_stats` set as Nones, since the metadata was lost - async fn prune_and_insert(cache_path: impl AsRef, artifacts: &mut Artifacts) { + async fn insert_and_prune(&mut self, cache_path: impl AsRef) { fn is_stale(file_name: &str) -> bool { !file_name.starts_with(ARTIFACT_PREFIX) } - fn insert_cache(artifacts: &mut Artifacts, id: ArtifactId) { - let last_time_needed = SystemTime::now(); - let prepare_stats = Default::default(); - always!(artifacts - .inner - .insert(id, ArtifactState::Prepared { last_time_needed, prepare_stats }) - .is_none()); - } - // Make sure that the cache path directory and all its parents are created. let cache_path = cache_path.as_ref(); let _ = tokio::fs::create_dir_all(cache_path).await; @@ -260,8 +250,20 @@ impl Artifacts { if let Some(file_name) = file_name.to_str() { if is_stale(file_name) { prunes.push(tokio::fs::remove_file(cache_path.join(file_name))); + + gum::debug!( + target: LOG_TARGET, + "discarding invalid artifact {}", + file_name, + ); } else if let Some(id) = ArtifactId::from_file_name(file_name) { - insert_cache(artifacts, id); + self.insert_prepared(id, SystemTime::now(), Default::default()); + + gum::debug!( + target: LOG_TARGET, + "reusing valid artifact found on disk for current node version v{}", + NODE_VERSION, + ); } } }, @@ -302,8 +304,7 @@ impl Artifacts { /// /// This function must be used only for brand-new artifacts and should never be used for /// replacing existing ones. - #[cfg(test)] - pub fn insert_prepared( + pub(crate) fn insert_prepared( &mut self, artifact_id: ArtifactId, last_time_needed: SystemTime, diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 662c50401add..5f0671a90ba4 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -1028,7 +1028,7 @@ pub(crate) mod tests { cleanup_pulse_interval: Duration::from_secs(3600), artifact_ttl: Duration::from_secs(3600), - artifacts: Artifacts::new(), + artifacts: Artifacts::empty(), } } From a3b0fcf61b60284855a241b902f7a9dae7af954d Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Wed, 18 Oct 2023 23:00:46 +0800 Subject: [PATCH 08/38] break on IO error --- polkadot/node/core/pvf/src/artifacts.rs | 33 ++++++++++++++----------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index a242022c9606..3783cb3b3c4c 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -251,27 +251,30 @@ impl Artifacts { if is_stale(file_name) { prunes.push(tokio::fs::remove_file(cache_path.join(file_name))); - gum::debug!( - target: LOG_TARGET, - "discarding invalid artifact {}", - file_name, - ); + gum::debug!( + target: LOG_TARGET, + "discarding invalid artifact {}", + file_name, + ); } else if let Some(id) = ArtifactId::from_file_name(file_name) { self.insert_prepared(id, SystemTime::now(), Default::default()); - gum::debug!( - target: LOG_TARGET, - "reusing valid artifact found on disk for current node version v{}", - NODE_VERSION, - ); + gum::debug!( + target: LOG_TARGET, + "reusing valid artifact found on disk for current node version v{}", + NODE_VERSION, + ); } } }, - Err(err) => gum::error!( - target: LOG_TARGET, - ?err, - "collecting stale artifacts", - ), + Err(err) => { + gum::error!( + target: LOG_TARGET, + ?err, + "I/O error while collecting stale artifacts", + ); + break + }, } } From b323c2803e11be95f23ec82ea28e65db3692cfba Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Wed, 18 Oct 2023 23:09:53 +0800 Subject: [PATCH 09/38] make pruning sound --- polkadot/node/core/pvf/src/artifacts.rs | 83 ++++++++++++++++--------- 1 file changed, 52 insertions(+), 31 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 3783cb3b3c4c..65078f69ece0 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -231,8 +231,8 @@ impl Artifacts { } async fn insert_and_prune(&mut self, cache_path: impl AsRef) { - fn is_stale(file_name: &str) -> bool { - !file_name.starts_with(ARTIFACT_PREFIX) + fn is_fresh(file_name: &str) -> bool { + file_name.starts_with(ARTIFACT_PREFIX) } // Make sure that the cache path directory and all its parents are created. @@ -248,23 +248,24 @@ impl Artifacts { Ok(Some(entry)) => { let file_name = entry.file_name(); if let Some(file_name) = file_name.to_str() { - if is_stale(file_name) { - prunes.push(tokio::fs::remove_file(cache_path.join(file_name))); - - gum::debug!( - target: LOG_TARGET, - "discarding invalid artifact {}", - file_name, - ); - } else if let Some(id) = ArtifactId::from_file_name(file_name) { - self.insert_prepared(id, SystemTime::now(), Default::default()); - - gum::debug!( - target: LOG_TARGET, - "reusing valid artifact found on disk for current node version v{}", - NODE_VERSION, - ); + if is_fresh(file_name) { + if let Some(id) = ArtifactId::from_file_name(file_name) { + self.insert_prepared(id, SystemTime::now(), Default::default()); + gum::debug!( + target: LOG_TARGET, + "reusing valid artifact found on disk for current node version v{}", + NODE_VERSION, + ); + continue + } } + + prunes.push(tokio::fs::remove_file(cache_path.join(file_name))); + gum::debug!( + target: LOG_TARGET, + "discarding invalid artifact {}", + file_name, + ); } }, Err(err) => { @@ -350,6 +351,7 @@ impl Artifacts { mod tests { use super::{ArtifactId, Artifacts, ARTIFACT_PREFIX, NODE_VERSION}; use polkadot_primitives::ExecutorParamsHash; + use rand::Rng; use sp_core::H256; use std::{ fs, @@ -357,25 +359,42 @@ mod tests { str::FromStr, }; + fn rand_hash() -> String { + let mut rng = rand::thread_rng(); + let hex: Vec<_> = "0123456789abcdef".chars().collect(); + (0..64).map(|_| hex[rng.gen_range(0..hex.len())]).collect() + } + fn file_name(code_hash: &str, param_hash: &str) -> String { format!("wasmtime_polkadot_v{}_0x{}_0x{}", NODE_VERSION, code_hash, param_hash) } - fn fake_artifact_path>(dir: D, prefix: &str) -> PathBuf { - let code_hash = "1234567890123456789012345678901234567890123456789012345678901234"; - let params_hash = "4321098765432109876543210987654321098765432109876543210987654321"; - let file_name = format!("{}_0x{}_0x{}", prefix, code_hash, params_hash); - + fn fake_artifact_path( + dir: impl AsRef, + prefix: &str, + code_hash: impl AsRef, + params_hash: impl AsRef, + ) -> PathBuf { let mut path = dir.as_ref().to_path_buf(); + let file_name = format!("{}_0x{}_0x{}", prefix, code_hash.as_ref(), params_hash.as_ref()); path.push(file_name); path } - fn create_fake_artifact>(dir: D, prefix: &str) { - let path = fake_artifact_path(dir, prefix); + fn create_artifact( + dir: impl AsRef, + prefix: &str, + code_hash: impl AsRef, + params_hash: impl AsRef, + ) { + let path = fake_artifact_path(dir, prefix, code_hash, params_hash); fs::File::create(path).unwrap(); } + fn create_rand_artifact(dir: impl AsRef, prefix: &str) { + create_artifact(dir, prefix, rand_hash(), rand_hash()); + } + #[test] fn artifact_prefix() { assert_eq!(ARTIFACT_PREFIX, format!("wasmtime_polkadot_v{}", NODE_VERSION),) @@ -429,13 +448,15 @@ mod tests { fs::create_dir_all(&cache_dir).unwrap(); - // 3 invalid, 1 valid - create_fake_artifact(&cache_dir, ""); - create_fake_artifact(&cache_dir, "wasmtime_polkadot_v"); - create_fake_artifact(&cache_dir, "wasmtime_polkadot_v1.0.0"); - create_fake_artifact(&cache_dir, ARTIFACT_PREFIX); + // 5 invalid, 1 valid + create_rand_artifact(&cache_dir, ""); + create_rand_artifact(&cache_dir, "wasmtime_polkadot_v"); + create_rand_artifact(&cache_dir, "wasmtime_polkadot_v1.0.0"); + create_rand_artifact(&cache_dir, ARTIFACT_PREFIX); + create_artifact(&cache_dir, ARTIFACT_PREFIX, "", ""); + create_artifact(&cache_dir, ARTIFACT_PREFIX, "000", "000000"); - assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 4); + assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 6); let artifacts = Artifacts::new_and_prune(&cache_dir).await; From a2808f8da5feea7ecf0daad9782f075341117144 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Thu, 19 Oct 2023 02:05:35 +0800 Subject: [PATCH 10/38] log more events --- polkadot/node/core/pvf/src/artifacts.rs | 90 ++++++++++++++++--------- 1 file changed, 58 insertions(+), 32 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 65078f69ece0..b7aaee0d026a 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -76,6 +76,7 @@ macro_rules! concat_const { const LEN: usize = 0 $(+ $arg.len())*; + // concatenate strings as byte slices const CAT: [u8; LEN] = { let mut cat = [0u8; LEN]; // for turning off unused warning @@ -96,6 +97,9 @@ macro_rules! concat_const { cat }; + // FIXME eagr: consider opting for `from_utf8_unchecked()` + // SAFETY: The concatenation of two string slices is guaranteed to be valid UTF8, + // so are the byte slices as they have the same memory layout. match std::str::from_utf8(&CAT) { Ok(s) => s, Err(_) => panic!("Error converting bytes to str"), @@ -231,55 +235,77 @@ impl Artifacts { } async fn insert_and_prune(&mut self, cache_path: impl AsRef) { - fn is_fresh(file_name: &str) -> bool { - file_name.starts_with(ARTIFACT_PREFIX) + fn is_stale(file_name: &str) -> bool { + !file_name.starts_with(ARTIFACT_PREFIX) } // Make sure that the cache path directory and all its parents are created. let cache_path = cache_path.as_ref(); let _ = tokio::fs::create_dir_all(cache_path).await; - if let Ok(mut dir) = tokio::fs::read_dir(cache_path).await { - let mut prunes = vec![]; - - loop { - match dir.next_entry().await { - Ok(None) => break, - Ok(Some(entry)) => { - let file_name = entry.file_name(); - if let Some(file_name) = file_name.to_str() { - if is_fresh(file_name) { - if let Some(id) = ArtifactId::from_file_name(file_name) { + match tokio::fs::read_dir(cache_path).await { + Ok(mut dir) => { + let mut prunes = vec![]; + + loop { + match dir.next_entry().await { + Ok(None) => break, + Ok(Some(entry)) => { + let file_name = entry.file_name(); + if let Some(file_name) = file_name.to_str() { + let id = ArtifactId::from_file_name(file_name); + let file_path = cache_path.join(file_name); + + if is_stale(file_name) || id.is_none() { + prunes.push(tokio::fs::remove_file(file_path.clone())); + gum::debug!( + target: LOG_TARGET, + "discarding invalid artifact {:?}", + &file_path, + ); + continue + } + + if let Some(id) = id { self.insert_prepared(id, SystemTime::now(), Default::default()); gum::debug!( target: LOG_TARGET, - "reusing valid artifact found on disk for current node version v{}", + "reusing {:?} for node version v{}", + &file_path, NODE_VERSION, ); - continue } + } else { + gum::warn!( + target: LOG_TARGET, + "non-Unicode file name found in {:?}", + cache_path, + ); } - - prunes.push(tokio::fs::remove_file(cache_path.join(file_name))); - gum::debug!( + }, + Err(err) => { + gum::warn!( target: LOG_TARGET, - "discarding invalid artifact {}", - file_name, + ?err, + "while collecting stale artifacts from {:?}", + cache_path, ); - } - }, - Err(err) => { - gum::error!( - target: LOG_TARGET, - ?err, - "I/O error while collecting stale artifacts", - ); - break - }, + break + }, + } } - } - futures::future::join_all(prunes).await; + futures::future::join_all(prunes).await; + }, + + Err(err) => { + gum::error!( + target: LOG_TARGET, + ?err, + "failed to read dir {:?}", + cache_path, + ) + }, } } From 0657d4158837a2a84de5867a4a76f029f6456ba7 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Fri, 20 Oct 2023 21:05:29 +0800 Subject: [PATCH 11/38] refactor --- polkadot/node/core/pvf/src/artifacts.rs | 132 ++++++++++++++---------- 1 file changed, 77 insertions(+), 55 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index b7aaee0d026a..60286c17034d 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -243,70 +243,92 @@ impl Artifacts { let cache_path = cache_path.as_ref(); let _ = tokio::fs::create_dir_all(cache_path).await; - match tokio::fs::read_dir(cache_path).await { - Ok(mut dir) => { - let mut prunes = vec![]; - - loop { - match dir.next_entry().await { - Ok(None) => break, - Ok(Some(entry)) => { - let file_name = entry.file_name(); - if let Some(file_name) = file_name.to_str() { - let id = ArtifactId::from_file_name(file_name); - let file_path = cache_path.join(file_name); - - if is_stale(file_name) || id.is_none() { - prunes.push(tokio::fs::remove_file(file_path.clone())); - gum::debug!( - target: LOG_TARGET, - "discarding invalid artifact {:?}", - &file_path, - ); - continue - } - - if let Some(id) = id { - self.insert_prepared(id, SystemTime::now(), Default::default()); - gum::debug!( - target: LOG_TARGET, - "reusing {:?} for node version v{}", - &file_path, - NODE_VERSION, - ); - } - } else { - gum::warn!( - target: LOG_TARGET, - "non-Unicode file name found in {:?}", - cache_path, - ); - } - }, + let read_dir = tokio::fs::read_dir(cache_path).await; + + if let Err(err) = read_dir { + gum::error!( + target: LOG_TARGET, + ?err, + "failed to read dir {:?}", + cache_path, + ); + return + } + + // `read_dir` is guaranteed to be Ok(_) + let mut dir = read_dir.unwrap(); + let mut prunes = vec![]; + + loop { + match dir.next_entry().await { + Ok(Some(entry)) => { + let file_type = entry.file_type().await; + let file_name = entry.file_name(); + + match file_type { + Ok(file_type) => + if !file_type.is_file() { + continue + }, Err(err) => { gum::warn!( target: LOG_TARGET, ?err, - "while collecting stale artifacts from {:?}", - cache_path, + "unable to get file type for {:?}", + file_name, ); - break + continue }, } - } - futures::future::join_all(prunes).await; - }, - - Err(err) => { - gum::error!( - target: LOG_TARGET, - ?err, - "failed to read dir {:?}", - cache_path, - ) - }, + if let Some(file_name) = file_name.to_str() { + let id = ArtifactId::from_file_name(file_name); + let file_path = cache_path.join(file_name); + + if is_stale(file_name) || id.is_none() { + prunes.push(tokio::fs::remove_file(file_path.clone())); + gum::debug!( + target: LOG_TARGET, + "discarding invalid artifact {:?}", + &file_path, + ); + continue + } + + if let Some(id) = id { + self.insert_prepared(id, SystemTime::now(), Default::default()); + gum::debug!( + target: LOG_TARGET, + "reusing {:?} for node version v{}", + &file_path, + NODE_VERSION, + ); + } + } else { + gum::warn!( + target: LOG_TARGET, + "non-Unicode file name {:?} found in {:?}", + file_name, + cache_path, + ); + } + }, + + Ok(None) => break, + + Err(err) => { + gum::warn!( + target: LOG_TARGET, + ?err, + "while processing artifacts in {:?}", + cache_path, + ); + break + }, + } } + + futures::future::join_all(prunes).await; } /// Returns the state of the given artifact by its ID. From 0384ae709367bd16635f12babcb0ac7f3af1ae9d Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Fri, 20 Oct 2023 23:09:50 +0800 Subject: [PATCH 12/38] doc --- polkadot/node/core/pvf/src/artifacts.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 60286c17034d..f9f291fdba43 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -18,8 +18,8 @@ //! //! # Lifecycle of an artifact //! -//! 1. During node start-up, the artifacts cache is cleaned up. This means that all local artifacts -//! stored on-disk are cleared, and we start with an empty [`Artifacts`] table. +//! 1. During node start-up, we will check the cached artifacts, if any. The stale and corrupted +//! ones are pruned. The valid ones are registered in the [`Artifacts`] table. //! //! 2. In order to be executed, a PVF should be prepared first. This means that artifacts should //! have an [`ArtifactState::Prepared`] entry for that artifact in the table. If not, the @@ -69,6 +69,7 @@ use std::{ time::{Duration, SystemTime}, }; +// A workaround for defining a `const` that is a concatenation of other constants. macro_rules! concat_const { ($($arg:tt),*) => {{ // ensure inputs to be strings @@ -97,9 +98,8 @@ macro_rules! concat_const { cat }; - // FIXME eagr: consider opting for `from_utf8_unchecked()` - // SAFETY: The concatenation of two string slices is guaranteed to be valid UTF8, - // so are the byte slices as they have the same memory layout. + // The concatenation of two string slices is guaranteed to be valid UTF8, + // so are the byte slices as they have the same memory layout. match std::str::from_utf8(&CAT) { Ok(s) => s, Err(_) => panic!("Error converting bytes to str"), From 4b1bb0ab6586d611da7201fb0ebb71c129feccb1 Mon Sep 17 00:00:00 2001 From: "Marcin S." Date: Sat, 21 Oct 2023 16:53:43 +0200 Subject: [PATCH 13/38] Refactor indentation --- polkadot/node/core/pvf/src/artifacts.rs | 161 +++++++++++++----------- 1 file changed, 85 insertions(+), 76 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index f9f291fdba43..8c52999e967e 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -64,6 +64,7 @@ use polkadot_parachain_primitives::primitives::ValidationCodeHash; use polkadot_primitives::ExecutorParamsHash; use std::{ collections::HashMap, + future::Future, path::{Path, PathBuf}, str::FromStr as _, time::{Duration, SystemTime}, @@ -230,89 +231,30 @@ impl Artifacts { /// valid, e.g., matching the current node version. The ones deemed invalid will be pruned. pub async fn new_and_prune(cache_path: &Path) -> Self { let mut artifacts = Self { inner: HashMap::new() }; - artifacts.insert_and_prune(cache_path).await; - artifacts - } - - async fn insert_and_prune(&mut self, cache_path: impl AsRef) { - fn is_stale(file_name: &str) -> bool { - !file_name.starts_with(ARTIFACT_PREFIX) - } // Make sure that the cache path directory and all its parents are created. - let cache_path = cache_path.as_ref(); let _ = tokio::fs::create_dir_all(cache_path).await; - let read_dir = tokio::fs::read_dir(cache_path).await; - - if let Err(err) = read_dir { - gum::error!( - target: LOG_TARGET, - ?err, - "failed to read dir {:?}", - cache_path, - ); - return - } - - // `read_dir` is guaranteed to be Ok(_) - let mut dir = read_dir.unwrap(); + let mut dir = match tokio::fs::read_dir(cache_path).await { + Ok(dir) => dir, + Err(err) => { + gum::error!( + target: LOG_TARGET, + ?err, + "failed to read dir {:?}", + cache_path, + ); + return artifacts + }, + }; let mut prunes = vec![]; loop { match dir.next_entry().await { - Ok(Some(entry)) => { - let file_type = entry.file_type().await; - let file_name = entry.file_name(); - - match file_type { - Ok(file_type) => - if !file_type.is_file() { - continue - }, - Err(err) => { - gum::warn!( - target: LOG_TARGET, - ?err, - "unable to get file type for {:?}", - file_name, - ); - continue - }, - } - - if let Some(file_name) = file_name.to_str() { - let id = ArtifactId::from_file_name(file_name); - let file_path = cache_path.join(file_name); - - if is_stale(file_name) || id.is_none() { - prunes.push(tokio::fs::remove_file(file_path.clone())); - gum::debug!( - target: LOG_TARGET, - "discarding invalid artifact {:?}", - &file_path, - ); - continue - } - - if let Some(id) = id { - self.insert_prepared(id, SystemTime::now(), Default::default()); - gum::debug!( - target: LOG_TARGET, - "reusing {:?} for node version v{}", - &file_path, - NODE_VERSION, - ); - } - } else { - gum::warn!( - target: LOG_TARGET, - "non-Unicode file name {:?} found in {:?}", - file_name, - cache_path, - ); - } - }, + Ok(Some(entry)) => + if let Some(prune) = artifacts.insert_or_prune_entry(entry, cache_path).await { + prunes.push(prune); + }, Ok(None) => break, @@ -320,7 +262,7 @@ impl Artifacts { gum::warn!( target: LOG_TARGET, ?err, - "while processing artifacts in {:?}", + "error processing artifacts in {:?}", cache_path, ); break @@ -328,7 +270,74 @@ impl Artifacts { } } + // Prune all the invalid artifact files that were found. futures::future::join_all(prunes).await; + + artifacts + } + + /// Inserts the entry into the artifacts table if it is a valid artifact file. If the file has + /// to be pruned, returns a future that does the pruning. + async fn insert_or_prune_entry( + &mut self, + entry: tokio::fs::DirEntry, + cache_path: &Path, + ) -> Option { + fn is_stale(file_name: &str) -> bool { + !file_name.starts_with(ARTIFACT_PREFIX) + } + + let file_type = entry.file_type().await; + let file_name = entry.file_name(); + + match file_type { + Ok(file_type) => + if !file_type.is_file() { + return None + }, + Err(err) => { + gum::warn!( + target: LOG_TARGET, + ?err, + "unable to get file type for {:?}", + file_name, + ); + return None + }, + } + + if let Some(file_name) = file_name.to_str() { + let id = ArtifactId::from_file_name(file_name); + let file_path = cache_path.join(file_name); + + if is_stale(file_name) || id.is_none() { + gum::debug!( + target: LOG_TARGET, + "discarding invalid artifact {:?}", + &file_path, + ); + return Some(tokio::fs::remove_file(file_path.clone())) + } + + if let Some(id) = id { + self.insert_prepared(id, SystemTime::now(), Default::default()); + gum::debug!( + target: LOG_TARGET, + "reusing {:?} for node version v{}", + &file_path, + NODE_VERSION, + ); + } + } else { + gum::warn!( + target: LOG_TARGET, + "non-Unicode file name {:?} found in {:?}", + file_name, + cache_path, + ); + } + + None } /// Returns the state of the given artifact by its ID. From a8bcce4eba245eac451ee476c3811ebcf701cfeb Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Sun, 22 Oct 2023 01:36:32 +0800 Subject: [PATCH 14/38] refactor --- polkadot/node/core/pvf/src/artifacts.rs | 145 +++++++++++------------- 1 file changed, 69 insertions(+), 76 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 8c52999e967e..8aebf8e7750c 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -64,7 +64,6 @@ use polkadot_parachain_primitives::primitives::ValidationCodeHash; use polkadot_primitives::ExecutorParamsHash; use std::{ collections::HashMap, - future::Future, path::{Path, PathBuf}, str::FromStr as _, time::{Duration, SystemTime}, @@ -231,6 +230,73 @@ impl Artifacts { /// valid, e.g., matching the current node version. The ones deemed invalid will be pruned. pub async fn new_and_prune(cache_path: &Path) -> Self { let mut artifacts = Self { inner: HashMap::new() }; + artifacts.insert_and_prune(cache_path).await; + artifacts + } + + async fn insert_and_prune(&mut self, cache_path: &Path) { + fn is_stale(file_name: &str) -> bool { + !file_name.starts_with(ARTIFACT_PREFIX) + } + + // Inserts the entry into the artifacts table if it is a valid artifact file, or prune it + // otherwise. + async fn insert_or_prune( + artifacts: &mut Artifacts, + entry: &tokio::fs::DirEntry, + cache_path: &Path, + ) { + let file_type = entry.file_type().await; + let file_name = entry.file_name(); + + match file_type { + Ok(file_type) => + if !file_type.is_file() { + return + }, + Err(err) => { + gum::warn!( + target: LOG_TARGET, + ?err, + "unable to get file type for {:?}", + file_name, + ); + return + }, + } + + if let Some(file_name) = file_name.to_str() { + let id = ArtifactId::from_file_name(file_name); + let file_path = cache_path.join(file_name); + + if is_stale(file_name) || id.is_none() { + gum::debug!( + target: LOG_TARGET, + "discarding invalid artifact {:?}", + &file_path, + ); + let _ = tokio::fs::remove_file(&file_path).await; + return + } + + if let Some(id) = id { + artifacts.insert_prepared(id, SystemTime::now(), Default::default()); + gum::debug!( + target: LOG_TARGET, + "reusing {:?} for node version v{}", + &file_path, + NODE_VERSION, + ); + } + } else { + gum::warn!( + target: LOG_TARGET, + "non-Unicode file name {:?} found in {:?}", + file_name, + cache_path, + ); + } + } // Make sure that the cache path directory and all its parents are created. let _ = tokio::fs::create_dir_all(cache_path).await; @@ -244,17 +310,13 @@ impl Artifacts { "failed to read dir {:?}", cache_path, ); - return artifacts + return }, }; - let mut prunes = vec![]; loop { match dir.next_entry().await { - Ok(Some(entry)) => - if let Some(prune) = artifacts.insert_or_prune_entry(entry, cache_path).await { - prunes.push(prune); - }, + Ok(Some(entry)) => insert_or_prune(self, &entry, cache_path).await, Ok(None) => break, @@ -269,75 +331,6 @@ impl Artifacts { }, } } - - // Prune all the invalid artifact files that were found. - futures::future::join_all(prunes).await; - - artifacts - } - - /// Inserts the entry into the artifacts table if it is a valid artifact file. If the file has - /// to be pruned, returns a future that does the pruning. - async fn insert_or_prune_entry( - &mut self, - entry: tokio::fs::DirEntry, - cache_path: &Path, - ) -> Option { - fn is_stale(file_name: &str) -> bool { - !file_name.starts_with(ARTIFACT_PREFIX) - } - - let file_type = entry.file_type().await; - let file_name = entry.file_name(); - - match file_type { - Ok(file_type) => - if !file_type.is_file() { - return None - }, - Err(err) => { - gum::warn!( - target: LOG_TARGET, - ?err, - "unable to get file type for {:?}", - file_name, - ); - return None - }, - } - - if let Some(file_name) = file_name.to_str() { - let id = ArtifactId::from_file_name(file_name); - let file_path = cache_path.join(file_name); - - if is_stale(file_name) || id.is_none() { - gum::debug!( - target: LOG_TARGET, - "discarding invalid artifact {:?}", - &file_path, - ); - return Some(tokio::fs::remove_file(file_path.clone())) - } - - if let Some(id) = id { - self.insert_prepared(id, SystemTime::now(), Default::default()); - gum::debug!( - target: LOG_TARGET, - "reusing {:?} for node version v{}", - &file_path, - NODE_VERSION, - ); - } - } else { - gum::warn!( - target: LOG_TARGET, - "non-Unicode file name {:?} found in {:?}", - file_name, - cache_path, - ); - } - - None } /// Returns the state of the given artifact by its ID. From 50b7ccc4b20947188585ce67a716a7de6e79c42a Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Sun, 22 Oct 2023 18:10:28 +0800 Subject: [PATCH 15/38] checksum poc --- Cargo.lock | 6 +-- polkadot/node/core/pvf/common/src/prepare.rs | 2 + .../node/core/pvf/prepare-worker/Cargo.toml | 1 + .../node/core/pvf/prepare-worker/src/lib.rs | 5 +- polkadot/node/core/pvf/src/artifacts.rs | 46 +++++++++++++++++-- polkadot/node/core/pvf/src/host.rs | 15 ++++-- .../node/core/pvf/src/prepare/worker_intf.rs | 2 +- 7 files changed, 63 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f12e16276e78..eee57465cc1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1366,16 +1366,15 @@ dependencies = [ [[package]] name = "blake3" -version = "1.4.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" +checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87" dependencies = [ "arrayref", "arrayvec 0.7.4", "cc", "cfg-if", "constant_time_eq 0.3.0", - "digest 0.10.7", ] [[package]] @@ -12152,6 +12151,7 @@ dependencies = [ name = "polkadot-node-core-pvf-prepare-worker" version = "1.0.0" dependencies = [ + "blake3", "cfg-if", "futures", "libc", diff --git a/polkadot/node/core/pvf/common/src/prepare.rs b/polkadot/node/core/pvf/common/src/prepare.rs index c205eddfb8b1..03e3c1b87469 100644 --- a/polkadot/node/core/pvf/common/src/prepare.rs +++ b/polkadot/node/core/pvf/common/src/prepare.rs @@ -23,6 +23,8 @@ pub struct PrepareStats { pub cpu_time_elapsed: std::time::Duration, /// The observed memory statistics for the preparation job. pub memory_stats: MemoryStats, + /// checksum + pub checksum: [u8; 32], } /// Helper struct to contain all the memory stats, including `MemoryAllocationStats` and, if diff --git a/polkadot/node/core/pvf/prepare-worker/Cargo.toml b/polkadot/node/core/pvf/prepare-worker/Cargo.toml index 886209b78c32..1502b816a183 100644 --- a/polkadot/node/core/pvf/prepare-worker/Cargo.toml +++ b/polkadot/node/core/pvf/prepare-worker/Cargo.toml @@ -7,6 +7,7 @@ edition.workspace = true license.workspace = true [dependencies] +blake3 = "1.5" cfg-if = "1.0" futures = "0.3.21" gum = { package = "tracing-gum", path = "../../../gum" } diff --git a/polkadot/node/core/pvf/prepare-worker/src/lib.rs b/polkadot/node/core/pvf/prepare-worker/src/lib.rs index fa5d3656a35e..435743a8ab19 100644 --- a/polkadot/node/core/pvf/prepare-worker/src/lib.rs +++ b/polkadot/node/core/pvf/prepare-worker/src/lib.rs @@ -257,7 +257,10 @@ pub fn worker_entrypoint( ); tokio::fs::write(&temp_artifact_dest, &artifact).await?; - Ok(PrepareStats { cpu_time_elapsed, memory_stats }) + let mut gen_hash = blake3::hash(artifact.as_ref()); + let checksum = gen_hash.finalize(); + + Ok(PrepareStats { cpu_time_elapsed, memory_stats, checksum }) }, } }, diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 8aebf8e7750c..4fa5994008ba 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -111,17 +111,42 @@ const RUNTIME_PREFIX: &str = "wasmtime_"; const NODE_PREFIX: &str = "polkadot_v"; const ARTIFACT_PREFIX: &str = concat_const!(RUNTIME_PREFIX, NODE_PREFIX, NODE_VERSION); +pub type ChecksumInner = [u8; 32]; + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Checksum(ChecksumInner); + +impl Checksum { + pub fn new() -> Self { + Self([0; 32]) + } + + pub fn update(&mut self, src: &ChecksumInner) { + self.0.copy_from_slice(src); + } +} + +impl std::fmt::LowerHex for Checksum { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for byte in self.0 { + write!(f, "{}", byte)?; + } + Ok(()) + } +} + /// Identifier of an artifact. Encodes a code hash of the PVF and a hash of executor parameter set. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ArtifactId { pub(crate) code_hash: ValidationCodeHash, pub(crate) executor_params_hash: ExecutorParamsHash, + pub(crate) checksum: Checksum, } impl ArtifactId { /// Creates a new artifact ID with the given hash. pub fn new(code_hash: ValidationCodeHash, executor_params_hash: ExecutorParamsHash) -> Self { - Self { code_hash, executor_params_hash } + Self { code_hash, executor_params_hash, checksum: Checksum::new() } } /// Returns an artifact ID that corresponds to the PVF with given executor params. @@ -129,10 +154,16 @@ impl ArtifactId { Self::new(pvf.code_hash(), pvf.executor_params().hash()) } + pub fn update_checksum(&mut self, src: &ChecksumInner) { + self.checksum.update(src); + } + /// Returns the expected path to this artifact given the root of the cache. pub fn path(&self, cache_path: &Path) -> PathBuf { - let file_name = - format!("{}_{:#x}_{:#x}", ARTIFACT_PREFIX, self.code_hash, self.executor_params_hash); + let file_name = format!( + "{}_{:#x}_{:#x}_{:#x}", + ARTIFACT_PREFIX, self.code_hash, self.executor_params_hash, self.checksum + ); cache_path.join(file_name) } @@ -153,7 +184,7 @@ impl ArtifactId { let executor_params_hash = ExecutorParamsHash::from_hash(Hash::from_str(executor_params_hash_str).ok()?); - Some(Self { code_hash, executor_params_hash }) + Some(Self::new(code_hash, executor_params_hash)) } } @@ -371,6 +402,13 @@ impl Artifacts { .is_none()); } + pub(crate) fn remove_artifact( + &mut self, + artifact_id: &ArtifactId, + ) -> Option<(ArtifactId, ArtifactState)> { + self.inner.remove_entry(&artifact_id) + } + /// Remove and retrieve the artifacts from the table that are older than the supplied /// Time-To-Live. pub fn prune(&mut self, artifact_ttl: Duration) -> Vec { diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 5f0671a90ba4..df15bd5d59cb 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -757,9 +757,14 @@ async fn handle_prepare_done( .await?; } - *state = match result { - Ok(prepare_stats) => - ArtifactState::Prepared { last_time_needed: SystemTime::now(), prepare_stats }, + match result { + Ok(prepare_stats) => match artifacts.remove_artifact(&artifact_id) { + Some((mut id, _)) => { + id.update_checksum(&prepare_stats.checksum); + artifacts.insert_prepared(artifact_id, SystemTime::now(), prepare_stats); + }, + None => unreachable!(), + }, Err(error) => { let last_time_failed = SystemTime::now(); let num_failures = *num_failures + 1; @@ -772,9 +777,9 @@ async fn handle_prepare_done( "artifact preparation failed: {}", error ); - ArtifactState::FailedToProcess { last_time_failed, num_failures, error } + *state = ArtifactState::FailedToProcess { last_time_failed, num_failures, error }; }, - }; + } Ok(()) } diff --git a/polkadot/node/core/pvf/src/prepare/worker_intf.rs b/polkadot/node/core/pvf/src/prepare/worker_intf.rs index b66c36044343..6c3b32a9eb2c 100644 --- a/polkadot/node/core/pvf/src/prepare/worker_intf.rs +++ b/polkadot/node/core/pvf/src/prepare/worker_intf.rs @@ -199,7 +199,7 @@ async fn handle_response( artifact_path: PathBuf, preparation_timeout: Duration, ) -> Outcome { - let PrepareStats { cpu_time_elapsed, memory_stats } = match result.clone() { + let PrepareStats { cpu_time_elapsed, memory_stats, .. } = match result.clone() { Ok(result) => result, // Timed out on the child. This should already be logged by the child. Err(PrepareError::TimedOut) => return Outcome::TimedOut, From b6c1a0753a8f91e75e30618712afee04100efbee Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Mon, 30 Oct 2023 13:20:12 +0800 Subject: [PATCH 16/38] Revert "checksum poc" This reverts commit 50b7ccc4b20947188585ce67a716a7de6e79c42a. --- Cargo.lock | 6 +-- polkadot/node/core/pvf/common/src/prepare.rs | 2 - .../node/core/pvf/prepare-worker/Cargo.toml | 1 - .../node/core/pvf/prepare-worker/src/lib.rs | 5 +- polkadot/node/core/pvf/src/artifacts.rs | 46 ++----------------- polkadot/node/core/pvf/src/host.rs | 15 ++---- .../node/core/pvf/src/prepare/worker_intf.rs | 2 +- 7 files changed, 14 insertions(+), 63 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eee57465cc1c..f12e16276e78 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1366,15 +1366,16 @@ dependencies = [ [[package]] name = "blake3" -version = "1.5.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87" +checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" dependencies = [ "arrayref", "arrayvec 0.7.4", "cc", "cfg-if", "constant_time_eq 0.3.0", + "digest 0.10.7", ] [[package]] @@ -12151,7 +12152,6 @@ dependencies = [ name = "polkadot-node-core-pvf-prepare-worker" version = "1.0.0" dependencies = [ - "blake3", "cfg-if", "futures", "libc", diff --git a/polkadot/node/core/pvf/common/src/prepare.rs b/polkadot/node/core/pvf/common/src/prepare.rs index 03e3c1b87469..c205eddfb8b1 100644 --- a/polkadot/node/core/pvf/common/src/prepare.rs +++ b/polkadot/node/core/pvf/common/src/prepare.rs @@ -23,8 +23,6 @@ pub struct PrepareStats { pub cpu_time_elapsed: std::time::Duration, /// The observed memory statistics for the preparation job. pub memory_stats: MemoryStats, - /// checksum - pub checksum: [u8; 32], } /// Helper struct to contain all the memory stats, including `MemoryAllocationStats` and, if diff --git a/polkadot/node/core/pvf/prepare-worker/Cargo.toml b/polkadot/node/core/pvf/prepare-worker/Cargo.toml index 1502b816a183..886209b78c32 100644 --- a/polkadot/node/core/pvf/prepare-worker/Cargo.toml +++ b/polkadot/node/core/pvf/prepare-worker/Cargo.toml @@ -7,7 +7,6 @@ edition.workspace = true license.workspace = true [dependencies] -blake3 = "1.5" cfg-if = "1.0" futures = "0.3.21" gum = { package = "tracing-gum", path = "../../../gum" } diff --git a/polkadot/node/core/pvf/prepare-worker/src/lib.rs b/polkadot/node/core/pvf/prepare-worker/src/lib.rs index 435743a8ab19..fa5d3656a35e 100644 --- a/polkadot/node/core/pvf/prepare-worker/src/lib.rs +++ b/polkadot/node/core/pvf/prepare-worker/src/lib.rs @@ -257,10 +257,7 @@ pub fn worker_entrypoint( ); tokio::fs::write(&temp_artifact_dest, &artifact).await?; - let mut gen_hash = blake3::hash(artifact.as_ref()); - let checksum = gen_hash.finalize(); - - Ok(PrepareStats { cpu_time_elapsed, memory_stats, checksum }) + Ok(PrepareStats { cpu_time_elapsed, memory_stats }) }, } }, diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 4fa5994008ba..8aebf8e7750c 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -111,42 +111,17 @@ const RUNTIME_PREFIX: &str = "wasmtime_"; const NODE_PREFIX: &str = "polkadot_v"; const ARTIFACT_PREFIX: &str = concat_const!(RUNTIME_PREFIX, NODE_PREFIX, NODE_VERSION); -pub type ChecksumInner = [u8; 32]; - -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct Checksum(ChecksumInner); - -impl Checksum { - pub fn new() -> Self { - Self([0; 32]) - } - - pub fn update(&mut self, src: &ChecksumInner) { - self.0.copy_from_slice(src); - } -} - -impl std::fmt::LowerHex for Checksum { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - for byte in self.0 { - write!(f, "{}", byte)?; - } - Ok(()) - } -} - /// Identifier of an artifact. Encodes a code hash of the PVF and a hash of executor parameter set. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct ArtifactId { pub(crate) code_hash: ValidationCodeHash, pub(crate) executor_params_hash: ExecutorParamsHash, - pub(crate) checksum: Checksum, } impl ArtifactId { /// Creates a new artifact ID with the given hash. pub fn new(code_hash: ValidationCodeHash, executor_params_hash: ExecutorParamsHash) -> Self { - Self { code_hash, executor_params_hash, checksum: Checksum::new() } + Self { code_hash, executor_params_hash } } /// Returns an artifact ID that corresponds to the PVF with given executor params. @@ -154,16 +129,10 @@ impl ArtifactId { Self::new(pvf.code_hash(), pvf.executor_params().hash()) } - pub fn update_checksum(&mut self, src: &ChecksumInner) { - self.checksum.update(src); - } - /// Returns the expected path to this artifact given the root of the cache. pub fn path(&self, cache_path: &Path) -> PathBuf { - let file_name = format!( - "{}_{:#x}_{:#x}_{:#x}", - ARTIFACT_PREFIX, self.code_hash, self.executor_params_hash, self.checksum - ); + let file_name = + format!("{}_{:#x}_{:#x}", ARTIFACT_PREFIX, self.code_hash, self.executor_params_hash); cache_path.join(file_name) } @@ -184,7 +153,7 @@ impl ArtifactId { let executor_params_hash = ExecutorParamsHash::from_hash(Hash::from_str(executor_params_hash_str).ok()?); - Some(Self::new(code_hash, executor_params_hash)) + Some(Self { code_hash, executor_params_hash }) } } @@ -402,13 +371,6 @@ impl Artifacts { .is_none()); } - pub(crate) fn remove_artifact( - &mut self, - artifact_id: &ArtifactId, - ) -> Option<(ArtifactId, ArtifactState)> { - self.inner.remove_entry(&artifact_id) - } - /// Remove and retrieve the artifacts from the table that are older than the supplied /// Time-To-Live. pub fn prune(&mut self, artifact_ttl: Duration) -> Vec { diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index df15bd5d59cb..5f0671a90ba4 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -757,14 +757,9 @@ async fn handle_prepare_done( .await?; } - match result { - Ok(prepare_stats) => match artifacts.remove_artifact(&artifact_id) { - Some((mut id, _)) => { - id.update_checksum(&prepare_stats.checksum); - artifacts.insert_prepared(artifact_id, SystemTime::now(), prepare_stats); - }, - None => unreachable!(), - }, + *state = match result { + Ok(prepare_stats) => + ArtifactState::Prepared { last_time_needed: SystemTime::now(), prepare_stats }, Err(error) => { let last_time_failed = SystemTime::now(); let num_failures = *num_failures + 1; @@ -777,9 +772,9 @@ async fn handle_prepare_done( "artifact preparation failed: {}", error ); - *state = ArtifactState::FailedToProcess { last_time_failed, num_failures, error }; + ArtifactState::FailedToProcess { last_time_failed, num_failures, error } }, - } + }; Ok(()) } diff --git a/polkadot/node/core/pvf/src/prepare/worker_intf.rs b/polkadot/node/core/pvf/src/prepare/worker_intf.rs index 6c3b32a9eb2c..b66c36044343 100644 --- a/polkadot/node/core/pvf/src/prepare/worker_intf.rs +++ b/polkadot/node/core/pvf/src/prepare/worker_intf.rs @@ -199,7 +199,7 @@ async fn handle_response( artifact_path: PathBuf, preparation_timeout: Duration, ) -> Outcome { - let PrepareStats { cpu_time_elapsed, memory_stats, .. } = match result.clone() { + let PrepareStats { cpu_time_elapsed, memory_stats } = match result.clone() { Ok(result) => result, // Timed out on the child. This should already be logged by the child. Err(PrepareError::TimedOut) => return Outcome::TimedOut, From 2ad526273ef2554763c4109c5ad32f9e715dcf6b Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Mon, 30 Oct 2023 19:29:03 +0800 Subject: [PATCH 17/38] redo checksum p1 --- Cargo.lock | 6 +++--- polkadot/node/core/pvf/Cargo.toml | 1 + polkadot/node/core/pvf/src/artifacts.rs | 12 +++++------- .../node/core/pvf/src/prepare/worker_intf.rs | 16 +++++++++++++++- 4 files changed, 24 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f12e16276e78..3a1a0530fdac 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1366,16 +1366,15 @@ dependencies = [ [[package]] name = "blake3" -version = "1.4.1" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "199c42ab6972d92c9f8995f086273d25c42fc0f7b2a1fcefba465c1352d25ba5" +checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87" dependencies = [ "arrayref", "arrayvec 0.7.4", "cc", "cfg-if", "constant_time_eq 0.3.0", - "digest 0.10.7", ] [[package]] @@ -12053,6 +12052,7 @@ version = "1.0.0" dependencies = [ "always-assert", "assert_matches", + "blake3", "cfg-if", "futures", "futures-timer", diff --git a/polkadot/node/core/pvf/Cargo.toml b/polkadot/node/core/pvf/Cargo.toml index 27f4df117e57..7d7e5d97990d 100644 --- a/polkadot/node/core/pvf/Cargo.toml +++ b/polkadot/node/core/pvf/Cargo.toml @@ -8,6 +8,7 @@ license.workspace = true [dependencies] always-assert = "0.1" +blake3 = "1.5" cfg-if = "1.0" futures = "0.3.21" futures-timer = "3.0.2" diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 8aebf8e7750c..8ae3334ba5c9 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -71,7 +71,7 @@ use std::{ // A workaround for defining a `const` that is a concatenation of other constants. macro_rules! concat_const { - ($($arg:tt),*) => {{ + ($($arg:expr),*) => {{ // ensure inputs to be strings $(const _: &str = $arg;)* @@ -80,8 +80,8 @@ macro_rules! concat_const { // concatenate strings as byte slices const CAT: [u8; LEN] = { let mut cat = [0u8; LEN]; - // for turning off unused warning - let mut _offset = 0; + #[allow(unused_variables)] + let mut offset = 0; $({ const BYTES: &[u8] = $arg.as_bytes(); @@ -89,10 +89,10 @@ macro_rules! concat_const { let mut i = 0; let len = BYTES.len(); while i < len { - cat[_offset + i] = BYTES[i]; + cat[offset + i] = BYTES[i]; i += 1; } - _offset += len; + offset += len; })* cat @@ -317,9 +317,7 @@ impl Artifacts { loop { match dir.next_entry().await { Ok(Some(entry)) => insert_or_prune(self, &entry, cache_path).await, - Ok(None) => break, - Err(err) => { gum::warn!( target: LOG_TARGET, diff --git a/polkadot/node/core/pvf/src/prepare/worker_intf.rs b/polkadot/node/core/pvf/src/prepare/worker_intf.rs index b66c36044343..897bf14d9dba 100644 --- a/polkadot/node/core/pvf/src/prepare/worker_intf.rs +++ b/polkadot/node/core/pvf/src/prepare/worker_intf.rs @@ -196,7 +196,7 @@ async fn handle_response( result: PrepareResult, worker_pid: u32, tmp_file: PathBuf, - artifact_path: PathBuf, + artifact_path_partial: PathBuf, preparation_timeout: Duration, ) -> Outcome { let PrepareStats { cpu_time_elapsed, memory_stats } = match result.clone() { @@ -219,6 +219,20 @@ async fn handle_response( return Outcome::TimedOut } + let checksum = match tokio::fs::read(&tmp_file).await { + Ok(bytes) => blake3::hash(&bytes), + // FIXME eagr: handle error properly + Err(_) => return Outcome::Unreachable, + }; + + let file_name_partial = artifact_path_partial + .file_name() + .expect("the path should never terminate in `..`"); + let mut file_name = file_name_partial.to_os_string(); + file_name.push("_0x"); + file_name.push(checksum.to_hex().as_str()); + let artifact_path = artifact_path_partial.with_file_name(&file_name); + gum::debug!( target: LOG_TARGET, %worker_pid, From 372380691f1fd9a30d0004156e37e0eadac62375 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Mon, 30 Oct 2023 21:59:27 +0800 Subject: [PATCH 18/38] p2 --- polkadot/node/core/pvf/src/artifacts.rs | 65 ++++++++++--------- polkadot/node/core/pvf/src/host.rs | 53 ++++++++------- polkadot/node/core/pvf/src/prepare/pool.rs | 14 +++- polkadot/node/core/pvf/src/prepare/queue.rs | 17 +++-- .../node/core/pvf/src/prepare/worker_intf.rs | 6 +- 5 files changed, 85 insertions(+), 70 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 8ae3334ba5c9..bc4849d89d24 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -130,30 +130,26 @@ impl ArtifactId { } /// Returns the expected path to this artifact given the root of the cache. - pub fn path(&self, cache_path: &Path) -> PathBuf { + pub fn path_prefix(&self, cache_path: &Path) -> PathBuf { let file_name = format!("{}_{:#x}_{:#x}", ARTIFACT_PREFIX, self.code_hash, self.executor_params_hash); cache_path.join(file_name) } /// Tries to recover the artifact id from the given file name. + /// Return `None` if the given file name is not valid. fn from_file_name(file_name: &str) -> Option { + // VALID_NAME := _ _ _ let file_name = file_name.strip_prefix(ARTIFACT_PREFIX)?.strip_prefix('_')?; + let parts: Vec<&str> = file_name.split('_').collect(); - // [ code hash | param hash ] - let hashes: Vec<&str> = file_name.split('_').collect(); - - if hashes.len() != 2 { - return None + if let [code_hash, param_hash, _checksum] = parts[..] { + let code_hash = Hash::from_str(code_hash_str).ok()?.into(); + let executor_params_hash = + ExecutorParamsHash::from_hash(Hash::from_str(param_hash).ok()?); + Some(Self { code_hash, executor_params_hash }) } - - let (code_hash_str, executor_params_hash_str) = (hashes[0], hashes[1]); - - let code_hash = Hash::from_str(code_hash_str).ok()?.into(); - let executor_params_hash = - ExecutorParamsHash::from_hash(Hash::from_str(executor_params_hash_str).ok()?); - - Some(Self { code_hash, executor_params_hash }) + None } } @@ -171,8 +167,8 @@ pub struct ArtifactPathId { } impl ArtifactPathId { - pub(crate) fn new(artifact_id: ArtifactId, cache_path: &Path) -> Self { - Self { path: artifact_id.path(cache_path), id: artifact_id } + pub(crate) fn new(artifact_id: ArtifactId, path: &Path) -> Self { + Self { id: artifact_id, path: path.to_owned() } } } @@ -182,6 +178,8 @@ pub enum ArtifactState { /// That means that the artifact should be accessible through the path obtained by the artifact /// id (unless, it was removed externally). Prepared { + /// The path of the compiled artifact. + path: PathBuf, /// The time when the artifact was last needed. /// /// This is updated when we get the heads up for this artifact or when we just discover @@ -235,10 +233,6 @@ impl Artifacts { } async fn insert_and_prune(&mut self, cache_path: &Path) { - fn is_stale(file_name: &str) -> bool { - !file_name.starts_with(ARTIFACT_PREFIX) - } - // Inserts the entry into the artifacts table if it is a valid artifact file, or prune it // otherwise. async fn insert_or_prune( @@ -267,26 +261,26 @@ impl Artifacts { if let Some(file_name) = file_name.to_str() { let id = ArtifactId::from_file_name(file_name); - let file_path = cache_path.join(file_name); + let path = cache_path.join(file_name); - if is_stale(file_name) || id.is_none() { + if id.is_none() { gum::debug!( target: LOG_TARGET, "discarding invalid artifact {:?}", - &file_path, + &path, ); - let _ = tokio::fs::remove_file(&file_path).await; + let _ = tokio::fs::remove_file(&path).await; return } if let Some(id) = id { - artifacts.insert_prepared(id, SystemTime::now(), Default::default()); gum::debug!( target: LOG_TARGET, "reusing {:?} for node version v{}", - &file_path, + &path, NODE_VERSION, ); + artifacts.insert_prepared(id, path, SystemTime::now(), Default::default()); } } else { gum::warn!( @@ -353,19 +347,17 @@ impl Artifacts { } /// Insert an artifact with the given ID as "prepared". - /// - /// This function must be used only for brand-new artifacts and should never be used for - /// replacing existing ones. pub(crate) fn insert_prepared( &mut self, artifact_id: ArtifactId, + path: PathBuf, last_time_needed: SystemTime, prepare_stats: PrepareStats, ) { // See the precondition. always!(self .inner - .insert(artifact_id, ArtifactState::Prepared { last_time_needed, prepare_stats }) + .insert(artifact_id, ArtifactState::Prepared { path, last_time_needed, prepare_stats }) .is_none()); } @@ -393,6 +385,15 @@ impl Artifacts { to_remove } + + pub fn get_path(&self, id: &ArtifactId) -> Option { + if let Some(state) = self.inner.get(id) { + if let ArtifactState::Prepared { path, .. } = state { + return Some(path.clone()) + } + } + None + } } #[cfg(test)] @@ -473,7 +474,7 @@ mod tests { } #[test] - fn path() { + fn path_prefix() { let dir = Path::new("/test"); let code_hash = "1234567890123456789012345678901234567890123456789012345678901234"; let params_hash = "4321098765432109876543210987654321098765432109876543210987654321"; @@ -484,7 +485,7 @@ mod tests { assert_eq!( ArtifactId::new(code_hash.into(), ExecutorParamsHash::from_hash(params_hash)) - .path(dir) + .path_prefix(dir) .to_str(), Some(format!("/test/{}", file_name).as_str()), ); diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 5f0671a90ba4..18fb418822de 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -378,7 +378,6 @@ async fn run( // If the artifact failed before, it could be re-scheduled for preparation here if // the preparation failure cooldown has elapsed. break_if_fatal!(handle_to_host( - &cache_path, &mut artifacts, &mut to_prepare_queue_tx, &mut to_execute_queue_tx, @@ -400,7 +399,6 @@ async fn run( // We could be eager in terms of reporting and plumb the result from the preparation // worker but we don't for the sake of simplicity. break_if_fatal!(handle_prepare_done( - &cache_path, &mut artifacts, &mut to_execute_queue_tx, &mut awaiting_prepare, @@ -412,7 +410,6 @@ async fn run( } async fn handle_to_host( - cache_path: &Path, artifacts: &mut Artifacts, prepare_queue: &mut mpsc::Sender, execute_queue: &mut mpsc::Sender, @@ -424,15 +421,8 @@ async fn handle_to_host( handle_precheck_pvf(artifacts, prepare_queue, pvf, result_tx).await?; }, ToHost::ExecutePvf(inputs) => { - handle_execute_pvf( - cache_path, - artifacts, - prepare_queue, - execute_queue, - awaiting_prepare, - inputs, - ) - .await?; + handle_execute_pvf(artifacts, prepare_queue, execute_queue, awaiting_prepare, inputs) + .await?; }, ToHost::HeadsUp { active_pvfs } => handle_heads_up(artifacts, prepare_queue, active_pvfs).await?, @@ -457,7 +447,7 @@ async fn handle_precheck_pvf( if let Some(state) = artifacts.artifact_state_mut(&artifact_id) { match state { - ArtifactState::Prepared { last_time_needed, prepare_stats } => { + ArtifactState::Prepared { last_time_needed, prepare_stats, .. } => { *last_time_needed = SystemTime::now(); let _ = result_sender.send(Ok(prepare_stats.clone())); }, @@ -489,7 +479,6 @@ async fn handle_precheck_pvf( /// When preparing for execution, we use a more lenient timeout ([`LENIENT_PREPARATION_TIMEOUT`]) /// than when prechecking. async fn handle_execute_pvf( - cache_path: &Path, artifacts: &mut Artifacts, prepare_queue: &mut mpsc::Sender, execute_queue: &mut mpsc::Sender, @@ -502,8 +491,8 @@ async fn handle_execute_pvf( if let Some(state) = artifacts.artifact_state_mut(&artifact_id) { match state { - ArtifactState::Prepared { last_time_needed, .. } => { - let file_metadata = std::fs::metadata(artifact_id.path(cache_path)); + ArtifactState::Prepared { ref path, last_time_needed, .. } => { + let file_metadata = std::fs::metadata(path); if file_metadata.is_ok() { *last_time_needed = SystemTime::now(); @@ -512,7 +501,7 @@ async fn handle_execute_pvf( send_execute( execute_queue, execute::ToQueue::Enqueue { - artifact: ArtifactPathId::new(artifact_id, cache_path), + artifact: ArtifactPathId::new(artifact_id, path), pending_execution_request: PendingExecutionRequest { exec_timeout, params, @@ -675,13 +664,12 @@ async fn handle_heads_up( } async fn handle_prepare_done( - cache_path: &Path, artifacts: &mut Artifacts, execute_queue: &mut mpsc::Sender, awaiting_prepare: &mut AwaitingPrepare, from_queue: prepare::FromQueue, ) -> Result<(), Fatal> { - let prepare::FromQueue { artifact_id, result } = from_queue; + let prepare::FromQueue { artifact_id, result, path } = from_queue; // Make some sanity checks and extract the current state. let state = match artifacts.artifact_state_mut(&artifact_id) { @@ -742,10 +730,16 @@ async fn handle_prepare_done( continue } + let path = if let Some(ref path) = path { + path.as_path() + } else { + unreachable!("path must be available if result is ok"); + }; + send_execute( execute_queue, execute::ToQueue::Enqueue { - artifact: ArtifactPathId::new(artifact_id.clone(), cache_path), + artifact: ArtifactPathId::new(artifact_id.clone(), path), pending_execution_request: PendingExecutionRequest { exec_timeout, params, @@ -758,8 +752,14 @@ async fn handle_prepare_done( } *state = match result { - Ok(prepare_stats) => - ArtifactState::Prepared { last_time_needed: SystemTime::now(), prepare_stats }, + Ok(prepare_stats) => { + let path = if let Some(path) = path { + path + } else { + unreachable!("path must be available if result is ok") + }; + ArtifactState::Prepared { path, last_time_needed: SystemTime::now(), prepare_stats } + }, Err(error) => { let last_time_failed = SystemTime::now(); let num_failures = *num_failures + 1; @@ -829,8 +829,9 @@ async fn handle_cleanup_pulse( validation_code_hash = ?artifact_id.code_hash, "pruning artifact", ); - let artifact_path = artifact_id.path(cache_path); - sweeper_tx.send(artifact_path).await.map_err(|_| Fatal)?; + if let Some(path) = artifacts.get_path(&artifact_id) { + sweeper_tx.send(path).await.map_err(|_| Fatal)?; + } } Ok(()) @@ -1012,7 +1013,9 @@ pub(crate) mod tests { } fn artifact_path(descriminator: u32) -> PathBuf { - artifact_id(descriminator).path(&PathBuf::from(std::env::temp_dir())).to_owned() + artifact_id(descriminator) + .path_prefix(&PathBuf::from(std::env::temp_dir())) + .to_owned() } struct Builder { diff --git a/polkadot/node/core/pvf/src/prepare/pool.rs b/polkadot/node/core/pvf/src/prepare/pool.rs index 7933b0319a6f..9902d3ad364b 100644 --- a/polkadot/node/core/pvf/src/prepare/pool.rs +++ b/polkadot/node/core/pvf/src/prepare/pool.rs @@ -86,6 +86,8 @@ pub enum FromPool { /// [`Ok`] indicates that compiled artifact is successfully stored on disk. /// Otherwise, an [error](PrepareError) is supplied. result: PrepareResult, + /// The path of the artifact, only available if successfully compiled. + path: Option, }, /// The given worker ceased to exist. @@ -330,8 +332,8 @@ fn handle_mux( // If we receive an outcome that the worker is unreachable or that an error occurred on // the worker, we attempt to kill the worker process. match outcome { - Outcome::Concluded { worker: idle, result } => - handle_concluded_no_rip(from_pool, spawned, worker, idle, result), + Outcome::Concluded { worker: idle, result, path } => + handle_concluded_no_rip(from_pool, spawned, worker, idle, result, path), // Return `Concluded`, but do not kill the worker since the error was on the host // side. Outcome::CreateTmpFileErr { worker: idle, err } => handle_concluded_no_rip( @@ -340,6 +342,7 @@ fn handle_mux( worker, idle, Err(PrepareError::CreateTmpFileErr(err)), + None, ), // Return `Concluded`, but do not kill the worker since the error was on the host // side. @@ -350,6 +353,7 @@ fn handle_mux( worker, idle, Err(PrepareError::RenameTmpFileErr { err, src, dest }), + None, ), // Could not clear worker cache. Kill the worker so other jobs can't see the data. Outcome::ClearWorkerDir { err } => { @@ -360,6 +364,7 @@ fn handle_mux( worker, rip: true, result: Err(PrepareError::ClearWorkerDir(err)), + path: None, }, )?; } @@ -381,6 +386,7 @@ fn handle_mux( worker, rip: true, result: Err(PrepareError::IoErr(err)), + path: None, }, )?; } @@ -395,6 +401,7 @@ fn handle_mux( worker, rip: true, result: Err(PrepareError::TimedOut), + path: None, }, )?; } @@ -440,6 +447,7 @@ fn handle_concluded_no_rip( worker: Worker, idle: IdleWorker, result: PrepareResult, + path: Option, ) -> Result<(), Fatal> { let data = match spawned.get_mut(worker) { None => { @@ -458,7 +466,7 @@ fn handle_concluded_no_rip( "old idle worker was taken out when starting work; we only replace it here; qed" ); - reply(from_pool, FromPool::Concluded { worker, rip: false, result })?; + reply(from_pool, FromPool::Concluded { worker, rip: false, result, path })?; Ok(()) } diff --git a/polkadot/node/core/pvf/src/prepare/queue.rs b/polkadot/node/core/pvf/src/prepare/queue.rs index c38012d74548..8277c947eb49 100644 --- a/polkadot/node/core/pvf/src/prepare/queue.rs +++ b/polkadot/node/core/pvf/src/prepare/queue.rs @@ -48,6 +48,8 @@ pub struct FromQueue { /// is successfully stored on disk. Otherwise, an [error](crate::error::PrepareError) /// is supplied. pub(crate) result: PrepareResult, + /// Path of PVF artifact, only available if compiled successfully. + pub(crate) path: Option, } #[derive(Default)] @@ -268,12 +270,12 @@ fn find_idle_worker(queue: &mut Queue) -> Option { } async fn handle_from_pool(queue: &mut Queue, from_pool: pool::FromPool) -> Result<(), Fatal> { - use pool::FromPool::*; + use pool::FromPool; match from_pool { - Spawned(worker) => handle_worker_spawned(queue, worker).await?, - Concluded { worker, rip, result } => - handle_worker_concluded(queue, worker, rip, result).await?, - Rip(worker) => handle_worker_rip(queue, worker).await?, + FromPool::Spawned(worker) => handle_worker_spawned(queue, worker).await?, + FromPool::Concluded { worker, rip, result, path } => + handle_worker_concluded(queue, worker, rip, result, path).await?, + FromPool::Rip(worker) => handle_worker_rip(queue, worker).await?, } Ok(()) } @@ -294,6 +296,7 @@ async fn handle_worker_concluded( worker: Worker, rip: bool, result: PrepareResult, + path: Option, ) -> Result<(), Fatal> { queue.metrics.prepare_concluded(); @@ -351,7 +354,7 @@ async fn handle_worker_concluded( "prepare worker concluded", ); - reply(&mut queue.from_queue_tx, FromQueue { artifact_id, result })?; + reply(&mut queue.from_queue_tx, FromQueue { artifact_id, result, path })?; // Figure out what to do with the worker. if rip { @@ -426,7 +429,7 @@ async fn assign(queue: &mut Queue, worker: Worker, job: Job) -> Result<(), Fatal let job_data = &mut queue.jobs[job]; let artifact_id = ArtifactId::from_pvf_prep_data(&job_data.pvf); - let artifact_path = artifact_id.path(&queue.cache_path); + let artifact_path = artifact_id.path_prefix(&queue.cache_path); job_data.worker = Some(worker); diff --git a/polkadot/node/core/pvf/src/prepare/worker_intf.rs b/polkadot/node/core/pvf/src/prepare/worker_intf.rs index 897bf14d9dba..81f452604917 100644 --- a/polkadot/node/core/pvf/src/prepare/worker_intf.rs +++ b/polkadot/node/core/pvf/src/prepare/worker_intf.rs @@ -70,7 +70,7 @@ pub async fn spawn( /// If the idle worker token is not returned, it means the worker must be terminated. pub enum Outcome { /// The worker has finished the work assigned to it. - Concluded { worker: IdleWorker, result: PrepareResult }, + Concluded { worker: IdleWorker, result: PrepareResult, path: Option }, /// The host tried to reach the worker but failed. This is most likely because the worked was /// killed by the system. Unreachable, @@ -203,7 +203,7 @@ async fn handle_response( Ok(result) => result, // Timed out on the child. This should already be logged by the child. Err(PrepareError::TimedOut) => return Outcome::TimedOut, - Err(_) => return Outcome::Concluded { worker, result }, + Err(_) => return Outcome::Concluded { worker, result, path: None }, }; if cpu_time_elapsed > preparation_timeout { @@ -242,7 +242,7 @@ async fn handle_response( ); let outcome = match tokio::fs::rename(&tmp_file, &artifact_path).await { - Ok(()) => Outcome::Concluded { worker, result }, + Ok(()) => Outcome::Concluded { worker, result, path: Some(artifact_path) }, Err(err) => { gum::warn!( target: LOG_TARGET, From e44c451434a5361b73d55816e04ddbd1de66cf42 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Tue, 31 Oct 2023 13:19:10 +0800 Subject: [PATCH 19/38] remove corrupted cache --- polkadot/node/core/pvf/src/artifacts.rs | 50 +++++++++++++++++-------- polkadot/node/core/pvf/src/host.rs | 2 - 2 files changed, 35 insertions(+), 17 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index bc4849d89d24..bdf81a568657 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -80,8 +80,7 @@ macro_rules! concat_const { // concatenate strings as byte slices const CAT: [u8; LEN] = { let mut cat = [0u8; LEN]; - #[allow(unused_variables)] - let mut offset = 0; + let mut _offset = 0; $({ const BYTES: &[u8] = $arg.as_bytes(); @@ -89,10 +88,10 @@ macro_rules! concat_const { let mut i = 0; let len = BYTES.len(); while i < len { - cat[offset + i] = BYTES[i]; + cat[_offset + i] = BYTES[i]; i += 1; } - offset += len; + _offset += len; })* cat @@ -137,18 +136,19 @@ impl ArtifactId { } /// Tries to recover the artifact id from the given file name. - /// Return `None` if the given file name is not valid. + /// Return `None` if the given file name is invalid. + /// VALID_NAME := _ _ _ fn from_file_name(file_name: &str) -> Option { - // VALID_NAME := _ _ _ let file_name = file_name.strip_prefix(ARTIFACT_PREFIX)?.strip_prefix('_')?; let parts: Vec<&str> = file_name.split('_').collect(); if let [code_hash, param_hash, _checksum] = parts[..] { - let code_hash = Hash::from_str(code_hash_str).ok()?.into(); + let code_hash = Hash::from_str(code_hash).ok()?.into(); let executor_params_hash = ExecutorParamsHash::from_hash(Hash::from_str(param_hash).ok()?); - Some(Self { code_hash, executor_params_hash }) + return Some(Self { code_hash, executor_params_hash }) } + None } } @@ -233,8 +233,30 @@ impl Artifacts { } async fn insert_and_prune(&mut self, cache_path: &Path) { - // Inserts the entry into the artifacts table if it is a valid artifact file, or prune it - // otherwise. + async fn is_corrupted(path: &Path) -> bool { + let file_name = path + .file_stem() + .expect("path must contain a file name") + .to_str() + .expect("file name must be valid UTF-8"); + let checksum = match tokio::fs::read(path).await { + Ok(bytes) => blake3::hash(&bytes), + Err(err) => { + // just remove the file if we cannot read it + gum::warn!( + target: LOG_TARGET, + ?err, + "unable to read file {:?} when checking integrity", + path, + ); + return true + }, + }; + !file_name.ends_with(checksum.to_hex().as_str()) + } + + // Insert the entry into the artifacts table if it is valid. + // Otherwise, prune it. async fn insert_or_prune( artifacts: &mut Artifacts, entry: &tokio::fs::DirEntry, @@ -263,7 +285,7 @@ impl Artifacts { let id = ArtifactId::from_file_name(file_name); let path = cache_path.join(file_name); - if id.is_none() { + if id.is_none() || is_corrupted(&path).await { gum::debug!( target: LOG_TARGET, "discarding invalid artifact {:?}", @@ -387,10 +409,8 @@ impl Artifacts { } pub fn get_path(&self, id: &ArtifactId) -> Option { - if let Some(state) = self.inner.get(id) { - if let ArtifactState::Prepared { path, .. } = state { - return Some(path.clone()) - } + if let Some(ArtifactState::Prepared { path, .. }) = self.inner.get(id) { + return Some(path.clone()) } None } diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 18fb418822de..01f9205f69fa 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -359,7 +359,6 @@ async fn run( // will notice it. break_if_fatal!(handle_cleanup_pulse( - &cache_path, &mut to_sweeper_tx, &mut artifacts, artifact_ttl, @@ -812,7 +811,6 @@ async fn enqueue_prepare_for_execute( } async fn handle_cleanup_pulse( - cache_path: &Path, sweeper_tx: &mut mpsc::Sender, artifacts: &mut Artifacts, artifact_ttl: Duration, From 6c1916441084962b2fb0991172e177abd2366db1 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Wed, 1 Nov 2023 11:22:48 +0800 Subject: [PATCH 20/38] diversify results --- Cargo.lock | 1 + polkadot/node/core/pvf/common/src/error.rs | 16 +++++-- polkadot/node/core/pvf/common/src/prepare.rs | 10 ++++ .../node/core/pvf/prepare-worker/Cargo.toml | 1 + .../node/core/pvf/prepare-worker/src/lib.rs | 11 +++-- polkadot/node/core/pvf/src/artifacts.rs | 6 +-- polkadot/node/core/pvf/src/host.rs | 46 ++++++++----------- polkadot/node/core/pvf/src/prepare/pool.rs | 14 ++---- polkadot/node/core/pvf/src/prepare/queue.rs | 9 ++-- .../node/core/pvf/src/prepare/worker_intf.rs | 40 ++++++++-------- 10 files changed, 78 insertions(+), 76 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a1a0530fdac..0a03a72bb643 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12152,6 +12152,7 @@ dependencies = [ name = "polkadot-node-core-pvf-prepare-worker" version = "1.0.0" dependencies = [ + "blake3", "cfg-if", "futures", "libc", diff --git a/polkadot/node/core/pvf/common/src/error.rs b/polkadot/node/core/pvf/common/src/error.rs index 6fdd06057c8b..07e2a3580e1d 100644 --- a/polkadot/node/core/pvf/common/src/error.rs +++ b/polkadot/node/core/pvf/common/src/error.rs @@ -14,13 +14,21 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::prepare::PrepareStats; +use crate::prepare::{PrepareStats, PrepareSuccess}; use parity_scale_codec::{Decode, Encode}; use std::fmt; -/// Result of PVF preparation performed by the validation host. Contains stats about the preparation -/// if successful -pub type PrepareResult = Result; +/// Result of PVF preparation from a worker, with checksum of the compiled PVF and stats of the +/// preparation if successful. +pub type PrepareWorkerResult = Result<(String, PrepareStats), PrepareError>; + +/// Result of PVF preparation propagated all the way back to the host, with path to the concluded +/// artifact and stats of the preparation if successful. +pub type PrepareResult = Result; + +/// Result of prechecking PVF performed by the validation host. Contains stats about the preparation +/// if successful. +pub type PrecheckResult = Result; /// An error that occurred during the prepare part of the PVF pipeline. #[derive(Debug, Clone, Encode, Decode)] diff --git a/polkadot/node/core/pvf/common/src/prepare.rs b/polkadot/node/core/pvf/common/src/prepare.rs index c205eddfb8b1..0ab19e486884 100644 --- a/polkadot/node/core/pvf/common/src/prepare.rs +++ b/polkadot/node/core/pvf/common/src/prepare.rs @@ -15,6 +15,16 @@ // along with Polkadot. If not, see . use parity_scale_codec::{Decode, Encode}; +use std::path::PathBuf; + +/// Result of PVF preparation if a success. +#[derive(Debug, Clone, Default)] +pub struct PrepareSuccess { + /// Canonical path to the compiled artifact. Using `String` for serialization purpose. + pub path: PathBuf, + /// Stats of the current preparation run. + pub stats: PrepareStats, +} /// Preparation statistics, including the CPU time and memory taken. #[derive(Debug, Clone, Default, Encode, Decode)] diff --git a/polkadot/node/core/pvf/prepare-worker/Cargo.toml b/polkadot/node/core/pvf/prepare-worker/Cargo.toml index 886209b78c32..1502b816a183 100644 --- a/polkadot/node/core/pvf/prepare-worker/Cargo.toml +++ b/polkadot/node/core/pvf/prepare-worker/Cargo.toml @@ -7,6 +7,7 @@ edition.workspace = true license.workspace = true [dependencies] +blake3 = "1.5" cfg-if = "1.0" futures = "0.3.21" gum = { package = "tracing-gum", path = "../../../gum" } diff --git a/polkadot/node/core/pvf/prepare-worker/src/lib.rs b/polkadot/node/core/pvf/prepare-worker/src/lib.rs index fa5d3656a35e..b0ef2ca6c7af 100644 --- a/polkadot/node/core/pvf/prepare-worker/src/lib.rs +++ b/polkadot/node/core/pvf/prepare-worker/src/lib.rs @@ -31,7 +31,7 @@ use crate::memory_stats::max_rss_stat::{extract_max_rss_stat, get_max_rss_thread use crate::memory_stats::memory_tracker::{get_memory_tracker_loop_stats, memory_tracker_loop}; use parity_scale_codec::{Decode, Encode}; use polkadot_node_core_pvf_common::{ - error::{PrepareError, PrepareResult}, + error::{PrepareError, PrepareWorkerResult}, executor_intf::create_runtime_from_artifact_bytes, framed_recv_blocking, framed_send_blocking, prepare::{MemoryStats, PrepareJobKind, PrepareStats}, @@ -79,7 +79,7 @@ fn recv_request(stream: &mut UnixStream) -> io::Result { Ok(pvf) } -fn send_response(stream: &mut UnixStream, result: PrepareResult) -> io::Result<()> { +fn send_response(stream: &mut UnixStream, result: PrepareWorkerResult) -> io::Result<()> { framed_send_blocking(stream, &result.encode()) } @@ -115,8 +115,8 @@ fn send_response(stream: &mut UnixStream, result: PrepareResult) -> io::Result<( /// /// 6. If compilation succeeded, write the compiled artifact into a temporary file. /// -/// 7. Send the result of preparation back to the host. If any error occurred in the above steps, we -/// send that in the `PrepareResult`. +/// 7. Send the result of preparation back to the host, including the checksum of the artifact. If +/// any error occurred in the above steps, we send that in the `PrepareWorkerResult`. pub fn worker_entrypoint( socket_path: PathBuf, worker_dir_path: PathBuf, @@ -257,7 +257,8 @@ pub fn worker_entrypoint( ); tokio::fs::write(&temp_artifact_dest, &artifact).await?; - Ok(PrepareStats { cpu_time_elapsed, memory_stats }) + let checksum = blake3::hash(&artifact).to_hex().to_string(); + Ok((checksum, PrepareStats { cpu_time_elapsed, memory_stats })) }, } }, diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index bdf81a568657..c4cedd88664b 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -55,7 +55,7 @@ //! older by a predefined parameter. This process is run very rarely (say, once a day). Once the //! artifact is expired it is removed from disk eagerly atomically. -use crate::{host::PrepareResultSender, LOG_TARGET}; +use crate::{host::PrecheckResultSender, LOG_TARGET}; use always_assert::always; use polkadot_core_primitives::Hash; use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData}; @@ -191,7 +191,7 @@ pub enum ArtifactState { /// A task to prepare this artifact is scheduled. Preparing { /// List of result senders that are waiting for a response. - waiting_for_response: Vec, + waiting_for_response: Vec, /// The number of times this artifact has failed to prepare. num_failures: u32, }, @@ -359,7 +359,7 @@ impl Artifacts { pub fn insert_preparing( &mut self, artifact_id: ArtifactId, - waiting_for_response: Vec, + waiting_for_response: Vec, ) { // See the precondition. always!(self diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 01f9205f69fa..733a8e2631d0 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -32,7 +32,8 @@ use futures::{ Future, FutureExt, SinkExt, StreamExt, }; use polkadot_node_core_pvf_common::{ - error::{PrepareError, PrepareResult}, + error::{PrecheckResult, PrepareError, PrepareResult}, + prepare::PrepareSuccess, pvf::PvfPrepData, SecurityStatus, }; @@ -63,7 +64,7 @@ pub const EXECUTE_BINARY_NAME: &str = "polkadot-execute-worker"; pub(crate) type ResultSender = oneshot::Sender>; /// Transmission end used for sending the PVF preparation result. -pub(crate) type PrepareResultSender = oneshot::Sender; +pub(crate) type PrecheckResultSender = oneshot::Sender; /// A handle to the async process serving the validation host requests. #[derive(Clone)] @@ -83,7 +84,7 @@ impl ValidationHost { pub async fn precheck_pvf( &mut self, pvf: PvfPrepData, - result_tx: PrepareResultSender, + result_tx: PrecheckResultSender, ) -> Result<(), String> { self.to_host_tx .send(ToHost::PrecheckPvf { pvf, result_tx }) @@ -133,7 +134,7 @@ impl ValidationHost { } enum ToHost { - PrecheckPvf { pvf: PvfPrepData, result_tx: PrepareResultSender }, + PrecheckPvf { pvf: PvfPrepData, result_tx: PrecheckResultSender }, ExecutePvf(ExecutePvfInputs), HeadsUp { active_pvfs: Vec }, } @@ -440,7 +441,7 @@ async fn handle_precheck_pvf( artifacts: &mut Artifacts, prepare_queue: &mut mpsc::Sender, pvf: PvfPrepData, - result_sender: PrepareResultSender, + result_sender: PrecheckResultSender, ) -> Result<(), Fatal> { let artifact_id = ArtifactId::from_pvf_prep_data(&pvf); @@ -455,7 +456,7 @@ async fn handle_precheck_pvf( ArtifactState::FailedToProcess { error, .. } => { // Do not retry failed preparation if another pre-check request comes in. We do not // retry pre-checking, anyway. - let _ = result_sender.send(PrepareResult::Err(error.clone())); + let _ = result_sender.send(PrecheckResult::Err(error.clone())); }, } } else { @@ -668,7 +669,7 @@ async fn handle_prepare_done( awaiting_prepare: &mut AwaitingPrepare, from_queue: prepare::FromQueue, ) -> Result<(), Fatal> { - let prepare::FromQueue { artifact_id, result, path } = from_queue; + let prepare::FromQueue { artifact_id, result } = from_queue; // Make some sanity checks and extract the current state. let state = match artifacts.artifact_state_mut(&artifact_id) { @@ -703,7 +704,8 @@ async fn handle_prepare_done( state { for result_sender in waiting_for_response.drain(..) { - let _ = result_sender.send(result.clone()); + let result = result.clone().map(|success| success.stats); + let _ = result_sender.send(result); } num_failures } else { @@ -723,22 +725,18 @@ async fn handle_prepare_done( continue } - // Don't send failed artifacts to the execution's queue. - if let Err(ref error) = result { - let _ = result_tx.send(Err(ValidationError::from(error.clone()))); - continue - } - - let path = if let Some(ref path) = path { - path.as_path() - } else { - unreachable!("path must be available if result is ok"); + let path = match &result { + Ok(success) => success.path.clone(), + Err(error) => { + let _ = result_tx.send(Err(ValidationError::from(error.clone()))); + continue + }, }; send_execute( execute_queue, execute::ToQueue::Enqueue { - artifact: ArtifactPathId::new(artifact_id.clone(), path), + artifact: ArtifactPathId::new(artifact_id.clone(), &path), pending_execution_request: PendingExecutionRequest { exec_timeout, params, @@ -751,14 +749,8 @@ async fn handle_prepare_done( } *state = match result { - Ok(prepare_stats) => { - let path = if let Some(path) = path { - path - } else { - unreachable!("path must be available if result is ok") - }; - ArtifactState::Prepared { path, last_time_needed: SystemTime::now(), prepare_stats } - }, + Ok(PrepareSuccess { path, stats: prepare_stats }) => + ArtifactState::Prepared { path, last_time_needed: SystemTime::now(), prepare_stats }, Err(error) => { let last_time_failed = SystemTime::now(); let num_failures = *num_failures + 1; diff --git a/polkadot/node/core/pvf/src/prepare/pool.rs b/polkadot/node/core/pvf/src/prepare/pool.rs index 9902d3ad364b..7933b0319a6f 100644 --- a/polkadot/node/core/pvf/src/prepare/pool.rs +++ b/polkadot/node/core/pvf/src/prepare/pool.rs @@ -86,8 +86,6 @@ pub enum FromPool { /// [`Ok`] indicates that compiled artifact is successfully stored on disk. /// Otherwise, an [error](PrepareError) is supplied. result: PrepareResult, - /// The path of the artifact, only available if successfully compiled. - path: Option, }, /// The given worker ceased to exist. @@ -332,8 +330,8 @@ fn handle_mux( // If we receive an outcome that the worker is unreachable or that an error occurred on // the worker, we attempt to kill the worker process. match outcome { - Outcome::Concluded { worker: idle, result, path } => - handle_concluded_no_rip(from_pool, spawned, worker, idle, result, path), + Outcome::Concluded { worker: idle, result } => + handle_concluded_no_rip(from_pool, spawned, worker, idle, result), // Return `Concluded`, but do not kill the worker since the error was on the host // side. Outcome::CreateTmpFileErr { worker: idle, err } => handle_concluded_no_rip( @@ -342,7 +340,6 @@ fn handle_mux( worker, idle, Err(PrepareError::CreateTmpFileErr(err)), - None, ), // Return `Concluded`, but do not kill the worker since the error was on the host // side. @@ -353,7 +350,6 @@ fn handle_mux( worker, idle, Err(PrepareError::RenameTmpFileErr { err, src, dest }), - None, ), // Could not clear worker cache. Kill the worker so other jobs can't see the data. Outcome::ClearWorkerDir { err } => { @@ -364,7 +360,6 @@ fn handle_mux( worker, rip: true, result: Err(PrepareError::ClearWorkerDir(err)), - path: None, }, )?; } @@ -386,7 +381,6 @@ fn handle_mux( worker, rip: true, result: Err(PrepareError::IoErr(err)), - path: None, }, )?; } @@ -401,7 +395,6 @@ fn handle_mux( worker, rip: true, result: Err(PrepareError::TimedOut), - path: None, }, )?; } @@ -447,7 +440,6 @@ fn handle_concluded_no_rip( worker: Worker, idle: IdleWorker, result: PrepareResult, - path: Option, ) -> Result<(), Fatal> { let data = match spawned.get_mut(worker) { None => { @@ -466,7 +458,7 @@ fn handle_concluded_no_rip( "old idle worker was taken out when starting work; we only replace it here; qed" ); - reply(from_pool, FromPool::Concluded { worker, rip: false, result, path })?; + reply(from_pool, FromPool::Concluded { worker, rip: false, result })?; Ok(()) } diff --git a/polkadot/node/core/pvf/src/prepare/queue.rs b/polkadot/node/core/pvf/src/prepare/queue.rs index 8277c947eb49..5439ce4a499a 100644 --- a/polkadot/node/core/pvf/src/prepare/queue.rs +++ b/polkadot/node/core/pvf/src/prepare/queue.rs @@ -48,8 +48,6 @@ pub struct FromQueue { /// is successfully stored on disk. Otherwise, an [error](crate::error::PrepareError) /// is supplied. pub(crate) result: PrepareResult, - /// Path of PVF artifact, only available if compiled successfully. - pub(crate) path: Option, } #[derive(Default)] @@ -273,8 +271,8 @@ async fn handle_from_pool(queue: &mut Queue, from_pool: pool::FromPool) -> Resul use pool::FromPool; match from_pool { FromPool::Spawned(worker) => handle_worker_spawned(queue, worker).await?, - FromPool::Concluded { worker, rip, result, path } => - handle_worker_concluded(queue, worker, rip, result, path).await?, + FromPool::Concluded { worker, rip, result } => + handle_worker_concluded(queue, worker, rip, result).await?, FromPool::Rip(worker) => handle_worker_rip(queue, worker).await?, } Ok(()) @@ -296,7 +294,6 @@ async fn handle_worker_concluded( worker: Worker, rip: bool, result: PrepareResult, - path: Option, ) -> Result<(), Fatal> { queue.metrics.prepare_concluded(); @@ -354,7 +351,7 @@ async fn handle_worker_concluded( "prepare worker concluded", ); - reply(&mut queue.from_queue_tx, FromQueue { artifact_id, result, path })?; + reply(&mut queue.from_queue_tx, FromQueue { artifact_id, result })?; // Figure out what to do with the worker. if rip { diff --git a/polkadot/node/core/pvf/src/prepare/worker_intf.rs b/polkadot/node/core/pvf/src/prepare/worker_intf.rs index 81f452604917..8ba0a124106f 100644 --- a/polkadot/node/core/pvf/src/prepare/worker_intf.rs +++ b/polkadot/node/core/pvf/src/prepare/worker_intf.rs @@ -26,8 +26,8 @@ use crate::{ }; use parity_scale_codec::{Decode, Encode}; use polkadot_node_core_pvf_common::{ - error::{PrepareError, PrepareResult}, - prepare::PrepareStats, + error::{PrepareError, PrepareResult, PrepareWorkerResult}, + prepare::{PrepareStats, PrepareSuccess}, pvf::PvfPrepData, worker_dir, SecurityStatus, }; @@ -70,7 +70,7 @@ pub async fn spawn( /// If the idle worker token is not returned, it means the worker must be terminated. pub enum Outcome { /// The worker has finished the work assigned to it. - Concluded { worker: IdleWorker, result: PrepareResult, path: Option }, + Concluded { worker: IdleWorker, result: PrepareResult }, /// The host tried to reach the worker but failed. This is most likely because the worked was /// killed by the system. Unreachable, @@ -80,7 +80,7 @@ pub enum Outcome { /// final destination location. RenameTmpFileErr { worker: IdleWorker, - result: PrepareResult, + result: PrepareWorkerResult, err: String, // Unfortunately `PathBuf` doesn't implement `Encode`/`Decode`, so we do a fallible // conversion to `Option`. @@ -193,17 +193,17 @@ pub async fn start_work( async fn handle_response( metrics: &Metrics, worker: IdleWorker, - result: PrepareResult, + result: PrepareWorkerResult, worker_pid: u32, tmp_file: PathBuf, artifact_path_partial: PathBuf, preparation_timeout: Duration, ) -> Outcome { - let PrepareStats { cpu_time_elapsed, memory_stats } = match result.clone() { + let (checksum, PrepareStats { cpu_time_elapsed, memory_stats }) = match result.clone() { Ok(result) => result, // Timed out on the child. This should already be logged by the child. Err(PrepareError::TimedOut) => return Outcome::TimedOut, - Err(_) => return Outcome::Concluded { worker, result, path: None }, + Err(err) => return Outcome::Concluded { worker, result: Err(err) }, }; if cpu_time_elapsed > preparation_timeout { @@ -219,18 +219,12 @@ async fn handle_response( return Outcome::TimedOut } - let checksum = match tokio::fs::read(&tmp_file).await { - Ok(bytes) => blake3::hash(&bytes), - // FIXME eagr: handle error properly - Err(_) => return Outcome::Unreachable, - }; - - let file_name_partial = artifact_path_partial - .file_name() - .expect("the path should never terminate in `..`"); + // append checksum to prefix to form the path of concluded artifact + let file_name_partial = + artifact_path_partial.file_stem().expect("the path should contain file name"); let mut file_name = file_name_partial.to_os_string(); file_name.push("_0x"); - file_name.push(checksum.to_hex().as_str()); + file_name.push(checksum); let artifact_path = artifact_path_partial.with_file_name(&file_name); gum::debug!( @@ -242,7 +236,13 @@ async fn handle_response( ); let outcome = match tokio::fs::rename(&tmp_file, &artifact_path).await { - Ok(()) => Outcome::Concluded { worker, result, path: Some(artifact_path) }, + Ok(()) => Outcome::Concluded { + worker, + result: Ok(PrepareSuccess { + path: artifact_path, + stats: PrepareStats { cpu_time_elapsed, memory_stats: memory_stats.clone() }, + }), + }, Err(err) => { gum::warn!( target: LOG_TARGET, @@ -324,9 +324,9 @@ async fn send_request(stream: &mut UnixStream, pvf: PvfPrepData) -> io::Result<( Ok(()) } -async fn recv_response(stream: &mut UnixStream, pid: u32) -> io::Result { +async fn recv_response(stream: &mut UnixStream, pid: u32) -> io::Result { let result = framed_recv(stream).await?; - let result = PrepareResult::decode(&mut &result[..]).map_err(|e| { + let result = PrepareWorkerResult::decode(&mut &result[..]).map_err(|e| { // We received invalid bytes from the worker. let bound_bytes = &result[..result.len().min(4)]; gum::warn!( From dad5285b399e34c2419c90e2c1e6a96c0ad42184 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Wed, 1 Nov 2023 21:48:59 +0800 Subject: [PATCH 21/38] fix tests --- .../node/core/pvf/prepare-worker/src/lib.rs | 3 +- polkadot/node/core/pvf/src/artifacts.rs | 79 +++++++++++++------ polkadot/node/core/pvf/src/host.rs | 43 +++++----- polkadot/node/core/pvf/src/prepare/queue.rs | 10 +-- 4 files changed, 83 insertions(+), 52 deletions(-) diff --git a/polkadot/node/core/pvf/prepare-worker/src/lib.rs b/polkadot/node/core/pvf/prepare-worker/src/lib.rs index b0ef2ca6c7af..9721b804c0aa 100644 --- a/polkadot/node/core/pvf/prepare-worker/src/lib.rs +++ b/polkadot/node/core/pvf/prepare-worker/src/lib.rs @@ -257,7 +257,8 @@ pub fn worker_entrypoint( ); tokio::fs::write(&temp_artifact_dest, &artifact).await?; - let checksum = blake3::hash(&artifact).to_hex().to_string(); + let checksum = + blake3::hash(&artifact.as_ref()).to_hex().to_string(); Ok((checksum, PrepareStats { cpu_time_elapsed, memory_stats })) }, } diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index c4cedd88664b..2ca176e65830 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -235,8 +235,8 @@ impl Artifacts { async fn insert_and_prune(&mut self, cache_path: &Path) { async fn is_corrupted(path: &Path) -> bool { let file_name = path - .file_stem() - .expect("path must contain a file name") + .file_name() + .expect("path should never terminate in '..'") .to_str() .expect("file name must be valid UTF-8"); let checksum = match tokio::fs::read(path).await { @@ -246,7 +246,7 @@ impl Artifacts { gum::warn!( target: LOG_TARGET, ?err, - "unable to read file {:?} when checking integrity", + "unable to read artifact {:?} when checking integrity, removing...", path, ); return true @@ -298,7 +298,7 @@ impl Artifacts { if let Some(id) = id { gum::debug!( target: LOG_TARGET, - "reusing {:?} for node version v{}", + "reusing existing {:?} for node version v{}", &path, NODE_VERSION, ); @@ -428,14 +428,17 @@ mod tests { str::FromStr, }; - fn rand_hash() -> String { + fn rand_hash(len: usize) -> String { let mut rng = rand::thread_rng(); let hex: Vec<_> = "0123456789abcdef".chars().collect(); - (0..64).map(|_| hex[rng.gen_range(0..hex.len())]).collect() + (0..len).map(|_| hex[rng.gen_range(0..hex.len())]).collect() } - fn file_name(code_hash: &str, param_hash: &str) -> String { - format!("wasmtime_polkadot_v{}_0x{}_0x{}", NODE_VERSION, code_hash, param_hash) + fn file_name(code_hash: &str, param_hash: &str, checksum: &str) -> String { + format!( + "wasmtime_polkadot_v{}_0x{}_0x{}_0x{}", + NODE_VERSION, code_hash, param_hash, checksum + ) } fn fake_artifact_path( @@ -445,7 +448,7 @@ mod tests { params_hash: impl AsRef, ) -> PathBuf { let mut path = dir.as_ref().to_path_buf(); - let file_name = format!("{}_0x{}_0x{}", prefix, code_hash.as_ref(), params_hash.as_ref()); + let file_name = format!("{}_0x{}_0x{}", prefix, code_hash.as_ref(), params_hash.as_ref(),); path.push(file_name); path } @@ -455,13 +458,24 @@ mod tests { prefix: &str, code_hash: impl AsRef, params_hash: impl AsRef, - ) { + ) -> (PathBuf, String) { let path = fake_artifact_path(dir, prefix, code_hash, params_hash); - fs::File::create(path).unwrap(); + fs::File::create(&path).unwrap(); + let bytes = fs::read(&path).unwrap(); + let checksum = blake3::hash(bytes.as_ref()).to_hex().to_string(); + (path, checksum) + } + + fn create_rand_artifact(dir: impl AsRef, prefix: &str) -> (PathBuf, String) { + create_artifact(dir, prefix, rand_hash(64), rand_hash(64)) } - fn create_rand_artifact(dir: impl AsRef, prefix: &str) { - create_artifact(dir, prefix, rand_hash(), rand_hash()); + fn concluded_path(path: impl AsRef, checksum: &str) -> PathBuf { + let path = path.as_ref(); + let mut file_name = path.file_name().unwrap().to_os_string(); + file_name.push("_0x"); + file_name.push(checksum); + path.with_file_name(file_name) } #[test] @@ -477,6 +491,7 @@ mod tests { let file_name = file_name( "0022800000000000000000000000000000000000000000000000000000000000", "0033900000000000000000000000000000000000000000000000000000000000", + "00000000000000000000000000000000", ); assert_eq!( @@ -498,34 +513,46 @@ mod tests { let dir = Path::new("/test"); let code_hash = "1234567890123456789012345678901234567890123456789012345678901234"; let params_hash = "4321098765432109876543210987654321098765432109876543210987654321"; - let file_name = file_name(code_hash, params_hash); + let checksum = "34567890123456789012345678901234"; + let file_name = file_name(code_hash, params_hash, checksum); let code_hash = H256::from_str(code_hash).unwrap(); let params_hash = H256::from_str(params_hash).unwrap(); - assert_eq!( - ArtifactId::new(code_hash.into(), ExecutorParamsHash::from_hash(params_hash)) - .path_prefix(dir) - .to_str(), - Some(format!("/test/{}", file_name).as_str()), - ); + let prefix = ArtifactId::new(code_hash.into(), ExecutorParamsHash::from_hash(params_hash)) + .path_prefix(dir); + assert!(format!("/test/{}", file_name).starts_with(prefix.to_str().unwrap())); } #[tokio::test] async fn remove_stale_cache_on_startup() { let cache_dir = crate::worker_intf::tmppath("test-cache").await.unwrap(); - fs::create_dir_all(&cache_dir).unwrap(); - // 5 invalid, 1 valid + // 6 invalid + + // invalid prefix create_rand_artifact(&cache_dir, ""); create_rand_artifact(&cache_dir, "wasmtime_polkadot_v"); create_rand_artifact(&cache_dir, "wasmtime_polkadot_v1.0.0"); + // no checksum create_rand_artifact(&cache_dir, ARTIFACT_PREFIX); - create_artifact(&cache_dir, ARTIFACT_PREFIX, "", ""); - create_artifact(&cache_dir, ARTIFACT_PREFIX, "000", "000000"); - - assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 6); + // invalid hashes + let (path, checksum) = create_artifact(&cache_dir, ARTIFACT_PREFIX, "000", "000001"); + let new_path = concluded_path(&path, &checksum); + fs::rename(&path, &new_path).unwrap(); + // invalid checksum + let (path, checksum) = create_rand_artifact(&cache_dir, ARTIFACT_PREFIX); + let new_path = concluded_path(&path, &checksum[1..]); + fs::rename(&path, &new_path).unwrap(); + + // 1 valid + + let (path, checksum) = create_rand_artifact(&cache_dir, ARTIFACT_PREFIX); + let new_path = concluded_path(&path, &checksum); + fs::rename(&path, &new_path).unwrap(); + + assert_eq!(fs::read_dir(&cache_dir).unwrap().count(), 7); let artifacts = Artifacts::new_and_prune(&cache_dir).await; diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 733a8e2631d0..111eb0c287fe 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -32,7 +32,7 @@ use futures::{ Future, FutureExt, SinkExt, StreamExt, }; use polkadot_node_core_pvf_common::{ - error::{PrecheckResult, PrepareError, PrepareResult}, + error::{PrecheckResult, PrepareError}, prepare::PrepareSuccess, pvf::PvfPrepData, SecurityStatus, @@ -251,7 +251,6 @@ pub fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future Self { - let cache_path = PathBuf::from(std::env::temp_dir()); - let (to_host_tx, to_host_rx) = mpsc::channel(10); let (to_prepare_queue_tx, to_prepare_queue_rx) = mpsc::channel(10); let (from_prepare_queue_tx, from_prepare_queue_rx) = mpsc::unbounded(); @@ -1052,7 +1050,6 @@ pub(crate) mod tests { let (to_sweeper_tx, to_sweeper_rx) = mpsc::channel(10); let run = run(Inner { - cache_path, cleanup_pulse_interval, artifact_ttl, artifacts, @@ -1201,12 +1198,18 @@ pub(crate) mod tests { let mut builder = Builder::default(); builder.cleanup_pulse_interval = Duration::from_millis(100); builder.artifact_ttl = Duration::from_millis(500); - builder - .artifacts - .insert_prepared(artifact_id(1), mock_now, PrepareStats::default()); - builder - .artifacts - .insert_prepared(artifact_id(2), mock_now, PrepareStats::default()); + builder.artifacts.insert_prepared( + artifact_id(1), + Default::default(), + mock_now, + PrepareStats::default(), + ); + builder.artifacts.insert_prepared( + artifact_id(2), + Default::default(), + mock_now, + PrepareStats::default(), + ); let mut test = builder.build(); let mut host = test.host_handle(); @@ -1278,7 +1281,7 @@ pub(crate) mod tests { test.from_prepare_queue_tx .send(prepare::FromQueue { artifact_id: artifact_id(1), - result: Ok(PrepareStats::default()), + result: Ok(PrepareSuccess::default()), }) .await .unwrap(); @@ -1294,7 +1297,7 @@ pub(crate) mod tests { test.from_prepare_queue_tx .send(prepare::FromQueue { artifact_id: artifact_id(2), - result: Ok(PrepareStats::default()), + result: Ok(PrepareSuccess::default()), }) .await .unwrap(); @@ -1348,7 +1351,7 @@ pub(crate) mod tests { test.from_prepare_queue_tx .send(prepare::FromQueue { artifact_id: artifact_id(1), - result: Ok(PrepareStats::default()), + result: Ok(PrepareSuccess::default()), }) .await .unwrap(); @@ -1461,7 +1464,7 @@ pub(crate) mod tests { test.from_prepare_queue_tx .send(prepare::FromQueue { artifact_id: artifact_id(2), - result: Ok(PrepareStats::default()), + result: Ok(PrepareSuccess::default()), }) .await .unwrap(); @@ -1617,7 +1620,7 @@ pub(crate) mod tests { test.from_prepare_queue_tx .send(prepare::FromQueue { artifact_id: artifact_id(1), - result: Ok(PrepareStats::default()), + result: Ok(PrepareSuccess::default()), }) .await .unwrap(); @@ -1793,7 +1796,7 @@ pub(crate) mod tests { test.from_prepare_queue_tx .send(prepare::FromQueue { artifact_id: artifact_id(1), - result: Ok(PrepareStats::default()), + result: Ok(PrepareSuccess::default()), }) .await .unwrap(); diff --git a/polkadot/node/core/pvf/src/prepare/queue.rs b/polkadot/node/core/pvf/src/prepare/queue.rs index 5439ce4a499a..f7c8c200548b 100644 --- a/polkadot/node/core/pvf/src/prepare/queue.rs +++ b/polkadot/node/core/pvf/src/prepare/queue.rs @@ -491,7 +491,7 @@ mod tests { use crate::host::tests::TEST_PREPARATION_TIMEOUT; use assert_matches::assert_matches; use futures::{future::BoxFuture, FutureExt}; - use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats}; + use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareSuccess}; use slotmap::SlotMap; use std::task::Poll; @@ -612,7 +612,7 @@ mod tests { test.send_from_pool(pool::FromPool::Concluded { worker: w, rip: false, - result: Ok(PrepareStats::default()), + result: Ok(PrepareSuccess::default()), }); assert_eq!( @@ -651,7 +651,7 @@ mod tests { test.send_from_pool(pool::FromPool::Concluded { worker: w1, rip: false, - result: Ok(PrepareStats::default()), + result: Ok(PrepareSuccess::default()), }); assert_matches!(test.poll_and_recv_to_pool().await, pool::ToPool::StartWork { .. }); @@ -697,7 +697,7 @@ mod tests { test.send_from_pool(pool::FromPool::Concluded { worker: w1, rip: false, - result: Ok(PrepareStats::default()), + result: Ok(PrepareSuccess::default()), }); assert_eq!(test.poll_and_recv_to_pool().await, pool::ToPool::Kill(w1)); } @@ -731,7 +731,7 @@ mod tests { test.send_from_pool(pool::FromPool::Concluded { worker: w1, rip: true, - result: Ok(PrepareStats::default()), + result: Ok(PrepareSuccess::default()), }); // Since there is still work, the queue requested one extra worker to spawn to handle the From 1ede2016de9bfe6056ba576e39aa8a783930990d Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Thu, 2 Nov 2023 00:25:50 +0800 Subject: [PATCH 22/38] fix pruning --- polkadot/node/core/pvf/src/artifacts.rs | 11 +++++------ polkadot/node/core/pvf/src/host.rs | 10 ++++------ polkadot/node/core/pvf/src/worker_intf.rs | 4 ++-- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 2ca176e65830..5551bf6e0cb4 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -383,26 +383,25 @@ impl Artifacts { .is_none()); } - /// Remove and retrieve the artifacts from the table that are older than the supplied - /// Time-To-Live. - pub fn prune(&mut self, artifact_ttl: Duration) -> Vec { + /// Remove artifacts older than the given TTL and return id and path of the removed ones. + pub fn prune(&mut self, artifact_ttl: Duration) -> Vec<(ArtifactId, PathBuf)> { let now = SystemTime::now(); let mut to_remove = vec![]; for (k, v) in self.inner.iter() { - if let ArtifactState::Prepared { last_time_needed, .. } = *v { + if let ArtifactState::Prepared { last_time_needed, ref path, .. } = *v { if now .duration_since(last_time_needed) .map(|age| age > artifact_ttl) .unwrap_or(false) { - to_remove.push(k.clone()); + to_remove.push((k.clone(), path.clone())); } } } for artifact in &to_remove { - self.inner.remove(artifact); + self.inner.remove(&artifact.0); } to_remove diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 111eb0c287fe..32b3c36ffa86 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -810,15 +810,13 @@ async fn handle_cleanup_pulse( "PVF pruning: {} artifacts reached their end of life", to_remove.len(), ); - for artifact_id in to_remove { + for (artifact_id, path) in to_remove { gum::debug!( target: LOG_TARGET, validation_code_hash = ?artifact_id.code_hash, "pruning artifact", ); - if let Some(path) = artifacts.get_path(&artifact_id) { - sweeper_tx.send(path).await.map_err(|_| Fatal)?; - } + sweeper_tx.send(path).await.map_err(|_| Fatal)?; } Ok(()) @@ -1200,13 +1198,13 @@ pub(crate) mod tests { builder.artifact_ttl = Duration::from_millis(500); builder.artifacts.insert_prepared( artifact_id(1), - Default::default(), + artifact_path(1), mock_now, PrepareStats::default(), ); builder.artifacts.insert_prepared( artifact_id(2), - Default::default(), + artifact_path(2), mock_now, PrepareStats::default(), ); diff --git a/polkadot/node/core/pvf/src/worker_intf.rs b/polkadot/node/core/pvf/src/worker_intf.rs index bd85d84055ce..c8ba316bd665 100644 --- a/polkadot/node/core/pvf/src/worker_intf.rs +++ b/polkadot/node/core/pvf/src/worker_intf.rs @@ -198,7 +198,7 @@ pub async fn tmppath_in(prefix: &str, dir: &Path) -> io::Result { /// The same as [`tmppath_in`], but uses [`std::env::temp_dir`] as the directory. pub async fn tmppath(prefix: &str) -> io::Result { - let temp_dir = PathBuf::from(std::env::temp_dir()); + let temp_dir = std::env::temp_dir(); tmppath_in(prefix, &temp_dir).await } @@ -449,7 +449,7 @@ impl Drop for WorkerDir { /// artifacts from previous jobs. pub fn clear_worker_dir_path(worker_dir_path: &Path) -> io::Result<()> { fn remove_dir_contents(path: &Path) -> io::Result<()> { - for entry in std::fs::read_dir(&path)? { + for entry in std::fs::read_dir(path)? { let entry = entry?; let path = entry.path(); From 2d51b522871dd3d301522edeb5b7225ecb61cdf6 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Thu, 2 Nov 2023 02:18:48 +0800 Subject: [PATCH 23/38] fix message serialization --- polkadot/node/core/pvf/common/src/error.rs | 4 ++-- polkadot/node/core/pvf/common/src/prepare.rs | 13 +++++++++++-- polkadot/node/core/pvf/prepare-worker/src/lib.rs | 7 +++++-- polkadot/node/core/pvf/src/prepare/worker_intf.rs | 15 ++++++++------- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/polkadot/node/core/pvf/common/src/error.rs b/polkadot/node/core/pvf/common/src/error.rs index 07e2a3580e1d..51ec24107705 100644 --- a/polkadot/node/core/pvf/common/src/error.rs +++ b/polkadot/node/core/pvf/common/src/error.rs @@ -14,13 +14,13 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::prepare::{PrepareStats, PrepareSuccess}; +use crate::prepare::{PrepareStats, PrepareSuccess, PrepareWorkerSuccess}; use parity_scale_codec::{Decode, Encode}; use std::fmt; /// Result of PVF preparation from a worker, with checksum of the compiled PVF and stats of the /// preparation if successful. -pub type PrepareWorkerResult = Result<(String, PrepareStats), PrepareError>; +pub type PrepareWorkerResult = Result; /// Result of PVF preparation propagated all the way back to the host, with path to the concluded /// artifact and stats of the preparation if successful. diff --git a/polkadot/node/core/pvf/common/src/prepare.rs b/polkadot/node/core/pvf/common/src/prepare.rs index 0ab19e486884..ded13ebeeef5 100644 --- a/polkadot/node/core/pvf/common/src/prepare.rs +++ b/polkadot/node/core/pvf/common/src/prepare.rs @@ -17,10 +17,19 @@ use parity_scale_codec::{Decode, Encode}; use std::path::PathBuf; -/// Result of PVF preparation if a success. +/// Result from prepare worker if successful. +#[derive(Debug, Clone, Default, Encode, Decode)] +pub struct PrepareWorkerSuccess { + /// Checksum of the compiled PVF. + pub checksum: String, + /// Stats of the current preparation run. + pub stats: PrepareStats, +} + +/// Result of PVF preparation if successful. #[derive(Debug, Clone, Default)] pub struct PrepareSuccess { - /// Canonical path to the compiled artifact. Using `String` for serialization purpose. + /// Canonical path to the compiled artifact. pub path: PathBuf, /// Stats of the current preparation run. pub stats: PrepareStats, diff --git a/polkadot/node/core/pvf/prepare-worker/src/lib.rs b/polkadot/node/core/pvf/prepare-worker/src/lib.rs index 9721b804c0aa..9f78174f5a0e 100644 --- a/polkadot/node/core/pvf/prepare-worker/src/lib.rs +++ b/polkadot/node/core/pvf/prepare-worker/src/lib.rs @@ -34,7 +34,7 @@ use polkadot_node_core_pvf_common::{ error::{PrepareError, PrepareWorkerResult}, executor_intf::create_runtime_from_artifact_bytes, framed_recv_blocking, framed_send_blocking, - prepare::{MemoryStats, PrepareJobKind, PrepareStats}, + prepare::{MemoryStats, PrepareJobKind, PrepareStats, PrepareWorkerSuccess}, pvf::PvfPrepData, worker::{ cpu_time_monitor_loop, stringify_panic_payload, @@ -259,7 +259,10 @@ pub fn worker_entrypoint( let checksum = blake3::hash(&artifact.as_ref()).to_hex().to_string(); - Ok((checksum, PrepareStats { cpu_time_elapsed, memory_stats })) + Ok(PrepareWorkerSuccess { + checksum, + stats: PrepareStats { cpu_time_elapsed, memory_stats }, + }) }, } }, diff --git a/polkadot/node/core/pvf/src/prepare/worker_intf.rs b/polkadot/node/core/pvf/src/prepare/worker_intf.rs index 8ba0a124106f..1670a472a9e7 100644 --- a/polkadot/node/core/pvf/src/prepare/worker_intf.rs +++ b/polkadot/node/core/pvf/src/prepare/worker_intf.rs @@ -27,7 +27,7 @@ use crate::{ use parity_scale_codec::{Decode, Encode}; use polkadot_node_core_pvf_common::{ error::{PrepareError, PrepareResult, PrepareWorkerResult}, - prepare::{PrepareStats, PrepareSuccess}, + prepare::{PrepareStats, PrepareSuccess, PrepareWorkerSuccess}, pvf::PvfPrepData, worker_dir, SecurityStatus, }; @@ -199,12 +199,13 @@ async fn handle_response( artifact_path_partial: PathBuf, preparation_timeout: Duration, ) -> Outcome { - let (checksum, PrepareStats { cpu_time_elapsed, memory_stats }) = match result.clone() { - Ok(result) => result, - // Timed out on the child. This should already be logged by the child. - Err(PrepareError::TimedOut) => return Outcome::TimedOut, - Err(err) => return Outcome::Concluded { worker, result: Err(err) }, - }; + let PrepareWorkerSuccess { checksum, stats: PrepareStats { cpu_time_elapsed, memory_stats } } = + match result.clone() { + Ok(result) => result, + // Timed out on the child. This should already be logged by the child. + Err(PrepareError::TimedOut) => return Outcome::TimedOut, + Err(err) => return Outcome::Concluded { worker, result: Err(err) }, + }; if cpu_time_elapsed > preparation_timeout { // The job didn't complete within the timeout. From 36a33e8e9b7df1b86d5cac493ebdca34399971bc Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Thu, 2 Nov 2023 03:25:42 +0800 Subject: [PATCH 24/38] clean up --- polkadot/node/core/pvf/src/artifacts.rs | 14 +++------ polkadot/node/core/pvf/src/prepare/pool.rs | 10 +++---- polkadot/node/core/pvf/src/prepare/queue.rs | 10 +++---- .../node/core/pvf/src/prepare/worker_intf.rs | 29 +++++++++---------- 4 files changed, 27 insertions(+), 36 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 5551bf6e0cb4..00593dfb788c 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -128,8 +128,9 @@ impl ArtifactId { Self::new(pvf.code_hash(), pvf.executor_params().hash()) } - /// Returns the expected path to this artifact given the root of the cache. - pub fn path_prefix(&self, cache_path: &Path) -> PathBuf { + /// Returns the prefix of the expected path to the artifact, which combined with the checksum + /// forms the canonical path to the artifact. + pub(crate) fn path_prefix(&self, cache_path: &Path) -> PathBuf { let file_name = format!("{}_{:#x}_{:#x}", ARTIFACT_PREFIX, self.code_hash, self.executor_params_hash); cache_path.join(file_name) @@ -286,7 +287,7 @@ impl Artifacts { let path = cache_path.join(file_name); if id.is_none() || is_corrupted(&path).await { - gum::debug!( + gum::warn!( target: LOG_TARGET, "discarding invalid artifact {:?}", &path, @@ -406,13 +407,6 @@ impl Artifacts { to_remove } - - pub fn get_path(&self, id: &ArtifactId) -> Option { - if let Some(ArtifactState::Prepared { path, .. }) = self.inner.get(id) { - return Some(path.clone()) - } - None - } } #[cfg(test)] diff --git a/polkadot/node/core/pvf/src/prepare/pool.rs b/polkadot/node/core/pvf/src/prepare/pool.rs index 7933b0319a6f..c6dd105a2205 100644 --- a/polkadot/node/core/pvf/src/prepare/pool.rs +++ b/polkadot/node/core/pvf/src/prepare/pool.rs @@ -68,7 +68,7 @@ pub enum ToPool { /// /// In either case, the worker is considered busy and no further `StartWork` messages should be /// sent until either `Concluded` or `Rip` message is received. - StartWork { worker: Worker, pvf: PvfPrepData, artifact_path: PathBuf }, + StartWork { worker: Worker, pvf: PvfPrepData, cache_path: PathBuf }, } /// A message sent from pool to its client. @@ -232,7 +232,7 @@ fn handle_to_pool( .boxed(), ); }, - ToPool::StartWork { worker, pvf, artifact_path } => { + ToPool::StartWork { worker, pvf, cache_path } => { if let Some(data) = spawned.get_mut(worker) { if let Some(idle) = data.idle.take() { let preparation_timer = metrics.time_preparation(); @@ -242,7 +242,7 @@ fn handle_to_pool( worker, idle, pvf, - artifact_path, + cache_path, preparation_timer, ) .boxed(), @@ -303,10 +303,10 @@ async fn start_work_task( worker: Worker, idle: IdleWorker, pvf: PvfPrepData, - artifact_path: PathBuf, + cache_path: PathBuf, _preparation_timer: Option, ) -> PoolEvent { - let outcome = worker_intf::start_work(&metrics, idle, pvf, artifact_path).await; + let outcome = worker_intf::start_work(&metrics, idle, pvf, cache_path).await; PoolEvent::StartWork(worker, outcome) } diff --git a/polkadot/node/core/pvf/src/prepare/queue.rs b/polkadot/node/core/pvf/src/prepare/queue.rs index f7c8c200548b..c140a6cafda0 100644 --- a/polkadot/node/core/pvf/src/prepare/queue.rs +++ b/polkadot/node/core/pvf/src/prepare/queue.rs @@ -424,17 +424,17 @@ async fn spawn_extra_worker(queue: &mut Queue, critical: bool) -> Result<(), Fat /// Attaches the work to the given worker telling the poll about the job. async fn assign(queue: &mut Queue, worker: Worker, job: Job) -> Result<(), Fatal> { let job_data = &mut queue.jobs[job]; - - let artifact_id = ArtifactId::from_pvf_prep_data(&job_data.pvf); - let artifact_path = artifact_id.path_prefix(&queue.cache_path); - job_data.worker = Some(worker); queue.workers[worker].job = Some(job); send_pool( &mut queue.to_pool_tx, - pool::ToPool::StartWork { worker, pvf: job_data.pvf.clone(), artifact_path }, + pool::ToPool::StartWork { + worker, + pvf: job_data.pvf.clone(), + cache_path: queue.cache_path.clone(), + }, ) .await?; diff --git a/polkadot/node/core/pvf/src/prepare/worker_intf.rs b/polkadot/node/core/pvf/src/prepare/worker_intf.rs index 1670a472a9e7..fcade386f7e4 100644 --- a/polkadot/node/core/pvf/src/prepare/worker_intf.rs +++ b/polkadot/node/core/pvf/src/prepare/worker_intf.rs @@ -17,6 +17,7 @@ //! Host interface to the prepare worker. use crate::{ + artifacts::ArtifactId, metrics::Metrics, worker_intf::{ clear_worker_dir_path, framed_recv, framed_send, spawn_with_program_path, IdleWorker, @@ -108,25 +109,17 @@ pub async fn start_work( metrics: &Metrics, worker: IdleWorker, pvf: PvfPrepData, - artifact_path: PathBuf, + cache_path: PathBuf, ) -> Outcome { let IdleWorker { stream, pid, worker_dir } = worker; - gum::debug!( - target: LOG_TARGET, - worker_pid = %pid, - ?worker_dir, - "starting prepare for {}", - artifact_path.display(), - ); - with_worker_dir_setup( worker_dir, stream, pid, |tmp_artifact_file, mut stream, worker_dir| async move { let preparation_timeout = pvf.prep_timeout(); - if let Err(err) = send_request(&mut stream, pvf).await { + if let Err(err) = send_request(&mut stream, &pvf).await { gum::warn!( target: LOG_TARGET, worker_pid = %pid, @@ -148,6 +141,9 @@ pub async fn start_work( let timeout = preparation_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR; let result = tokio::time::timeout(timeout, recv_response(&mut stream, pid)).await; + let artifact_id = ArtifactId::from_pvf_prep_data(&pvf); + let artifact_path_prefix = artifact_id.path_prefix(&cache_path); + match result { // Received bytes from worker within the time limit. Ok(Ok(prepare_result)) => @@ -157,7 +153,7 @@ pub async fn start_work( prepare_result, pid, tmp_artifact_file, - artifact_path, + artifact_path_prefix, preparation_timeout, ) .await, @@ -196,7 +192,7 @@ async fn handle_response( result: PrepareWorkerResult, worker_pid: u32, tmp_file: PathBuf, - artifact_path_partial: PathBuf, + artifact_path_prefix: PathBuf, preparation_timeout: Duration, ) -> Outcome { let PrepareWorkerSuccess { checksum, stats: PrepareStats { cpu_time_elapsed, memory_stats } } = @@ -221,12 +217,13 @@ async fn handle_response( } // append checksum to prefix to form the path of concluded artifact - let file_name_partial = - artifact_path_partial.file_stem().expect("the path should contain file name"); + let file_name_partial = artifact_path_prefix + .file_name() + .expect("the path should never terminate in '..'"); let mut file_name = file_name_partial.to_os_string(); file_name.push("_0x"); file_name.push(checksum); - let artifact_path = artifact_path_partial.with_file_name(&file_name); + let artifact_path = artifact_path_prefix.with_file_name(&file_name); gum::debug!( target: LOG_TARGET, @@ -320,7 +317,7 @@ where outcome } -async fn send_request(stream: &mut UnixStream, pvf: PvfPrepData) -> io::Result<()> { +async fn send_request(stream: &mut UnixStream, pvf: &PvfPrepData) -> io::Result<()> { framed_send(stream, &pvf.encode()).await?; Ok(()) } From 69f6a4413b248c6dd4e4369a393fadb2d94cf2bf Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Thu, 2 Nov 2023 11:35:55 +0800 Subject: [PATCH 25/38] retire path_prefix() --- polkadot/node/core/pvf/src/artifacts.rs | 19 ++++++++-------- polkadot/node/core/pvf/src/host.rs | 22 ++++++++++++++----- .../node/core/pvf/src/prepare/worker_intf.rs | 19 +++++----------- 3 files changed, 33 insertions(+), 27 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 00593dfb788c..2d668b65550f 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -128,11 +128,12 @@ impl ArtifactId { Self::new(pvf.code_hash(), pvf.executor_params().hash()) } - /// Returns the prefix of the expected path to the artifact, which combined with the checksum - /// forms the canonical path to the artifact. - pub(crate) fn path_prefix(&self, cache_path: &Path) -> PathBuf { - let file_name = - format!("{}_{:#x}_{:#x}", ARTIFACT_PREFIX, self.code_hash, self.executor_params_hash); + /// Returns the canonical path to the concluded artifact. + pub(crate) fn path(&self, cache_path: &Path, checksum: &str) -> PathBuf { + let file_name = format!( + "{}_{:#x}_{:#x}_0x{}", + ARTIFACT_PREFIX, self.code_hash, self.executor_params_hash, checksum + ); cache_path.join(file_name) } @@ -502,7 +503,7 @@ mod tests { } #[test] - fn path_prefix() { + fn path() { let dir = Path::new("/test"); let code_hash = "1234567890123456789012345678901234567890123456789012345678901234"; let params_hash = "4321098765432109876543210987654321098765432109876543210987654321"; @@ -511,10 +512,10 @@ mod tests { let code_hash = H256::from_str(code_hash).unwrap(); let params_hash = H256::from_str(params_hash).unwrap(); + let path = ArtifactId::new(code_hash.into(), ExecutorParamsHash::from_hash(params_hash)) + .path(dir, checksum); - let prefix = ArtifactId::new(code_hash.into(), ExecutorParamsHash::from_hash(params_hash)) - .path_prefix(dir); - assert!(format!("/test/{}", file_name).starts_with(prefix.to_str().unwrap())); + assert_eq!(path.to_str().unwrap(), format!("/test/{}", file_name)); } #[tokio::test] diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 32b3c36ffa86..dd209d6fe9b2 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -996,13 +996,25 @@ pub(crate) mod tests { } /// Creates a new PVF which artifact id can be uniquely identified by the given number. - fn artifact_id(descriminator: u32) -> ArtifactId { - ArtifactId::from_pvf_prep_data(&PvfPrepData::from_discriminator(descriminator)) + fn artifact_id(discriminator: u32) -> ArtifactId { + ArtifactId::from_pvf_prep_data(&PvfPrepData::from_discriminator(discriminator)) } - fn artifact_path(descriminator: u32) -> PathBuf { - artifact_id(descriminator) - .path_prefix(&PathBuf::from(std::env::temp_dir())) + fn artifact_path(discriminator: u32) -> PathBuf { + fn to_bytes(mut n: u32) -> Vec { + let mut bytes = vec![0; 4]; + let mut i = 0; + while n > 0 { + bytes[i] = (n & 0xff) as u8; + n >>= 8; + i += 1; + } + bytes + } + + let checksum = blake3::hash(&to_bytes(discriminator)); + artifact_id(discriminator) + .path(&PathBuf::from(std::env::temp_dir()), checksum.to_hex().as_str()) .to_owned() } diff --git a/polkadot/node/core/pvf/src/prepare/worker_intf.rs b/polkadot/node/core/pvf/src/prepare/worker_intf.rs index fcade386f7e4..afed3ff918b7 100644 --- a/polkadot/node/core/pvf/src/prepare/worker_intf.rs +++ b/polkadot/node/core/pvf/src/prepare/worker_intf.rs @@ -141,9 +141,6 @@ pub async fn start_work( let timeout = preparation_timeout * JOB_TIMEOUT_WALL_CLOCK_FACTOR; let result = tokio::time::timeout(timeout, recv_response(&mut stream, pid)).await; - let artifact_id = ArtifactId::from_pvf_prep_data(&pvf); - let artifact_path_prefix = artifact_id.path_prefix(&cache_path); - match result { // Received bytes from worker within the time limit. Ok(Ok(prepare_result)) => @@ -153,7 +150,8 @@ pub async fn start_work( prepare_result, pid, tmp_artifact_file, - artifact_path_prefix, + &pvf, + &cache_path, preparation_timeout, ) .await, @@ -192,7 +190,8 @@ async fn handle_response( result: PrepareWorkerResult, worker_pid: u32, tmp_file: PathBuf, - artifact_path_prefix: PathBuf, + pvf: &PvfPrepData, + cache_path: &PathBuf, preparation_timeout: Duration, ) -> Outcome { let PrepareWorkerSuccess { checksum, stats: PrepareStats { cpu_time_elapsed, memory_stats } } = @@ -216,14 +215,8 @@ async fn handle_response( return Outcome::TimedOut } - // append checksum to prefix to form the path of concluded artifact - let file_name_partial = artifact_path_prefix - .file_name() - .expect("the path should never terminate in '..'"); - let mut file_name = file_name_partial.to_os_string(); - file_name.push("_0x"); - file_name.push(checksum); - let artifact_path = artifact_path_prefix.with_file_name(&file_name); + let artifact_id = ArtifactId::from_pvf_prep_data(pvf); + let artifact_path = artifact_id.path(cache_path, &checksum); gum::debug!( target: LOG_TARGET, From d3254f500ee9d00ef5b6f9ac0b37ef5638854de2 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Fri, 3 Nov 2023 17:57:15 +0800 Subject: [PATCH 26/38] improve test --- polkadot/node/core/pvf/src/artifacts.rs | 45 ++++++++++++++----------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 2d668b65550f..4e7fe0738578 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -419,7 +419,7 @@ mod tests { use std::{ fs, path::{Path, PathBuf}, - str::FromStr, + str::FromStr, io::Write, }; fn rand_hash(len: usize) -> String { @@ -435,28 +435,32 @@ mod tests { ) } - fn fake_artifact_path( - dir: impl AsRef, - prefix: &str, - code_hash: impl AsRef, - params_hash: impl AsRef, - ) -> PathBuf { - let mut path = dir.as_ref().to_path_buf(); - let file_name = format!("{}_0x{}_0x{}", prefix, code_hash.as_ref(), params_hash.as_ref(),); - path.push(file_name); - path - } - fn create_artifact( dir: impl AsRef, prefix: &str, code_hash: impl AsRef, params_hash: impl AsRef, ) -> (PathBuf, String) { - let path = fake_artifact_path(dir, prefix, code_hash, params_hash); - fs::File::create(&path).unwrap(); - let bytes = fs::read(&path).unwrap(); - let checksum = blake3::hash(bytes.as_ref()).to_hex().to_string(); + fn artifact_path_without_checksum( + dir: impl AsRef, + prefix: &str, + code_hash: impl AsRef, + params_hash: impl AsRef, + ) -> PathBuf { + let mut path = dir.as_ref().to_path_buf(); + let file_name = format!("{}_0x{}_0x{}", prefix, code_hash.as_ref(), params_hash.as_ref(),); + path.push(file_name); + path + } + + let (code_hash, params_hash) = (code_hash.as_ref(), params_hash.as_ref()); + let path = artifact_path_without_checksum(dir, prefix, code_hash, params_hash); + let mut file = fs::File::create(&path).unwrap(); + + let content = format!("{}{}", code_hash, params_hash).into_bytes(); + file.write_all(&content).unwrap(); + let checksum = blake3::hash(&content).to_hex().to_string(); + (path, checksum) } @@ -529,15 +533,18 @@ mod tests { create_rand_artifact(&cache_dir, ""); create_rand_artifact(&cache_dir, "wasmtime_polkadot_v"); create_rand_artifact(&cache_dir, "wasmtime_polkadot_v1.0.0"); + // no checksum create_rand_artifact(&cache_dir, ARTIFACT_PREFIX); + // invalid hashes let (path, checksum) = create_artifact(&cache_dir, ARTIFACT_PREFIX, "000", "000001"); let new_path = concluded_path(&path, &checksum); fs::rename(&path, &new_path).unwrap(); - // invalid checksum + + // checksum reversed let (path, checksum) = create_rand_artifact(&cache_dir, ARTIFACT_PREFIX); - let new_path = concluded_path(&path, &checksum[1..]); + let new_path = concluded_path(&path, checksum.chars().rev().collect::().as_str()); fs::rename(&path, &new_path).unwrap(); // 1 valid From 39448ff5a80ed088b4e00b6d321b932e40d4d4e1 Mon Sep 17 00:00:00 2001 From: "Marcin S." Date: Sun, 12 Nov 2023 16:12:13 +0100 Subject: [PATCH 27/38] Fix test --- polkadot/node/core/pvf/common/src/error.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polkadot/node/core/pvf/common/src/error.rs b/polkadot/node/core/pvf/common/src/error.rs index 0b9f3d5da7fa..6279a80f9130 100644 --- a/polkadot/node/core/pvf/common/src/error.rs +++ b/polkadot/node/core/pvf/common/src/error.rs @@ -78,7 +78,7 @@ pub enum PrepareError { ClearWorkerDir(String), } -/// Pre-encoded length-prefixed `PrepareResult::Err(PrepareError::OutOfMemory)` +/// Pre-encoded length-prefixed `PrepareWorkerResult::Err(PrepareError::OutOfMemory)` pub const OOM_PAYLOAD: &[u8] = b"\x02\x00\x00\x00\x00\x00\x00\x00\x01\x08"; impl PrepareError { @@ -175,7 +175,7 @@ impl fmt::Display for InternalValidationError { #[test] fn pre_encoded_payloads() { - let oom_enc = PrepareResult::Err(PrepareError::OutOfMemory).encode(); + let oom_enc = PrepareWorkerResult::Err(PrepareError::OutOfMemory).encode(); let mut oom_payload = oom_enc.len().to_le_bytes().to_vec(); oom_payload.extend(oom_enc); assert_eq!(oom_payload, OOM_PAYLOAD); From 3a2c1cda375a589bae3a82c40e48cbc2d5b60aac Mon Sep 17 00:00:00 2001 From: "Marcin S." Date: Sun, 12 Nov 2023 16:12:59 +0100 Subject: [PATCH 28/38] cargo fmt --- polkadot/node/core/pvf/src/artifacts.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index ae38146e0c67..2b08e94d7788 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -419,8 +419,9 @@ mod tests { use sp_core::H256; use std::{ fs, + io::Write, path::{Path, PathBuf}, - str::FromStr, io::Write, + str::FromStr, }; fn rand_hash(len: usize) -> String { @@ -449,7 +450,8 @@ mod tests { params_hash: impl AsRef, ) -> PathBuf { let mut path = dir.as_ref().to_path_buf(); - let file_name = format!("{}_0x{}_0x{}", prefix, code_hash.as_ref(), params_hash.as_ref(),); + let file_name = + format!("{}_0x{}_0x{}", prefix, code_hash.as_ref(), params_hash.as_ref(),); path.push(file_name); path } From a02cb0612a0d36e929e477ddd4f34ef3d5234c71 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Mon, 13 Nov 2023 13:14:45 +0800 Subject: [PATCH 29/38] as per advices --- Cargo.lock | 18 ++++++++-------- .../node/core/candidate-validation/src/lib.rs | 6 +++--- polkadot/node/core/pvf/common/src/error.rs | 4 ++-- polkadot/node/core/pvf/common/src/pvf.rs | 2 +- polkadot/node/core/pvf/src/host.rs | 21 ++++++------------- .../node/core/pvf/src/prepare/worker_intf.rs | 12 +++++++++-- polkadot/node/core/pvf/tests/it/main.rs | 2 +- 7 files changed, 32 insertions(+), 33 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 216b5e21c58f..bd04bf62119f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17460,7 +17460,7 @@ dependencies = [ [[package]] name = "sp-crypto-ec-utils" version = "0.4.1" -source = "git+https://github.com/paritytech/polkadot-sdk#fe9435db2fda7c9e2f4e29521564c72cac38f59b" +source = "git+https://github.com/paritytech/polkadot-sdk#0c5dcca9e3cef6b2f456fccefd9f6c5e43444053" dependencies = [ "ark-bls12-377", "ark-bls12-377-ext", @@ -17498,7 +17498,7 @@ dependencies = [ [[package]] name = "sp-debug-derive" version = "8.0.0" -source = "git+https://github.com/paritytech/polkadot-sdk#fe9435db2fda7c9e2f4e29521564c72cac38f59b" +source = "git+https://github.com/paritytech/polkadot-sdk#0c5dcca9e3cef6b2f456fccefd9f6c5e43444053" dependencies = [ "proc-macro2", "quote", @@ -17518,7 +17518,7 @@ dependencies = [ [[package]] name = "sp-externalities" version = "0.19.0" -source = "git+https://github.com/paritytech/polkadot-sdk#fe9435db2fda7c9e2f4e29521564c72cac38f59b" +source = "git+https://github.com/paritytech/polkadot-sdk#0c5dcca9e3cef6b2f456fccefd9f6c5e43444053" dependencies = [ "environmental", "parity-scale-codec", @@ -17750,7 +17750,7 @@ dependencies = [ [[package]] name = "sp-runtime-interface" version = "17.0.0" -source = "git+https://github.com/paritytech/polkadot-sdk#fe9435db2fda7c9e2f4e29521564c72cac38f59b" +source = "git+https://github.com/paritytech/polkadot-sdk#0c5dcca9e3cef6b2f456fccefd9f6c5e43444053" dependencies = [ "bytes", "impl-trait-for-tuples", @@ -17779,7 +17779,7 @@ dependencies = [ [[package]] name = "sp-runtime-interface-proc-macro" version = "11.0.0" -source = "git+https://github.com/paritytech/polkadot-sdk#fe9435db2fda7c9e2f4e29521564c72cac38f59b" +source = "git+https://github.com/paritytech/polkadot-sdk#0c5dcca9e3cef6b2f456fccefd9f6c5e43444053" dependencies = [ "Inflector", "proc-macro-crate", @@ -17907,7 +17907,7 @@ version = "8.0.0" [[package]] name = "sp-std" version = "8.0.0" -source = "git+https://github.com/paritytech/polkadot-sdk#fe9435db2fda7c9e2f4e29521564c72cac38f59b" +source = "git+https://github.com/paritytech/polkadot-sdk#0c5dcca9e3cef6b2f456fccefd9f6c5e43444053" [[package]] name = "sp-storage" @@ -17924,7 +17924,7 @@ dependencies = [ [[package]] name = "sp-storage" version = "13.0.0" -source = "git+https://github.com/paritytech/polkadot-sdk#fe9435db2fda7c9e2f4e29521564c72cac38f59b" +source = "git+https://github.com/paritytech/polkadot-sdk#0c5dcca9e3cef6b2f456fccefd9f6c5e43444053" dependencies = [ "impl-serde", "parity-scale-codec", @@ -17973,7 +17973,7 @@ dependencies = [ [[package]] name = "sp-tracing" version = "10.0.0" -source = "git+https://github.com/paritytech/polkadot-sdk#fe9435db2fda7c9e2f4e29521564c72cac38f59b" +source = "git+https://github.com/paritytech/polkadot-sdk#0c5dcca9e3cef6b2f456fccefd9f6c5e43444053" dependencies = [ "parity-scale-codec", "sp-std 8.0.0 (git+https://github.com/paritytech/polkadot-sdk)", @@ -18074,7 +18074,7 @@ dependencies = [ [[package]] name = "sp-wasm-interface" version = "14.0.0" -source = "git+https://github.com/paritytech/polkadot-sdk#fe9435db2fda7c9e2f4e29521564c72cac38f59b" +source = "git+https://github.com/paritytech/polkadot-sdk#0c5dcca9e3cef6b2f456fccefd9f6c5e43444053" dependencies = [ "anyhow", "impl-trait-for-tuples", diff --git a/polkadot/node/core/candidate-validation/src/lib.rs b/polkadot/node/core/candidate-validation/src/lib.rs index 89ea02728840..b0fc7ca1d2a6 100644 --- a/polkadot/node/core/candidate-validation/src/lib.rs +++ b/polkadot/node/core/candidate-validation/src/lib.rs @@ -25,7 +25,7 @@ use polkadot_node_core_pvf::{ InternalValidationError, InvalidCandidate as WasmInvalidCandidate, PrepareError, - PrepareJobKind, PrepareStats, PvfPrepData, ValidationError, ValidationHost, + PrepareJobKind, PvfPrepData, ValidationError, ValidationHost, }; use polkadot_node_primitives::{ BlockData, InvalidCandidate, PoV, ValidationResult, POV_BOMB_LIMIT, VALIDATION_CODE_BOMB_LIMIT, @@ -788,7 +788,7 @@ trait ValidationBackend { validation_result } - async fn precheck_pvf(&mut self, pvf: PvfPrepData) -> Result; + async fn precheck_pvf(&mut self, pvf: PvfPrepData) -> Result<(), PrepareError>; } #[async_trait] @@ -818,7 +818,7 @@ impl ValidationBackend for ValidationHost { })? } - async fn precheck_pvf(&mut self, pvf: PvfPrepData) -> Result { + async fn precheck_pvf(&mut self, pvf: PvfPrepData) -> Result<(), PrepareError> { let (tx, rx) = oneshot::channel(); if let Err(err) = self.precheck_pvf(pvf, tx).await { // Return an IO error if there was an error communicating with the host. diff --git a/polkadot/node/core/pvf/common/src/error.rs b/polkadot/node/core/pvf/common/src/error.rs index 6279a80f9130..899bfda79a7e 100644 --- a/polkadot/node/core/pvf/common/src/error.rs +++ b/polkadot/node/core/pvf/common/src/error.rs @@ -14,7 +14,7 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::prepare::{PrepareStats, PrepareSuccess, PrepareWorkerSuccess}; +use crate::prepare::{PrepareSuccess, PrepareWorkerSuccess}; use parity_scale_codec::{Decode, Encode}; use std::fmt; @@ -28,7 +28,7 @@ pub type PrepareResult = Result; /// Result of prechecking PVF performed by the validation host. Contains stats about the preparation /// if successful. -pub type PrecheckResult = Result; +pub type PrecheckResult = Result<(), PrepareError>; /// An error that occurred during the prepare part of the PVF pipeline. // Codec indexes are intended to stabilize pre-encoded payloads (see `OOM_PAYLOAD` below) diff --git a/polkadot/node/core/pvf/common/src/pvf.rs b/polkadot/node/core/pvf/common/src/pvf.rs index 0cc86434c195..af3328997829 100644 --- a/polkadot/node/core/pvf/common/src/pvf.rs +++ b/polkadot/node/core/pvf/common/src/pvf.rs @@ -115,7 +115,7 @@ impl fmt::Debug for PvfPrepData { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "Pvf {{ code, code_hash: {:?}, executor_params: {:?}, prep_timeout: {:?} }}", + "Pvf {{ code_hash: {:?}, executor_params: {:?}, prep_timeout: {:?} }}", self.code_hash, self.executor_params, self.prep_timeout ) } diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 9dbcdd34d917..b031357ca5e0 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -458,9 +458,9 @@ async fn handle_precheck_pvf( if let Some(state) = artifacts.artifact_state_mut(&artifact_id) { match state { - ArtifactState::Prepared { last_time_needed, prepare_stats, .. } => { + ArtifactState::Prepared { last_time_needed, .. } => { *last_time_needed = SystemTime::now(); - let _ = result_sender.send(Ok(prepare_stats.clone())); + let _ = result_sender.send(Ok(())); }, ArtifactState::Preparing { waiting_for_response, num_failures: _ } => waiting_for_response.push(result_sender), @@ -714,7 +714,7 @@ async fn handle_prepare_done( state { for result_sender in waiting_for_response.drain(..) { - let result = result.clone().map(|success| success.stats); + let result = result.clone().map(|_| ()); let _ = result_sender.send(result); } num_failures @@ -891,6 +891,7 @@ pub(crate) mod tests { error::PrepareError, prepare::{PrepareStats, PrepareSuccess}, }; + use sp_core::hexdisplay::AsBytesRef; const TEST_EXECUTION_TIMEOUT: Duration = Duration::from_secs(3); pub(crate) const TEST_PREPARATION_TIMEOUT: Duration = Duration::from_secs(30); @@ -915,18 +916,8 @@ pub(crate) mod tests { } fn artifact_path(discriminator: u32) -> PathBuf { - fn to_bytes(mut n: u32) -> Vec { - let mut bytes = vec![0; 4]; - let mut i = 0; - while n > 0 { - bytes[i] = (n & 0xff) as u8; - n >>= 8; - i += 1; - } - bytes - } - - let checksum = blake3::hash(&to_bytes(discriminator)); + let pvf = PvfPrepData::from_discriminator(discriminator); + let checksum = blake3::hash(pvf.code().as_bytes_ref()); artifact_id(discriminator) .path(&PathBuf::from(std::env::temp_dir()), checksum.to_hex().as_str()) .to_owned() diff --git a/polkadot/node/core/pvf/src/prepare/worker_intf.rs b/polkadot/node/core/pvf/src/prepare/worker_intf.rs index 7c45072afbb4..d0aa3332939d 100644 --- a/polkadot/node/core/pvf/src/prepare/worker_intf.rs +++ b/polkadot/node/core/pvf/src/prepare/worker_intf.rs @@ -116,6 +116,14 @@ pub async fn start_work( ) -> Outcome { let IdleWorker { stream, pid, worker_dir } = worker; + gum::debug!( + target: LOG_TARGET, + worker_pid = %pid, + ?worker_dir, + "starting prepare for {:?}", + pvf, + ); + with_worker_dir_setup( worker_dir, stream, @@ -148,7 +156,7 @@ pub async fn start_work( match result { // Received bytes from worker within the time limit. - Ok(Ok(prepare_result)) => { + Ok(Ok(prepare_worker_result)) => { // Check if any syscall violations occurred during the job. For now this is only // informative, as we are not enforcing the seccomp policy yet. for syscall in security::check_seccomp_violations_for_worker(audit_log_file, pid).await { @@ -164,7 +172,7 @@ pub async fn start_work( handle_response( metrics, IdleWorker { stream, pid, worker_dir }, - prepare_result, + prepare_worker_result, pid, tmp_artifact_file, &pvf, diff --git a/polkadot/node/core/pvf/tests/it/main.rs b/polkadot/node/core/pvf/tests/it/main.rs index f4fd7f802f5e..080ee0729c71 100644 --- a/polkadot/node/core/pvf/tests/it/main.rs +++ b/polkadot/node/core/pvf/tests/it/main.rs @@ -70,7 +70,7 @@ impl TestHost { &self, code: &[u8], executor_params: ExecutorParams, - ) -> Result { + ) -> Result<(), PrepareError> { let (result_tx, result_rx) = futures::channel::oneshot::channel(); let code = sp_maybe_compressed_blob::decompress(code, 16 * 1024 * 1024) From 53e4557ef7d95cf116fe8da4b06f79830b7b17af Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Mon, 13 Nov 2023 20:11:30 +0800 Subject: [PATCH 30/38] tag artifact with runtime version --- .../benches/host_prepare_rococo_runtime.rs | 2 +- polkadot/node/core/pvf/common/Cargo.toml | 1 + polkadot/node/core/pvf/common/build.rs | 28 +++++++++++++++++++ polkadot/node/core/pvf/common/src/lib.rs | 2 ++ polkadot/node/core/pvf/src/artifacts.rs | 21 ++++++++------ polkadot/node/core/pvf/tests/it/main.rs | 3 +- polkadot/xcm/procedural/tests/ui.rs | 2 +- substrate/client/chain-spec/src/chain_spec.rs | 4 +-- 8 files changed, 47 insertions(+), 16 deletions(-) create mode 100644 polkadot/node/core/pvf/common/build.rs diff --git a/polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs b/polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs index acd80526262c..2255067408fa 100644 --- a/polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs +++ b/polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs @@ -56,7 +56,7 @@ impl TestHost { &self, code: &[u8], executor_params: ExecutorParams, - ) -> Result { + ) -> Result<(), PrepareError> { let (result_tx, result_rx) = futures::channel::oneshot::channel(); let code = sp_maybe_compressed_blob::decompress(code, 16 * 1024 * 1024) diff --git a/polkadot/node/core/pvf/common/Cargo.toml b/polkadot/node/core/pvf/common/Cargo.toml index 7dc8d307026e..4445def943c2 100644 --- a/polkadot/node/core/pvf/common/Cargo.toml +++ b/polkadot/node/core/pvf/common/Cargo.toml @@ -2,6 +2,7 @@ name = "polkadot-node-core-pvf-common" description = "Polkadot crate that contains functionality related to PVFs that is shared by the PVF host and the PVF workers." version = "1.0.0" +build = "build.rs" authors.workspace = true edition.workspace = true license.workspace = true diff --git a/polkadot/node/core/pvf/common/build.rs b/polkadot/node/core/pvf/common/build.rs new file mode 100644 index 000000000000..797987e1660e --- /dev/null +++ b/polkadot/node/core/pvf/common/build.rs @@ -0,0 +1,28 @@ +fn main() { + get_wasmtime_version(); +} + +pub fn get_wasmtime_version() { + // we only care about the root of the tree + match std::process::Command::new("cargo") + .args(&["tree", "--package=wasmtime", "--depth=0"]) + .output() + { + Ok(out) if out.status.success() => { + // wasmtime vX.X.X + let version = String::from_utf8_lossy(&out.stdout); + if let Some(version) = version.strip_prefix("wasmtime v") { + println!("cargo:rustc-env=SUBSTRATE_WASMTIME_VERSION={}", version); + } else { + println!("cargo:warning=build.rs: unexpected result {}", version); + } + }, + Ok(out) => println!( + "cargo:warning=build.rs: `cargo tree` {}", + String::from_utf8_lossy(&out.stderr), + ), + Err(err) => { + println!("cargo:warning=build.rs: Could not run `cargo tree`: {}", err); + }, + } +} diff --git a/polkadot/node/core/pvf/common/src/lib.rs b/polkadot/node/core/pvf/common/src/lib.rs index e2211b97d87b..282d2f7c41d0 100644 --- a/polkadot/node/core/pvf/common/src/lib.rs +++ b/polkadot/node/core/pvf/common/src/lib.rs @@ -31,6 +31,8 @@ pub use sp_tracing; const LOG_TARGET: &str = "parachain::pvf-common"; +pub const RUNTIME_VERSION: &str = env!("SUBSTRATE_WASMTIME_VERSION"); + use std::{ io::{self, Read, Write}, mem, diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 2b08e94d7788..575c79c7b2b4 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -58,7 +58,9 @@ use crate::{host::PrecheckResultSender, LOG_TARGET}; use always_assert::always; use polkadot_core_primitives::Hash; -use polkadot_node_core_pvf_common::{error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData}; +use polkadot_node_core_pvf_common::{ + error::PrepareError, prepare::PrepareStats, pvf::PvfPrepData, RUNTIME_VERSION, +}; use polkadot_node_primitives::NODE_VERSION; use polkadot_parachain_primitives::primitives::ValidationCodeHash; use polkadot_primitives::ExecutorParamsHash; @@ -106,9 +108,10 @@ macro_rules! concat_const { }} } -const RUNTIME_PREFIX: &str = "wasmtime_"; +const RUNTIME_PREFIX: &str = "wasmtime_v"; const NODE_PREFIX: &str = "polkadot_v"; -const ARTIFACT_PREFIX: &str = concat_const!(RUNTIME_PREFIX, NODE_PREFIX, NODE_VERSION); +const ARTIFACT_PREFIX: &str = + concat_const!(RUNTIME_PREFIX, RUNTIME_VERSION, "_", NODE_PREFIX, NODE_VERSION); /// Identifier of an artifact. Encodes a code hash of the PVF and a hash of executor parameter set. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -413,7 +416,7 @@ impl Artifacts { #[cfg(test)] mod tests { - use super::{ArtifactId, Artifacts, ARTIFACT_PREFIX, NODE_VERSION}; + use super::{ArtifactId, Artifacts, ARTIFACT_PREFIX, NODE_VERSION, RUNTIME_VERSION}; use polkadot_primitives::ExecutorParamsHash; use rand::Rng; use sp_core::H256; @@ -431,10 +434,7 @@ mod tests { } fn file_name(code_hash: &str, param_hash: &str, checksum: &str) -> String { - format!( - "wasmtime_polkadot_v{}_0x{}_0x{}_0x{}", - NODE_VERSION, code_hash, param_hash, checksum - ) + format!("{}_0x{}_0x{}_0x{}", ARTIFACT_PREFIX, code_hash, param_hash, checksum) } fn create_artifact( @@ -481,7 +481,10 @@ mod tests { #[test] fn artifact_prefix() { - assert_eq!(ARTIFACT_PREFIX, format!("wasmtime_polkadot_v{}", NODE_VERSION),) + assert_eq!( + ARTIFACT_PREFIX, + format!("wasmtime_v{}_polkadot_v{}", RUNTIME_VERSION, NODE_VERSION) + ); } #[test] diff --git a/polkadot/node/core/pvf/tests/it/main.rs b/polkadot/node/core/pvf/tests/it/main.rs index 080ee0729c71..1543f32b6cd0 100644 --- a/polkadot/node/core/pvf/tests/it/main.rs +++ b/polkadot/node/core/pvf/tests/it/main.rs @@ -20,8 +20,7 @@ use assert_matches::assert_matches; use parity_scale_codec::Encode as _; use polkadot_node_core_pvf::{ start, testing::get_and_check_worker_paths, Config, InvalidCandidate, Metrics, PrepareError, - PrepareJobKind, PrepareStats, PvfPrepData, ValidationError, ValidationHost, - JOB_TIMEOUT_WALL_CLOCK_FACTOR, + PrepareJobKind, PvfPrepData, ValidationError, ValidationHost, JOB_TIMEOUT_WALL_CLOCK_FACTOR, }; use polkadot_parachain_primitives::primitives::{BlockData, ValidationParams, ValidationResult}; use polkadot_primitives::{ExecutorParam, ExecutorParams}; diff --git a/polkadot/xcm/procedural/tests/ui.rs b/polkadot/xcm/procedural/tests/ui.rs index a6ec35d0862a..fef7c95c6142 100644 --- a/polkadot/xcm/procedural/tests/ui.rs +++ b/polkadot/xcm/procedural/tests/ui.rs @@ -21,7 +21,7 @@ fn ui() { // Only run the ui tests when `RUN_UI_TESTS` is set. if std::env::var("RUN_UI_TESTS").is_err() { - return; + return } // As trybuild is using `cargo check`, we don't need the real WASM binaries. diff --git a/substrate/client/chain-spec/src/chain_spec.rs b/substrate/client/chain-spec/src/chain_spec.rs index 8d97d9410229..fe8fcfda216e 100644 --- a/substrate/client/chain-spec/src/chain_spec.rs +++ b/substrate/client/chain-spec/src/chain_spec.rs @@ -784,9 +784,7 @@ fn json_eval_value_at_key( path: &mut VecDeque<&str>, fun: &dyn Fn(&json::Value) -> bool, ) -> bool { - let Some(key) = path.pop_front() else { - return false; - }; + let Some(key) = path.pop_front() else { return false }; if path.is_empty() { doc.as_object().map_or(false, |o| o.get(key).map_or(false, |v| fun(v))) From d4f30837362d22e678a2a9c62798432632a394b5 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Tue, 14 Nov 2023 00:27:22 +0800 Subject: [PATCH 31/38] fix tests --- polkadot/node/core/candidate-validation/src/tests.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/polkadot/node/core/candidate-validation/src/tests.rs b/polkadot/node/core/candidate-validation/src/tests.rs index af530a20c4e0..5f443fbc170c 100644 --- a/polkadot/node/core/candidate-validation/src/tests.rs +++ b/polkadot/node/core/candidate-validation/src/tests.rs @@ -377,7 +377,7 @@ impl ValidationBackend for MockValidateCandidateBackend { result } - async fn precheck_pvf(&mut self, _pvf: PvfPrepData) -> Result { + async fn precheck_pvf(&mut self, _pvf: PvfPrepData) -> Result<(), PrepareError> { unreachable!() } } @@ -1012,11 +1012,11 @@ fn pov_decompression_failure_is_invalid() { } struct MockPreCheckBackend { - result: Result, + result: Result<(), PrepareError>, } impl MockPreCheckBackend { - fn with_hardcoded_result(result: Result) -> Self { + fn with_hardcoded_result(result: Result<(), PrepareError>) -> Self { Self { result } } } @@ -1032,7 +1032,7 @@ impl ValidationBackend for MockPreCheckBackend { unreachable!() } - async fn precheck_pvf(&mut self, _pvf: PvfPrepData) -> Result { + async fn precheck_pvf(&mut self, _pvf: PvfPrepData) -> Result<(), PrepareError> { self.result.clone() } } @@ -1049,7 +1049,7 @@ fn precheck_works() { let (check_fut, check_result) = precheck_pvf( ctx.sender(), - MockPreCheckBackend::with_hardcoded_result(Ok(PrepareStats::default())), + MockPreCheckBackend::with_hardcoded_result(Ok(())), relay_parent, validation_code_hash, ) @@ -1111,7 +1111,7 @@ fn precheck_invalid_pvf_blob_compression() { let (check_fut, check_result) = precheck_pvf( ctx.sender(), - MockPreCheckBackend::with_hardcoded_result(Ok(PrepareStats::default())), + MockPreCheckBackend::with_hardcoded_result(Ok(())), relay_parent, validation_code_hash, ) From 931aae1e309acdf379ca553ca41f20a8072d710e Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Wed, 15 Nov 2023 11:38:35 +0800 Subject: [PATCH 32/38] upstream build fn to substrate --- Cargo.lock | 1 + polkadot/node/core/pvf/common/Cargo.toml | 4 +- polkadot/node/core/pvf/common/build.rs | 44 ++++++++----------- polkadot/node/core/pvf/common/src/pvf.rs | 2 +- polkadot/node/core/pvf/src/artifacts.rs | 2 +- .../utils/build-script-utils/src/version.rs | 31 +++++++++++++ 6 files changed, 55 insertions(+), 29 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c84afd36a92e..b98836bb1138 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12536,6 +12536,7 @@ dependencies = [ "sp-externalities 0.19.0", "sp-io", "sp-tracing 10.0.0", + "substrate-build-script-utils", "tempfile", "thiserror", "tracing-gum", diff --git a/polkadot/node/core/pvf/common/Cargo.toml b/polkadot/node/core/pvf/common/Cargo.toml index 720b4311aa89..bfe1be9156fc 100644 --- a/polkadot/node/core/pvf/common/Cargo.toml +++ b/polkadot/node/core/pvf/common/Cargo.toml @@ -2,7 +2,6 @@ name = "polkadot-node-core-pvf-common" description = "Polkadot crate that contains functionality related to PVFs that is shared by the PVF host and the PVF workers." version = "1.0.0" -build = "build.rs" authors.workspace = true edition.workspace = true license.workspace = true @@ -37,6 +36,9 @@ seccompiler = "0.4.0" assert_matches = "1.4.0" tempfile = "3.3.0" +[build-dependencies] +substrate-build-script-utils = { path = "../../../../../substrate/utils/build-script-utils" } + [features] # This feature is used to export test code to other crates without putting it in the production build. test-utils = [] diff --git a/polkadot/node/core/pvf/common/build.rs b/polkadot/node/core/pvf/common/build.rs index 797987e1660e..fc4ca398eec8 100644 --- a/polkadot/node/core/pvf/common/build.rs +++ b/polkadot/node/core/pvf/common/build.rs @@ -1,28 +1,20 @@ -fn main() { - get_wasmtime_version(); -} +// This file is part of Substrate. + +// Copyright (C) Parity Technologies (UK) Ltd. +// SPDX-License-Identifier: Apache-2.0 -pub fn get_wasmtime_version() { - // we only care about the root of the tree - match std::process::Command::new("cargo") - .args(&["tree", "--package=wasmtime", "--depth=0"]) - .output() - { - Ok(out) if out.status.success() => { - // wasmtime vX.X.X - let version = String::from_utf8_lossy(&out.stdout); - if let Some(version) = version.strip_prefix("wasmtime v") { - println!("cargo:rustc-env=SUBSTRATE_WASMTIME_VERSION={}", version); - } else { - println!("cargo:warning=build.rs: unexpected result {}", version); - } - }, - Ok(out) => println!( - "cargo:warning=build.rs: `cargo tree` {}", - String::from_utf8_lossy(&out.stderr), - ), - Err(err) => { - println!("cargo:warning=build.rs: Could not run `cargo tree`: {}", err); - }, - } +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +fn main() { + substrate_build_script_utils::generate_wasmtime_version(); } diff --git a/polkadot/node/core/pvf/common/src/pvf.rs b/polkadot/node/core/pvf/common/src/pvf.rs index af3328997829..2d8f6430187b 100644 --- a/polkadot/node/core/pvf/common/src/pvf.rs +++ b/polkadot/node/core/pvf/common/src/pvf.rs @@ -115,7 +115,7 @@ impl fmt::Debug for PvfPrepData { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "Pvf {{ code_hash: {:?}, executor_params: {:?}, prep_timeout: {:?} }}", + "Pvf {{ code: [...], code_hash: {:?}, executor_params: {:?}, prep_timeout: {:?} }}", self.code_hash, self.executor_params, self.prep_timeout ) } diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 575c79c7b2b4..1de43055c382 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -538,7 +538,7 @@ mod tests { // invalid prefix create_rand_artifact(&cache_dir, ""); create_rand_artifact(&cache_dir, "wasmtime_polkadot_v"); - create_rand_artifact(&cache_dir, "wasmtime_polkadot_v1.0.0"); + create_rand_artifact(&cache_dir, "wasmtime_v8.0.0_polkadot_v1.0.0"); // no checksum create_rand_artifact(&cache_dir, ARTIFACT_PREFIX); diff --git a/substrate/utils/build-script-utils/src/version.rs b/substrate/utils/build-script-utils/src/version.rs index f6a9ff9554ab..6bc3204506ae 100644 --- a/substrate/utils/build-script-utils/src/version.rs +++ b/substrate/utils/build-script-utils/src/version.rs @@ -59,3 +59,34 @@ fn get_version(impl_commit: &str) -> String { impl_commit ) } + +/// Generate `SUBSTRATE_WASMTIME_VERSION` +pub fn generate_wasmtime_version() { + generate_dependency_version("wasmtime", "SUBSTRATE_WASMTIME_VERSION"); +} + +fn generate_dependency_version(dep: &str, env_var: &str) { + // we only care about the root + match std::process::Command::new("cargo") + .args(["tree", "--depth=0", "--package", dep]) + .output() + { + Ok(output) if output.status.success() => { + let version = String::from_utf8_lossy(&output.stdout); + + // vX.X.X + if let Some(ver) = version.strip_prefix(&format!("{} v", dep)) { + println!("cargo:rustc-env={}={}", env_var, ver); + } else { + println!("cargo:warning=Unexpected result {}", version); + } + }, + + // command errors out when could not find the given dependency + // or when having multiple versions of it + Ok(output) => + println!("cargo:warning=`cargo tree` {}", String::from_utf8_lossy(&output.stderr)), + + Err(err) => println!("cargo:warning=Could not run `cargo tree`: {}", err), + } +} From 5de4e8eb789236d193caaaa6e396ec0d467ee7e5 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Wed, 15 Nov 2023 11:48:46 +0800 Subject: [PATCH 33/38] glitch --- polkadot/node/core/pvf/tests/it/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/node/core/pvf/tests/it/main.rs b/polkadot/node/core/pvf/tests/it/main.rs index 16e13e6e8664..d6bfdfb9a6a3 100644 --- a/polkadot/node/core/pvf/tests/it/main.rs +++ b/polkadot/node/core/pvf/tests/it/main.rs @@ -20,7 +20,7 @@ use assert_matches::assert_matches; use parity_scale_codec::Encode as _; use polkadot_node_core_pvf::{ start, testing::build_workers_and_get_paths, Config, InvalidCandidate, Metrics, PrepareError, - PrepareJobKind, PrepareStats, PvfPrepData, ValidationError, ValidationHost, + PrepareJobKind, PvfPrepData, ValidationError, ValidationHost, JOB_TIMEOUT_WALL_CLOCK_FACTOR, }; use polkadot_parachain_primitives::primitives::{BlockData, ValidationParams, ValidationResult}; From a0b71c5bef1dcd05c11e1d27face9aea98ad6bee Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Wed, 15 Nov 2023 13:55:29 +0800 Subject: [PATCH 34/38] wrong attribution --- polkadot/node/core/pvf/common/build.rs | 27 +++++++++++++------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/polkadot/node/core/pvf/common/build.rs b/polkadot/node/core/pvf/common/build.rs index fc4ca398eec8..5531ad411da8 100644 --- a/polkadot/node/core/pvf/common/build.rs +++ b/polkadot/node/core/pvf/common/build.rs @@ -1,19 +1,18 @@ -// This file is part of Substrate. - // Copyright (C) Parity Technologies (UK) Ltd. -// SPDX-License-Identifier: Apache-2.0 +// This file is part of Polkadot. + +// Polkadot is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Polkadot is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// You should have received a copy of the GNU General Public License +// along with Polkadot. If not, see . fn main() { substrate_build_script_utils::generate_wasmtime_version(); From c85adfef64211260ae83edf457d662836c2b0983 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Fri, 17 Nov 2023 15:50:43 +0800 Subject: [PATCH 35/38] as per suggestions --- polkadot/node/core/pvf/src/artifacts.rs | 90 ++++++------------- polkadot/node/core/pvf/tests/it/main.rs | 3 +- polkadot/xcm/procedural/tests/ui.rs | 2 +- substrate/client/chain-spec/src/chain_spec.rs | 4 +- 4 files changed, 34 insertions(+), 65 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 1de43055c382..1006f279c09f 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -71,47 +71,12 @@ use std::{ time::{Duration, SystemTime}, }; -// A workaround for defining a `const` that is a concatenation of other constants. -macro_rules! concat_const { - ($($arg:expr),*) => {{ - // ensure inputs to be strings - $(const _: &str = $arg;)* - - const LEN: usize = 0 $(+ $arg.len())*; - - // concatenate strings as byte slices - const CAT: [u8; LEN] = { - let mut cat = [0u8; LEN]; - let mut _offset = 0; - - $({ - const BYTES: &[u8] = $arg.as_bytes(); - - let mut i = 0; - let len = BYTES.len(); - while i < len { - cat[_offset + i] = BYTES[i]; - i += 1; - } - _offset += len; - })* - - cat - }; - - // The concatenation of two string slices is guaranteed to be valid UTF8, - // so are the byte slices as they have the same memory layout. - match std::str::from_utf8(&CAT) { - Ok(s) => s, - Err(_) => panic!("Error converting bytes to str"), - } - }} -} - const RUNTIME_PREFIX: &str = "wasmtime_v"; const NODE_PREFIX: &str = "polkadot_v"; -const ARTIFACT_PREFIX: &str = - concat_const!(RUNTIME_PREFIX, RUNTIME_VERSION, "_", NODE_PREFIX, NODE_VERSION); + +fn artifact_prefix() -> String { + format!("{}{}_{}{}", RUNTIME_PREFIX, RUNTIME_VERSION, NODE_PREFIX, NODE_VERSION) +} /// Identifier of an artifact. Encodes a code hash of the PVF and a hash of executor parameter set. #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -135,7 +100,10 @@ impl ArtifactId { pub(crate) fn path(&self, cache_path: &Path, checksum: &str) -> PathBuf { let file_name = format!( "{}_{:#x}_{:#x}_0x{}", - ARTIFACT_PREFIX, self.code_hash, self.executor_params_hash, checksum + artifact_prefix(), + self.code_hash, + self.executor_params_hash, + checksum ); cache_path.join(file_name) } @@ -144,7 +112,7 @@ impl ArtifactId { /// Return `None` if the given file name is invalid. /// VALID_NAME := _ _ _ fn from_file_name(file_name: &str) -> Option { - let file_name = file_name.strip_prefix(ARTIFACT_PREFIX)?.strip_prefix('_')?; + let file_name = file_name.strip_prefix(&artifact_prefix())?.strip_prefix('_')?; let parts: Vec<&str> = file_name.split('_').collect(); if let [code_hash, param_hash, _checksum] = parts[..] { @@ -240,11 +208,6 @@ impl Artifacts { async fn insert_and_prune(&mut self, cache_path: &Path) { async fn is_corrupted(path: &Path) -> bool { - let file_name = path - .file_name() - .expect("path should never terminate in '..'") - .to_str() - .expect("file name must be valid UTF-8"); let checksum = match tokio::fs::read(path).await { Ok(bytes) => blake3::hash(&bytes), Err(err) => { @@ -258,7 +221,13 @@ impl Artifacts { return true }, }; - !file_name.ends_with(checksum.to_hex().as_str()) + + if let Some(file_name) = path.file_name { + if let Some(file_name) = file_name.to_str() { + return !file_name.ends_with(checksum.to_hex().as_str()) + } + } + false } // Insert the entry into the artifacts table if it is valid. @@ -375,6 +344,9 @@ impl Artifacts { } /// Insert an artifact with the given ID as "prepared". + /// + /// This function should only be used to build the artifact table at startup with valid + /// artifact caches. pub(crate) fn insert_prepared( &mut self, artifact_id: ArtifactId, @@ -416,7 +388,7 @@ impl Artifacts { #[cfg(test)] mod tests { - use super::{ArtifactId, Artifacts, ARTIFACT_PREFIX, NODE_VERSION, RUNTIME_VERSION}; + use super::{artifact_prefix as prefix, ArtifactId, Artifacts, NODE_VERSION, RUNTIME_VERSION}; use polkadot_primitives::ExecutorParamsHash; use rand::Rng; use sp_core::H256; @@ -434,7 +406,7 @@ mod tests { } fn file_name(code_hash: &str, param_hash: &str, checksum: &str) -> String { - format!("{}_0x{}_0x{}_0x{}", ARTIFACT_PREFIX, code_hash, param_hash, checksum) + format!("{}_0x{}_0x{}_0x{}", prefix(), code_hash, param_hash, checksum) } fn create_artifact( @@ -481,10 +453,7 @@ mod tests { #[test] fn artifact_prefix() { - assert_eq!( - ARTIFACT_PREFIX, - format!("wasmtime_v{}_polkadot_v{}", RUNTIME_VERSION, NODE_VERSION) - ); + assert_eq!(prefix(), format!("wasmtime_v{}_polkadot_v{}", RUNTIME_VERSION, NODE_VERSION)); } #[test] @@ -533,29 +502,28 @@ mod tests { let cache_dir = crate::worker_intf::tmppath("test-cache").await.unwrap(); fs::create_dir_all(&cache_dir).unwrap(); - // 6 invalid - // invalid prefix create_rand_artifact(&cache_dir, ""); create_rand_artifact(&cache_dir, "wasmtime_polkadot_v"); create_rand_artifact(&cache_dir, "wasmtime_v8.0.0_polkadot_v1.0.0"); + let prefix = prefix(); + // no checksum - create_rand_artifact(&cache_dir, ARTIFACT_PREFIX); + create_rand_artifact(&cache_dir, &prefix); // invalid hashes - let (path, checksum) = create_artifact(&cache_dir, ARTIFACT_PREFIX, "000", "000001"); + let (path, checksum) = create_artifact(&cache_dir, &prefix, "000", "000001"); let new_path = concluded_path(&path, &checksum); fs::rename(&path, &new_path).unwrap(); // checksum reversed - let (path, checksum) = create_rand_artifact(&cache_dir, ARTIFACT_PREFIX); + let (path, checksum) = create_rand_artifact(&cache_dir, &prefix); let new_path = concluded_path(&path, checksum.chars().rev().collect::().as_str()); fs::rename(&path, &new_path).unwrap(); - // 1 valid - - let (path, checksum) = create_rand_artifact(&cache_dir, ARTIFACT_PREFIX); + // valid + let (path, checksum) = create_rand_artifact(&cache_dir, &prefix); let new_path = concluded_path(&path, &checksum); fs::rename(&path, &new_path).unwrap(); diff --git a/polkadot/node/core/pvf/tests/it/main.rs b/polkadot/node/core/pvf/tests/it/main.rs index d6bfdfb9a6a3..5bdf49cc719e 100644 --- a/polkadot/node/core/pvf/tests/it/main.rs +++ b/polkadot/node/core/pvf/tests/it/main.rs @@ -20,8 +20,7 @@ use assert_matches::assert_matches; use parity_scale_codec::Encode as _; use polkadot_node_core_pvf::{ start, testing::build_workers_and_get_paths, Config, InvalidCandidate, Metrics, PrepareError, - PrepareJobKind, PvfPrepData, ValidationError, ValidationHost, - JOB_TIMEOUT_WALL_CLOCK_FACTOR, + PrepareJobKind, PvfPrepData, ValidationError, ValidationHost, JOB_TIMEOUT_WALL_CLOCK_FACTOR, }; use polkadot_parachain_primitives::primitives::{BlockData, ValidationParams, ValidationResult}; use polkadot_primitives::{ExecutorParam, ExecutorParams}; diff --git a/polkadot/xcm/procedural/tests/ui.rs b/polkadot/xcm/procedural/tests/ui.rs index fef7c95c6142..a6ec35d0862a 100644 --- a/polkadot/xcm/procedural/tests/ui.rs +++ b/polkadot/xcm/procedural/tests/ui.rs @@ -21,7 +21,7 @@ fn ui() { // Only run the ui tests when `RUN_UI_TESTS` is set. if std::env::var("RUN_UI_TESTS").is_err() { - return + return; } // As trybuild is using `cargo check`, we don't need the real WASM binaries. diff --git a/substrate/client/chain-spec/src/chain_spec.rs b/substrate/client/chain-spec/src/chain_spec.rs index fe8fcfda216e..8d97d9410229 100644 --- a/substrate/client/chain-spec/src/chain_spec.rs +++ b/substrate/client/chain-spec/src/chain_spec.rs @@ -784,7 +784,9 @@ fn json_eval_value_at_key( path: &mut VecDeque<&str>, fun: &dyn Fn(&json::Value) -> bool, ) -> bool { - let Some(key) = path.pop_front() else { return false }; + let Some(key) = path.pop_front() else { + return false; + }; if path.is_empty() { doc.as_object().map_or(false, |o| o.get(key).map_or(false, |v| fun(v))) From b897f1cb036e1430b684397211d3385884523f8c Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Fri, 17 Nov 2023 16:09:41 +0800 Subject: [PATCH 36/38] glitch --- polkadot/node/core/pvf/src/artifacts.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index 1006f279c09f..b6c39127d566 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -222,7 +222,7 @@ impl Artifacts { }, }; - if let Some(file_name) = path.file_name { + if let Some(file_name) = path.file_name() { if let Some(file_name) = file_name.to_str() { return !file_name.ends_with(checksum.to_hex().as_str()) } From 89ada31d1b70028044aa23aa7a55289f1d429559 Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Fri, 17 Nov 2023 16:28:07 +0800 Subject: [PATCH 37/38] prevent `cargo tree` from accessing network --- substrate/utils/build-script-utils/src/version.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/substrate/utils/build-script-utils/src/version.rs b/substrate/utils/build-script-utils/src/version.rs index 6bc3204506ae..549e499b1102 100644 --- a/substrate/utils/build-script-utils/src/version.rs +++ b/substrate/utils/build-script-utils/src/version.rs @@ -68,7 +68,7 @@ pub fn generate_wasmtime_version() { fn generate_dependency_version(dep: &str, env_var: &str) { // we only care about the root match std::process::Command::new("cargo") - .args(["tree", "--depth=0", "--package", dep]) + .args(["tree", "--depth=0", "--locked", "--package", dep]) .output() { Ok(output) if output.status.success() => { @@ -82,7 +82,7 @@ fn generate_dependency_version(dep: &str, env_var: &str) { } }, - // command errors out when could not find the given dependency + // command errors out when it could not find the given dependency // or when having multiple versions of it Ok(output) => println!("cargo:warning=`cargo tree` {}", String::from_utf8_lossy(&output.stderr)), From 868426a8505705ea4b7391182b0aec51f4dc498f Mon Sep 17 00:00:00 2001 From: Julian Eager Date: Fri, 17 Nov 2023 18:35:27 +0800 Subject: [PATCH 38/38] glitch --- polkadot/node/core/pvf/src/artifacts.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/polkadot/node/core/pvf/src/artifacts.rs b/polkadot/node/core/pvf/src/artifacts.rs index b6c39127d566..53085eade3cb 100644 --- a/polkadot/node/core/pvf/src/artifacts.rs +++ b/polkadot/node/core/pvf/src/artifacts.rs @@ -227,7 +227,7 @@ impl Artifacts { return !file_name.ends_with(checksum.to_hex().as_str()) } } - false + true } // Insert the entry into the artifacts table if it is valid. @@ -517,7 +517,7 @@ mod tests { let new_path = concluded_path(&path, &checksum); fs::rename(&path, &new_path).unwrap(); - // checksum reversed + // checksum tampered let (path, checksum) = create_rand_artifact(&cache_dir, &prefix); let new_path = concluded_path(&path, checksum.chars().rev().collect::().as_str()); fs::rename(&path, &new_path).unwrap();