Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

PVF: Refactor workers into separate crates, remove host dependency #7253

Merged
merged 15 commits into from
May 25, 2023
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 55 additions & 13 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ tikv-jemallocator = "0.5.0"

# Crates in our workspace, defined as dependencies so we can pass them feature flags.
polkadot-cli = { path = "cli", features = [ "kusama-native", "westend-native", "rococo-native" ] }
polkadot-node-core-pvf-worker = { path = "node/core/pvf/worker" }
polkadot-node-core-pvf-prepare-worker = { path = "node/core/pvf/prepare-worker" }
polkadot-overseer = { path = "node/overseer" }

[dev-dependencies]
Expand Down Expand Up @@ -81,7 +81,9 @@ members = [
"node/core/parachains-inherent",
"node/core/provisioner",
"node/core/pvf",
"node/core/pvf/worker",
"node/core/pvf/common",
"node/core/pvf/execute-worker",
"node/core/pvf/prepare-worker",
"node/core/pvf-checker",
"node/core/runtime-api",
"node/network/approval-distribution",
Expand Down Expand Up @@ -208,7 +210,7 @@ try-runtime = [ "polkadot-cli/try-runtime" ]
fast-runtime = [ "polkadot-cli/fast-runtime" ]
runtime-metrics = [ "polkadot-cli/runtime-metrics" ]
pyroscope = ["polkadot-cli/pyroscope"]
jemalloc-allocator = ["polkadot-node-core-pvf-worker/jemalloc-allocator", "polkadot-overseer/jemalloc-allocator"]
jemalloc-allocator = ["polkadot-node-core-pvf-prepare-worker/jemalloc-allocator", "polkadot-overseer/jemalloc-allocator"]



Expand Down
6 changes: 4 additions & 2 deletions cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ pyroscope_pprofrs = { version = "0.2", optional = true }

service = { package = "polkadot-service", path = "../node/service", default-features = false, optional = true }
polkadot-client = { path = "../node/client", optional = true }
polkadot-node-core-pvf-worker = { path = "../node/core/pvf/worker", optional = true }
polkadot-node-core-pvf-execute-worker = { path = "../node/core/pvf/execute-worker", optional = true }
polkadot-node-core-pvf-prepare-worker = { path = "../node/core/pvf/prepare-worker", optional = true }
polkadot-performance-test = { path = "../node/test/performance-test", optional = true }

sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
Expand Down Expand Up @@ -54,7 +55,8 @@ cli = [
"frame-benchmarking-cli",
"try-runtime-cli",
"polkadot-client",
"polkadot-node-core-pvf-worker",
"polkadot-node-core-pvf-execute-worker",
"polkadot-node-core-pvf-prepare-worker",
]
runtime-benchmarks = [
"service/runtime-benchmarks",
Expand Down
4 changes: 2 additions & 2 deletions cli/src/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ pub fn run() -> Result<()> {

#[cfg(not(target_os = "android"))]
{
polkadot_node_core_pvf_worker::prepare_worker_entrypoint(
polkadot_node_core_pvf_prepare_worker::worker_entrypoint(
&cmd.socket_path,
Some(&cmd.node_impl_version),
);
Expand All @@ -517,7 +517,7 @@ pub fn run() -> Result<()> {

#[cfg(not(target_os = "android"))]
{
polkadot_node_core_pvf_worker::execute_worker_entrypoint(
polkadot_node_core_pvf_execute_worker::worker_entrypoint(
&cmd.socket_path,
Some(&cmd.node_impl_version),
);
Expand Down
12 changes: 11 additions & 1 deletion node/core/pvf/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ version.workspace = true
authors.workspace = true
edition.workspace = true

[[bin]]
name = "puppet_worker"
path = "bin/puppet_worker.rs"

[dependencies]
always-assert = "0.1"
futures = "0.3.21"
Expand All @@ -13,12 +17,16 @@ libc = "0.2.139"
pin-project = "1.0.9"
rand = "0.8.5"
slotmap = "1.0"
tempfile = "3.3.0"
tokio = { version = "1.24.2", features = ["fs", "process"] }

parity-scale-codec = { version = "3.4.0", default-features = false, features = ["derive"] }

polkadot-parachain = { path = "../../../parachain" }
polkadot-core-primitives = { path = "../../../core-primitives" }
polkadot-node-core-pvf-common = { path = "common" }
polkadot-node-core-pvf-execute-worker = { path = "execute-worker" }
polkadot-node-core-pvf-prepare-worker = { path = "prepare-worker" }
polkadot-node-metrics = { path = "../../metrics" }
polkadot-node-primitives = { path = "../../primitives" }
polkadot-primitives = { path = "../../../primitives" }
Expand All @@ -34,4 +42,6 @@ substrate-build-script-utils = { git = "https://github.com/paritytech/substrate"
[dev-dependencies]
assert_matches = "1.4.0"
hex-literal = "0.3.4"
tempfile = "3.3.0"

adder = { package = "test-parachain-adder", path = "../../../parachain/test-parachains/adder" }
halt = { package = "test-parachain-halt", path = "../../../parachain/test-parachains/halt" }
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.

polkadot_node_core_pvf_worker::decl_puppet_worker_main!();
polkadot_node_core_pvf::decl_puppet_worker_main!();
26 changes: 26 additions & 0 deletions node/core/pvf/common/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[package]
name = "polkadot-node-core-pvf-common"
version.workspace = true
authors.workspace = true
edition.workspace = true

[dependencies]
cpu-time = "1.0.0"
futures = "0.3.21"
gum = { package = "tracing-gum", path = "../../../gum" }
libc = "0.2.139"
tokio = { version = "1.24.2", features = ["fs", "process", "io-util"] }

parity-scale-codec = { version = "3.4.0", default-features = false, features = ["derive"] }

polkadot-parachain = { path = "../../../../parachain" }
polkadot-primitives = { path = "../../../../primitives" }

sc-executor-common = { git = "https://github.com/paritytech/substrate", branch = "master" }
sc-executor-wasmtime = { git = "https://github.com/paritytech/substrate", branch = "master" }

sp-core = { git = "https://github.com/paritytech/substrate", branch = "master" }
sp-tracing = { git = "https://github.com/paritytech/substrate", branch = "master" }

[build-dependencies]
substrate-build-script-utils = { git = "https://github.com/paritytech/substrate", branch = "master" }
106 changes: 106 additions & 0 deletions node/core/pvf/common/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// Copyright (C) Parity Technologies (UK) Ltd.
// This file is part of Polkadot.

// Polkadot is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.

// Polkadot is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.

// You should have received a copy of the GNU General Public License
// along with Polkadot. If not, see <http://www.gnu.org/licenses/>.

use crate::prepare::PrepareStats;
use parity_scale_codec::{Decode, Encode};
use std::fmt;

/// Result of PVF preparation performed by the validation host. Contains stats about the preparation if
/// successful
pub type PrepareResult = Result<PrepareStats, PrepareError>;

/// An error that occurred during the prepare part of the PVF pipeline.
#[derive(Debug, Clone, Encode, Decode)]
pub enum PrepareError {
/// During the prevalidation stage of preparation an issue was found with the PVF.
Prevalidation(String),
/// Compilation failed for the given PVF.
Preparation(String),
/// An unexpected panic has occurred in the preparation worker.
Panic(String),
/// Failed to prepare the PVF due to the time limit.
TimedOut,
/// An IO error occurred. This state is reported by either the validation host or by the worker.
IoErr(String),
/// The temporary file for the artifact could not be created at the given cache path. This state is reported by the
/// validation host (not by the worker).
CreateTmpFileErr(String),
/// The response from the worker is received, but the file cannot be renamed (moved) to the final destination
/// location. This state is reported by the validation host (not by the worker).
RenameTmpFileErr(String),
}

impl PrepareError {
/// Returns whether this is a deterministic error, i.e. one that should trigger reliably. Those
/// errors depend on the PVF itself and the sc-executor/wasmtime logic.
///
/// Non-deterministic errors can happen spuriously. Typically, they occur due to resource
/// starvation, e.g. under heavy load or memory pressure. Those errors are typically transient
/// but may persist e.g. if the node is run by overwhelmingly underpowered machine.
pub fn is_deterministic(&self) -> bool {
use PrepareError::*;
match self {
Prevalidation(_) | Preparation(_) | Panic(_) => true,
TimedOut | IoErr(_) | CreateTmpFileErr(_) | RenameTmpFileErr(_) => false,
}
}
}

impl fmt::Display for PrepareError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use PrepareError::*;
match self {
Prevalidation(err) => write!(f, "prevalidation: {}", err),
Preparation(err) => write!(f, "preparation: {}", err),
Panic(err) => write!(f, "panic: {}", err),
TimedOut => write!(f, "prepare: timeout"),
IoErr(err) => write!(f, "prepare: io error while receiving response: {}", err),
CreateTmpFileErr(err) => write!(f, "prepare: error creating tmp file: {}", err),
RenameTmpFileErr(err) => write!(f, "prepare: error renaming tmp file: {}", err),
}
}
}

/// Some internal error occurred.
///
/// Should only ever be used for validation errors independent of the candidate and PVF, or for errors we ruled out
/// during pre-checking (so preparation errors are fine).
#[derive(Debug, Clone, Encode, Decode)]
pub enum InternalValidationError {
/// Some communication error occurred with the host.
HostCommunication(String),
/// Could not find or open compiled artifact file.
CouldNotOpenFile(String),
/// An error occurred in the CPU time monitor thread. Should be totally unrelated to validation.
CpuTimeMonitorThread(String),
/// Some non-deterministic preparation error occurred.
NonDeterministicPrepareError(PrepareError),
}

impl fmt::Display for InternalValidationError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use InternalValidationError::*;
match self {
HostCommunication(err) =>
write!(f, "validation: some communication error occurred with the host: {}", err),
CouldNotOpenFile(err) =>
write!(f, "validation: could not find or open compiled artifact file: {}", err),
CpuTimeMonitorThread(err) =>
write!(f, "validation: an error occurred in the CPU time monitor thread: {}", err),
NonDeterministicPrepareError(err) => write!(f, "validation: prepare: {}", err),
}
}
}
Loading