From cacf9d7944c35d1bf1ce262f4836b0fa3a219412 Mon Sep 17 00:00:00 2001 From: Pietro Date: Sat, 1 Jun 2024 09:45:45 +0200 Subject: [PATCH 01/37] Heal all subnets --- rs/cli/src/cli.rs | 8 ++++++++ rs/cli/src/main.rs | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/rs/cli/src/cli.rs b/rs/cli/src/cli.rs index ed323c2a9..4711fbf42 100644 --- a/rs/cli/src/cli.rs +++ b/rs/cli/src/cli.rs @@ -56,6 +56,14 @@ pub enum Commands { /// Path to the DER file path: String, }, + + Heal { + /// Amount of nodes to be replaced by decentralization optimization + /// algorithm + #[clap(short, long)] + optimize: Option, + }, + /// Manage an existing subnet Subnet(subnet::Cmd), /// Get a value using ic-admin CLI diff --git a/rs/cli/src/main.rs b/rs/cli/src/main.rs index 4c25dd71a..3a45ef48d 100644 --- a/rs/cli/src/main.rs +++ b/rs/cli/src/main.rs @@ -104,6 +104,12 @@ async fn async_main() -> Result<(), anyhow::Error> { Ok(()) } + cli::Commands::Heal{ optimize} => { + let principal = ic_base_types::PrincipalId::new_self_authenticating(&std::fs::read(path)?); + println!("{}", principal); + Ok(()) + } + cli::Commands::Subnet(subnet) => { match &subnet.subcommand { cli::subnet::Commands::Deploy { .. } | cli::subnet::Commands::Resize { .. } => { From 10d4e2a58030ff471c0c4fa6d3ad18b684034b1f Mon Sep 17 00:00:00 2001 From: Pietro Date: Tue, 4 Jun 2024 10:42:50 +0200 Subject: [PATCH 02/37] Fix bazel --- rs/cli/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/cli/src/main.rs b/rs/cli/src/main.rs index 98bbb26c7..8671eb169 100644 --- a/rs/cli/src/main.rs +++ b/rs/cli/src/main.rs @@ -104,7 +104,7 @@ async fn async_main() -> Result<(), anyhow::Error> { Ok(()) } - cli::Commands::Heal{ optimize} => { + cli::Commands::Heal { optimize } => { let principal = ic_base_types::PrincipalId::new_self_authenticating(&std::fs::read(path)?); println!("{}", principal); Ok(()) From a018d3817b4b4225b7d4b57ea876ddd72666516c Mon Sep 17 00:00:00 2001 From: Pietro Date: Fri, 7 Jun 2024 17:41:29 +0200 Subject: [PATCH 03/37] Add heal comand and endpoint --- rs/cli/src/cli.rs | 4 ++ rs/cli/src/clients.rs | 11 ++- rs/cli/src/main.rs | 19 +++-- rs/cli/src/runner.rs | 22 ++++++ rs/decentralization/src/lib.rs | 15 ++++ rs/ic-management-backend/src/endpoints/mod.rs | 2 + .../src/endpoints/network.rs | 69 +++++++++++++++++++ rs/ic-management-types/src/requests.rs | 6 ++ 8 files changed, 143 insertions(+), 5 deletions(-) create mode 100644 rs/ic-management-backend/src/endpoints/network.rs diff --git a/rs/cli/src/cli.rs b/rs/cli/src/cli.rs index 7fa80d537..142c9f27f 100644 --- a/rs/cli/src/cli.rs +++ b/rs/cli/src/cli.rs @@ -62,6 +62,10 @@ pub enum Commands { /// algorithm #[clap(short, long)] optimize: Option, + + /// Minimum Nakamoto coefficients after the replacement + #[clap(long, num_args(1..))] + min_nakamoto_coefficients: Vec, }, /// Manage an existing subnet diff --git a/rs/cli/src/clients.rs b/rs/cli/src/clients.rs index 55b0667c7..cc3019f97 100644 --- a/rs/cli/src/clients.rs +++ b/rs/cli/src/clients.rs @@ -1,8 +1,9 @@ use async_trait::async_trait; +use decentralization::HealResponse; use decentralization::SubnetChangeResponse; use ic_base_types::PrincipalId; use ic_management_types::{ - requests::{MembershipReplaceRequest, NodesRemoveRequest, NodesRemoveResponse, SubnetCreateRequest, SubnetResizeRequest}, + requests::{HealRequest, MembershipReplaceRequest, NodesRemoveRequest, NodesRemoveResponse, SubnetCreateRequest, SubnetResizeRequest}, Artifact, Network, NetworkError, Release, TopologyProposal, }; use log::error; @@ -97,6 +98,14 @@ impl DashboardBackendClient { .rest_send() .await } + + pub(crate) async fn network_heal(&self, request: HealRequest) -> anyhow::Result { + reqwest::Client::new() + .post(self.url.join("network/heal").map_err(|e| anyhow::anyhow!(e))?) + .json(&request) + .rest_send() + .await + } } #[async_trait] diff --git a/rs/cli/src/main.rs b/rs/cli/src/main.rs index 8671eb169..16213f794 100644 --- a/rs/cli/src/main.rs +++ b/rs/cli/src/main.rs @@ -104,10 +104,21 @@ async fn async_main() -> Result<(), anyhow::Error> { Ok(()) } - cli::Commands::Heal { optimize } => { - let principal = ic_base_types::PrincipalId::new_self_authenticating(&std::fs::read(path)?); - println!("{}", principal); - Ok(()) + cli::Commands::Heal { + optimize, + min_nakamoto_coefficients, + } => { + let min_nakamoto_coefficients = parse_min_nakamoto_coefficients(&mut cmd, min_nakamoto_coefficients); + runner_instance + .network_heal( + ic_management_types::requests::HealRequest { + optimize: *optimize, + min_nakamoto_coefficients, + }, + cli_opts.verbose, + simulate, + ) + .await } cli::Commands::Subnet(subnet) => { diff --git a/rs/cli/src/runner.rs b/rs/cli/src/runner.rs index 7fbd77e1a..8605e769c 100644 --- a/rs/cli/src/runner.rs +++ b/rs/cli/src/runner.rs @@ -436,4 +436,26 @@ impl Runner { .await?; Ok(()) } + + pub async fn network_heal( + &self, + request: ic_management_types::requests::HealRequest, + _verbose: bool, + simulate: bool, + ) -> Result<(), anyhow::Error> { + let change = self.dashboard_backend_client.network_heal(request).await?; + println!("{}", change); + + let _ = change.subnets_change_response.iter().map(|subnet_change_response| async move { + self.run_membership_change( + subnet_change_response.clone(), + ops_subnet_node_replace::replace_proposal_options(&subnet_change_response).unwrap(), + simulate, + ) + .await + .unwrap(); + }); + + Ok(()) + } } diff --git a/rs/decentralization/src/lib.rs b/rs/decentralization/src/lib.rs index 106b21f0d..bb878947e 100644 --- a/rs/decentralization/src/lib.rs +++ b/rs/decentralization/src/lib.rs @@ -183,3 +183,18 @@ impl Display for SubnetChangeResponse { Ok(()) } } + +#[derive(Clone, Debug, Deserialize, Serialize)] +pub struct HealResponse { + pub subnets_change_response: Vec, +} + +impl Display for HealResponse { + fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result { + self.subnets_change_response.iter().for_each(|subnet_change_response| { + println!("{}", subnet_change_response); + }); + + Ok(()) + } +} diff --git a/rs/ic-management-backend/src/endpoints/mod.rs b/rs/ic-management-backend/src/endpoints/mod.rs index 927873c72..90cf9c02f 100644 --- a/rs/ic-management-backend/src/endpoints/mod.rs +++ b/rs/ic-management-backend/src/endpoints/mod.rs @@ -1,4 +1,5 @@ pub mod governance_canister; +pub mod network; pub mod nodes_ops; pub mod query_decentralization; pub mod release; @@ -86,6 +87,7 @@ pub async fn run_backend( .service(self::subnet::create_subnet) .service(self::subnet::resize) .service(self::subnet::change_preview) + .service(self::network::heal) .service(self::nodes_ops::remove) .service(self::query_decentralization::decentralization_subnet_query) .service(self::query_decentralization::decentralization_whatif_query) diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs new file mode 100644 index 000000000..b7bb55705 --- /dev/null +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -0,0 +1,69 @@ +use super::*; +use crate::health; +use crate::health::HealthStatusQuerier; +use decentralization::network::{SubnetQueryBy, TopologyManager}; +use ic_management_types::requests::HealRequest; +use log::warn; +use std::collections::{BTreeMap, HashSet}; + +#[post("/network/heal")] +async fn heal(request: web::Json, registry: web::Data>>) -> Result { + let registry = registry.read().await; + let all_nodes = registry.nodes(); + let health_client = health::HealthClient::new(registry.network()); + let healths = health_client + .nodes() + .await + .map_err(|_| actix_web::error::ErrorInternalServerError("failed to fetch subnet health".to_string()))?; + + let unhealthy_subnets = registry + .subnets() + .into_iter() + .filter_map(|(_, subnet)| { + let unhealthy = subnet + .nodes + .into_iter() + .filter_map(|n| match healths.get(&n.principal) { + Some(health) => { + if *health == ic_management_types::Status::Healthy { + None + } else { + info!("Node {} is {:?}", n.principal, health); + Some(n) + } + } + None => { + warn!("Node {} has no known health, assuming unhealthy", n.principal); + Some(n) + } + }) + .map(|n| decentralization::network::Node::from(&n)) + .collect::>(); + + if !unhealthy.is_empty() { + return Some((subnet.principal, unhealthy)); + } + None + }) + .collect::>(); + + let mut already_added = HashSet::new(); + let mut subnets_changed = Vec::new(); + + for (id, unhealthy_nodes) in unhealthy_subnets { + let change = registry + .modify_subnet_nodes(SubnetQueryBy::SubnetId(id)) + .await? + .with_exclude_nodes(already_added.iter().map(|n: &decentralization::network::Node| n.id.to_string()).collect()) + .optimize(request.optimize.unwrap_or(0), &unhealthy_nodes) + .unwrap(); + + already_added.extend(change.added()); + subnets_changed.push(decentralization::SubnetChangeResponse::from(&change)); + } + + let heal_response = decentralization::HealResponse { + subnets_change_response: subnets_changed, + }; + Ok(HttpResponse::Ok().json(heal_response)) +} diff --git a/rs/ic-management-types/src/requests.rs b/rs/ic-management-types/src/requests.rs index 2742718c7..6542699f4 100644 --- a/rs/ic-management-types/src/requests.rs +++ b/rs/ic-management-types/src/requests.rs @@ -114,3 +114,9 @@ impl NodeRemovalReason { } } } + +#[derive(Serialize, Deserialize)] +pub struct HealRequest { + pub optimize: Option, + pub min_nakamoto_coefficients: Option, +} From 0a36de1f31b30177c4f77456f1e58d101e5a1347 Mon Sep 17 00:00:00 2001 From: Pietro Date: Fri, 7 Jun 2024 17:46:58 +0200 Subject: [PATCH 04/37] Fix Clippy --- rs/decentralization/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/decentralization/src/lib.rs b/rs/decentralization/src/lib.rs index bb878947e..e25f3fcda 100644 --- a/rs/decentralization/src/lib.rs +++ b/rs/decentralization/src/lib.rs @@ -192,7 +192,7 @@ pub struct HealResponse { impl Display for HealResponse { fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result { self.subnets_change_response.iter().for_each(|subnet_change_response| { - println!("{}", subnet_change_response); + writeln!("{}", subnet_change_response); }); Ok(()) From a2411ac6779105d93e9d2733180c5c24021f31d6 Mon Sep 17 00:00:00 2001 From: Pietro Date: Fri, 7 Jun 2024 18:11:39 +0200 Subject: [PATCH 05/37] Fix fmt --- rs/decentralization/src/lib.rs | 9 ++++----- rs/ic-management-backend/src/endpoints/network.rs | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/rs/decentralization/src/lib.rs b/rs/decentralization/src/lib.rs index e25f3fcda..745c27f15 100644 --- a/rs/decentralization/src/lib.rs +++ b/rs/decentralization/src/lib.rs @@ -190,11 +190,10 @@ pub struct HealResponse { } impl Display for HealResponse { - fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result { - self.subnets_change_response.iter().for_each(|subnet_change_response| { - writeln!("{}", subnet_change_response); - }); - + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + for change in &self.subnets_change_response { + writeln!(f,"{}", change)?; + } Ok(()) } } diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index b7bb55705..4c6383910 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -9,7 +9,6 @@ use std::collections::{BTreeMap, HashSet}; #[post("/network/heal")] async fn heal(request: web::Json, registry: web::Data>>) -> Result { let registry = registry.read().await; - let all_nodes = registry.nodes(); let health_client = health::HealthClient::new(registry.network()); let healths = health_client .nodes() From 4964b09bacf5b5985441e7c2329db43413d6b1f4 Mon Sep 17 00:00:00 2001 From: Pietro Date: Sun, 9 Jun 2024 20:01:42 +0200 Subject: [PATCH 06/37] Compute local optimum based on max-replacable-nodes --- rs/cli/src/cli.rs | 10 +++--- rs/cli/src/main.rs | 4 +-- rs/decentralization/src/lib.rs | 2 +- rs/decentralization/src/network.rs | 17 ++++++++++ .../src/endpoints/network.rs | 31 +++++++++++++++---- rs/ic-management-types/src/requests.rs | 2 +- 6 files changed, 51 insertions(+), 15 deletions(-) diff --git a/rs/cli/src/cli.rs b/rs/cli/src/cli.rs index 19730aa04..da3fc522a 100644 --- a/rs/cli/src/cli.rs +++ b/rs/cli/src/cli.rs @@ -59,14 +59,14 @@ pub enum Commands { }, Heal { - /// Amount of nodes to be replaced by decentralization optimization - /// algorithm - #[clap(short, long)] - optimize: Option, - /// Minimum Nakamoto coefficients after the replacement #[clap(long, num_args(1..))] min_nakamoto_coefficients: Vec, + + /// Max number of nodes to be replaced per subnet + /// Optimization will be performed automatically at the best level if possible + #[clap(short, long)] + max_replacable_nodes_per_sub: Option, }, /// Manage an existing subnet diff --git a/rs/cli/src/main.rs b/rs/cli/src/main.rs index 22e10050d..e25aa7d2b 100644 --- a/rs/cli/src/main.rs +++ b/rs/cli/src/main.rs @@ -105,15 +105,15 @@ async fn async_main() -> Result<(), anyhow::Error> { } cli::Commands::Heal { - optimize, min_nakamoto_coefficients, + max_replacable_nodes_per_sub, } => { let min_nakamoto_coefficients = parse_min_nakamoto_coefficients(&mut cmd, min_nakamoto_coefficients); runner_instance .network_heal( ic_management_types::requests::HealRequest { - optimize: *optimize, min_nakamoto_coefficients, + max_replacable_nodes_per_sub: *max_replacable_nodes_per_sub, }, cli_opts.verbose, simulate, diff --git a/rs/decentralization/src/lib.rs b/rs/decentralization/src/lib.rs index 745c27f15..8a02b6113 100644 --- a/rs/decentralization/src/lib.rs +++ b/rs/decentralization/src/lib.rs @@ -192,7 +192,7 @@ pub struct HealResponse { impl Display for HealResponse { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { for change in &self.subnets_change_response { - writeln!(f,"{}", change)?; + writeln!(f, "{}", change)?; } Ok(()) } diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index 7e146b95f..a8570af66 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -861,6 +861,23 @@ impl SubnetChangeRequest { Ok(SubnetChange { old_nodes, ..result }) } + /// Optimize is implemented by removing a certain number of nodes and then + /// adding the same number back. + pub fn optimize_with_buffer(self, optimize_buffer: usize, replacements_unhealthy: &Vec) -> Result { + let mut optimize_num = 0; + let mut change_current = SubnetChangeResponse::from(&self.clone().optimize(optimize_num, replacements_unhealthy)?); + + loop { + let change_next = SubnetChangeResponse::from(&self.clone().optimize(optimize_num + 1, replacements_unhealthy)?); + if optimize_num >= optimize_buffer || change_current.score_after >= change_next.score_after { + return Ok(change_current); + } else { + change_current = change_next.clone(); + optimize_num += 1; + } + } + } + /// Add or remove nodes from the subnet. pub fn resize(&self, how_many_nodes_to_add: usize, how_many_nodes_to_remove: usize) -> Result { println!( diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index 4c6383910..40d85d655 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -40,9 +40,21 @@ async fn heal(request: web::Json, registry: web::Data>(); if !unhealthy.is_empty() { + if let Some(x) = request.max_replacable_nodes_per_sub { + if unhealthy.len() > x { + warn!( + "Subnet {} has {} unhealthy nodes\nMax replacable nodes is {} skipping...", + subnet.principal, + unhealthy.len(), + x + ); + return None; + } + } return Some((subnet.principal, unhealthy)); + } else { + None } - None }) .collect::>(); @@ -53,12 +65,19 @@ async fn heal(request: web::Json, registry: web::Data, pub min_nakamoto_coefficients: Option, + pub max_replacable_nodes_per_sub: Option, } From c3457c3372aa58a6bd3cec0d79ef97c0324232ce Mon Sep 17 00:00:00 2001 From: Pietro Date: Mon, 10 Jun 2024 11:13:19 +0200 Subject: [PATCH 07/37] Test multi concurrent proposals --- rs/cli/src/runner.rs | 25 +++++++++++++------ .../src/endpoints/network.rs | 3 ++- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/rs/cli/src/runner.rs b/rs/cli/src/runner.rs index 8605e769c..f1ac113e8 100644 --- a/rs/cli/src/runner.rs +++ b/rs/cli/src/runner.rs @@ -4,6 +4,7 @@ use crate::operations::hostos_rollout::{HostosRollout, HostosRolloutResponse, No use crate::ops_subnet_node_replace; use crate::{ic_admin, local_unused_port}; use decentralization::SubnetChangeResponse; +use futures::future::join_all; use ic_base_types::PrincipalId; use ic_management_backend::proposal::ProposalAgent; use ic_management_backend::public_dashboard::query_ic_dashboard_list; @@ -446,16 +447,24 @@ impl Runner { let change = self.dashboard_backend_client.network_heal(request).await?; println!("{}", change); - let _ = change.subnets_change_response.iter().map(|subnet_change_response| async move { - self.run_membership_change( - subnet_change_response.clone(), - ops_subnet_node_replace::replace_proposal_options(&subnet_change_response).unwrap(), + // let changes = change.subnets_change_response.iter().map(|subnet_change_response| async move { + // self.run_membership_change( + // subnet_change_response.clone(), + // ops_subnet_node_replace::replace_proposal_options(&subnet_change_response)?, + // simulate, + // ) + // .await + // }); + + // join_all(changes).await; + + let first_change = change.subnets_change_response.get(0).unwrap(); + + self.run_membership_change( + first_change.clone(), + ops_subnet_node_replace::replace_proposal_options(&first_change)?, simulate, ) .await - .unwrap(); - }); - - Ok(()) } } diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index 40d85d655..891fcb06d 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -67,7 +67,8 @@ async fn heal(request: web::Json, registry: web::Data Date: Mon, 10 Jun 2024 17:51:25 +0200 Subject: [PATCH 08/37] Run rustftm --- rs/cli/src/clients.rs | 2 +- rs/cli/src/runner.rs | 28 ++++++++----------- .../src/endpoints/network.rs | 3 +- 3 files changed, 14 insertions(+), 19 deletions(-) diff --git a/rs/cli/src/clients.rs b/rs/cli/src/clients.rs index 1ef515221..44bc8c477 100644 --- a/rs/cli/src/clients.rs +++ b/rs/cli/src/clients.rs @@ -3,7 +3,7 @@ use decentralization::HealResponse; use decentralization::SubnetChangeResponse; use ic_base_types::PrincipalId; use ic_management_types::{ - requests::{MembershipReplaceRequest, NodesRemoveRequest, NodesRemoveResponse, SubnetCreateRequest, SubnetResizeRequest}, + requests::{HealRequest, MembershipReplaceRequest, NodesRemoveRequest, NodesRemoveResponse, SubnetCreateRequest, SubnetResizeRequest}, Artifact, Network, NetworkError, Release, TopologyChangeProposal, }; use log::error; diff --git a/rs/cli/src/runner.rs b/rs/cli/src/runner.rs index f1ac113e8..9151bd10c 100644 --- a/rs/cli/src/runner.rs +++ b/rs/cli/src/runner.rs @@ -447,24 +447,20 @@ impl Runner { let change = self.dashboard_backend_client.network_heal(request).await?; println!("{}", change); - // let changes = change.subnets_change_response.iter().map(|subnet_change_response| async move { - // self.run_membership_change( - // subnet_change_response.clone(), - // ops_subnet_node_replace::replace_proposal_options(&subnet_change_response)?, - // simulate, - // ) - // .await - // }); - - // join_all(changes).await; - - let first_change = change.subnets_change_response.get(0).unwrap(); - - self.run_membership_change( - first_change.clone(), - ops_subnet_node_replace::replace_proposal_options(&first_change)?, + join_all(change.subnets_change_response.iter().map(|subnet_change_response| async move { + self.run_membership_change( + subnet_change_response.clone(), + ops_subnet_node_replace::replace_proposal_options(&subnet_change_response)?, simulate, ) .await + .map_err(|e| { + println!("{}", e); + e + }) + })) + .await; + + Ok(()) } } diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index 891fcb06d..40d85d655 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -67,8 +67,7 @@ async fn heal(request: web::Json, registry: web::Data Date: Mon, 10 Jun 2024 18:09:26 +0200 Subject: [PATCH 09/37] Add subnet name --- rs/decentralization/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/decentralization/src/lib.rs b/rs/decentralization/src/lib.rs index 8a02b6113..357484d97 100644 --- a/rs/decentralization/src/lib.rs +++ b/rs/decentralization/src/lib.rs @@ -73,7 +73,7 @@ impl From<&network::SubnetChange> for SubnetChangeResponse { impl Display for SubnetChangeResponse { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - writeln!(f, "Decentralization score changes:\n")?; + writeln!(f, "Decentralization score changes for subnet {}:\n", self.subnet_id.unwrap_or_default())?; let before_individual = self.score_before.scores_individual(); let after_individual = self.score_after.scores_individual(); self.score_before From 05e94ea6f2b7d6a1709b315dfc85e6c612832ac9 Mon Sep 17 00:00:00 2001 From: Pietro Date: Mon, 10 Jun 2024 21:20:31 +0200 Subject: [PATCH 10/37] Move unhealthy in separate function --- rs/cli/src/cli.rs | 9 +-- rs/cli/src/main.rs | 3 - rs/decentralization/src/network.rs | 7 +- .../src/endpoints/network.rs | 67 +++++-------------- rs/ic-management-backend/src/subnets.rs | 48 +++++++++++++ rs/ic-management-types/src/requests.rs | 1 - 6 files changed, 72 insertions(+), 63 deletions(-) diff --git a/rs/cli/src/cli.rs b/rs/cli/src/cli.rs index 47c2607ef..12dc717a8 100644 --- a/rs/cli/src/cli.rs +++ b/rs/cli/src/cli.rs @@ -60,12 +60,9 @@ pub enum Commands { }, Heal { - /// Minimum Nakamoto coefficients after the replacement - #[clap(long, num_args(1..))] - min_nakamoto_coefficients: Vec, - - /// Max number of nodes to be replaced per subnet - /// Optimization will be performed automatically at the best level if possible + /// Max number of nodes to be replaced per subnet. + /// Optimization will be performed automatically maximizing the decentralization + /// and minimizing the number of replaced nodes per subnet #[clap(short, long)] max_replacable_nodes_per_sub: Option, }, diff --git a/rs/cli/src/main.rs b/rs/cli/src/main.rs index bbe96b051..1509e7083 100644 --- a/rs/cli/src/main.rs +++ b/rs/cli/src/main.rs @@ -105,14 +105,11 @@ async fn async_main() -> Result<(), anyhow::Error> { } cli::Commands::Heal { - min_nakamoto_coefficients, max_replacable_nodes_per_sub, } => { - let min_nakamoto_coefficients = parse_min_nakamoto_coefficients(&mut cmd, min_nakamoto_coefficients); runner_instance .network_heal( ic_management_types::requests::HealRequest { - min_nakamoto_coefficients, max_replacable_nodes_per_sub: *max_replacable_nodes_per_sub, }, cli_opts.verbose, diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index a8570af66..fe6bb301c 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -861,15 +861,14 @@ impl SubnetChangeRequest { Ok(SubnetChange { old_nodes, ..result }) } - /// Optimize is implemented by removing a certain number of nodes and then - /// adding the same number back. - pub fn optimize_with_buffer(self, optimize_buffer: usize, replacements_unhealthy: &Vec) -> Result { + /// Optimize the number of nodes which maximize decentralization up to limit + pub fn optimize_with_limit(self, optimize_limit: usize, replacements_unhealthy: &Vec) -> Result { let mut optimize_num = 0; let mut change_current = SubnetChangeResponse::from(&self.clone().optimize(optimize_num, replacements_unhealthy)?); loop { let change_next = SubnetChangeResponse::from(&self.clone().optimize(optimize_num + 1, replacements_unhealthy)?); - if optimize_num >= optimize_buffer || change_current.score_after >= change_next.score_after { + if optimize_num >= optimize_limit || change_current.score_after >= change_next.score_after { return Ok(change_current); } else { change_current = change_next.clone(); diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index 40d85d655..61c2d727f 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -1,60 +1,30 @@ use super::*; -use crate::health; -use crate::health::HealthStatusQuerier; +use crate::subnets; use decentralization::network::{SubnetQueryBy, TopologyManager}; use ic_management_types::requests::HealRequest; use log::warn; -use std::collections::{BTreeMap, HashSet}; +use std::collections::HashSet; #[post("/network/heal")] async fn heal(request: web::Json, registry: web::Data>>) -> Result { let registry = registry.read().await; - let health_client = health::HealthClient::new(registry.network()); - let healths = health_client - .nodes() - .await - .map_err(|_| actix_web::error::ErrorInternalServerError("failed to fetch subnet health".to_string()))?; - - let unhealthy_subnets = registry - .subnets() + let unhealthy_subnets = subnets::get_unhealthy(®istry) + .await + .map_err(|_| actix_web::error::ErrorInternalServerError("failed to fetch subnet health".to_string()))? .into_iter() - .filter_map(|(_, subnet)| { - let unhealthy = subnet - .nodes - .into_iter() - .filter_map(|n| match healths.get(&n.principal) { - Some(health) => { - if *health == ic_management_types::Status::Healthy { - None - } else { - info!("Node {} is {:?}", n.principal, health); - Some(n) - } - } - None => { - warn!("Node {} has no known health, assuming unhealthy", n.principal); - Some(n) - } - }) - .map(|n| decentralization::network::Node::from(&n)) - .collect::>(); - - if !unhealthy.is_empty() { - if let Some(x) = request.max_replacable_nodes_per_sub { - if unhealthy.len() > x { - warn!( - "Subnet {} has {} unhealthy nodes\nMax replacable nodes is {} skipping...", - subnet.principal, - unhealthy.len(), - x - ); - return None; - } + .filter_map(|(subnet_id, n)| { + if let Some(x) = request.max_replacable_nodes_per_sub { + if n.len() > x { + warn!( + "Subnet {} has {} unhealthy nodes\nMax replacable nodes is {} skipping...", + subnet_id, + n.len(), + x + ); + return None; } - return Some((subnet.principal, unhealthy)); - } else { - None } + return Some((subnet_id, n)); }) .collect::>(); @@ -68,9 +38,8 @@ async fn heal(request: web::Json, registry: web::Data +) -> anyhow::Result>> { + let health_client = health::HealthClient::new(registry.network()); + let healths = health_client + .nodes() + .await?; + + let unhealthy_subnets = registry + .subnets() + .into_iter() + .filter_map(|(_, subnet)| { + let unhealthy = subnet + .nodes + .into_iter() + .filter_map(|n| match healths.get(&n.principal) { + Some(health) => { + if *health == ic_management_types::Status::Healthy { + None + } else { + info!("Node {} is {:?}", n.principal, health); + Some(n) + } + } + None => { + warn!("Node {} has no known health, assuming unhealthy", n.principal); + Some(n) + } + }) + .map(|n| decentralization::network::Node::from(&n)) + .collect::>(); + + if !unhealthy.is_empty() { + return Some((subnet.principal, unhealthy)); + } else { + None + } + }) + .collect::>(); + + Ok(unhealthy_subnets) +} pub fn get_proposed_subnet_changes( all_nodes: &BTreeMap, diff --git a/rs/ic-management-types/src/requests.rs b/rs/ic-management-types/src/requests.rs index 54490f77b..9dd314a71 100644 --- a/rs/ic-management-types/src/requests.rs +++ b/rs/ic-management-types/src/requests.rs @@ -117,6 +117,5 @@ impl NodeRemovalReason { #[derive(Serialize, Deserialize)] pub struct HealRequest { - pub min_nakamoto_coefficients: Option, pub max_replacable_nodes_per_sub: Option, } From a172508b9fc29e990844045ceb36d212d0a1f75a Mon Sep 17 00:00:00 2001 From: Pietro Date: Tue, 11 Jun 2024 11:45:06 +0200 Subject: [PATCH 11/37] Remove optimize_with_limit --- rs/decentralization/src/network.rs | 16 ---------------- .../src/endpoints/network.rs | 15 +++++---------- 2 files changed, 5 insertions(+), 26 deletions(-) diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index fe6bb301c..7e146b95f 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -861,22 +861,6 @@ impl SubnetChangeRequest { Ok(SubnetChange { old_nodes, ..result }) } - /// Optimize the number of nodes which maximize decentralization up to limit - pub fn optimize_with_limit(self, optimize_limit: usize, replacements_unhealthy: &Vec) -> Result { - let mut optimize_num = 0; - let mut change_current = SubnetChangeResponse::from(&self.clone().optimize(optimize_num, replacements_unhealthy)?); - - loop { - let change_next = SubnetChangeResponse::from(&self.clone().optimize(optimize_num + 1, replacements_unhealthy)?); - if optimize_num >= optimize_limit || change_current.score_after >= change_next.score_after { - return Ok(change_current); - } else { - change_current = change_next.clone(); - optimize_num += 1; - } - } - } - /// Add or remove nodes from the subnet. pub fn resize(&self, how_many_nodes_to_add: usize, how_many_nodes_to_remove: usize) -> Result { println!( diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index 61c2d727f..cd7a4566c 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -35,18 +35,13 @@ async fn heal(request: web::Json, registry: web::Data>()); - let subnet_change = if let Some(max_replacable_nodes_per_sub) = request.max_replacable_nodes_per_sub { - let optimize_limit = max_replacable_nodes_per_sub - unhealthy_nodes.len(); - change.optimize_with_limit(optimize_limit, &unhealthy_nodes)? - } else { - let not_optimized = change.optimize(0, &unhealthy_nodes)?; - decentralization::SubnetChangeResponse::from(¬_optimized) - }; + let optimize_limit = request.max_replacable_nodes_per_sub.unwrap_or(unhealthy_nodes.len()) - unhealthy_nodes.len(); + let optimized = change.optimize(optimize_limit, &unhealthy_nodes)?; - already_added.extend(subnet_change.added.clone()); - subnets_changed.push(subnet_change); + already_added.extend(optimized.added().iter().map(|n| n.id.to_string()).collect::>()); + subnets_changed.push(decentralization::SubnetChangeResponse::from(&optimized)); } let heal_response = decentralization::HealResponse { From 6b79b8a78871f8ba787a2942d1c8b4bc2cf144b4 Mon Sep 17 00:00:00 2001 From: Pietro Date: Tue, 11 Jun 2024 11:47:05 +0200 Subject: [PATCH 12/37] Run rustfmt --- .../src/endpoints/network.rs | 2 +- rs/ic-management-backend/src/subnets.rs | 16 +++++++--------- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index cd7a4566c..9ceae351e 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -9,7 +9,7 @@ use std::collections::HashSet; async fn heal(request: web::Json, registry: web::Data>>) -> Result { let registry = registry.read().await; let unhealthy_subnets = subnets::get_unhealthy(®istry) - .await + .await .map_err(|_| actix_web::error::ErrorInternalServerError("failed to fetch subnet health".to_string()))? .into_iter() .filter_map(|(subnet_id, n)| { diff --git a/rs/ic-management-backend/src/subnets.rs b/rs/ic-management-backend/src/subnets.rs index e7db900f8..2109c2fbf 100644 --- a/rs/ic-management-backend/src/subnets.rs +++ b/rs/ic-management-backend/src/subnets.rs @@ -1,21 +1,19 @@ use std::collections::BTreeMap; +use crate::health; +use crate::health::HealthStatusQuerier; +use crate::registry::RegistryState; use decentralization::{network::SubnetChange, SubnetChangeResponse}; use ic_base_types::PrincipalId; use ic_management_types::{Node, TopologyChangeProposal}; -use crate::health; -use crate::health::HealthStatusQuerier; +use log::{info, warn}; use tokio::sync::RwLockReadGuard; -use log::{warn, info}; -use crate::registry::RegistryState; pub async fn get_unhealthy( - registry: &RwLockReadGuard<'_, RegistryState> + registry: &RwLockReadGuard<'_, RegistryState>, ) -> anyhow::Result>> { let health_client = health::HealthClient::new(registry.network()); - let healths = health_client - .nodes() - .await?; + let healths = health_client.nodes().await?; let unhealthy_subnets = registry .subnets() @@ -48,7 +46,7 @@ pub async fn get_unhealthy( } }) .collect::>(); - + Ok(unhealthy_subnets) } From 8c8f6d04ceba0ac2d7355eb6d159cd195f5f7cee Mon Sep 17 00:00:00 2001 From: Pietro Date: Tue, 11 Jun 2024 11:50:16 +0200 Subject: [PATCH 13/37] Fixed clippy --- rs/ic-management-backend/src/endpoints/network.rs | 2 +- rs/ic-management-backend/src/subnets.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index 9ceae351e..b5fdfe103 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -24,7 +24,7 @@ async fn heal(request: web::Json, registry: web::Data>(); diff --git a/rs/ic-management-backend/src/subnets.rs b/rs/ic-management-backend/src/subnets.rs index 2109c2fbf..e49870c57 100644 --- a/rs/ic-management-backend/src/subnets.rs +++ b/rs/ic-management-backend/src/subnets.rs @@ -40,7 +40,7 @@ pub async fn get_unhealthy( .collect::>(); if !unhealthy.is_empty() { - return Some((subnet.principal, unhealthy)); + Some((subnet.principal, unhealthy)) } else { None } From 173e4b828059da6c1eed49400a2256f5b940b352 Mon Sep 17 00:00:00 2001 From: Pietro Date: Tue, 11 Jun 2024 11:54:43 +0200 Subject: [PATCH 14/37] Fixing clippy --- rs/cli/src/runner.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/cli/src/runner.rs b/rs/cli/src/runner.rs index 9151bd10c..5b141b5c7 100644 --- a/rs/cli/src/runner.rs +++ b/rs/cli/src/runner.rs @@ -450,7 +450,7 @@ impl Runner { join_all(change.subnets_change_response.iter().map(|subnet_change_response| async move { self.run_membership_change( subnet_change_response.clone(), - ops_subnet_node_replace::replace_proposal_options(&subnet_change_response)?, + ops_subnet_node_replace::replace_proposal_options(subnet_change_response)?, simulate, ) .await From 923e24b2ea3c42a06ff6c6831f3ee020c1d2e1c3 Mon Sep 17 00:00:00 2001 From: Pietro Date: Tue, 11 Jun 2024 18:14:25 +0200 Subject: [PATCH 15/37] Refactor heal method --- .../src/endpoints/network.rs | 39 +++++++++++-------- rs/ic-management-backend/src/subnets.rs | 31 ++++++--------- 2 files changed, 34 insertions(+), 36 deletions(-) diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index b5fdfe103..b63ad587d 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -1,25 +1,33 @@ use super::*; use crate::subnets; -use decentralization::network::{SubnetQueryBy, TopologyManager}; +use decentralization::network::{Node, SubnetQueryBy, TopologyManager}; use ic_management_types::requests::HealRequest; use log::warn; use std::collections::HashSet; #[post("/network/heal")] async fn heal(request: web::Json, registry: web::Data>>) -> Result { + let mut already_added = HashSet::new(); + let mut subnets_changed = Vec::new(); + let registry = registry.read().await; - let unhealthy_subnets = subnets::get_unhealthy(®istry) + let health_client = health::HealthClient::new(registry.network()); + let nodes_health = health_client + .nodes() + .await + .map_err(|_| actix_web::error::ErrorInternalServerError("failed to fetch subnet health".to_string()))?; + + let unhealthy_subnets = subnets::unhealthy_with_nodes(®istry.subnets(), nodes_health) .await - .map_err(|_| actix_web::error::ErrorInternalServerError("failed to fetch subnet health".to_string()))? .into_iter() .filter_map(|(subnet_id, n)| { - if let Some(x) = request.max_replacable_nodes_per_sub { - if n.len() > x { + if let Some(max_replacable_nodes) = request.max_replacable_nodes_per_sub { + if n.len() > max_replacable_nodes { warn!( "Subnet {} has {} unhealthy nodes\nMax replacable nodes is {} skipping...", subnet_id, n.len(), - x + max_replacable_nodes ); return None; } @@ -28,24 +36,23 @@ async fn heal(request: web::Json, registry: web::Data>(); - let mut already_added = HashSet::new(); - let mut subnets_changed = Vec::new(); - for (id, unhealthy_nodes) in unhealthy_subnets { - let change = registry + let decentralized_unhealthy_nodes: Vec = unhealthy_nodes.iter().map(decentralization::network::Node::from).collect::>(); + let unhealthy_nodes_len = unhealthy_nodes.len(); + let optimize_limit = request.max_replacable_nodes_per_sub.unwrap_or(unhealthy_nodes_len) - unhealthy_nodes_len; + + let optimized = registry .modify_subnet_nodes(SubnetQueryBy::SubnetId(id)) .await? - .with_exclude_nodes(already_added.iter().cloned().collect::>()); - - let optimize_limit = request.max_replacable_nodes_per_sub.unwrap_or(unhealthy_nodes.len()) - unhealthy_nodes.len(); - let optimized = change.optimize(optimize_limit, &unhealthy_nodes)?; + .with_exclude_nodes(already_added.iter().cloned().collect::>()) + .optimize(optimize_limit, &decentralized_unhealthy_nodes)?; already_added.extend(optimized.added().iter().map(|n| n.id.to_string()).collect::>()); subnets_changed.push(decentralization::SubnetChangeResponse::from(&optimized)); } - let heal_response = decentralization::HealResponse { + let response = decentralization::HealResponse { subnets_change_response: subnets_changed, }; - Ok(HttpResponse::Ok().json(heal_response)) + Ok(HttpResponse::Ok().json(response)) } diff --git a/rs/ic-management-backend/src/subnets.rs b/rs/ic-management-backend/src/subnets.rs index e49870c57..144cae5e6 100644 --- a/rs/ic-management-backend/src/subnets.rs +++ b/rs/ic-management-backend/src/subnets.rs @@ -1,28 +1,22 @@ use std::collections::BTreeMap; -use crate::health; -use crate::health::HealthStatusQuerier; -use crate::registry::RegistryState; use decentralization::{network::SubnetChange, SubnetChangeResponse}; use ic_base_types::PrincipalId; -use ic_management_types::{Node, TopologyChangeProposal}; +use ic_management_types::{Node, Status, Subnet, TopologyChangeProposal}; use log::{info, warn}; -use tokio::sync::RwLockReadGuard; -pub async fn get_unhealthy( - registry: &RwLockReadGuard<'_, RegistryState>, -) -> anyhow::Result>> { - let health_client = health::HealthClient::new(registry.network()); - let healths = health_client.nodes().await?; - - let unhealthy_subnets = registry - .subnets() +pub async fn unhealthy_with_nodes( + subnets: &BTreeMap, + nodes_health: BTreeMap, +) -> BTreeMap> { + subnets + .clone() .into_iter() - .filter_map(|(_, subnet)| { + .filter_map(|(id, subnet)| { let unhealthy = subnet .nodes .into_iter() - .filter_map(|n| match healths.get(&n.principal) { + .filter_map(|n| match nodes_health.get(&n.principal) { Some(health) => { if *health == ic_management_types::Status::Healthy { None @@ -36,18 +30,15 @@ pub async fn get_unhealthy( Some(n) } }) - .map(|n| decentralization::network::Node::from(&n)) .collect::>(); if !unhealthy.is_empty() { - Some((subnet.principal, unhealthy)) + Some((id, unhealthy)) } else { None } }) - .collect::>(); - - Ok(unhealthy_subnets) + .collect::>() } pub fn get_proposed_subnet_changes( From b29684e90cbe1bf738772c08446a53264a77a986 Mon Sep 17 00:00:00 2001 From: Pietro Date: Tue, 11 Jun 2024 18:21:10 +0200 Subject: [PATCH 16/37] Fix rustfmt --- rs/ic-management-backend/src/endpoints/network.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index b63ad587d..a69ed3799 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -9,7 +9,7 @@ use std::collections::HashSet; async fn heal(request: web::Json, registry: web::Data>>) -> Result { let mut already_added = HashSet::new(); let mut subnets_changed = Vec::new(); - + let registry = registry.read().await; let health_client = health::HealthClient::new(registry.network()); let nodes_health = health_client From 21a517b7cbe3833d6cebcbd1a90dd00ab1f8995b Mon Sep 17 00:00:00 2001 From: Pietro Date: Tue, 11 Jun 2024 22:06:59 +0200 Subject: [PATCH 17/37] Add NetworkHealRequest --- rs/decentralization/src/network.rs | 62 ++++++++++++++++++- .../src/endpoints/network.rs | 49 ++------------- 2 files changed, 66 insertions(+), 45 deletions(-) diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index 7e146b95f..144ce874d 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -2,15 +2,15 @@ use crate::nakamoto::{self, NakamotoScore}; use crate::SubnetChangeResponse; use actix_web::http::StatusCode; use actix_web::{HttpResponse, ResponseError}; -use anyhow::anyhow; use async_trait::async_trait; use ic_base_types::PrincipalId; use ic_management_types::{MinNakamotoCoefficients, NetworkError, NodeFeature}; use itertools::Itertools; -use log::{debug, info}; +use log::{debug, info, warn}; use rand::{seq::SliceRandom, SeedableRng}; use serde::{Deserialize, Serialize}; use std::cmp::Ordering; +use std::collections::{BTreeMap, HashSet}; use std::fmt::{Debug, Display, Formatter}; use std::hash::Hash; @@ -985,3 +985,61 @@ impl Display for SubnetChange { write!(f, "{}", SubnetChangeResponse::from(self)) } } + +#[derive(Default, Clone, Debug)] +pub struct NetworkHealRequest { + pub subnets_with_unhealthy_nodes: Vec<(DecentralizedSubnet, Vec)>, +} + +impl NetworkHealRequest { + pub fn new( + subnets: BTreeMap, + unhealthy_subnets: BTreeMap>, + ) -> Self { + Self { + subnets_with_unhealthy_nodes: unhealthy_subnets + .into_iter() + .map(|(id, unhealthy_nodes)| { + let subnet = subnets.get(&id).unwrap().clone(); + + ( + DecentralizedSubnet::from(subnet), + unhealthy_nodes.iter().map(Node::from).collect::>(), + ) + }) + .collect::>(), + } + } + + pub fn heal(&self, available_nodes: Vec, max_replacable_nodes: Option) -> Result, NetworkError> { + let mut already_added = HashSet::new(); + let mut subnets_changed = Vec::new(); + + for (subnet, unhealthy_nodes) in &self.subnets_with_unhealthy_nodes { + let unhealthy_nodes_len = unhealthy_nodes.len(); + if let Some(max_replacable_nodes) = max_replacable_nodes { + if unhealthy_nodes_len > max_replacable_nodes { + warn!( + "Subnet {} has {} unhealthy nodes\nMax replacable nodes is {} skipping...", + subnet.id, unhealthy_nodes_len, max_replacable_nodes + ); + continue; + } + } + let optimize_limit = max_replacable_nodes.unwrap_or(unhealthy_nodes_len) - unhealthy_nodes_len; + + let optimized = SubnetChangeRequest { + subnet: subnet.clone(), + available_nodes: available_nodes.clone(), + ..Default::default() + } + .with_exclude_nodes(already_added.iter().cloned().collect::>()) + .optimize(optimize_limit, &unhealthy_nodes)?; + + already_added.extend(optimized.added().iter().map(|n| n.id.to_string()).collect::>()); + subnets_changed.push(SubnetChangeResponse::from(&optimized)); + } + + Ok(subnets_changed) + } +} diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index a69ed3799..4f6b03ffd 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -1,58 +1,21 @@ use super::*; use crate::subnets; -use decentralization::network::{Node, SubnetQueryBy, TopologyManager}; +use decentralization::network::NetworkHealRequest; use ic_management_types::requests::HealRequest; -use log::warn; -use std::collections::HashSet; #[post("/network/heal")] async fn heal(request: web::Json, registry: web::Data>>) -> Result { - let mut already_added = HashSet::new(); - let mut subnets_changed = Vec::new(); - let registry = registry.read().await; let health_client = health::HealthClient::new(registry.network()); let nodes_health = health_client .nodes() .await .map_err(|_| actix_web::error::ErrorInternalServerError("failed to fetch subnet health".to_string()))?; + let subnets: BTreeMap = registry.subnets(); + let unhealthy_subnets: BTreeMap> = subnets::unhealthy_with_nodes(&subnets, nodes_health).await; - let unhealthy_subnets = subnets::unhealthy_with_nodes(®istry.subnets(), nodes_health) - .await - .into_iter() - .filter_map(|(subnet_id, n)| { - if let Some(max_replacable_nodes) = request.max_replacable_nodes_per_sub { - if n.len() > max_replacable_nodes { - warn!( - "Subnet {} has {} unhealthy nodes\nMax replacable nodes is {} skipping...", - subnet_id, - n.len(), - max_replacable_nodes - ); - return None; - } - } - Some((subnet_id, n)) - }) - .collect::>(); - - for (id, unhealthy_nodes) in unhealthy_subnets { - let decentralized_unhealthy_nodes: Vec = unhealthy_nodes.iter().map(decentralization::network::Node::from).collect::>(); - let unhealthy_nodes_len = unhealthy_nodes.len(); - let optimize_limit = request.max_replacable_nodes_per_sub.unwrap_or(unhealthy_nodes_len) - unhealthy_nodes_len; - - let optimized = registry - .modify_subnet_nodes(SubnetQueryBy::SubnetId(id)) - .await? - .with_exclude_nodes(already_added.iter().cloned().collect::>()) - .optimize(optimize_limit, &decentralized_unhealthy_nodes)?; - - already_added.extend(optimized.added().iter().map(|n| n.id.to_string()).collect::>()); - subnets_changed.push(decentralization::SubnetChangeResponse::from(&optimized)); - } + let subnets_change_response = + NetworkHealRequest::new(subnets, unhealthy_subnets).heal(registry.available_nodes().await?, request.max_replacable_nodes_per_sub)?; - let response = decentralization::HealResponse { - subnets_change_response: subnets_changed, - }; - Ok(HttpResponse::Ok().json(response)) + Ok(HttpResponse::Ok().json(decentralization::HealResponse { subnets_change_response })) } From 0ee7cd5618cde3d54cb7a43fa45f14794a5810c6 Mon Sep 17 00:00:00 2001 From: Pietro Date: Wed, 12 Jun 2024 18:05:04 +0200 Subject: [PATCH 18/37] Add removed anyhow --- rs/decentralization/src/network.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index 144ce874d..c94518882 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -3,6 +3,7 @@ use crate::SubnetChangeResponse; use actix_web::http::StatusCode; use actix_web::{HttpResponse, ResponseError}; use async_trait::async_trait; +use anyhow::anyhow; use ic_base_types::PrincipalId; use ic_management_types::{MinNakamotoCoefficients, NetworkError, NodeFeature}; use itertools::Itertools; From d56031b0ef9e72aa713767db7efbf7eb8883b491 Mon Sep 17 00:00:00 2001 From: Pietro Date: Mon, 17 Jun 2024 10:16:54 +0200 Subject: [PATCH 19/37] Add ordering of subnets --- rs/decentralization/src/network.rs | 106 ++++++++++++------ .../src/endpoints/network.rs | 2 +- rs/ic-management-types/src/lib.rs | 2 + 3 files changed, 76 insertions(+), 34 deletions(-) diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index c94518882..2529539be 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -11,7 +11,7 @@ use log::{debug, info, warn}; use rand::{seq::SliceRandom, SeedableRng}; use serde::{Deserialize, Serialize}; use std::cmp::Ordering; -use std::collections::{BTreeMap, HashSet}; +use std::collections::BTreeMap; use std::fmt::{Debug, Display, Formatter}; use std::hash::Hash; @@ -123,7 +123,7 @@ impl From<&ic_management_types::Node> for Node { } } -#[derive(Clone, Default, Debug, Serialize, Deserialize)] +#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)] pub struct DecentralizedSubnet { pub id: PrincipalId, pub nodes: Vec, @@ -987,58 +987,98 @@ impl Display for SubnetChange { } } -#[derive(Default, Clone, Debug)] -pub struct NetworkHealRequest { - pub subnets_with_unhealthy_nodes: Vec<(DecentralizedSubnet, Vec)>, +#[derive(PartialEq, Eq)] +pub struct NetworkHealSubnets { + pub name: String, + pub decentralized_subnet: DecentralizedSubnet, + pub unhealthy_nodes: Vec } -impl NetworkHealRequest { - pub fn new( - subnets: BTreeMap, - unhealthy_subnets: BTreeMap>, - ) -> Self { - Self { - subnets_with_unhealthy_nodes: unhealthy_subnets - .into_iter() - .map(|(id, unhealthy_nodes)| { - let subnet = subnets.get(&id).unwrap().clone(); +impl NetworkHealSubnets { + const IMPORTANT_SUBNETS: &'static [&'static str] = &[ + "NNS", + "SNS", + "Bitcoin", + "Internet Identity, tECDSA backup", + "tECDSA signing" + ]; +} - ( - DecentralizedSubnet::from(subnet), - unhealthy_nodes.iter().map(Node::from).collect::>(), - ) - }) - .collect::>(), +impl PartialOrd for NetworkHealSubnets { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for NetworkHealSubnets { + fn cmp(&self, other: &Self) -> Ordering { + + let self_is_important = NetworkHealSubnets::IMPORTANT_SUBNETS.contains(&self.name.as_str()); + let other_is_important = NetworkHealSubnets::IMPORTANT_SUBNETS.contains(&other.name.as_str()); + + match (self_is_important, other_is_important) { + (true, false) => Ordering::Greater, + (false, true) => Ordering::Less, + _ => match self.decentralized_subnet.nodes.len().cmp(&other.decentralized_subnet.nodes.len()) { + Ordering::Equal => self.unhealthy_nodes.len().cmp(&other.unhealthy_nodes.len()), + other => other, + } } + + } +} + +#[derive(Default, Clone)] +pub struct NetworkHealRequest { + pub subnets: BTreeMap, + pub unhealthy_subnets: BTreeMap>, +} + +impl NetworkHealRequest { + fn network_heal_subnets(&self) -> Result, NetworkError> { + Ok(self.unhealthy_subnets + .iter() + .flat_map(|(id, unhealthy_nodes)| { + let unhealthy_nodes = unhealthy_nodes.iter().map(Node::from).collect::>(); + let unhealthy_subnet = self.subnets.get(id).ok_or(NetworkError::SubnetNotFound(id.clone()))?; + + Ok::(NetworkHealSubnets{ + name: unhealthy_subnet.metadata.name.clone(), + decentralized_subnet: DecentralizedSubnet::from(unhealthy_subnet), + unhealthy_nodes} + ) + }) + .sorted_by(|a, b| a.cmp(b).reverse()) + .collect_vec()) } - pub fn heal(&self, available_nodes: Vec, max_replacable_nodes: Option) -> Result, NetworkError> { - let mut already_added = HashSet::new(); + pub fn heal(&self, mut available_nodes: Vec, max_replacable_nodes: Option) -> Result, NetworkError> { let mut subnets_changed = Vec::new(); + let subnets = self.network_heal_subnets()?; - for (subnet, unhealthy_nodes) in &self.subnets_with_unhealthy_nodes { - let unhealthy_nodes_len = unhealthy_nodes.len(); + for subnet in subnets { + let unhealthy_nodes_len = subnet.unhealthy_nodes.len(); if let Some(max_replacable_nodes) = max_replacable_nodes { if unhealthy_nodes_len > max_replacable_nodes { warn!( "Subnet {} has {} unhealthy nodes\nMax replacable nodes is {} skipping...", - subnet.id, unhealthy_nodes_len, max_replacable_nodes + subnet.decentralized_subnet.id, unhealthy_nodes_len, max_replacable_nodes ); continue; } } let optimize_limit = max_replacable_nodes.unwrap_or(unhealthy_nodes_len) - unhealthy_nodes_len; - let optimized = SubnetChangeRequest { - subnet: subnet.clone(), + let change = SubnetChangeRequest{ + subnet: subnet.decentralized_subnet.clone(), available_nodes: available_nodes.clone(), ..Default::default() - } - .with_exclude_nodes(already_added.iter().cloned().collect::>()) - .optimize(optimize_limit, &unhealthy_nodes)?; + }; + let change = change.optimize(optimize_limit, &subnet.unhealthy_nodes)?; - already_added.extend(optimized.added().iter().map(|n| n.id.to_string()).collect::>()); - subnets_changed.push(SubnetChangeResponse::from(&optimized)); + // Remove from the available nodes the nodes we added + available_nodes.retain(|node| !change.added().contains(node)); + subnets_changed.push(SubnetChangeResponse::from(&change)); } Ok(subnets_changed) diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index 4f6b03ffd..7c74e2021 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -15,7 +15,7 @@ async fn heal(request: web::Json, registry: web::Data> = subnets::unhealthy_with_nodes(&subnets, nodes_health).await; let subnets_change_response = - NetworkHealRequest::new(subnets, unhealthy_subnets).heal(registry.available_nodes().await?, request.max_replacable_nodes_per_sub)?; + NetworkHealRequest{ subnets, unhealthy_subnets}.heal(registry.available_nodes().await?, request.max_replacable_nodes_per_sub)?; Ok(HttpResponse::Ok().json(decentralization::HealResponse { subnets_change_response })) } diff --git a/rs/ic-management-types/src/lib.rs b/rs/ic-management-types/src/lib.rs index cab4de5c2..89aae7a2c 100644 --- a/rs/ic-management-types/src/lib.rs +++ b/rs/ic-management-types/src/lib.rs @@ -268,6 +268,8 @@ pub struct MinNakamotoCoefficients { pub average: f64, } +impl Eq for MinNakamotoCoefficients {} + #[derive(Clone, Serialize, Debug, Deserialize)] pub struct TopologyProposal { pub id: u64, From 5cb3e45879ef9caf6001221dd8208f25bf1d8ceb Mon Sep 17 00:00:00 2001 From: Pietro Date: Mon, 17 Jun 2024 13:24:00 +0200 Subject: [PATCH 20/37] Add tests --- rs/decentralization/src/nakamoto/mod.rs | 46 ++++++++++++++++++- rs/decentralization/src/network.rs | 40 +++++----------- .../src/endpoints/network.rs | 23 ++++++++-- 3 files changed, 76 insertions(+), 33 deletions(-) diff --git a/rs/decentralization/src/nakamoto/mod.rs b/rs/decentralization/src/nakamoto/mod.rs index 306d49870..a9f771121 100644 --- a/rs/decentralization/src/nakamoto/mod.rs +++ b/rs/decentralization/src/nakamoto/mod.rs @@ -425,7 +425,7 @@ impl Display for NakamotoScore { mod tests { use std::str::FromStr; - use crate::network::{DecentralizedSubnet, SubnetChangeRequest}; + use crate::network::{DecentralizedSubnet, NetworkHealRequest, NetworkHealSubnets, SubnetChangeRequest}; use ic_base_types::PrincipalId; use itertools::Itertools; use regex::Regex; @@ -871,4 +871,48 @@ mod tests { (5000, vec!["European subnet has 5 non-European node(s)".to_string()]) ); } + + #[test] + fn test_network_heal_subnets_ord() { + let not_important_small = new_test_subnet( + 0, + 13, + 0 + ) + .with_subnet_id(PrincipalId::from_str("k44fs-gm4pv-afozh-rs7zw-cg32n-u7xov-xqyx3-2pw5q-eucnu-cosd4-uqe").unwrap()); + let not_important_small = NetworkHealSubnets{ + name: String::from("App 20"), + decentralized_subnet: not_important_small, + unhealthy_nodes: vec![], + }; + + let not_important_large = new_test_subnet( + 0, + 28, + 0 + ) + .with_subnet_id(PrincipalId::from_str("bkfrj-6k62g-dycql-7h53p-atvkj-zg4to-gaogh-netha-ptybj-ntsgw-rqe").unwrap()); + let not_important_large = NetworkHealSubnets{ + name: String::from("European"), + decentralized_subnet: not_important_large, + unhealthy_nodes: vec![], + }; + + let important = + serde_json::from_str::(include_str!("../../test_data/subnet-uzr34.json")).expect("failed to read test data"); + let important = NetworkHealSubnets{ + name: important.metadata.name.clone(), + decentralized_subnet: DecentralizedSubnet::from(important), + unhealthy_nodes: vec![], + }; + + let unordered = vec![not_important_small.clone(), important.clone(), not_important_large.clone()]; + let healing_order = unordered.clone().into_iter().sorted_by(|a, b| a.cmp(b).reverse()).collect_vec(); + + assert_eq!( + vec![important, not_important_large, not_important_small], + healing_order + ); + } + } diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index 2529539be..ea8cf3d2e 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -11,7 +11,6 @@ use log::{debug, info, warn}; use rand::{seq::SliceRandom, SeedableRng}; use serde::{Deserialize, Serialize}; use std::cmp::Ordering; -use std::collections::BTreeMap; use std::fmt::{Debug, Display, Formatter}; use std::hash::Hash; @@ -987,7 +986,7 @@ impl Display for SubnetChange { } } -#[derive(PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub struct NetworkHealSubnets { pub name: String, pub decentralized_subnet: DecentralizedSubnet, @@ -999,7 +998,7 @@ impl NetworkHealSubnets { "NNS", "SNS", "Bitcoin", - "Internet Identity, tECDSA backup", + "Internet Identity", "tECDSA signing" ]; } @@ -1019,10 +1018,7 @@ impl Ord for NetworkHealSubnets { match (self_is_important, other_is_important) { (true, false) => Ordering::Greater, (false, true) => Ordering::Less, - _ => match self.decentralized_subnet.nodes.len().cmp(&other.decentralized_subnet.nodes.len()) { - Ordering::Equal => self.unhealthy_nodes.len().cmp(&other.unhealthy_nodes.len()), - other => other, - } + _ => self.decentralized_subnet.nodes.len().cmp(&other.decentralized_subnet.nodes.len()) } } @@ -1030,33 +1026,22 @@ impl Ord for NetworkHealSubnets { #[derive(Default, Clone)] pub struct NetworkHealRequest { - pub subnets: BTreeMap, - pub unhealthy_subnets: BTreeMap>, + pub subnets: Vec } impl NetworkHealRequest { - fn network_heal_subnets(&self) -> Result, NetworkError> { - Ok(self.unhealthy_subnets - .iter() - .flat_map(|(id, unhealthy_nodes)| { - let unhealthy_nodes = unhealthy_nodes.iter().map(Node::from).collect::>(); - let unhealthy_subnet = self.subnets.get(id).ok_or(NetworkError::SubnetNotFound(id.clone()))?; - - Ok::(NetworkHealSubnets{ - name: unhealthy_subnet.metadata.name.clone(), - decentralized_subnet: DecentralizedSubnet::from(unhealthy_subnet), - unhealthy_nodes} - ) - }) - .sorted_by(|a, b| a.cmp(b).reverse()) - .collect_vec()) + pub fn new(subnets: Vec) -> Self { + Self { subnets } } - pub fn heal(&self, mut available_nodes: Vec, max_replacable_nodes: Option) -> Result, NetworkError> { + pub fn heal_and_optimize(&self, mut available_nodes: Vec, max_replacable_nodes: Option) -> Result, NetworkError> { let mut subnets_changed = Vec::new(); - let subnets = self.network_heal_subnets()?; + let subnets_to_heal = self.subnets + .iter() + .sorted_by(|a, b| a.cmp(b).reverse()) + .collect_vec(); - for subnet in subnets { + for subnet in subnets_to_heal { let unhealthy_nodes_len = subnet.unhealthy_nodes.len(); if let Some(max_replacable_nodes) = max_replacable_nodes { if unhealthy_nodes_len > max_replacable_nodes { @@ -1076,7 +1061,6 @@ impl NetworkHealRequest { }; let change = change.optimize(optimize_limit, &subnet.unhealthy_nodes)?; - // Remove from the available nodes the nodes we added available_nodes.retain(|node| !change.added().contains(node)); subnets_changed.push(SubnetChangeResponse::from(&change)); } diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index 7c74e2021..d0df20b45 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -1,7 +1,8 @@ use super::*; use crate::subnets; -use decentralization::network::NetworkHealRequest; -use ic_management_types::requests::HealRequest; +use decentralization::network::{DecentralizedSubnet, NetworkHealRequest, NetworkHealSubnets, Node}; +use ic_management_types::{requests::HealRequest, NetworkError}; +use itertools::Itertools; #[post("/network/heal")] async fn heal(request: web::Json, registry: web::Data>>) -> Result { @@ -14,8 +15,22 @@ async fn heal(request: web::Json, registry: web::Data = registry.subnets(); let unhealthy_subnets: BTreeMap> = subnets::unhealthy_with_nodes(&subnets, nodes_health).await; - let subnets_change_response = - NetworkHealRequest{ subnets, unhealthy_subnets}.heal(registry.available_nodes().await?, request.max_replacable_nodes_per_sub)?; + let subnets_to_heal = unhealthy_subnets + .iter() + .flat_map(|(id, unhealthy_nodes)| { + let unhealthy_nodes = unhealthy_nodes.iter().map(Node::from).collect::>(); + let unhealthy_subnet = subnets.get(id).ok_or(NetworkError::SubnetNotFound(id.clone()))?; + + Ok::(NetworkHealSubnets{ + name: unhealthy_subnet.metadata.name.clone(), + decentralized_subnet: DecentralizedSubnet::from(unhealthy_subnet), + unhealthy_nodes} + ) + }) + .collect_vec(); + + let subnets_change_response = NetworkHealRequest::new(subnets_to_heal) + .heal_and_optimize(registry.available_nodes().await?, request.max_replacable_nodes_per_sub)?; Ok(HttpResponse::Ok().json(decentralization::HealResponse { subnets_change_response })) } From 9ccf3efbe958ce1e62c34642f562d1ae0c490214 Mon Sep 17 00:00:00 2001 From: Pietro Date: Mon, 17 Jun 2024 15:13:18 +0200 Subject: [PATCH 21/37] Run rustfmt --- rs/decentralization/src/nakamoto/mod.rs | 32 ++++++------------- rs/decentralization/src/network.rs | 31 +++++++----------- .../src/endpoints/network.rs | 10 +++--- 3 files changed, 27 insertions(+), 46 deletions(-) diff --git a/rs/decentralization/src/nakamoto/mod.rs b/rs/decentralization/src/nakamoto/mod.rs index a9f771121..eddee687b 100644 --- a/rs/decentralization/src/nakamoto/mod.rs +++ b/rs/decentralization/src/nakamoto/mod.rs @@ -873,34 +873,26 @@ mod tests { } #[test] - fn test_network_heal_subnets_ord() { - let not_important_small = new_test_subnet( - 0, - 13, - 0 - ) - .with_subnet_id(PrincipalId::from_str("k44fs-gm4pv-afozh-rs7zw-cg32n-u7xov-xqyx3-2pw5q-eucnu-cosd4-uqe").unwrap()); - let not_important_small = NetworkHealSubnets{ + fn test_network_heal_subnets_ord() { + let not_important_small = new_test_subnet(0, 13, 0) + .with_subnet_id(PrincipalId::from_str("k44fs-gm4pv-afozh-rs7zw-cg32n-u7xov-xqyx3-2pw5q-eucnu-cosd4-uqe").unwrap()); + let not_important_small = NetworkHealSubnets { name: String::from("App 20"), decentralized_subnet: not_important_small, unhealthy_nodes: vec![], }; - let not_important_large = new_test_subnet( - 0, - 28, - 0 - ) - .with_subnet_id(PrincipalId::from_str("bkfrj-6k62g-dycql-7h53p-atvkj-zg4to-gaogh-netha-ptybj-ntsgw-rqe").unwrap()); - let not_important_large = NetworkHealSubnets{ + let not_important_large = new_test_subnet(0, 28, 0) + .with_subnet_id(PrincipalId::from_str("bkfrj-6k62g-dycql-7h53p-atvkj-zg4to-gaogh-netha-ptybj-ntsgw-rqe").unwrap()); + let not_important_large = NetworkHealSubnets { name: String::from("European"), decentralized_subnet: not_important_large, unhealthy_nodes: vec![], }; let important = - serde_json::from_str::(include_str!("../../test_data/subnet-uzr34.json")).expect("failed to read test data"); - let important = NetworkHealSubnets{ + serde_json::from_str::(include_str!("../../test_data/subnet-uzr34.json")).expect("failed to read test data"); + let important = NetworkHealSubnets { name: important.metadata.name.clone(), decentralized_subnet: DecentralizedSubnet::from(important), unhealthy_nodes: vec![], @@ -909,10 +901,6 @@ mod tests { let unordered = vec![not_important_small.clone(), important.clone(), not_important_large.clone()]; let healing_order = unordered.clone().into_iter().sorted_by(|a, b| a.cmp(b).reverse()).collect_vec(); - assert_eq!( - vec![important, not_important_large, not_important_small], - healing_order - ); + assert_eq!(vec![important, not_important_large, not_important_small], healing_order); } - } diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index ea8cf3d2e..d42f7a97b 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -2,8 +2,8 @@ use crate::nakamoto::{self, NakamotoScore}; use crate::SubnetChangeResponse; use actix_web::http::StatusCode; use actix_web::{HttpResponse, ResponseError}; -use async_trait::async_trait; use anyhow::anyhow; +use async_trait::async_trait; use ic_base_types::PrincipalId; use ic_management_types::{MinNakamotoCoefficients, NetworkError, NodeFeature}; use itertools::Itertools; @@ -990,17 +990,11 @@ impl Display for SubnetChange { pub struct NetworkHealSubnets { pub name: String, pub decentralized_subnet: DecentralizedSubnet, - pub unhealthy_nodes: Vec + pub unhealthy_nodes: Vec, } impl NetworkHealSubnets { - const IMPORTANT_SUBNETS: &'static [&'static str] = &[ - "NNS", - "SNS", - "Bitcoin", - "Internet Identity", - "tECDSA signing" - ]; + const IMPORTANT_SUBNETS: &'static [&'static str] = &["NNS", "SNS", "Bitcoin", "Internet Identity", "tECDSA signing"]; } impl PartialOrd for NetworkHealSubnets { @@ -1011,22 +1005,20 @@ impl PartialOrd for NetworkHealSubnets { impl Ord for NetworkHealSubnets { fn cmp(&self, other: &Self) -> Ordering { - let self_is_important = NetworkHealSubnets::IMPORTANT_SUBNETS.contains(&self.name.as_str()); let other_is_important = NetworkHealSubnets::IMPORTANT_SUBNETS.contains(&other.name.as_str()); match (self_is_important, other_is_important) { (true, false) => Ordering::Greater, (false, true) => Ordering::Less, - _ => self.decentralized_subnet.nodes.len().cmp(&other.decentralized_subnet.nodes.len()) + _ => self.decentralized_subnet.nodes.len().cmp(&other.decentralized_subnet.nodes.len()), } - } } #[derive(Default, Clone)] pub struct NetworkHealRequest { - pub subnets: Vec + pub subnets: Vec, } impl NetworkHealRequest { @@ -1034,12 +1026,13 @@ impl NetworkHealRequest { Self { subnets } } - pub fn heal_and_optimize(&self, mut available_nodes: Vec, max_replacable_nodes: Option) -> Result, NetworkError> { + pub fn heal_and_optimize( + &self, + mut available_nodes: Vec, + max_replacable_nodes: Option, + ) -> Result, NetworkError> { let mut subnets_changed = Vec::new(); - let subnets_to_heal = self.subnets - .iter() - .sorted_by(|a, b| a.cmp(b).reverse()) - .collect_vec(); + let subnets_to_heal = self.subnets.iter().sorted_by(|a, b| a.cmp(b).reverse()).collect_vec(); for subnet in subnets_to_heal { let unhealthy_nodes_len = subnet.unhealthy_nodes.len(); @@ -1054,7 +1047,7 @@ impl NetworkHealRequest { } let optimize_limit = max_replacable_nodes.unwrap_or(unhealthy_nodes_len) - unhealthy_nodes_len; - let change = SubnetChangeRequest{ + let change = SubnetChangeRequest { subnet: subnet.decentralized_subnet.clone(), available_nodes: available_nodes.clone(), ..Default::default() diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index d0df20b45..b0a4669ae 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -21,16 +21,16 @@ async fn heal(request: web::Json, registry: web::Data>(); let unhealthy_subnet = subnets.get(id).ok_or(NetworkError::SubnetNotFound(id.clone()))?; - Ok::(NetworkHealSubnets{ + Ok::(NetworkHealSubnets { name: unhealthy_subnet.metadata.name.clone(), decentralized_subnet: DecentralizedSubnet::from(unhealthy_subnet), - unhealthy_nodes} - ) + unhealthy_nodes, + }) }) .collect_vec(); - let subnets_change_response = NetworkHealRequest::new(subnets_to_heal) - .heal_and_optimize(registry.available_nodes().await?, request.max_replacable_nodes_per_sub)?; + let subnets_change_response = + NetworkHealRequest::new(subnets_to_heal).heal_and_optimize(registry.available_nodes().await?, request.max_replacable_nodes_per_sub)?; Ok(HttpResponse::Ok().json(decentralization::HealResponse { subnets_change_response })) } From cae5322a37fffa317e7b1f410dc873983f8a92e4 Mon Sep 17 00:00:00 2001 From: Pietro Date: Mon, 17 Jun 2024 17:45:54 +0200 Subject: [PATCH 22/37] Add test --- rs/decentralization/src/nakamoto/mod.rs | 37 +++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/rs/decentralization/src/nakamoto/mod.rs b/rs/decentralization/src/nakamoto/mod.rs index eddee687b..249b09a8b 100644 --- a/rs/decentralization/src/nakamoto/mod.rs +++ b/rs/decentralization/src/nakamoto/mod.rs @@ -903,4 +903,41 @@ mod tests { assert_eq!(vec![important, not_important_large, not_important_small], healing_order); } + + #[test] + fn test_network_heal() { + let nodes_available = new_test_nodes("spare", 10, 2); + let nodes_available_principals = nodes_available.iter().map(|n| n.id).collect_vec(); + + let important = + serde_json::from_str::(include_str!("../../test_data/subnet-uzr34.json")).expect("failed to read test data"); + let important_decentralized = DecentralizedSubnet::from(important.clone()); + let important_unhealthy_principals = vec![ + PrincipalId::from_str("e4ysi-xp4fs-5ckcv-7e76q-edydw-ak6le-2acyt-k7udb-lj2vo-fqhhx-vqe").unwrap(), + PrincipalId::from_str("aefqq-d7ldg-ljk5s-cmnxk-qqu7c-tw52l-74g3m-xxl5d-ag4ia-dxubz-wae").unwrap(), + ]; + let unhealthy_nodes = important_decentralized + .nodes + .clone() + .into_iter() + .filter(|n| important_unhealthy_principals.contains(&n.id)) + .collect_vec(); + let important = NetworkHealSubnets { + name: important.metadata.name.clone(), + decentralized_subnet: important_decentralized, + unhealthy_nodes: unhealthy_nodes.clone(), + }; + + let network_heal_response = NetworkHealRequest::new(vec![important]) + .heal_and_optimize(nodes_available.clone(), None) + .unwrap(); + + let result = network_heal_response.get(0).unwrap().clone(); + + assert_eq!(important_unhealthy_principals, result.removed.clone()); + + assert_eq!(important_unhealthy_principals.len(), result.added.len()); + + result.added.iter().for_each(|n| assert!(nodes_available_principals.contains(n))); + } } From 7676b5b2151955298ef4cbd6b59d32598d40ba34 Mon Sep 17 00:00:00 2001 From: Pietro Date: Mon, 17 Jun 2024 17:52:21 +0200 Subject: [PATCH 23/37] Fix clippy --- rs/decentralization/src/nakamoto/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/decentralization/src/nakamoto/mod.rs b/rs/decentralization/src/nakamoto/mod.rs index 249b09a8b..70fa4e6c7 100644 --- a/rs/decentralization/src/nakamoto/mod.rs +++ b/rs/decentralization/src/nakamoto/mod.rs @@ -932,7 +932,7 @@ mod tests { .heal_and_optimize(nodes_available.clone(), None) .unwrap(); - let result = network_heal_response.get(0).unwrap().clone(); + let result = network_heal_response.first().unwrap().clone(); assert_eq!(important_unhealthy_principals, result.removed.clone()); From 28b4d57f600afe7dbd285f2d32ffe0a74fc9b374 Mon Sep 17 00:00:00 2001 From: Pietro Date: Mon, 17 Jun 2024 17:54:56 +0200 Subject: [PATCH 24/37] Fix clippy --- rs/ic-management-backend/src/endpoints/network.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index b0a4669ae..573cdbdf4 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -19,7 +19,7 @@ async fn heal(request: web::Json, registry: web::Data>(); - let unhealthy_subnet = subnets.get(id).ok_or(NetworkError::SubnetNotFound(id.clone()))?; + let unhealthy_subnet = subnets.get(id).ok_or(NetworkError::SubnetNotFound(*id))?; Ok::(NetworkHealSubnets { name: unhealthy_subnet.metadata.name.clone(), From 64b039fdb44bc9288fb2dd2442a460812a22d2b7 Mon Sep 17 00:00:00 2001 From: Pietro Date: Mon, 17 Jun 2024 18:18:09 +0200 Subject: [PATCH 25/37] Removed unecessary derive --- rs/decentralization/src/network.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index d42f7a97b..3fac78b1a 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -1016,7 +1016,6 @@ impl Ord for NetworkHealSubnets { } } -#[derive(Default, Clone)] pub struct NetworkHealRequest { pub subnets: Vec, } From 3535c432b610a637c111af337fd1e1abefdfcaa6 Mon Sep 17 00:00:00 2001 From: pietrodimarco-dfinity <124565147+pietrodimarco-dfinity@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:35:40 +0200 Subject: [PATCH 26/37] Update rs/cli/src/main.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Saša Tomić --- rs/cli/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/cli/src/main.rs b/rs/cli/src/main.rs index 5b88468a0..b2d5d45f9 100644 --- a/rs/cli/src/main.rs +++ b/rs/cli/src/main.rs @@ -111,7 +111,7 @@ async fn async_main() -> Result<(), anyhow::Error> { runner_instance .network_heal( ic_management_types::requests::HealRequest { - max_replacable_nodes_per_sub: *max_replacable_nodes_per_sub, + max_replaceable_nodes_per_sub: *max_replaceable_nodes_per_sub, }, cli_opts.verbose, simulate, From c145360859ed240795f6d1c01d959b98b5ec6245 Mon Sep 17 00:00:00 2001 From: pietrodimarco-dfinity <124565147+pietrodimarco-dfinity@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:35:48 +0200 Subject: [PATCH 27/37] Update rs/cli/src/main.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Saša Tomić --- rs/cli/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/cli/src/main.rs b/rs/cli/src/main.rs index b2d5d45f9..363f5d25c 100644 --- a/rs/cli/src/main.rs +++ b/rs/cli/src/main.rs @@ -106,7 +106,7 @@ async fn async_main() -> Result<(), anyhow::Error> { } cli::Commands::Heal { - max_replacable_nodes_per_sub, + max_replaceable_nodes_per_sub, } => { runner_instance .network_heal( From 032693413d9b649ee68d9554485f85d91b72a867 Mon Sep 17 00:00:00 2001 From: pietrodimarco-dfinity <124565147+pietrodimarco-dfinity@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:36:22 +0200 Subject: [PATCH 28/37] Update rs/decentralization/src/network.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Saša Tomić --- rs/decentralization/src/network.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index 3fac78b1a..787ce47d3 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -1028,7 +1028,7 @@ impl NetworkHealRequest { pub fn heal_and_optimize( &self, mut available_nodes: Vec, - max_replacable_nodes: Option, + max_replaceable_nodes: Option, ) -> Result, NetworkError> { let mut subnets_changed = Vec::new(); let subnets_to_heal = self.subnets.iter().sorted_by(|a, b| a.cmp(b).reverse()).collect_vec(); From 188e4f705a8441ff5d53580fa756ee8563318ffd Mon Sep 17 00:00:00 2001 From: pietrodimarco-dfinity <124565147+pietrodimarco-dfinity@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:43:04 +0200 Subject: [PATCH 29/37] Update rs/cli/src/cli.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Saša Tomić --- rs/cli/src/cli.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/cli/src/cli.rs b/rs/cli/src/cli.rs index 718d8637f..8750747ce 100644 --- a/rs/cli/src/cli.rs +++ b/rs/cli/src/cli.rs @@ -63,7 +63,7 @@ pub enum Commands { /// Optimization will be performed automatically maximizing the decentralization /// and minimizing the number of replaced nodes per subnet #[clap(short, long)] - max_replacable_nodes_per_sub: Option, + max_replaceable_nodes_per_sub: Option, }, /// Manage an existing subnet From 60694363c63f3660fd62ca10b6d073a22983c398 Mon Sep 17 00:00:00 2001 From: pietrodimarco-dfinity <124565147+pietrodimarco-dfinity@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:43:15 +0200 Subject: [PATCH 30/37] Update rs/decentralization/src/network.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Saša Tomić --- rs/decentralization/src/network.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index 787ce47d3..bdef45e60 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -1036,7 +1036,7 @@ impl NetworkHealRequest { for subnet in subnets_to_heal { let unhealthy_nodes_len = subnet.unhealthy_nodes.len(); if let Some(max_replacable_nodes) = max_replacable_nodes { - if unhealthy_nodes_len > max_replacable_nodes { + if unhealthy_nodes_len > max_replaceable_nodes { warn!( "Subnet {} has {} unhealthy nodes\nMax replacable nodes is {} skipping...", subnet.decentralized_subnet.id, unhealthy_nodes_len, max_replacable_nodes From b779c5c8c5666e6278849f19c7ba3ddbb55a46a2 Mon Sep 17 00:00:00 2001 From: pietrodimarco-dfinity <124565147+pietrodimarco-dfinity@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:43:26 +0200 Subject: [PATCH 31/37] Update rs/ic-management-types/src/requests.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Saša Tomić --- rs/ic-management-types/src/requests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/ic-management-types/src/requests.rs b/rs/ic-management-types/src/requests.rs index 9dd314a71..4da5a8571 100644 --- a/rs/ic-management-types/src/requests.rs +++ b/rs/ic-management-types/src/requests.rs @@ -117,5 +117,5 @@ impl NodeRemovalReason { #[derive(Serialize, Deserialize)] pub struct HealRequest { - pub max_replacable_nodes_per_sub: Option, + pub max_replaceable_nodes_per_sub: Option, } From 96f5ad8d342177949da574233f02176c2c5523db Mon Sep 17 00:00:00 2001 From: pietrodimarco-dfinity <124565147+pietrodimarco-dfinity@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:43:32 +0200 Subject: [PATCH 32/37] Update rs/ic-management-backend/src/endpoints/network.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Saša Tomić --- rs/ic-management-backend/src/endpoints/network.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/ic-management-backend/src/endpoints/network.rs b/rs/ic-management-backend/src/endpoints/network.rs index 573cdbdf4..fc0663276 100644 --- a/rs/ic-management-backend/src/endpoints/network.rs +++ b/rs/ic-management-backend/src/endpoints/network.rs @@ -30,7 +30,7 @@ async fn heal(request: web::Json, registry: web::Data Date: Mon, 17 Jun 2024 19:43:40 +0200 Subject: [PATCH 33/37] Update rs/decentralization/src/network.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Saša Tomić --- rs/decentralization/src/network.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index bdef45e60..73513a47a 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -1039,7 +1039,7 @@ impl NetworkHealRequest { if unhealthy_nodes_len > max_replaceable_nodes { warn!( "Subnet {} has {} unhealthy nodes\nMax replacable nodes is {} skipping...", - subnet.decentralized_subnet.id, unhealthy_nodes_len, max_replacable_nodes + subnet.decentralized_subnet.id, unhealthy_nodes_len, max_replaceable_nodes ); continue; } From 5a9a5d449ea7d005e6c503b112c3d0bb8ee82cf1 Mon Sep 17 00:00:00 2001 From: pietrodimarco-dfinity <124565147+pietrodimarco-dfinity@users.noreply.github.com> Date: Mon, 17 Jun 2024 19:43:47 +0200 Subject: [PATCH 34/37] Update rs/decentralization/src/network.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Saša Tomić --- rs/decentralization/src/network.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index 73513a47a..8d63019ed 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -1044,7 +1044,7 @@ impl NetworkHealRequest { continue; } } - let optimize_limit = max_replacable_nodes.unwrap_or(unhealthy_nodes_len) - unhealthy_nodes_len; + let optimize_limit = max_replaceable_nodes.unwrap_or(unhealthy_nodes_len) - unhealthy_nodes_len; let change = SubnetChangeRequest { subnet: subnet.decentralized_subnet.clone(), From 1aa10db1a4624bf7d623082df654275fee911995 Mon Sep 17 00:00:00 2001 From: Pietro Date: Mon, 17 Jun 2024 21:14:53 +0200 Subject: [PATCH 35/37] Add take max replacement unhealthy --- rs/decentralization/src/nakamoto/mod.rs | 16 +++++++++++++--- rs/decentralization/src/network.rs | 19 +++++++++++++------ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/rs/decentralization/src/nakamoto/mod.rs b/rs/decentralization/src/nakamoto/mod.rs index 70fa4e6c7..1ffaa5758 100644 --- a/rs/decentralization/src/nakamoto/mod.rs +++ b/rs/decentralization/src/nakamoto/mod.rs @@ -928,16 +928,26 @@ mod tests { unhealthy_nodes: unhealthy_nodes.clone(), }; - let network_heal_response = NetworkHealRequest::new(vec![important]) - .heal_and_optimize(nodes_available.clone(), None) + let max_replaceable_nodes = None; + let network_heal_response = NetworkHealRequest::new(vec![important.clone()]) + .heal_and_optimize(nodes_available.clone(), max_replaceable_nodes) .unwrap(); - let result = network_heal_response.first().unwrap().clone(); assert_eq!(important_unhealthy_principals, result.removed.clone()); assert_eq!(important_unhealthy_principals.len(), result.added.len()); + let max_replaceable_nodes = Some(1); + let network_heal_response = NetworkHealRequest::new(vec![important.clone()]) + .heal_and_optimize(nodes_available.clone(), max_replaceable_nodes) + .unwrap(); + let result = network_heal_response.first().unwrap().clone(); + + assert_eq!(important_unhealthy_principals.into_iter().take(1).collect_vec(), result.removed.clone()); + + assert_eq!(1, result.added.len()); + result.added.iter().for_each(|n| assert!(nodes_available_principals.contains(n))); } } diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index 8d63019ed..7c089f121 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -1034,16 +1034,23 @@ impl NetworkHealRequest { let subnets_to_heal = self.subnets.iter().sorted_by(|a, b| a.cmp(b).reverse()).collect_vec(); for subnet in subnets_to_heal { - let unhealthy_nodes_len = subnet.unhealthy_nodes.len(); - if let Some(max_replacable_nodes) = max_replacable_nodes { + let mut unhealthy_nodes = subnet.unhealthy_nodes.clone(); + let unhealthy_nodes_len = unhealthy_nodes.len(); + + if let Some(max_replaceable_nodes) = max_replaceable_nodes { if unhealthy_nodes_len > max_replaceable_nodes { + unhealthy_nodes = subnet.unhealthy_nodes.clone().into_iter().take(max_replaceable_nodes).collect_vec(); + warn!( - "Subnet {} has {} unhealthy nodes\nMax replacable nodes is {} skipping...", - subnet.decentralized_subnet.id, unhealthy_nodes_len, max_replaceable_nodes + "Subnet {} has {} unhealthy nodes\nMax replacable nodes is {}\nNodes considered for replacement: {:?}", + subnet.decentralized_subnet.id, + unhealthy_nodes_len, + max_replaceable_nodes, + unhealthy_nodes.iter().map(|node| node.id).collect_vec() ); - continue; } } + let unhealthy_nodes_len = unhealthy_nodes.len(); let optimize_limit = max_replaceable_nodes.unwrap_or(unhealthy_nodes_len) - unhealthy_nodes_len; let change = SubnetChangeRequest { @@ -1051,7 +1058,7 @@ impl NetworkHealRequest { available_nodes: available_nodes.clone(), ..Default::default() }; - let change = change.optimize(optimize_limit, &subnet.unhealthy_nodes)?; + let change = change.optimize(optimize_limit, &unhealthy_nodes)?; available_nodes.retain(|node| !change.added().contains(node)); subnets_changed.push(SubnetChangeResponse::from(&change)); From ce209c9446d5bfc7ee935e002685af57f5320107 Mon Sep 17 00:00:00 2001 From: pietrodimarco-dfinity <124565147+pietrodimarco-dfinity@users.noreply.github.com> Date: Tue, 18 Jun 2024 13:19:31 +0200 Subject: [PATCH 36/37] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Nikola Milosavljevic <73236646+NikolaMilosa@users.noreply.github.com> Co-authored-by: Saša Tomić --- rs/cli/src/runner.rs | 11 +++++++++-- rs/decentralization/src/network.rs | 4 ++-- rs/ic-management-backend/src/subnets.rs | 14 ++------------ 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/rs/cli/src/runner.rs b/rs/cli/src/runner.rs index 2c0d72133..c52b68c4c 100644 --- a/rs/cli/src/runner.rs +++ b/rs/cli/src/runner.rs @@ -455,7 +455,7 @@ impl Runner { let change = self.dashboard_backend_client.network_heal(request).await?; println!("{}", change); - join_all(change.subnets_change_response.iter().map(|subnet_change_response| async move { + let errors = join_all(change.subnets_change_response.iter().map(|subnet_change_response| async move { self.run_membership_change( subnet_change_response.clone(), ops_subnet_node_replace::replace_proposal_options(subnet_change_response)?, @@ -467,7 +467,14 @@ impl Runner { e }) })) - .await; + .await + .into_iter() + .filter_map(|f| f.err()) + .collect::>(); + + if !errors.is_empty() { + anyhow::bail!("Errors: {:?}", errors); + } Ok(()) } diff --git a/rs/decentralization/src/network.rs b/rs/decentralization/src/network.rs index 7c089f121..2c78b180c 100644 --- a/rs/decentralization/src/network.rs +++ b/rs/decentralization/src/network.rs @@ -1042,10 +1042,10 @@ impl NetworkHealRequest { unhealthy_nodes = subnet.unhealthy_nodes.clone().into_iter().take(max_replaceable_nodes).collect_vec(); warn!( - "Subnet {} has {} unhealthy nodes\nMax replacable nodes is {}\nNodes considered for replacement: {:?}", + "Subnet {}: replacing {} of {} unhealthy nodes: {:?}", subnet.decentralized_subnet.id, - unhealthy_nodes_len, max_replaceable_nodes, + unhealthy_nodes_len, unhealthy_nodes.iter().map(|node| node.id).collect_vec() ); } diff --git a/rs/ic-management-backend/src/subnets.rs b/rs/ic-management-backend/src/subnets.rs index 144cae5e6..90b7847f0 100644 --- a/rs/ic-management-backend/src/subnets.rs +++ b/rs/ic-management-backend/src/subnets.rs @@ -17,18 +17,8 @@ pub async fn unhealthy_with_nodes( .nodes .into_iter() .filter_map(|n| match nodes_health.get(&n.principal) { - Some(health) => { - if *health == ic_management_types::Status::Healthy { - None - } else { - info!("Node {} is {:?}", n.principal, health); - Some(n) - } - } - None => { - warn!("Node {} has no known health, assuming unhealthy", n.principal); - Some(n) - } + Some(health) if *health == ic_management_types::Status::Healthy => None, + _ => Some(n) }) .collect::>(); From 96ce52e1d35999686629006d00c815a01277bd15 Mon Sep 17 00:00:00 2001 From: Pietro Date: Tue, 18 Jun 2024 13:38:14 +0200 Subject: [PATCH 37/37] Run rustfmt --- rs/cli/src/runner.rs | 2 +- rs/ic-management-backend/src/subnets.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/rs/cli/src/runner.rs b/rs/cli/src/runner.rs index c52b68c4c..6ab395b52 100644 --- a/rs/cli/src/runner.rs +++ b/rs/cli/src/runner.rs @@ -455,7 +455,7 @@ impl Runner { let change = self.dashboard_backend_client.network_heal(request).await?; println!("{}", change); - let errors = join_all(change.subnets_change_response.iter().map(|subnet_change_response| async move { + let errors = join_all(change.subnets_change_response.iter().map(|subnet_change_response| async move { self.run_membership_change( subnet_change_response.clone(), ops_subnet_node_replace::replace_proposal_options(subnet_change_response)?, diff --git a/rs/ic-management-backend/src/subnets.rs b/rs/ic-management-backend/src/subnets.rs index 90b7847f0..cd5b1fff9 100644 --- a/rs/ic-management-backend/src/subnets.rs +++ b/rs/ic-management-backend/src/subnets.rs @@ -3,7 +3,6 @@ use std::collections::BTreeMap; use decentralization::{network::SubnetChange, SubnetChangeResponse}; use ic_base_types::PrincipalId; use ic_management_types::{Node, Status, Subnet, TopologyChangeProposal}; -use log::{info, warn}; pub async fn unhealthy_with_nodes( subnets: &BTreeMap, @@ -18,7 +17,7 @@ pub async fn unhealthy_with_nodes( .into_iter() .filter_map(|n| match nodes_health.get(&n.principal) { Some(health) if *health == ic_management_types::Status::Healthy => None, - _ => Some(n) + _ => Some(n), }) .collect::>();