Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
ee64ca4
feat: initial compiling draft
pierugo-dfinity Apr 1, 2026
1e60226
fix: clippy
pierugo-dfinity Apr 2, 2026
18ecb85
fix: clippy
pierugo-dfinity Apr 2, 2026
dcffb73
fix
pierugo-dfinity Apr 2, 2026
928e3d9
fix
pierugo-dfinity Apr 2, 2026
fe0f840
fix: given height == 0 downloads nothing
pierugo-dfinity Apr 2, 2026
868a927
fix: do not print height infos twice
pierugo-dfinity Apr 10, 2026
3156de5
refactor: consistent download <-> copy
pierugo-dfinity Apr 10, 2026
c474c28
docs
pierugo-dfinity Apr 10, 2026
666c924
style: introduce `MaybeRemote`
pierugo-dfinity Apr 10, 2026
29a3a46
refactor: cleanup recovery parameters
pierugo-dfinity Apr 10, 2026
5d2a2c3
style: named struct
pierugo-dfinity Apr 10, 2026
408b49a
style: match
pierugo-dfinity Apr 13, 2026
a308460
refactor: inline implementations
pierugo-dfinity Apr 13, 2026
72edf77
test: add unit tests
pierugo-dfinity Apr 13, 2026
e5a40ac
fix: do not `exit(1)` in unit tests
pierugo-dfinity Apr 13, 2026
e10e867
Merge branch 'master' into pierugo/recovery/download-state-chosen-by-…
pierugo-dfinity Apr 15, 2026
442ffe9
feat: max CUP first
pierugo-dfinity Apr 15, 2026
f5626d1
fix: check for nonzero state height download
pierugo-dfinity Apr 15, 2026
faa05c9
docs: fix missing docs
pierugo-dfinity Apr 16, 2026
df10c4c
style: pass `MaybeRemote` directly
pierugo-dfinity Apr 16, 2026
d21dbae
style: enum instead
pierugo-dfinity Apr 16, 2026
20f31fe
style: move MaybeRemote to util
pierugo-dfinity Apr 16, 2026
c168b4f
docs: remove self-explaining docs
pierugo-dfinity Apr 16, 2026
3d2cbe7
style: clippy
pierugo-dfinity Apr 16, 2026
6595691
docs: AI review
pierugo-dfinity Apr 16, 2026
96821d9
style: clippy
pierugo-dfinity Apr 16, 2026
8c9b674
refactor: use CheckpointHeight
pierugo-dfinity Apr 17, 2026
7b3cfc4
style: include arg in format string
pierugo-dfinity Apr 17, 2026
f2d53c9
style: rename variable
pierugo-dfinity Apr 17, 2026
b3a6a23
feat: log warning on no checkpoints found
pierugo-dfinity Apr 17, 2026
03813ec
style: rename `MaybeRemote` to `ExecutionMode`
pierugo-dfinity Apr 17, 2026
15b6516
refactor: run same commands locally and remotely
pierugo-dfinity Apr 17, 2026
64f31e9
style: do not unnecessarily deconstruct NodeHeights
pierugo-dfinity Apr 17, 2026
5a8a090
docs: adapt comment
pierugo-dfinity Apr 17, 2026
52f9c66
style: remove implicit assumptions
pierugo-dfinity Apr 17, 2026
f61eade
fix: clippy
pierugo-dfinity Apr 17, 2026
af89c16
fix: clippy
pierugo-dfinity Apr 17, 2026
ce0f431
fix: clippy
pierugo-dfinity Apr 17, 2026
34b75f4
Merge branch 'master' into pierugo/recovery/download-state-chosen-by-…
pierugo-dfinity Apr 17, 2026
8f6f1f7
style: factorize in closure
pierugo-dfinity Apr 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions rs/recovery/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ DEV_DEPENDENCIES = [
"//rs/test_utilities/consensus",
"//rs/test_utilities/tmpdir",
"//rs/test_utilities/types",
"@crate_index//:assert_matches",
"@crate_index//:tempfile",
]

Expand Down
1 change: 1 addition & 0 deletions rs/recovery/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ tokio = { workspace = true }
url = { workspace = true }

[dev-dependencies]
assert_matches = { workspace = true }
ic-crypto-tree-hash = { path = "../crypto/tree_hash" }
ic-test-utilities-consensus = { path = "../test_utilities/consensus" }
ic-test-utilities-tmpdir = { path = "../test_utilities/tmpdir" }
Expand Down
27 changes: 20 additions & 7 deletions rs/recovery/src/app_subnet_recovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ pub enum StepType {
/// replica bug and not due to malicious actors, this step should not reveal any problems.
MergeCertificationPools,
/// In this step we will download all finalized consensus artifacts. For that we should use a
/// node, that is up to date with the highest finalization height because this node will contain
/// all required artifacts for the recovery.
/// node, that is up to date with the highest finalization and CUP height because this node
/// will contain all required artifacts for the recovery.
DownloadConsensusPool,
/// In this step we will download the subnet state from a node that is sufficiently up to date
/// with the rest of the subnet, i.e. not behind by more than 1 DKG interval. To avoid
Expand Down Expand Up @@ -171,6 +171,10 @@ pub struct AppSubnetRecoveryArgs {
#[clap(long)]
pub keep_downloaded_state: Option<bool>,

/// Height of the checkpoint to download. If not provided, the latest checkpoint is used.
#[clap(long)]
pub download_state_height: Option<u64>,

/// The method of uploading state. Possible values are either `local` (for a
/// local recovery on the admin node) or the ipv6 address of the target node.
/// Local recoveries allow us to skip a potentially expensive data transfer.
Expand Down Expand Up @@ -306,8 +310,8 @@ impl RecoveryIterator<StepType, StepTypeIter> for AppSubnetRecovery {

StepType::DownloadConsensusPool => {
if self.params.download_pool_node.is_none() {
// We could pick a node with highest finalization height automatically, but we
// might have a preference between nodes of the same finalization height.
// We could pick a node with highest finalization and CUP height automatically,
// but we might have a preference between nodes of same heights.
Comment thread
kpop-dfinity marked this conversation as resolved.
print_height_info(
&self.logger,
&self.recovery.registry_helper,
Expand Down Expand Up @@ -336,6 +340,13 @@ impl RecoveryIterator<StepType, StepTypeIter> for AppSubnetRecovery {
"Preserve original downloaded state locally?",
));
}

if self.params.download_state_height.is_none() {
self.params.download_state_height = read_optional(
&self.logger,
"Enter the height of the checkpoint to download (leave empty for latest checkpoint):",
);
}
}

StepType::ICReplay => {
Expand Down Expand Up @@ -453,9 +464,10 @@ impl RecoveryIterator<StepType, StepTypeIter> for AppSubnetRecovery {
}

StepType::DownloadState => match self.params.download_state_method {
Some(DataLocation::Local) => {
Ok(Box::new(self.recovery.get_copy_local_state_step()))
}
Some(DataLocation::Local) => Ok(Box::new(
self.recovery
.get_copy_local_state_step(self.params.download_state_height)?,
)),
Some(DataLocation::Remote(node_ip)) => {
let (ssh_user, key_file) = if self.params.readonly_pub_key.is_some() {
(SshUser::Readonly, self.params.readonly_key_file.clone())
Expand All @@ -468,6 +480,7 @@ impl RecoveryIterator<StepType, StepTypeIter> for AppSubnetRecovery {
ssh_user,
key_file,
self.params.keep_downloaded_state == Some(true),
self.params.download_state_height,
)?))
}
None => Err(RecoveryError::StepSkipped),
Expand Down
5 changes: 4 additions & 1 deletion rs/recovery/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,10 @@ fn print_summary(logger: &Logger, args: &RecoveryArgs, subnet_id: SubnetId) {

pub fn print_height_info(logger: &Logger, registry_helper: &RegistryHelper, subnet_id: SubnetId) {
info!(logger, "Collecting node heights from metrics...");
info!(logger, "Select a node with highest finalization height:");
info!(
logger,
"Select a node with highest finalization height and highest CUP height:"
);
match get_available_nodes_heights_from_metrics(logger, registry_helper, subnet_id) {
Ok(heights) => info!(logger, "{:#?}", heights),
Err(err) => warn!(logger, "Failed to query height info: {:?}", err),
Expand Down
Loading
Loading