diff --git a/src/Cargo.lock b/src/Cargo.lock index aee58d4..8590ed8 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -3310,6 +3310,7 @@ dependencies = [ "parking_lot", "pretty_assertions", "rand 0.8.5", + "rayon", "regex", "reqwest", "secrets-vault", @@ -3323,6 +3324,7 @@ dependencies = [ "storage", "stream-cancel", "string-builder", + "tempfile", "thiserror 1.0.69", "tokio", "tokio-util", @@ -5206,9 +5208,11 @@ dependencies = [ "serde", "serde_cbor", "serde_derive", + "serde_json", "smallvec", "strum 0.18.0", "strum_macros 0.18.0", + "tempfile", "thiserror 1.0.69", "tokio", "tokio-util", diff --git a/src/block/src/shard.rs b/src/block/src/shard.rs index 3e6ab62..9d528b1 100644 --- a/src/block/src/shard.rs +++ b/src/block/src/shard.rs @@ -31,6 +31,7 @@ use crate::{ use std::{ any::type_name, fmt::{self, Display, Formatter}, + str::FromStr, }; #[cfg(test)] @@ -644,6 +645,24 @@ impl fmt::Debug for ShardIdent { } } +impl FromStr for ShardIdent { + type Err = crate::Error; + + fn from_str(s: &str) -> Result { + let (workchain_part, shard_part_with_maybe_extra) = + s.split_once(':').ok_or_else(|| error!("Can't read shard ident from {}", s))?; + + let workchain_id: i32 = workchain_part + .trim() + .parse() + .map_err(|e| error!("Can't read workchain_id from {}: {}", s, e))?; + let prefix = u64::from_str_radix(shard_part_with_maybe_extra.trim(), 16) + .map_err(|e| error!("Can't read shard from {}: {}", s, e))?; + + Ok(Self { workchain_id, prefix }) + } +} + impl Deserializable for ShardIdent { fn read_from(&mut self, cell: &mut SliceData) -> Result<()> { let constructor_and_pfx = cell.get_next_byte()?; diff --git a/src/node/Cargo.toml b/src/node/Cargo.toml index afd08d6..8052da5 100644 --- a/src/node/Cargo.toml +++ b/src/node/Cargo.toml @@ -37,6 +37,10 @@ path = 'bin/print.rs' name = 'zerostate' path = 'bin/zerostate.rs' +[[bin]] +name = 'archive_import' +path = 'bin/archive_import.rs' + [[bin]] name = 'hardfork' path = 'bin/hardfork.rs' @@ -73,6 +77,7 @@ num_cpus = '1.13' openssl = '0.10' parking_lot = '0.12' rand = '0.8' +rayon = '1' regex = '1.10' serde = '1.0' serde_derive = '1.0' @@ -110,6 +115,7 @@ harness = false [dev-dependencies] criterion = { version = "0.5", features = ["html_reports", "async_tokio"] } +tempfile = '3' difference = '2.0' external-ip = '6.0' http-body-util = "0.1" diff --git a/src/node/bin/archive_import.rs b/src/node/bin/archive_import.rs new file mode 100644 index 0000000..0def3cd --- /dev/null +++ b/src/node/bin/archive_import.rs @@ -0,0 +1,102 @@ +/* + * Copyright (C) 2025-2026 RSquad Blockchain Lab. + * + * Licensed under the GNU General Public License v3.0. + * See the LICENSE file in the root of this repository. + * + * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. + */ +use clap::{Arg, ArgAction, Command}; +use node::archive_import::{run_import, ImportConfig}; +use std::path::PathBuf; + +fn main() { + env_logger::Builder::from_default_env().format_timestamp_millis().init(); + + let matches = Command::new("archive_import") + .about("Import raw .pack archive files into epoch-based storage") + .arg( + Arg::new("archives-path") + .long("archives-path") + .required(true) + .help("Path to directory with source .pack files"), + ) + .arg( + Arg::new("epochs-path") + .long("epochs-path") + .required(true) + .help("Path where epoch directories will be created"), + ) + .arg( + Arg::new("epoch-size") + .long("epoch-size") + .default_value("10000000") + .help("Number of MC blocks per epoch (must be multiple of 20000)"), + ) + .arg( + Arg::new("node-db-path") + .long("node-db-path") + .required(true) + .help("Path to node database directory"), + ) + .arg( + Arg::new("mc-zerostate") + .long("mc-zerostate") + .required(true) + .help("Path to masterchain zerostate .boc file"), + ) + .arg( + Arg::new("wc-zerostate") + .long("wc-zerostate") + .action(ArgAction::Append) + .required(true) + .help("Path to workchain zerostate .boc file (one per workchain)"), + ) + .arg( + Arg::new("global-config") + .long("global-config") + .required(true) + .help("Path to global config JSON file (describes zerostate and hard forks)"), + ) + .arg( + Arg::new("skip-validation") + .long("skip-validation") + .action(ArgAction::SetTrue) + .help("Skip block proof validation (for re-importing already validated archives)"), + ) + .arg(Arg::new("copy").long("copy").action(ArgAction::SetTrue).help( + "Copy source .pack files instead of moving them. Use for keeping original \ + files or when source and destination are on different filesystems.", + )) + .get_matches(); + + let config = ImportConfig { + archives_path: PathBuf::from(matches.get_one::("archives-path").unwrap()), + epochs_path: PathBuf::from(matches.get_one::("epochs-path").unwrap()), + epoch_size: matches + .get_one::("epoch-size") + .unwrap() + .parse() + .expect("epoch-size must be a number"), + node_db_path: PathBuf::from(matches.get_one::("node-db-path").unwrap()), + mc_zerostate_path: PathBuf::from(matches.get_one::("mc-zerostate").unwrap()), + wc_zerostate_paths: matches + .get_many::("wc-zerostate") + .unwrap() + .map(|s| PathBuf::from(s)) + .collect(), + global_config_path: PathBuf::from(matches.get_one::("global-config").unwrap()), + skip_validation: matches.get_flag("skip-validation"), + move_files: !matches.get_flag("copy"), + }; + + let rt = tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .expect("Failed to create tokio runtime"); + + if let Err(e) = rt.block_on(run_import(config)) { + log::error!("Import failed: {}", e); + std::process::exit(1); + } +} diff --git a/src/node/src/archive_import/ingester.rs b/src/node/src/archive_import/ingester.rs new file mode 100644 index 0000000..8eaccc2 --- /dev/null +++ b/src/node/src/archive_import/ingester.rs @@ -0,0 +1,830 @@ +/* + * Copyright (C) 2025-2026 RSquad Blockchain Lab. + * + * Licensed under the GNU General Public License v3.0. + * See the LICENSE file in the root of this repository. + * + * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. + */ +use crate::{ + archive_import::{scanner::PackageGroup, validator::ValidatorState}, + block::BlockIdExtExtention, + block_proof::BlockProofStuff, + internal_db::{ + ARCHIVES_GC_BLOCK, LAST_APPLIED_MC_BLOCK, PSS_KEEPER_MC_BLOCK, SHARD_CLIENT_MC_BLOCK, + }, + shard_state::ShardHashesStuff, +}; +use futures::future::try_join_all; +use rayon::prelude::*; +use std::{ + collections::{HashMap, HashSet}, + path::Path, + sync::Arc, +}; +use storage::{ + archives::{ + archive_manager::{ArchiveManager, ImportBlockMeta, ImportEntry}, + package::read_package_from_file, + package_entry_id::PackageEntryId, + }, + block_handle_db::BlockHandleStorage, + block_info_db::BlockInfoDb, + traits::Serializable, + types::BlockMeta, +}; +use ton_block::{ + error, fail, Block, BlockIdExt, Cell, Deserializable, Result, ShardIdent, UInt256, +}; + +const TARGET: &str = "archive_import"; + +struct RawEntry { + block_data: Vec, + block_offset: u64, + proof_data: Vec, + proof_offset: u64, +} + +async fn read_raw_package(path: &Path) -> Result> { + let mut reader = read_package_from_file(path).await?; + let mut entries = HashMap::::new(); + let mut offset: u64 = 0; + while let Some(entry) = reader.next().await? { + let entry_size = entry.serialized_size(); + let entry_id = PackageEntryId::::from_filename(entry.filename())?; + let (block_id, is_proof) = match entry_id { + PackageEntryId::Block(id) => (id, false), + PackageEntryId::Proof(id) if id.is_masterchain() => (id, true), + PackageEntryId::ProofLink(id) if !id.is_masterchain() => (id, true), + entry_id => { + log::warn!("Unexpected entry type {} in {}", entry_id, path.display()); + offset += entry_size; + continue; + } + }; + let mut data = entry.take_data(); + entries + .entry(block_id) + .and_modify(|e| { + if is_proof { + e.proof_data = std::mem::take(&mut data); + e.proof_offset = offset; + } else { + e.block_data = std::mem::take(&mut data); + e.block_offset = offset; + } + }) + .or_insert_with(|| { + if is_proof { + RawEntry { + block_data: vec![], + block_offset: 0, + proof_data: data, + proof_offset: offset, + } + } else { + RawEntry { + block_data: data, + block_offset: offset, + proof_data: vec![], + proof_offset: 0, + } + } + }); + offset += entry_size; + } + Ok(entries) +} + +struct McEntry { + block_id: BlockIdExt, + prev_block_id: BlockIdExt, + proof: BlockProofStuff, + is_key: bool, + gen_utime: u32, + end_lt: u64, + shard_tops: Vec, + state_update_new: Cell, + proof_data: Vec, + proof_offset: u64, + block_data: Vec, + block_offset: u64, +} + +struct ProcessedEntry { + block_id: BlockIdExt, + gen_utime: u32, + end_lt: u64, + mc_ref_seq_no: u32, + is_key_block: bool, + proof_offset: u64, + block_offset: u64, + prevs: Vec, + state_update_new: Cell, +} + +impl ProcessedEntry { + fn to_import_entries(&self) -> [ImportEntry; 2] { + let proof_entry_id = if self.block_id.is_masterchain() { + PackageEntryId::Proof(self.block_id.clone()) + } else { + PackageEntryId::ProofLink(self.block_id.clone()) + }; + [ + ImportEntry { entry_id: proof_entry_id, offset: self.proof_offset, block_meta: None }, + ImportEntry { + entry_id: PackageEntryId::Block(self.block_id.clone()), + offset: self.block_offset, + block_meta: Some(ImportBlockMeta { + seq_no: self.block_id.seq_no(), + shard: self.block_id.shard_id.clone(), + gen_utime: self.gen_utime, + end_lt: self.end_lt, + mc_ref_seq_no: self.mc_ref_seq_no, + }), + }, + ] + } +} + +struct KeyBlockData { + block_id: BlockIdExt, + proof_data: Vec, + block_data: Vec, +} + +pub struct LastGroupState { + pub mc_block_id: BlockIdExt, + pub shard_tops: Vec, +} + +fn parse_and_verify_block(data: &[u8], declared_id: &BlockIdExt) -> Result { + let file_hash = UInt256::calc_file_hash(data); + let root_cell = ton_block::read_single_root_boc(data)?; + let root_hash = root_cell.repr_hash(); + let block = Block::construct_from_cell(root_cell)?; + let info = block.read_info()?; + let actual_id = + BlockIdExt::with_params(info.shard().clone(), info.seq_no(), root_hash, file_hash); + if actual_id != *declared_id { + return Err(error!("Block declared as {} but data contains {}", declared_id, actual_id)); + } + Ok(block) +} + +fn deserialize_mc_entry(block_id: BlockIdExt, raw: RawEntry) -> Result { + if raw.proof_data.is_empty() { + return Err(error!("MC block {} has no proof in the package", block_id)); + } + if raw.block_data.is_empty() { + return Err(error!("MC block {} has no block data in the package", block_id)); + } + + let proof = BlockProofStuff::deserialize(&block_id, raw.proof_data.clone(), false)?; + let (virt_block, _) = proof.virtualize_block()?; + let is_key = virt_block.read_info()?.key_block(); + + let block = parse_and_verify_block(&raw.block_data, &block_id)?; + let block_info = block.read_info()?; + let gen_utime = block_info.gen_utime(); + let end_lt = block_info.end_lt(); + let mut prev_ids = block_info.read_prev_ids()?; + if prev_ids.len() != 1 { + return Err(error!("MC block {} has {} prev refs, expected 1", block_id, prev_ids.len())); + } + let prev_block_id = prev_ids.pop().unwrap(); + let extra = block + .read_extra()? + .read_custom()? + .ok_or_else(|| error!("No McExtra in master block {}", block_id))?; + let shard_tops = ShardHashesStuff::from(extra.shards().clone()).top_blocks_all()?; + let state_update_new = block.read_state_update()?.new; + + Ok(McEntry { + block_id, + prev_block_id, + proof, + is_key, + gen_utime, + end_lt, + shard_tops, + state_update_new, + proof_data: raw.proof_data, + proof_offset: raw.proof_offset, + block_data: raw.block_data, + block_offset: raw.block_offset, + }) +} + +fn validate_mc_range( + entries: &[McEntry], + key_proof: &Option, + zerostate: &Arc, +) -> Result<()> { + entries.par_iter().try_for_each(|e| match key_proof { + None => e.proof.check_with_master_state(zerostate), + Some(kb) => e.proof.check_with_prev_key_block_proof(kb), + }) +} + +fn check_mc_chain(entries: &[McEntry], expected_first_prev: &BlockIdExt) -> Result<()> { + if let Some(first) = entries.first() { + if first.prev_block_id != *expected_first_prev { + fail!( + "MC chain gap between packages: block {} prev_ref = {} but expected {}", + first.block_id, + first.prev_block_id, + expected_first_prev, + ); + } + } + for w in entries.windows(2) { + if w[1].prev_block_id != w[0].block_id { + fail!( + "MC chain gap: block {} prev_ref = {} but expected {}", + w[1].block_id, + w[1].prev_block_id, + w[0].block_id, + ); + } + } + Ok(()) +} + +fn parse_mc_entries( + raw: HashMap, + validator: &mut ValidatorState, + skip: bool, + expected_first_prev: BlockIdExt, +) -> Result<(Vec, Option, Vec<(u32, Vec)>, LastGroupState)> +{ + let mut entries: Vec = + raw.into_par_iter().map(|(id, r)| deserialize_mc_entry(id, r)).collect::>()?; + entries.sort_by_key(|e| e.block_id.seq_no()); + check_mc_chain(&entries, &expected_first_prev)?; + + let rest_start = if entries.first().map(|e| e.is_key).unwrap_or(false) { + let block_id = &entries[0].block_id; + if !skip { + // Skip re-validation if this key block is already the current validation root (resume). + let already_done = validator + .current_key_block_proof() + .map(|kp| kp.id().seq_no() >= block_id.seq_no()) + .unwrap_or(false); + if !already_done { + let is_hardfork = validator.is_hardfork(block_id); + if is_hardfork { + log::info!( + target: TARGET, + "Hard fork block {} accepted as new validation root", + block_id, + ); + } else { + let key_proof = validator.current_key_block_proof().cloned(); + let zerostate = Arc::clone(validator.zerostate()); + validate_mc_range(&entries[..1], &key_proof, &zerostate)?; + } + } + } + validator.set_key_block_proof(entries[0].proof.clone()); + 1 + } else { + 0 + }; + + if !skip { + let key_proof = validator.current_key_block_proof().cloned(); + let zerostate = Arc::clone(validator.zerostate()); + validate_mc_range(&entries[rest_start..], &key_proof, &zerostate)?; + } + + let mut processed = Vec::with_capacity(entries.len()); + let mut key_block: Option = None; + let mut mc_shard_tops: Vec<(u32, Vec)> = Vec::new(); + + for entry in entries { + if entry.is_key { + if Some(&entry.block_id) != validator.current_key_block_proof().map(|kp| kp.id()) { + fail!("Second key block {} in package", entry.block_id); + } + key_block = Some(KeyBlockData { + block_id: entry.block_id.clone(), + proof_data: entry.proof_data, + block_data: entry.block_data.clone(), + }); + } + mc_shard_tops.push((entry.block_id.seq_no(), entry.shard_tops)); + processed.push(ProcessedEntry { + mc_ref_seq_no: entry.block_id.seq_no(), + block_id: entry.block_id, + gen_utime: entry.gen_utime, + end_lt: entry.end_lt, + is_key_block: entry.is_key, + proof_offset: entry.proof_offset, + block_offset: entry.block_offset, + prevs: vec![entry.prev_block_id], + state_update_new: entry.state_update_new, + }); + } + + let last_group_state = + processed.last().ok_or_else(|| error!("MC package is empty")).map(|e| LastGroupState { + mc_block_id: e.block_id.clone(), + shard_tops: mc_shard_tops.last().map(|(_, tops)| tops.clone()).unwrap_or_default(), + })?; + Ok((processed, key_block, mc_shard_tops, last_group_state)) +} + +fn deserialize_shard_entry( + block_id: BlockIdExt, + raw: RawEntry, + skip: bool, +) -> Result { + if raw.proof_data.is_empty() { + return Err(error!("Shard block {} has no proof link in the package", block_id)); + } + if raw.block_data.is_empty() { + return Err(error!("Shard block {} has no block data in the package", block_id)); + } + + if !skip { + let proof = BlockProofStuff::deserialize(&block_id, raw.proof_data.clone(), true)?; + proof.check_proof_link()?; + } + + let block = parse_and_verify_block(&raw.block_data, &block_id)?; + let info = block.read_info()?; + let prevs = info.read_prev_ids()?; + let state_update_new = block.read_state_update()?.new; + + Ok(ProcessedEntry { + gen_utime: info.gen_utime(), + end_lt: info.end_lt(), + mc_ref_seq_no: 0, + is_key_block: false, + proof_offset: raw.proof_offset, + block_offset: raw.block_offset, + block_id, + prevs, + state_update_new, + }) +} + +fn parse_shard_entries( + raw: HashMap, + archive_id: u32, + shard: ShardIdent, + mc_shard_tops: Vec<(u32, Vec)>, + prev_shard_tops: Vec, + skip: bool, +) -> Result> { + let now = std::time::Instant::now(); + let results: HashMap = raw + .into_par_iter() + .map(|(id, r)| deserialize_shard_entry(id.clone(), r, skip).map(|res| (id, res))) + .collect::>()?; + log::debug!(target: TARGET, "Deserialized shard entries after {:#?}", now.elapsed()); + + let entries = if !skip { + let prev_committed: HashSet = + prev_shard_tops.into_iter().filter(|id| id.shard_id.intersect_with(&shard)).collect(); + validate_shard_and_assign_mc_refs(&shard, mc_shard_tops, results, prev_committed)? + } else { + // mc_ref_seq_no must be >= archive_id for choose_package() to find the right file. + let mut entries: Vec = results + .into_iter() + .map(|(_, mut entry)| { + entry.mc_ref_seq_no = archive_id; + entry + }) + .collect(); + entries.sort_by_key(|e| e.block_id.seq_no()); + entries + }; + + Ok(entries) +} + +fn validate_shard_and_assign_mc_refs( + shard: &ShardIdent, + mut mc_shard_tops: Vec<(u32, Vec)>, + mut blocks: HashMap, + prev_committed: HashSet, +) -> Result> { + if blocks.len() == 0 { + return Ok(vec![]); + } + + let mut known: HashSet = prev_committed; + mc_shard_tops.sort_by_key(|(seqno, _)| *seqno); + + let mut entries = Vec::with_capacity(blocks.len()); + for (mc_seqno, tops) in mc_shard_tops { + for top in tops { + if !top.shard_id.intersect_with(shard) { + continue; + } + let mut current = top; + loop { + if known.contains(¤t) { + break; + } + if let Some(mut entry) = blocks.remove(¤t) { + entry.mc_ref_seq_no = mc_seqno; + let mut prevs = entry.prevs.clone(); + entries.push(entry); + // blocks before merge are always committed by MC block + if prevs.len() > 1 + && (blocks.contains_key(&prevs[0]) || blocks.contains_key(&prevs[1])) + { + fail!("Block {} parents are not committed by MC blocks", current); + } + let prev = + prevs.pop().ok_or_else(|| error!("Block {} has no parents", current))?; + known.insert(current); + current = prev; + } else { + fail!( + "Shard chain break: block {} is not in current package \ + and was not committed by previous archive group", + current, + ); + } + } + } + } + + if !blocks.is_empty() { + fail!("Some blocks in shard {} are not reachable from MC shard_hashes", shard); + } + + // Sort by seqno ascending: prev block handles must exist when setting next links. + // This also handles cross-shard deps (parent shard blocks have lower seqno than children after split). + entries.sort_by_key(|e| e.block_id.seq_no()); + Ok(entries) +} + +pub struct Ingester { + archive_manager: Arc, + block_handle_storage: Arc, + archive_state_db: Arc, + prev1_block_db: BlockInfoDb, + prev2_block_db: BlockInfoDb, + next1_block_db: BlockInfoDb, + next2_block_db: BlockInfoDb, + move_files: bool, + skip_validation: bool, +} + +impl Ingester { + pub fn new( + archive_manager: Arc, + block_handle_storage: Arc, + archive_state_db: Arc, + prev1_block_db: BlockInfoDb, + prev2_block_db: BlockInfoDb, + next1_block_db: BlockInfoDb, + next2_block_db: BlockInfoDb, + move_files: bool, + skip_validation: bool, + ) -> Self { + Self { + archive_manager, + block_handle_storage, + archive_state_db, + prev1_block_db, + prev2_block_db, + next1_block_db, + next2_block_db, + move_files, + skip_validation, + } + } + + pub async fn run_groups( + &self, + groups: &[PackageGroup], + start_idx: usize, + total: usize, + mut validator: ValidatorState, + mut last_group_state: LastGroupState, + ) -> Result { + let mut prefetch: Option< + tokio::task::JoinHandle< + Result<(HashMap, Vec>)>, + >, + > = None; + let start = std::time::Instant::now(); + + for (local_idx, group) in groups.iter().enumerate() { + let global_idx = start_idx + local_idx; + let elapsed = start.elapsed(); + let eta = (elapsed * total as u32 / (global_idx + 1) as u32).saturating_sub(elapsed); + log::info!( + target: TARGET, + "Processing group {}/{}: archive_id={}, {} shard packages. ETA {:#?}", + global_idx + 1, + total, + group.archive_id, + group.shard_packages.len(), + eta, + ); + + let next_prefetch = groups.get(local_idx + 1).map(|next| { + let mc_path = next.mc_package.path.clone(); + let shard_paths: Vec<_> = + next.shard_packages.iter().map(|p| p.path.clone()).collect(); + tokio::spawn(async move { + let (mc_raw, shard_raws) = tokio::try_join!( + read_raw_package(&mc_path), + try_join_all(shard_paths.iter().map(|p| read_raw_package(p))), + )?; + Ok::<_, ton_block::Error>((mc_raw, shard_raws)) + }) + }); + + let (mc_raw, shard_raws) = match prefetch.take() { + Some(handle) => { + handle.await.map_err(|e| error!("Prefetch task panicked: {}", e))?? + } + None => tokio::try_join!( + read_raw_package(&group.mc_package.path), + try_join_all(group.shard_packages.iter().map(|p| read_raw_package(&p.path))), + )?, + }; + + let (new_validator, new_state) = self + .ingest_group_from_raw(group, mc_raw, shard_raws, validator, last_group_state) + .await?; + validator = new_validator; + last_group_state = new_state; + prefetch = next_prefetch; + } + + self.block_handle_storage.save_full_node_state( + LAST_APPLIED_MC_BLOCK.to_string(), + &last_group_state.mc_block_id, + )?; + self.block_handle_storage.save_full_node_state( + SHARD_CLIENT_MC_BLOCK.to_string(), + &last_group_state.mc_block_id, + )?; + self.block_handle_storage + .save_full_node_state(ARCHIVES_GC_BLOCK.to_string(), &last_group_state.mc_block_id)?; + self.block_handle_storage + .save_full_node_state(PSS_KEEPER_MC_BLOCK.to_string(), &last_group_state.mc_block_id)?; + + Ok(validator) + } + + async fn ingest_group_from_raw( + &self, + group: &PackageGroup, + mc_raw: HashMap, + shard_raws: Vec>, + validator: ValidatorState, + prev_group_state: LastGroupState, + ) -> Result<(ValidatorState, LastGroupState)> { + let skip = self.skip_validation; + let expected_first_mc_prev = prev_group_state.mc_block_id; + let prev_shard_tops = prev_group_state.shard_tops; + let mc_block_count = mc_raw.len(); + let group_start = std::time::Instant::now(); + + let t = std::time::Instant::now(); + let (mc_entries, key_block, mc_shard_tops, last_group_state, validator) = + tokio::task::spawn_blocking(move || -> Result<_> { + let mut v = validator; + let (entries, key_block, shard_tops, last_state) = + parse_mc_entries(mc_raw, &mut v, skip, expected_first_mc_prev)?; + Ok((entries, key_block, shard_tops, last_state, v)) + }) + .await + .map_err(|e| error!("MC parse task panicked: {}", e))??; + let parse_mc_ms = t.elapsed().as_millis(); + + let t = std::time::Instant::now(); + for entry in &mc_entries { + self.update_block_handles(entry)?; + } + let mc_handles_ms = t.elapsed().as_millis(); + + let mc_import_entries: Vec = + mc_entries.iter().flat_map(|e| e.to_import_entries()).collect(); + + let archive_id = group.archive_id; + let shard_parse_handles: Vec<_> = shard_raws + .into_iter() + .zip(group.shard_packages.iter()) + .map(|(raw, pkg)| { + let shard = pkg.shard.clone(); + let tops = mc_shard_tops.clone(); + let prev = prev_shard_tops.clone(); + tokio::task::spawn_blocking(move || { + parse_shard_entries(raw, archive_id, shard, tops, prev, skip) + }) + }) + .collect(); + let archive_state_db = Arc::clone(&self.archive_state_db); + let fill_mc_states_db = tokio::task::spawn_blocking(move || -> Result<()> { + for entry in &mc_entries { + archive_state_db.put_update(&entry.block_id, entry.state_update_new.clone())?; + } + Ok(()) + }); + + let mc_shard = ShardIdent::masterchain(); + + // Run mc_import, mc_states, and the full shard pipeline concurrently. + let t = std::time::Instant::now(); + let mut shard_block_count = 0usize; + let (_, _, shard_pipeline_ms) = tokio::try_join!( + // Task 1: import MC package into archive + self.archive_manager.import_package( + &group.mc_package.path, + group.mc_package.archive_id, + &mc_shard, + &mc_import_entries, + false, + key_block.is_some(), + ), + // Task 2: save MC state cells + async { + fill_mc_states_db.await.map_err(|e| error!("MC states db task panicked: {}", e))? + }, + // Task 3: shard pipeline — parse → handles+states → import + async { + let t_pipeline = std::time::Instant::now(); + + // 3a: await shard parse (already spawned above) + let shard_parse_results: Vec> = + try_join_all(shard_parse_handles.into_iter().map(|h| async move { + h.await.map_err(|e| error!("Shard parse task panicked: {}", e))? + })) + .await?; + + // 3b: update block handles + save shard state cells + for shard_entries in &shard_parse_results { + shard_block_count += shard_entries.len(); + for entry in shard_entries { + self.update_block_handles(entry)?; + self.archive_state_db + .put_update(&entry.block_id, entry.state_update_new.clone())?; + } + } + + // 3c: import shard packages into archive + let shard_import_entries: Vec> = shard_parse_results + .iter() + .map(|entries| entries.iter().flat_map(|e| e.to_import_entries()).collect()) + .collect(); + + try_join_all( + group + .shard_packages + .iter() + .zip(shard_import_entries.iter()) + .filter(|(_, entries)| !entries.is_empty()) + .map(|(pkg, import_entries)| { + self.archive_manager.import_package( + &pkg.path, + pkg.archive_id, + &pkg.shard, + import_entries, + self.move_files, + false, + ) + }), + ) + .await?; + + Ok(t_pipeline.elapsed().as_millis()) + }, + )?; + let parallel_ms = t.elapsed().as_millis(); + + let t = std::time::Instant::now(); + if let Some(kb) = key_block { + self.archive_key_block(&kb.block_id, kb.proof_data, kb.block_data).await?; + } + let key_block_ms = t.elapsed().as_millis(); + + if self.move_files { + if let Err(e) = tokio::fs::remove_file(&group.mc_package.path).await { + log::warn!( + target: TARGET, + "Failed to remove MC pack {} after import: {}", + group.mc_package.path.display(), + e, + ); + } + } + + log::info!( + target: TARGET, + "Imported archive {} ({} MC, {} shard blocks, {} shard pkgs) total {:#?}: \ + parse_mc {}ms, mc_handles {}ms, \ + parallel {}ms (shard_pipeline {}ms), key_block {}ms", + group.archive_id, + mc_block_count, + shard_block_count, + group.shard_packages.len(), + group_start.elapsed(), + parse_mc_ms, + mc_handles_ms, + parallel_ms, + shard_pipeline_ms, + key_block_ms, + ); + + Ok((validator, last_group_state)) + } + + async fn archive_key_block( + &self, + block_id: &BlockIdExt, + proof_data: Vec, + block_data: Vec, + ) -> Result<()> { + let handle = self.block_handle_storage.load_handle_by_id(block_id)?.ok_or_else(|| { + error!("Block handle not found for key block {} during key archive creation", block_id) + })?; + self.archive_manager + .add_block_data_to_package( + proof_data, + &handle, + &PackageEntryId::Proof(block_id.clone()), + true, + ) + .await?; + self.archive_manager + .add_block_data_to_package( + block_data, + &handle, + &PackageEntryId::Block(block_id.clone()), + true, + ) + .await?; + Ok(()) + } + + fn update_block_handles(&self, entry: &ProcessedEntry) -> Result<()> { + let meta = BlockMeta::for_import( + entry.gen_utime, + entry.end_lt, + entry.mc_ref_seq_no, + entry.is_key_block, + entry.block_id.is_masterchain(), + entry.prevs.len() > 1, + ); + + if let Some(handle) = + self.block_handle_storage.create_handle(entry.block_id.clone(), meta, None)? + { + log::trace!( + target: TARGET, + "Created block handle for {} (key={})", + entry.block_id, + entry.is_key_block, + ); + let _ = handle; + } + + let prev1 = entry + .prevs + .first() + .ok_or_else(|| error!("Block {} has no prev refs", entry.block_id))?; + + self.prev1_block_db.put(&entry.block_id, &prev1.serialize())?; + self.store_next_link(&entry.block_id, prev1)?; + + if let Some(prev2) = entry.prevs.get(1) { + self.prev2_block_db.put(&entry.block_id, &prev2.serialize())?; + self.store_next_link(&entry.block_id, prev2)?; + } + + Ok(()) + } + + fn store_next_link(&self, block_id: &BlockIdExt, prev_id: &BlockIdExt) -> Result<()> { + let prev_handle = + self.block_handle_storage.load_handle_by_id(prev_id)?.ok_or_else(|| { + error!("Block handle not found for prev block {} of {}", prev_id, block_id) + })?; + + let prev_shard = prev_id.shard(); + let shard = block_id.shard(); + if prev_shard != shard && prev_shard.split()?.1 == *shard { + // After split: right child → next2 + self.next2_block_db.put(prev_id, &block_id.serialize())?; + prev_handle.set_next2(); + } else { + // Simple chain or after merge or left child → next1 + self.next1_block_db.put(prev_id, &block_id.serialize())?; + prev_handle.set_next1(); + } + self.block_handle_storage.save_handle(&prev_handle, None)?; + Ok(()) + } +} diff --git a/src/node/src/archive_import/mod.rs b/src/node/src/archive_import/mod.rs new file mode 100644 index 0000000..722e52e --- /dev/null +++ b/src/node/src/archive_import/mod.rs @@ -0,0 +1,412 @@ +/* + * Copyright (C) 2025-2026 RSquad Blockchain Lab. + * + * Licensed under the GNU General Public License v3.0. + * See the LICENSE file in the root of this repository. + * + * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. + */ +pub mod ingester; +pub mod scanner; +pub mod validator; + +use crate::{ + block_proof::BlockProofStuff, + collator_test_bundle::create_engine_allocated, + config::TonNodeGlobalConfig, + engine_traits::EngineAlloc, + internal_db::{ + ARCHIVE_CELLS_CF_NAME, ARCHIVE_SHARDSTATE_CF_NAME, CURRENT_DB_VERSION, DB_VERSION, + }, + shard_state::ShardStateStuff, +}; +#[cfg(feature = "telemetry")] +use crate::{collator_test_bundle::create_engine_telemetry, engine_traits::EngineTelemetry}; +use ingester::{Ingester, LastGroupState}; +use std::{ + collections::HashMap, + path::PathBuf, + sync::{atomic::AtomicU8, Arc}, +}; +use storage::{ + archive_shardstate_db::ArchiveShardStateDb, + archives::{ + archive_manager::ArchiveManager, + db_provider::EpochDbProvider, + epoch::{ArchivalModeConfig, EpochRouter}, + ARCHIVE_PACKAGE_SIZE, + }, + block_handle_db::{ + BlockHandleDb, BlockHandleStorage, NodeStateDb, BLOCK_HANDLE_DB_NAME, + VALIDATOR_STATE_DB_NAME, + }, + block_info_db::{ + BlockInfoDb, NEXT1_BLOCK_DB_NAME, NEXT2_BLOCK_DB_NAME, PREV1_BLOCK_DB_NAME, + PREV2_BLOCK_DB_NAME, + }, + db::rocksdb::{AccessType, RocksDb, NODE_DB_NAME}, + shardstate_db_async::CellsDbConfig, + traits::Serializable, + types::BlockMeta, +}; +use ton_block::{ + error, AccountIdPrefixFull, Block, BlockIdExt, Deserializable, Result, ShardIdent, UInt256, + WorkchainDescr, MASTERCHAIN_ID, SHARD_FULL, +}; +use validator::ValidatorState; + +const TARGET: &str = "archive_import"; + +pub struct ImportConfig { + pub archives_path: PathBuf, + pub epochs_path: PathBuf, + pub epoch_size: u32, + pub node_db_path: PathBuf, + pub mc_zerostate_path: PathBuf, + pub wc_zerostate_paths: Vec, + pub global_config_path: PathBuf, + pub skip_validation: bool, + pub move_files: bool, +} + +fn read_wc_zerostates_from_config(mc_zerostate: &ShardStateStuff) -> Result> { + // shard_hashes is empty at genesis; workchain zerostates are in ConfigParams::workchains() + let mut shards = Vec::new(); + mc_zerostate.config_params()?.workchains()?.iterate_with_keys( + |wc_id: i32, descr: WorkchainDescr| { + let shard = ShardIdent::with_tagged_prefix(wc_id, SHARD_FULL)?; + shards.push(BlockIdExt::with_params( + shard, + 0, + descr.zerostate_root_hash, + descr.zerostate_file_hash, + )); + Ok(true) + }, + )?; + Ok(shards) +} + +async fn build_initial_group_state( + zerostate: &ShardStateStuff, + archive_manager: &ArchiveManager, + last_imported: u32, +) -> Result { + if last_imported == 0 { + let shard_tops = read_wc_zerostates_from_config(zerostate)?; + log::info!( + target: TARGET, + "Initial state from zerostate {}, {} workchain shard tops", + zerostate.block_id(), + shard_tops.len(), + ); + return Ok(LastGroupState { mc_block_id: zerostate.block_id().clone(), shard_tops }); + } + + let mc_prefix = AccountIdPrefixFull { workchain_id: MASTERCHAIN_ID, prefix: 0 }; + let (block_id, block_data) = archive_manager + .lookup_block_by_seqno(&mc_prefix, last_imported) + .await? + .ok_or_else(|| error!("Cannot find MC block at seqno {}", last_imported))?; + let block = Block::construct_from_bytes(&block_data)?; + let extra = block + .read_extra()? + .read_custom()? + .ok_or_else(|| error!("No McExtra in MC block {}", block_id))?; + let shard_tops = + crate::shard_state::ShardHashesStuff::from(extra.shards().clone()).top_blocks_all()?; + log::info!( + target: TARGET, + "Resuming from MC block {} (seqno {}), {} shard tops", + block_id, + last_imported, + shard_tops.len(), + ); + Ok(LastGroupState { mc_block_id: block_id, shard_tops }) +} + +fn process_zerostates( + config: &ImportConfig, + global_config: &TonNodeGlobalConfig, + archive_state_db: &ArchiveShardStateDb, + block_handle_storage: &BlockHandleStorage, + #[cfg(feature = "telemetry")] engine_telemetry: Arc, + engine_allocated: Arc, +) -> Result> { + log::info!(target: TARGET, "Loading MC zerostate from {}", config.mc_zerostate_path.display()); + let zerostate_bytes = std::fs::read(&config.mc_zerostate_path).map_err(|e| { + error!("Cannot read MC zerostate file {}: {}", config.mc_zerostate_path.display(), e) + })?; + let expected_mc_zerostate_id = global_config.zero_state()?; + let mc_zerostate = ShardStateStuff::deserialize_zerostate( + expected_mc_zerostate_id.clone(), + &zerostate_bytes, + #[cfg(feature = "telemetry")] + &engine_telemetry, + &engine_allocated, + )?; + log::info!(target: TARGET, "MC zerostate loaded successfully"); + + // Load and validate workchain zerostates + let mut expected_wc_zerostates: HashMap = HashMap::from_iter( + read_wc_zerostates_from_config(&mc_zerostate)? + .into_iter() + .map(|id| (id.file_hash.clone(), id)), + ); + + let mut wc_zerostates = Vec::new(); + for path in &config.wc_zerostate_paths { + log::info!(target: TARGET, "Loading workchain zerostate from {}", path.display()); + let zerostate_bytes = std::fs::read(path) + .map_err(|e| error!("Cannot read WC zerostate file {}: {}", path.display(), e))?; + let id = expected_wc_zerostates.remove(&UInt256::calc_file_hash(&zerostate_bytes)).ok_or_else(|| { + error!( + "Workchain zerostate file {} does not match any expected file hash from MC zerostate", + path.display(), + ) + })?; + let state = ShardStateStuff::deserialize_zerostate( + id.clone(), + &zerostate_bytes, + #[cfg(feature = "telemetry")] + &engine_telemetry, + &engine_allocated, + )?; + wc_zerostates.push((id, state.root_cell().clone())); + } + + if !expected_wc_zerostates.is_empty() { + let missing: Vec<_> = expected_wc_zerostates.into_iter().collect(); + return Err(error!("Missing workchain zerostates: {:?}", missing,)); + } + + let save_handle = |id: &BlockIdExt| -> Result<()> { + let handle = if let Some(handle) = + block_handle_storage.create_handle(id.clone(), BlockMeta::default(), None)? + { + handle + } else { + block_handle_storage + .load_handle_by_id(&id)? + .ok_or_else(|| error!("Failed to create or load block handle for MC zerostate"))? + }; + if handle.set_state() | handle.set_state_saved() | handle.set_block_applied() { + block_handle_storage.save_handle(&handle, None)?; + } + Ok(()) + }; + + archive_state_db.put(&expected_mc_zerostate_id, mc_zerostate.root_cell().clone())?; + save_handle(&expected_mc_zerostate_id)?; + log::info!(target: TARGET, "MC zerostate saved to archive state DB"); + + for (wc_id, wc_root) in wc_zerostates { + archive_state_db.put(&wc_id, wc_root)?; + save_handle(&wc_id)?; + log::info!(target: TARGET, "Workchain zerostate {} saved to archive state DB", wc_id); + } + + Ok(mc_zerostate) +} + +/// Returns the node_db Arc so the caller can wait for all background tasks to release it. +pub async fn run_import(config: ImportConfig) -> Result> { + log::info!( + target: TARGET, + "Loading global config from {}", + config.global_config_path.display() + ); + let global_config = TonNodeGlobalConfig::from_json_file(&config.global_config_path) + .map_err(|e| error!("Cannot load global config: {}", e))?; + let expected_zerostate_id = global_config.zero_state()?; + let mut hardforks = global_config.hardforks()?; + hardforks.sort_by_key(|hf| hf.seq_no()); + log::info!( + target: TARGET, + "Global config: zerostate={}, {} hard fork(s)", + expected_zerostate_id, + hardforks.len(), + ); + + #[cfg(feature = "telemetry")] + let engine_telemetry = create_engine_telemetry(); + let engine_allocated = create_engine_allocated(); + + let epoch_config = ArchivalModeConfig { + epoch_size: config.epoch_size, + new_epochs_path: config.epochs_path.clone(), + existing_epochs: vec![], + }; + let router = Arc::new(EpochRouter::new(&epoch_config).await?); + let db_provider = Arc::new(EpochDbProvider::new(router)); + + std::fs::create_dir_all(&config.node_db_path).map_err(|e| { + error!("Cannot create node_db_path {}: {}", config.node_db_path.display(), e) + })?; + let node_db = RocksDb::new(&config.node_db_path, NODE_DB_NAME, None, AccessType::ReadWrite)?; + + let handle_db = Arc::new(BlockHandleDb::with_db(node_db.clone(), BLOCK_HANDLE_DB_NAME, true)?); + let full_node_state_db = Arc::new(NodeStateDb::with_db( + node_db.clone(), + storage::db::rocksdb::NODE_STATE_DB_NAME, + true, + )?); + full_node_state_db.put(&DB_VERSION, &CURRENT_DB_VERSION.serialize())?; + let validator_state_db = + Arc::new(NodeStateDb::with_db(node_db.clone(), VALIDATOR_STATE_DB_NAME, true)?); + + let prev1_block_db = BlockInfoDb::with_db(node_db.clone(), PREV1_BLOCK_DB_NAME, true)?; + let prev2_block_db = BlockInfoDb::with_db(node_db.clone(), PREV2_BLOCK_DB_NAME, true)?; + let next1_block_db = BlockInfoDb::with_db(node_db.clone(), NEXT1_BLOCK_DB_NAME, true)?; + let next2_block_db = BlockInfoDb::with_db(node_db.clone(), NEXT2_BLOCK_DB_NAME, true)?; + + #[cfg(feature = "telemetry")] + let storage_telemetry = engine_telemetry.storage.clone(); + let storage_alloc = engine_allocated.storage.clone(); + + let mut block_handle_storage = BlockHandleStorage::with_dbs( + handle_db, + full_node_state_db, + validator_state_db, + #[cfg(feature = "telemetry")] + storage_telemetry.clone(), + storage_alloc.clone(), + ); + block_handle_storage.set_no_cache(); + let block_handle_storage = Arc::new(block_handle_storage); + + let db_root_path = Arc::new(config.node_db_path.clone()); + let shard_split_depth = Arc::new(AtomicU8::new(0)); + + let archive_manager = Arc::new( + ArchiveManager::with_data( + node_db.clone(), + db_root_path, + db_provider, + 0, // last_unneeded_key_block + shard_split_depth, + #[cfg(feature = "telemetry")] + storage_telemetry, + storage_alloc, + ) + .await?, + ); + + let cells_db_config = CellsDbConfig::default(); + let archive_states_db = RocksDb::new( + &config.node_db_path, + crate::internal_db::ARCHIVE_STATES_DB_NAME, + std::collections::HashMap::from([( + ARCHIVE_CELLS_CF_NAME.to_string(), + storage::cell_db::CellDb::build_cf_options(cells_db_config.cells_cache_size_bytes), + )]), + AccessType::ReadWrite, + )?; + let archive_state_db = Arc::new(ArchiveShardStateDb::new( + archive_states_db, + ARCHIVE_SHARDSTATE_CF_NAME, + ARCHIVE_CELLS_CF_NAME, + &config.node_db_path, + &cells_db_config, + #[cfg(feature = "telemetry")] + engine_telemetry.storage.clone(), + engine_allocated.storage.clone(), + )?); + + let mc_zerostate = process_zerostates( + &config, + &global_config, + &archive_state_db, + &block_handle_storage, + #[cfg(feature = "telemetry")] + engine_telemetry.clone(), + engine_allocated.clone(), + )?; + + log::info!(target: TARGET, "Scanning packages in {}", config.archives_path.display()); + let packages = scanner::scan_packages(&config.archives_path)?; + log::info!(target: TARGET, "Found {} package files", packages.len()); + + if packages.is_empty() { + log::warn!(target: TARGET, "No packages found, nothing to import"); + return Ok(node_db); + } + + let groups = scanner::group_by_archive_id(packages)?; + log::info!(target: TARGET, "Grouped into {} archive groups", groups.len()); + + let mut validator_state = ValidatorState::new(mc_zerostate.clone(), hardforks); + let mut skip_count = 0; + + let last_imported = if let Some(max_seqno) = archive_manager.get_max_mc_seqno().await { + if max_seqno > groups.last().unwrap().archive_id + ARCHIVE_PACKAGE_SIZE as u32 { + log::warn!(target: TARGET, + "Existing import detected with max MC seqno {}, which is beyond the last archive group ({}), skipping all groups", + max_seqno, groups.last().unwrap().archive_id); + return Ok(node_db); + } + skip_count = + groups.iter().take_while(|g| g.archive_id < max_seqno).count().saturating_sub(1); + + log::info!( + target: TARGET, + "Detected existing import (max MC seqno = {}), skipping {} groups", + max_seqno, + skip_count, + ); + + // Restore key block proof regardless of skip_count: files may have been moved + // and the scanned list may start mid-chain. + if !config.skip_validation { + if let Some(key_seqno) = archive_manager.get_max_key_block_seqno().await { + let mc_prefix = AccountIdPrefixFull { workchain_id: MASTERCHAIN_ID, prefix: 0 }; + let (block_id, proof_data) = archive_manager + .lookup_proof_by_seqno(&mc_prefix, key_seqno) + .await? + .ok_or_else(|| { + error!( + "Key block seqno {} found in index but proof not readable", + key_seqno, + ) + })?; + let proof = BlockProofStuff::deserialize(&block_id, proof_data, false)?; + log::info!( + target: TARGET, + "Restored key block proof: {}", + block_id, + ); + validator_state.set_key_block_proof(proof); + } + } + groups[skip_count].archive_id.saturating_sub(1) + } else { + 0 + }; + + let initial_group_state = + build_initial_group_state(&mc_zerostate, &archive_manager, last_imported).await?; + + let ingester = Ingester::new( + archive_manager, + block_handle_storage, + archive_state_db, + prev1_block_db, + prev2_block_db, + next1_block_db, + next2_block_db, + config.move_files, + config.skip_validation, + ); + + let total = groups.len(); + ingester + .run_groups(&groups[skip_count..], skip_count, total, validator_state, initial_group_state) + .await?; + + log::info!(target: TARGET, "Import complete! Processed {} archive groups", total); + Ok(node_db) +} + +#[cfg(test)] +#[path = "../tests/test_archive_import.rs"] +mod tests; diff --git a/src/node/src/archive_import/scanner.rs b/src/node/src/archive_import/scanner.rs new file mode 100644 index 0000000..077e793 --- /dev/null +++ b/src/node/src/archive_import/scanner.rs @@ -0,0 +1,155 @@ +/* + * Copyright (C) 2025-2026 RSquad Blockchain Lab. + * + * Licensed under the GNU General Public License v3.0. + * See the LICENSE file in the root of this repository. + * + * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. + */ +use std::{ + collections::BTreeMap, + path::{Path, PathBuf}, + str::FromStr, +}; +use ton_block::{error, fail, Result, ShardIdent}; + +pub struct PackageFile { + pub path: PathBuf, + pub archive_id: u32, + pub shard: ShardIdent, +} + +pub struct PackageGroup { + pub archive_id: u32, + pub mc_package: PackageFile, + pub shard_packages: Vec, +} + +/// Parse a .pack filename into (archive_id, shard). +fn parse_pack_filename(filename: &str) -> Result> { + if !filename.ends_with(".pack") { + return Ok(None); + } + let stem = &filename[..filename.len() - 5]; + + if stem.starts_with("key.") { + return Ok(None); + } + + if !stem.starts_with("archive.") { + return Ok(None); + } + let rest = &stem[8..]; + + if let Some(dot_pos) = rest.find('.') { + // archive.NNNNN.WC:HHHHHHHHHHHHHHHH - shards + let id_str = &rest[..dot_pos]; + let shard_str = &rest[dot_pos + 1..]; + + let archive_id: u32 = + id_str.parse().map_err(|_| error!("Invalid archive id in filename: {}", filename))?; + + let shard = ShardIdent::from_str(shard_str)?; + Ok(Some((archive_id, shard))) + } else { + // archive.NNNNN — masterchain + let archive_id: u32 = + rest.parse().map_err(|_| error!("Invalid archive id in filename: {}", filename))?; + Ok(Some((archive_id, ShardIdent::masterchain()))) + } +} + +/// Scan the source directory for .pack files, parse filenames, sort by archive_id. +pub fn scan_packages(archives_path: &Path) -> Result> { + let entries = std::fs::read_dir(archives_path) + .map_err(|e| error!("Cannot read archives directory {}: {}", archives_path.display(), e))?; + + let mut packages = Vec::new(); + + for entry in entries { + let entry = entry.map_err(|e| error!("Error reading directory entry: {}", e))?; + let path = entry.path(); + + if !path.is_file() { + continue; + } + + let filename = match path.file_name().and_then(|n| n.to_str()) { + Some(name) => name.to_string(), + None => continue, + }; + + if let Some((archive_id, shard)) = parse_pack_filename(&filename)? { + packages.push(PackageFile { path, archive_id, shard }); + } + } + + // Sort by archive_id, then MC before shards + packages.sort_by(|a, b| { + a.archive_id.cmp(&b.archive_id).then_with(|| { + let a_mc = a.shard.is_masterchain() as u8; + let b_mc = b.shard.is_masterchain() as u8; + b_mc.cmp(&a_mc) // MC first + }) + }); + + Ok(packages) +} + +/// Group packages by archive_id: each group has one MC package and zero or more shard packages. +pub fn group_by_archive_id(packages: Vec) -> Result> { + let mut map: BTreeMap, Vec)> = BTreeMap::new(); + + for pkg in packages { + let entry = map.entry(pkg.archive_id).or_insert_with(|| (None, Vec::new())); + if pkg.shard.is_masterchain() { + if entry.0.is_some() { + fail!("Duplicate MC package for archive_id {}", pkg.archive_id); + } + entry.0 = Some(pkg); + } else { + entry.1.push(pkg); + } + } + + let mut groups = Vec::with_capacity(map.len()); + for (archive_id, (mc_package, shard_packages)) in map { + let mc_package = mc_package + .ok_or_else(|| error!("No MC package found for archive_id {}", archive_id))?; + groups.push(PackageGroup { archive_id, mc_package, shard_packages }); + } + + Ok(groups) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_mc_filename() { + let (id, shard) = parse_pack_filename("archive.00100.pack").unwrap().unwrap(); + assert_eq!(id, 100); + assert!(shard.is_masterchain()); + } + + #[test] + fn test_parse_shard_filename_with_wc() { + let (id, shard) = + parse_pack_filename("archive.00100.0:8000000000000000.pack").unwrap().unwrap(); + assert_eq!(id, 100); + assert!(!shard.is_masterchain()); + assert_eq!(shard.workchain_id(), 0); + assert_eq!(shard.shard_prefix_with_tag(), 0x8000000000000000); + } + + #[test] + fn test_parse_key_filename_skipped() { + assert!(parse_pack_filename("key.archive.000000.pack").unwrap().is_none()); + } + + #[test] + fn test_parse_non_pack_file() { + assert!(parse_pack_filename("readme.txt").unwrap().is_none()); + } +} diff --git a/src/node/src/archive_import/validator.rs b/src/node/src/archive_import/validator.rs new file mode 100644 index 0000000..9ff74fe --- /dev/null +++ b/src/node/src/archive_import/validator.rs @@ -0,0 +1,81 @@ +/* + * Copyright (C) 2025-2026 RSquad Blockchain Lab. + * + * Licensed under the GNU General Public License v3.0. + * See the LICENSE file in the root of this repository. + * + * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. + */ +use crate::{block_proof::BlockProofStuff, shard_state::ShardStateStuff}; +use std::sync::Arc; +use ton_block::{BlockIdExt, BlockInfo, Result}; + +pub struct ValidatorState { + zerostate: Arc, + current_key_block_proof: Option, + hardforks: Vec, +} + +impl ValidatorState { + pub fn new(zerostate: Arc, hardforks: Vec) -> Self { + Self { zerostate, current_key_block_proof: None, hardforks } + } + + pub(crate) fn is_hardfork(&self, block_id: &BlockIdExt) -> bool { + self.hardforks.iter().any(|hf| hf == block_id) + } + + pub(crate) fn zerostate(&self) -> &Arc { + &self.zerostate + } + + pub(crate) fn current_key_block_proof(&self) -> Option<&BlockProofStuff> { + self.current_key_block_proof.as_ref() + } + + pub fn set_key_block_proof(&mut self, proof: BlockProofStuff) { + self.current_key_block_proof = Some(proof); + } + + pub fn validate_mc_proof(&mut self, proof: &BlockProofStuff) -> Result { + let (virt_block, _virt_root) = proof.virtualize_block()?; + let info = virt_block.read_info()?; + + let prev_key_block_seqno = info.prev_key_block_seqno(); + + if prev_key_block_seqno == 0 { + proof.check_with_master_state(&self.zerostate)?; + } else { + let prev_key_proof = self.current_key_block_proof.as_ref().ok_or_else(|| { + ton_block::error!( + "No key block proof available for validation of block {} \ + (prev_key_block_seqno = {})", + proof.id(), + prev_key_block_seqno + ) + })?; + proof.check_with_prev_key_block_proof(prev_key_proof)?; + } + + if info.key_block() { + self.current_key_block_proof = Some(proof.clone()); + } + + Ok(info) + } + + pub fn extract_mc_info(&mut self, proof: &BlockProofStuff) -> Result { + let (virt_block, _virt_root) = proof.virtualize_block()?; + let info = virt_block.read_info()?; + + if info.key_block() { + self.current_key_block_proof = Some(proof.clone()); + } + + Ok(info) + } + + pub fn validate_shard_proof_link(&self, proof: &BlockProofStuff) -> Result<()> { + proof.check_proof_link() + } +} diff --git a/src/node/src/config.rs b/src/node/src/config.rs index 4ca45ab..db4bd8e 100644 --- a/src/node/src/config.rs +++ b/src/node/src/config.rs @@ -44,7 +44,7 @@ use std::{ }, time::Duration, }; -use storage::shardstate_db_async::CellsDbConfig; +use storage::{archives::epoch::ArchivalModeConfig, shardstate_db_async::CellsDbConfig}; use ton_api::{ ton::{ self, @@ -408,6 +408,8 @@ pub struct TonNodeConfig { sync_by_archives: bool, #[serde(default)] accelerated_consensus_disabled: bool, + #[serde(skip_serializing_if = "Option::is_none")] + archival_mode: Option, #[serde(skip)] custom_overlays: CustomOverlaysConfigBoxed, #[serde(default)] @@ -619,6 +621,10 @@ impl TonNodeConfig { } pub fn gc_archives_life_time_hours(&self) -> Option { + // GC disabled in archival mode + if self.archival_mode.is_some() { + return None; + } if let Some(gc) = &self.gc { if gc.enable_for_archives { return gc.archives_life_time_hours.or(Some(0)); @@ -627,6 +633,10 @@ impl TonNodeConfig { None } + pub fn archival_mode(&self) -> Option<&ArchivalModeConfig> { + self.archival_mode.as_ref() + } + pub fn internal_db_path(&self) -> &str { self.internal_db_path.as_deref().unwrap_or(Self::DEFAULT_DB_ROOT) } diff --git a/src/node/src/engine.rs b/src/node/src/engine.rs index 862497d..9bb2c63 100644 --- a/src/node/src/engine.rs +++ b/src/node/src/engine.rs @@ -518,7 +518,11 @@ impl Engine { }); let archives_life_time_hours = general_config.gc_archives_life_time_hours(); - let cells_lifetime_sec = general_config.cells_gc_config().cells_lifetime_sec; + let cells_lifetime_sec = if general_config.archival_mode().is_none() { + general_config.cells_gc_config().cells_lifetime_sec + } else { + u64::MAX + }; let enable_shard_state_persistent_gc = general_config.enable_shard_state_persistent_gc(); let skip_saving_persistent_states = general_config.skip_saving_persistent_states(); let states_cache_mode = general_config.states_cache_mode(); @@ -529,6 +533,7 @@ impl Engine { db_directory: general_config.internal_db_path().to_string(), cells_gc_interval_sec: general_config.cells_gc_config().gc_interval_sec, cells_db_config: cells_db_config.clone(), + archival_mode: general_config.archival_mode().cloned(), }; let control_config = general_config.control_server()?; let collator_config = general_config.collator_config().clone(); diff --git a/src/node/src/engine_operations.rs b/src/node/src/engine_operations.rs index 00d0fcc..668eaed 100644 --- a/src/node/src/engine_operations.rs +++ b/src/node/src/engine_operations.rs @@ -747,6 +747,16 @@ impl EngineOperations for Engine { Ok(state) } + async fn store_state_update( + &self, + handle: &Arc, + state_update: Cell, + ) -> Result<()> { + self.db().store_state_update(handle, state_update).await?; + self.shard_states_awaiters().shunt_async(handle.id(), self.load_state(handle.id())).await?; + Ok(()) + } + async fn store_zerostate( &self, state: Arc, @@ -1248,6 +1258,10 @@ impl EngineOperations for Engine { ) -> Result<()> { self.update_public_overlays(keyblock_id, config).await } + + fn is_archival_mode(&self) -> bool { + self.db().is_archival_mode() + } } async fn redirect_external_message( diff --git a/src/node/src/engine_traits.rs b/src/node/src/engine_traits.rs index b3158b0..6f6ca39 100644 --- a/src/node/src/engine_traits.rs +++ b/src/node/src/engine_traits.rs @@ -636,6 +636,13 @@ pub trait EngineOperations: Sync + Send { ) -> Result> { unimplemented!() } + async fn store_state_update( + &self, + handle: &Arc, + state_update: Cell, + ) -> Result<()> { + unimplemented!() + } async fn store_zerostate( &self, state: Arc, @@ -991,6 +998,10 @@ pub trait EngineOperations: Sync + Send { ) -> Result<()> { Ok(()) } + + fn is_archival_mode(&self) -> bool { + false + } } #[async_trait::async_trait] diff --git a/src/node/src/full_node/apply_block.rs b/src/node/src/full_node/apply_block.rs index f55e74e..8a58f03 100644 --- a/src/node/src/full_node/apply_block.rs +++ b/src/node/src/full_node/apply_block.rs @@ -37,7 +37,7 @@ pub async fn apply_block( check_prev_blocks(&prev_ids, engine, mc_seq_no, pre_apply, recursion_depth).await?; if !handle.has_state() { - calc_shard_state(handle, block, &prev_ids, engine).await?; + store_state_update(handle, block, &prev_ids, engine).await?; } set_prev_ids(handle, &prev_ids, engine.deref())?; if !pre_apply { @@ -92,75 +92,87 @@ async fn check_prev_blocks( Ok(()) } -// Gets prev block(s) state and applies merkle update from block to calculate new state -pub async fn calc_shard_state( +// Normal mode - gets prev block(s) state and applies merkle update from block to calculate new state +// Archival mode - just saves state update from block, without applying it +pub async fn store_state_update( handle: &Arc, block: &BlockStuff, prev_ids: &(BlockIdExt, Option), engine: &Arc, -) -> Result<(Arc, (Arc, Option>))> { +) -> Result<()> { let block_descr = fmt_block_id_short(block.id()); - log::debug!("({}): calc_shard_state: block: {}", block_descr, block.id()); - - let (prev_ss_root, prev_ss) = match prev_ids { - (prev1, Some(prev2)) => { - let ss1 = engine.clone().wait_state(prev1, None, true).await?; - let ss2 = engine.clone().wait_state(prev2, None, true).await?; - let root = ShardStateStuff::construct_split_root( - ss1.root_cell().clone(), - ss2.root_cell().clone(), - )?; - (root, (ss1, Some(ss2))) - } - (prev, None) => { - let ss = engine.clone().wait_state(prev, None, true).await?; - (ss.root_cell().clone(), (ss, None)) - } - }; + log::debug!("({}): store_state_update: block: {}", block_descr, block.id()); - let merkle_update = block.block()?.read_state_update()?; - let block_id = block.id().clone(); - let engine_cloned = engine.clone(); - - let block_descr_clone = block_descr.clone(); - let ss = tokio::task::spawn_blocking(move || -> Result> { - let now = std::time::Instant::now(); - let cf = engine_cloned.db_cells_factory()?; - let cl = engine_cloned.db_cells_loader()?; - let (ss_root, _metrics) = - merkle_update.apply_for_ex(&prev_ss_root, &cf, cl.deref()).map_err(|e| { - error!( - "Error applying Merkle update for block {}: {}\ - prev_ss_root: {:#.2}\ - merkle_update: {}", - block_id, e, prev_ss_root, merkle_update - ) - })?; - let elapsed = now.elapsed(); + if engine.is_archival_mode() { + log::debug!("({}): store_state_update: store_state_update: {}", block_descr, handle.id()); + engine.store_state_update(handle, block.block()?.read_state_update()?.new).await?; log::debug!( - "({}): TIME: calc_shard_state: applied Merkle update {}ms {}", - block_descr_clone, - elapsed.as_millis(), - block_id + "({}): store_state_update: store_state_update: {} done", + block_descr, + handle.id() ); - #[cfg(feature = "telemetry")] - log::debug!(target: "telemetry", "({}): applying Merkle update: \n{}", block_descr_clone, _metrics); - metrics::histogram!("ton_node_db_calc_merkle_update_seconds").record(elapsed); - ShardStateStuff::from_root_cell( - block_id.clone(), - ss_root, + } else { + let prev_ss_root = match prev_ids { + (prev1, Some(prev2)) => { + let ss1 = engine.clone().wait_state(prev1, None, true).await?; + let ss2 = engine.clone().wait_state(prev2, None, true).await?; + let root = ShardStateStuff::construct_split_root( + ss1.root_cell().clone(), + ss2.root_cell().clone(), + )?; + root + } + (prev, None) => { + let ss = engine.clone().wait_state(prev, None, true).await?; + ss.root_cell().clone() + } + }; + + let merkle_update = block.block()?.read_state_update()?; + let block_id = block.id().clone(); + let engine_cloned = engine.clone(); + + let block_descr_clone = block_descr.clone(); + let ss = tokio::task::spawn_blocking(move || -> Result> { + let now = std::time::Instant::now(); + let cf = engine_cloned.db_cells_factory()?; + let cl = engine_cloned.db_cells_loader()?; + let (ss_root, _metrics) = + merkle_update.apply_for_ex(&prev_ss_root, &cf, cl.deref()).map_err(|e| { + error!( + "Error applying Merkle update for block {}: {}\ + prev_ss_root: {:#.2}\ + merkle_update: {}", + block_id, e, prev_ss_root, merkle_update + ) + })?; + let elapsed = now.elapsed(); + log::debug!( + "({}): TIME: store_state_update: applied Merkle update {}ms {}", + block_descr_clone, + elapsed.as_millis(), + block_id + ); #[cfg(feature = "telemetry")] - engine_cloned.engine_telemetry(), - engine_cloned.engine_allocated(), - ) - }) - .await??; + log::debug!(target: "telemetry", "({}): applying Merkle update: \n{}", block_descr_clone, _metrics); + metrics::histogram!("ton_node_db_calc_merkle_update_seconds").record(elapsed); + ShardStateStuff::from_root_cell( + block_id.clone(), + ss_root, + #[cfg(feature = "telemetry")] + engine_cloned.engine_telemetry(), + engine_cloned.engine_allocated(), + ) + }) + .await??; - log::debug!("({}): calc_shard_state: store_state: {}", block_descr, handle.id()); - let ss = engine.store_state(handle, ss).await?; - log::debug!("({}): calc_shard_state: store_state: {} done", block_descr, handle.id()); - Ok((ss, prev_ss)) + log::debug!("({}): store_state_update: store_state: {}", block_descr, handle.id()); + engine.store_state(handle, ss).await?; + log::debug!("({}): store_state_update: store_state: {} done", block_descr, handle.id()); + } + + Ok(()) } // set next block ids for prev blocks diff --git a/src/node/src/internal_db/mod.rs b/src/node/src/internal_db/mod.rs index e80c567..6c0d0c9 100644 --- a/src/node/src/internal_db/mod.rs +++ b/src/node/src/internal_db/mod.rs @@ -34,16 +34,28 @@ use std::{ #[cfg(feature = "telemetry")] use storage::StorageTelemetry; use storage::{ - archives::{archive_manager::ArchiveManager, package_entry_id::PackageEntryId}, - block_handle_db::{self, BlockHandle, BlockHandleDb, BlockHandleStorage, NodeStateDb}, - block_info_db::BlockInfoDb, + archive_shardstate_db::ArchiveShardStateDb, + archives::{ + archive_manager::ArchiveManager, + db_provider::{ArchiveDbProvider, EpochDbProvider, SingleDbProvider}, + epoch::{ArchivalModeConfig, EpochRouter}, + package_entry_id::PackageEntryId, + }, + block_handle_db::{ + self, BlockHandle, BlockHandleDb, BlockHandleStorage, NodeStateDb, BLOCK_HANDLE_DB_NAME, + VALIDATOR_STATE_DB_NAME, + }, + block_info_db::{ + BlockInfoDb, NEXT1_BLOCK_DB_NAME, NEXT2_BLOCK_DB_NAME, PREV1_BLOCK_DB_NAME, + PREV2_BLOCK_DB_NAME, + }, db::{ filedb::FileDb, - rocksdb::{AccessType, RocksDb}, + rocksdb::{AccessType, RocksDb, CATCHAINS_DB_NAME, NODE_DB_NAME}, }, dynamic_boc_rc_db::{AsyncCellsStorageAdapter, DynamicBocDb}, - shard_top_blocks_db::ShardTopBlocksDb, - shardstate_db_async::{AllowStateGcResolver, CellsDbConfig, ShardStateDb}, + shard_top_blocks_db::{ShardTopBlocksDb, SHARD_TOP_BLOCKS_DB_NAME}, + shardstate_db_async::{AllowStateGcResolver, CellsDbConfig, Job, ShardStateDb}, traits::Serializable, types::{BlockMeta, PersistentStatePartId, PersistentStatePartKey}, StorageAlloc, TimeChecker, @@ -67,8 +79,13 @@ pub const DB_VERSION: &str = "DbVersion"; pub const DB_VERSION_7: u32 = 7; // with block indexes pub const CURRENT_DB_VERSION: u32 = DB_VERSION_7; +pub const SHARDSTATE_DB_NAME: &str = "shardstate_db"; const CELLS_CF_NAME: &str = "cells_db_v6"; const CELLSCOUNTERS_CF_NAME: &str = "cells_db_v6_counters"; +const SHARD_STATE_PERSISTENT_DB_NAME: &str = "shard_state_persistent_db"; +pub const ARCHIVE_STATES_DB_NAME: &str = "archive_states"; +pub const ARCHIVE_CELLS_CF_NAME: &str = "archive_cells_db"; +pub const ARCHIVE_SHARDSTATE_CF_NAME: &str = "archive_shardstate_db"; /// Validator state keys pub(crate) const LAST_ROTATION_MC_BLOCK: &str = "LastRotationBlockId"; @@ -176,6 +193,59 @@ pub struct InternalDbConfig { pub db_directory: String, pub cells_gc_interval_sec: u32, pub cells_db_config: CellsDbConfig, + pub archival_mode: Option, +} + +pub enum StateDb { + Dynamic(Arc), + Archive(Arc), +} + +impl StateDb { + pub fn get(&self, id: &BlockIdExt) -> Result { + match self { + StateDb::Dynamic(db) => db.get(id), + StateDb::Archive(db) => db.get(id), + } + } + + pub fn get_cell(&self, id: &UInt256) -> Result { + match self { + StateDb::Dynamic(db) => db.get_cell(id), + StateDb::Archive(db) => db.get_cell(id), + } + } + + pub fn cells_factory(&self) -> Result> { + match self { + StateDb::Dynamic(db) => db.cells_factory(), + StateDb::Archive(db) => Ok(db.cells_factory()), + } + } + + pub fn create_hashed_cell_storage( + &self, + root: Option<&Cell>, + max_inmemory_cells: usize, + ) -> Result> { + match self { + StateDb::Dynamic(db) => { + Ok(Arc::new(db.create_hashed_cell_storage(root, max_inmemory_cells)?)) + } + StateDb::Archive(db) => { + Ok(Arc::new(db.create_hashed_cell_storage(root, max_inmemory_cells)?)) + } + } + } +} + +impl Clone for StateDb { + fn clone(&self) -> Self { + match self { + StateDb::Dynamic(db) => StateDb::Dynamic(db.clone()), + StateDb::Archive(db) => StateDb::Archive(db.clone()), + } + } } pub struct InternalDb { @@ -186,7 +256,7 @@ pub struct InternalDb { next1_block_db: BlockInfoDb, next2_block_db: BlockInfoDb, shard_state_persistent_db: Arc, - shard_state_dynamic_db: Arc, + state_db: StateDb, archive_manager: Arc, shard_top_blocks_db: ShardTopBlocksDb, full_node_state_db: Arc, @@ -258,28 +328,39 @@ impl InternalDb { allocated: Arc, ) -> Result { let mut cfs_opts = HashMap::new(); - cfs_opts.insert( - CELLS_CF_NAME.to_string(), - DynamicBocDb::build_cells_cf_options(&config.cells_db_config), - ); - cfs_opts.insert( - CELLSCOUNTERS_CF_NAME.to_string(), - DynamicBocDb::build_counters_cf_options(&config.cells_db_config), - ); + if config.archival_mode.is_none() { + cfs_opts.insert( + CELLS_CF_NAME.to_string(), + DynamicBocDb::build_cells_cf_options(&config.cells_db_config), + ); + cfs_opts.insert( + CELLSCOUNTERS_CF_NAME.to_string(), + DynamicBocDb::build_counters_cf_options(&config.cells_db_config), + ); + } let access_type = access_type.unwrap_or(AccessType::ReadWrite); let can_create_db = access_type == AccessType::ReadWrite; - let db = RocksDb::new(config.db_directory.as_str(), "db", cfs_opts, access_type.clone())?; - let db_catchain = - RocksDb::new(config.db_directory.as_str(), "catchains", None, access_type)?; + let db = RocksDb::new( + config.db_directory.as_str(), + NODE_DB_NAME, + cfs_opts, + access_type.clone(), + )?; + let db_catchain = RocksDb::new( + config.db_directory.as_str(), + CATCHAINS_DB_NAME, + None, + access_type.clone(), + )?; let block_handle_db = - Arc::new(BlockHandleDb::with_db(db.clone(), "block_handle_db", can_create_db)?); + Arc::new(BlockHandleDb::with_db(db.clone(), BLOCK_HANDLE_DB_NAME, can_create_db)?); let full_node_state_db = Arc::new(NodeStateDb::with_db( db.clone(), storage::db::rocksdb::NODE_STATE_DB_NAME, can_create_db, )?); let validator_state_db = - Arc::new(NodeStateDb::with_db(db_catchain, "validator_state_db", can_create_db)?); + Arc::new(NodeStateDb::with_db(db_catchain, VALIDATOR_STATE_DB_NAME, can_create_db)?); let block_handle_storage = Arc::new(BlockHandleStorage::with_dbs( block_handle_db.clone(), full_node_state_db.clone(), @@ -289,19 +370,52 @@ impl InternalDb { allocated.storage.clone(), )); - let shard_state_dynamic_db = Self::create_shard_state_dynamic_db( - db.clone(), - &config, - #[cfg(feature = "telemetry")] - telemetry.storage.clone(), - allocated.storage.clone(), - )?; + let state_db = if config.archival_mode.is_some() { + let states_db = RocksDb::new( + &config.db_directory, + ARCHIVE_STATES_DB_NAME, + std::collections::HashMap::from([( + ARCHIVE_CELLS_CF_NAME.to_string(), + storage::cell_db::CellDb::build_cf_options( + config.cells_db_config.cells_cache_size_bytes, + ), + )]), + access_type.clone(), + )?; + StateDb::Archive(Arc::new(ArchiveShardStateDb::new( + states_db, + ARCHIVE_SHARDSTATE_CF_NAME, + ARCHIVE_CELLS_CF_NAME, + &config.db_directory, + &config.cells_db_config, + #[cfg(feature = "telemetry")] + telemetry.storage.clone(), + allocated.storage.clone(), + )?)) + } else { + StateDb::Dynamic(Self::create_shard_state_dynamic_db( + db.clone(), + &config, + #[cfg(feature = "telemetry")] + telemetry.storage.clone(), + allocated.storage.clone(), + )?) + }; let last_unneeded_key_block_id = block_handle_storage.load_full_node_state(LAST_UNNEEDED_KEY_BLOCK)?.unwrap_or_default(); + let db_root_path = Arc::new(PathBuf::from(&config.db_directory)); + let db_provider: Arc = + if let Some(ref archival_config) = config.archival_mode { + let router = Arc::new(EpochRouter::new(archival_config).await?); + Arc::new(EpochDbProvider::new(router)) + } else { + Arc::new(SingleDbProvider::new(db.clone(), db_root_path.clone())) + }; let archive_manager = Arc::new( ArchiveManager::with_data( db.clone(), - Arc::new(PathBuf::from(&config.db_directory)), + db_root_path, + db_provider, last_unneeded_key_block_id.seq_no(), monitor_min_split, #[cfg(feature = "telemetry")] @@ -314,18 +428,18 @@ impl InternalDb { let db = Self { db: db.clone(), block_handle_storage, - prev1_block_db: BlockInfoDb::with_db(db.clone(), "prev1_block_db", can_create_db)?, - prev2_block_db: BlockInfoDb::with_db(db.clone(), "prev2_block_db", can_create_db)?, - next1_block_db: BlockInfoDb::with_db(db.clone(), "next1_block_db", can_create_db)?, - next2_block_db: BlockInfoDb::with_db(db.clone(), "next2_block_db", can_create_db)?, + prev1_block_db: BlockInfoDb::with_db(db.clone(), PREV1_BLOCK_DB_NAME, can_create_db)?, + prev2_block_db: BlockInfoDb::with_db(db.clone(), PREV2_BLOCK_DB_NAME, can_create_db)?, + next1_block_db: BlockInfoDb::with_db(db.clone(), NEXT1_BLOCK_DB_NAME, can_create_db)?, + next2_block_db: BlockInfoDb::with_db(db.clone(), NEXT2_BLOCK_DB_NAME, can_create_db)?, shard_state_persistent_db: Arc::new(FileDb::with_path( - Path::new(config.db_directory.as_str()).join("shard_state_persistent_db"), + Path::new(config.db_directory.as_str()).join(SHARD_STATE_PERSISTENT_DB_NAME), )), - shard_state_dynamic_db, + state_db, archive_manager, shard_top_blocks_db: ShardTopBlocksDb::with_db( db.clone(), - "shard_top_blocks_db", + SHARD_TOP_BLOCKS_DB_NAME, can_create_db, )?, full_node_state_db, @@ -365,7 +479,7 @@ impl InternalDb { ) -> Result> { ShardStateDb::new( db, - "shardstate_db", + SHARDSTATE_DB_NAME, CELLS_CF_NAME, CELLSCOUNTERS_CF_NAME, &config.db_directory, @@ -377,36 +491,49 @@ impl InternalDb { } pub fn clean_shard_state_dynamic_db(&mut self) -> Result<()> { - if self.shard_state_dynamic_db.is_gc_run() { - fail!("It is forbidden to clear shard_state_dynamic_db while cells GC is running") - } + match &self.state_db { + StateDb::Dynamic(db) => { + if db.is_gc_run() { + fail!( + "It is forbidden to clear shard_state_dynamic_db while cells GC is running" + ) + } - if let Err(e) = self.db.drop_table_force("shardstate_db") { - log::warn!("Can't drop table \"shardstate_db\": {}", e); - } - if let Err(e) = self.db.drop_table_force(CELLS_CF_NAME) { - log::warn!("Can't drop table \"cells_db\": {}", e); - } - let _ = self.db.drop_table_force("cells_db1"); // depricated table, used in db versions 1 & 2 - self.full_node_state_db.put(&ASSUME_OLD_FORMAT_CELLS, &[0])?; + if let Err(e) = self.db.drop_table_force(SHARDSTATE_DB_NAME) { + log::warn!("Can't drop table \"shardstate_db\": {}", e); + } + if let Err(e) = self.db.drop_table_force(CELLS_CF_NAME) { + log::warn!("Can't drop table \"cells_db\": {}", e); + } + let _ = self.db.drop_table_force("cells_db1"); + self.full_node_state_db.put(&ASSUME_OLD_FORMAT_CELLS, &[0])?; - self.shard_state_dynamic_db = Self::create_shard_state_dynamic_db( - self.db.clone(), - &self.config, - #[cfg(feature = "telemetry")] - self.telemetry.storage.clone(), - self.allocated.storage.clone(), - )?; + self.state_db = StateDb::Dynamic(Self::create_shard_state_dynamic_db( + self.db.clone(), + &self.config, + #[cfg(feature = "telemetry")] + self.telemetry.storage.clone(), + self.allocated.storage.clone(), + )?); - Ok(()) + Ok(()) + } + StateDb::Archive(_) => { + fail!("clean_shard_state_dynamic_db is not supported in archival mode") + } + } } pub fn start_states_gc(&self, resolver: Arc) { - self.shard_state_dynamic_db.clone().start_gc(resolver, self.cells_gc_interval.clone()) + if let StateDb::Dynamic(db) = &self.state_db { + db.clone().start_gc(resolver, self.cells_gc_interval.clone()) + } } pub async fn stop_states_db(&self) { - self.shard_state_dynamic_db.stop().await + if let StateDb::Dynamic(db) = &self.state_db { + db.stop().await + } } fn store_block_handle( @@ -679,15 +806,35 @@ impl InternalDb { } let _lock = handle.saving_state_lock().lock().await; if force || !handle.has_saved_state() { - let callback = - SsCallback::new(handle.clone(), self.block_handle_storage.clone(), callback_ss); - let callback = - Some(Arc::new(callback) as Arc); - self.shard_state_dynamic_db - .put(state.block_id(), state.root_cell().clone(), callback) - .await?; - if handle.set_state() { - self.store_block_handle(handle, callback_handle)?; + match &self.state_db { + StateDb::Archive(db) => { + let state = state.clone(); + let db = db.clone(); + let state_root = state.root_cell().clone(); + tokio::task::spawn_blocking(move || { + db.put(state.block_id(), state.root_cell().clone()) + }) + .await??; + if let Some(callback) = callback_ss { + callback.invoke(Job::PutState(state_root, handle.id().clone()), true).await; + } + if handle.set_state() | handle.set_state_saved() { + self.store_block_handle(handle, callback_handle)?; + } + } + StateDb::Dynamic(db) => { + let callback = SsCallback::new( + handle.clone(), + self.block_handle_storage.clone(), + callback_ss, + ); + let callback = + Some(Arc::new(callback) as Arc); + db.put(state.block_id(), state.root_cell().clone(), callback).await?; + if handle.set_state() { + self.store_block_handle(handle, callback_handle)?; + } + } } Ok((state.clone(), true)) } else { @@ -702,17 +849,62 @@ impl InternalDb { callback_ss: Option>, ) -> Result { let timeout = 30; - let callback = - SsCallback::new(handle.clone(), self.block_handle_storage.clone(), callback_ss); - let callback = Some(Arc::new(callback) as Arc); let _tc = TimeChecker::new( format!("store_shard_state_dynamic_raw_force {}", handle.id()), timeout, ); let _lock = handle.saving_state_lock().lock().await; - self.shard_state_dynamic_db.put(handle.id(), state_root.clone(), callback).await?; - Ok(state_root) + match &self.state_db { + StateDb::Archive(db) => { + let db = db.clone(); + let id = handle.id().clone(); + let saved = tokio::task::spawn_blocking(move || db.put(&id, state_root)).await??; + if let Some(callback) = callback_ss { + callback.invoke(Job::PutState(saved.clone(), handle.id().clone()), true).await; + } + if handle.set_state() | handle.set_state_saved() { + self.store_block_handle(handle, None)?; + } + Ok(saved) + } + StateDb::Dynamic(db) => { + let callback = + SsCallback::new(handle.clone(), self.block_handle_storage.clone(), callback_ss); + let callback = + Some(Arc::new(callback) as Arc); + db.put(handle.id(), state_root.clone(), callback).await?; + Ok(state_root) + } + } + } + + pub async fn store_state_update( + &self, + handle: &Arc, + state_update: Cell, + ) -> Result<()> { + let timeout = 30; + let _tc = TimeChecker::new(format!("store_state_update {}", handle.id()), timeout); + + let _lock = handle.saving_state_lock().lock().await; + if !handle.has_saved_state() { + match &self.state_db { + StateDb::Archive(db) => { + let db = db.clone(); + let id = handle.id().clone(); + tokio::task::spawn_blocking(move || db.put_update(&id, state_update)).await??; + if handle.set_state() | handle.set_state_saved() { + self.store_block_handle(handle, None)?; + } + } + _ => { + fail!("store_state_update is only supported in archival mode") + } + } + } + + Ok(()) } pub fn load_shard_state_dynamic(&self, id: &BlockIdExt) -> Result> { @@ -726,7 +918,7 @@ impl InternalDb { fail!("ShardState is not saved for {}", id); } - let root_cell = self.shard_state_dynamic_db.get(handle.id())?; + let root_cell = self.state_db.get(handle.id())?; ShardStateStuff::from_root_cell( handle.id().clone(), @@ -739,7 +931,7 @@ impl InternalDb { pub fn load_cell(&self, id: &UInt256) -> Result { let _tc = TimeChecker::new(format!("load_cell {}", id), 30); - self.shard_state_dynamic_db.get_cell(id) + self.state_db.get_cell(id) } pub fn shard_state_persistent_write_obj( @@ -788,7 +980,7 @@ impl InternalDb { log::info!("store_shard_state_persistent {:x}: already saved", root_hash); } else { let id = handle.id().clone(); - let shard_state_dynamic_db = self.shard_state_dynamic_db.clone(); + let state_db = self.state_db.clone(); let shard_state_persistent_db = self.shard_state_persistent_db.clone(); tokio::task::spawn_blocking(move || -> Result<()> { let root_cell = state.root_cell().clone(); @@ -803,13 +995,13 @@ impl InternalDb { // in memory cells, as we do it while storing part (see store_shard_state_persistent_part). // It means we don't need to pass root cell into the adapter // and can set a zero limit for in-memory cells. - let cells_storage = shard_state_dynamic_db.create_hashed_cell_storage(None, 0)?; + let cells_storage = state_db.create_hashed_cell_storage(None, 0)?; let writer = BigBocWriter::with_params( [root_cell], MAX_SAFE_DEPTH, BocFlags::all(), abort.deref(), - Arc::new(cells_storage), + cells_storage, )?; let arrange_time = now.elapsed(); let cells_count = writer.cells_count(); @@ -855,7 +1047,7 @@ impl InternalDb { log::info!("store_shard_state_persistent_part {}: already saved", id); } else { tokio::task::spawn_blocking({ - let shard_state_dynamic_db = self.shard_state_dynamic_db.clone(); + let state_db = self.state_db.clone(); let shard_state_persistent_db = self.shard_state_persistent_db.clone(); let db_key: PersistentStatePartKey = id.into(); let id = id.clone(); @@ -889,14 +1081,14 @@ impl InternalDb { // and remembers their data in memory. // The adapter does not store the cell (don't keep references), only data. // The maximum number of cells to store in memory is limited - let cells_storage = shard_state_dynamic_db - .create_hashed_cell_storage(Some(&part), MAX_INMEMORY_CELLS)?; + let cells_storage = + state_db.create_hashed_cell_storage(Some(&part), MAX_INMEMORY_CELLS)?; let writer = BigBocWriter::with_params( [part], MAX_SAFE_DEPTH, BocFlags::all(), abort.deref(), - Arc::new(cells_storage), + cells_storage, )?; let arrange_time = now.elapsed(); let cells_count = writer.cells_count(); @@ -1453,7 +1645,12 @@ impl InternalDb { &self, index: Vec<(UInt256, u16)>, ) -> Result { - self.shard_state_dynamic_db.create_fast_cell_storage(index) + match &self.state_db { + StateDb::Dynamic(db) => db.create_fast_cell_storage(index), + StateDb::Archive(_) => { + fail!("create_fast_cell_storage is not supported in archival mode") + } + } } pub fn find_full_block_id(&self, root_hash: &UInt256) -> Result> { @@ -1461,13 +1658,17 @@ impl InternalDb { } pub fn cells_factory(&self) -> Result> { - self.shard_state_dynamic_db.cells_factory() + self.state_db.cells_factory() } pub fn cells_loader(&self) -> Result Result + Send + Sync>> { - let cs = self.shard_state_dynamic_db.create_hashed_cell_storage(None, 0)?; + let cs = self.state_db.create_hashed_cell_storage(None, 0)?; Ok(Arc::new(move |hash| cs.load_cell(hash))) } + + pub fn is_archival_mode(&self) -> bool { + matches!(self.state_db, StateDb::Archive(_)) + } } #[cfg(test)] diff --git a/src/node/src/internal_db/restore.rs b/src/node/src/internal_db/restore.rs index c85e77b..27c2d82 100644 --- a/src/node/src/internal_db/restore.rs +++ b/src/node/src/internal_db/restore.rs @@ -29,7 +29,7 @@ use std::{ time::Duration, }; use storage::{ - dynamic_boc_rc_db::BROKEN_CELL_BEACON_FILE, shardstate_db_async::SsNotificationCallback, + cell_db::BROKEN_CELL_BEACON_FILE, shardstate_db_async::SsNotificationCallback, traits::Serializable, }; use ton_block::{ @@ -350,6 +350,9 @@ async fn restore( log::info!("Fast restore successfully finished"); return Ok(db); } + if db.config.archival_mode.is_some() { + fail!("Refilling cells db is not supported in archival mode"); + } // If there was broken cell or special flag set - check blocks and restore cells db log::info!("Checking blocks..."); diff --git a/src/node/src/lib.rs b/src/node/src/lib.rs index 8cb3936..a82818e 100644 --- a/src/node/src/lib.rs +++ b/src/node/src/lib.rs @@ -8,6 +8,7 @@ * This file has been modified from its original version. * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. */ +pub mod archive_import; pub mod block; pub mod block_proof; pub mod boot; diff --git a/src/node/src/network/liteserver.rs b/src/node/src/network/liteserver.rs index 8ddecdb..212af10 100644 --- a/src/node/src/network/liteserver.rs +++ b/src/node/src/network/liteserver.rs @@ -1656,7 +1656,7 @@ impl LiteServerQuerySubscriber { let result = BlockState { id: block_id, root_hash: state.root_cell().repr_hash(), - file_hash: state.block_id().file_hash.clone(), + file_hash: UInt256::calc_file_hash(&data), data, }; Ok(result) diff --git a/src/node/src/shard_blocks.rs b/src/node/src/shard_blocks.rs index ef10ed0..ff2ca81 100644 --- a/src/node/src/shard_blocks.rs +++ b/src/node/src/shard_blocks.rs @@ -259,7 +259,11 @@ impl ShardBlocksPool { } if last_mc_seq_no != mc_seqno { - log::debug!("get_shard_blocks: Given last_mc_seq_no {} is not actual", last_mc_seq_no); + log::debug!( + "get_shard_blocks: Given last_mc_seq_no {} is not actual {}", + last_mc_seq_no, + mc_seqno + ); fail!("Given last_mc_seq_no {} is not actual {}", last_mc_seq_no, mc_seqno); } else { let mut returned_list = string_builder::Builder::default(); @@ -340,6 +344,7 @@ async fn resend_top_shard_blocks(engine: &dyn EngineOperations) -> Result<()> { Err(e) => { if actual_last_mc_seqno != mc_state.block_id().seq_no { log::trace!("resend_top_shard_blocks: goto next attempt"); + futures_timer::Delay::new(Duration::from_millis(100)).await; continue; } fail!("resend_top_shard_blocks: {:?}", e); diff --git a/src/node/src/sync.rs b/src/node/src/sync.rs index 2737980..9a57a22 100644 --- a/src/node/src/sync.rs +++ b/src/node/src/sync.rs @@ -760,7 +760,7 @@ async fn import_shard_blocks( } }; if let Some(block) = block { - engine.apply_block(&handle, &block, mc_seq_no, false).await?; + engine.apply_block(&handle, &block, mc_handle.id().seq_no(), false).await?; return Ok(id); } } @@ -770,7 +770,7 @@ async fn import_shard_blocks( unapplied blocks. Will try to download it directly" ); absent_blocks.fetch_add(1, Ordering::Relaxed); - engine.download_and_apply_block(&id, mc_seq_no, false).await?; + engine.download_and_apply_block(&id, mc_handle.id().seq_no(), false).await?; Ok(id) }); tasks.push(task) diff --git a/src/node/src/tests/static/5E994FCF4D425C0A6CE6A792594B7173205F740A39CD56F537DEFD28B48A0F6E.boc b/src/node/src/tests/static/5E994FCF4D425C0A6CE6A792594B7173205F740A39CD56F537DEFD28B48A0F6E.boc new file mode 100644 index 0000000..2e61492 Binary files /dev/null and b/src/node/src/tests/static/5E994FCF4D425C0A6CE6A792594B7173205F740A39CD56F537DEFD28B48A0F6E.boc differ diff --git a/src/node/src/tests/static/EE0BEDFE4B32761FB35E9E1D8818EA720CAD1A0E7B4D2ED673C488E72E910342.boc b/src/node/src/tests/static/EE0BEDFE4B32761FB35E9E1D8818EA720CAD1A0E7B4D2ED673C488E72E910342.boc new file mode 100644 index 0000000..c95aea2 Binary files /dev/null and b/src/node/src/tests/static/EE0BEDFE4B32761FB35E9E1D8818EA720CAD1A0E7B4D2ED673C488E72E910342.boc differ diff --git a/src/node/src/tests/static/archives/archive.00000.0:8000000000000000.pack b/src/node/src/tests/static/archives/archive.00000.0:8000000000000000.pack new file mode 100644 index 0000000..503f698 Binary files /dev/null and b/src/node/src/tests/static/archives/archive.00000.0:8000000000000000.pack differ diff --git a/src/node/src/tests/static/archives/archive.00000.pack b/src/node/src/tests/static/archives/archive.00000.pack new file mode 100644 index 0000000..c1c06f6 Binary files /dev/null and b/src/node/src/tests/static/archives/archive.00000.pack differ diff --git a/src/node/src/tests/static/archives/archive.00100.0:8000000000000000.pack b/src/node/src/tests/static/archives/archive.00100.0:8000000000000000.pack new file mode 100644 index 0000000..6c05156 Binary files /dev/null and b/src/node/src/tests/static/archives/archive.00100.0:8000000000000000.pack differ diff --git a/src/node/src/tests/static/archives/archive.00100.pack b/src/node/src/tests/static/archives/archive.00100.pack new file mode 100644 index 0000000..d2fce83 Binary files /dev/null and b/src/node/src/tests/static/archives/archive.00100.pack differ diff --git a/src/node/src/tests/test_archive_import.rs b/src/node/src/tests/test_archive_import.rs new file mode 100644 index 0000000..0620be0 --- /dev/null +++ b/src/node/src/tests/test_archive_import.rs @@ -0,0 +1,270 @@ +/* + * Copyright (C) 2025-2026 RSquad Blockchain Lab. + * + * Licensed under the GNU General Public License v3.0. + * See the LICENSE file in the root of this repository. + * + * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. + */ +#[cfg(feature = "telemetry")] +use crate::collator_test_bundle::create_engine_telemetry; +use crate::{ + archive_import::{run_import, ImportConfig}, + block::{BlockIdExtExtention, BlockStuff}, + collator_test_bundle::create_engine_allocated, + internal_db::{ + InternalDb, InternalDbConfig, ARCHIVES_GC_BLOCK, LAST_APPLIED_MC_BLOCK, + PSS_KEEPER_MC_BLOCK, SHARD_CLIENT_MC_BLOCK, + }, + test_helper::init_test_log, +}; +use std::{ + path::{Path, PathBuf}, + sync::{atomic::AtomicU8, Arc}, +}; +use storage::{archives::epoch::ArchivalModeConfig, db::rocksdb::RocksDb}; +use ton_block::{ + read_single_root_boc, write_boc, AccountIdPrefixFull, BlockIdExt, Result, SHARD_FULL, +}; + +async fn wait_for_db_release(db: Arc) { + while Arc::strong_count(&db) > 1 { + tokio::time::sleep(std::time::Duration::from_millis(1)).await; + } + drop(db); +} + +const ARCHIVES_PATH: &str = "src/tests/static/archives"; +const MC_ZEROSTATE_PATH: &str = + "src/tests/static/5E994FCF4D425C0A6CE6A792594B7173205F740A39CD56F537DEFD28B48A0F6E.boc"; +const WC_ZEROSTATE_PATH: &str = + "src/tests/static/EE0BEDFE4B32761FB35E9E1D8818EA720CAD1A0E7B4D2ED673C488E72E910342.boc"; +const GLOBAL_CONFIG_PATH: &str = "src/tests/config/mainnet.json"; + +fn import_config(dir: &Path) -> ImportConfig { + ImportConfig { + archives_path: PathBuf::from(ARCHIVES_PATH), + epochs_path: dir.join("epochs"), + epoch_size: 20_000, + node_db_path: dir.join("node_db"), + mc_zerostate_path: PathBuf::from(MC_ZEROSTATE_PATH), + wc_zerostate_paths: vec![PathBuf::from(WC_ZEROSTATE_PATH)], + global_config_path: PathBuf::from(GLOBAL_CONFIG_PATH), + skip_validation: false, + move_files: false, + } +} + +async fn open_db(dir: &Path) -> Result { + let db_dir = dir.join("node_db"); + let epochs_path = dir.join("epochs"); + InternalDb::with_update( + InternalDbConfig { + db_directory: db_dir.to_string_lossy().to_string(), + archival_mode: Some(ArchivalModeConfig { + epoch_size: 20_000, + new_epochs_path: epochs_path, + existing_epochs: vec![], + }), + ..Default::default() + }, + false, + false, + false, + &|| Ok(()), + None, + Arc::new(AtomicU8::new(0)), + None, + #[cfg(feature = "telemetry")] + create_engine_telemetry(), + create_engine_allocated(), + ) + .await +} + +async fn check_imported_block( + db: &InternalDb, + block_id: &BlockIdExt, +) -> Result> { + let handle = + db.load_block_handle(block_id)?.expect("Block handle must exist for imported block"); + assert!(handle.has_state(), "Imported block must have state"); + assert!(handle.has_saved_state(), "Imported block must have saved state"); + assert!(handle.is_applied(), "Imported block must be applied"); + + let mut block_stuff = None; + if block_id.seq_no() > 0 { + assert!(handle.has_data(), "Imported block must have data"); + assert!(handle.has_prev1(), "Imported block must have prev1"); + if block_id.is_masterchain() { + assert!(handle.has_proof(), "Imported MC block must have proof"); + } else { + assert!(handle.has_proof_link(), "Imported shard block must have proof link"); + } + + let prev1 = db.load_block_prev1(&block_id)?; + assert_eq!(prev1.seq_no(), block_id.seq_no() - 1); + let prev_handle = db.load_block_handle(&prev1)?.expect("Prev block handle must exist"); + assert!(prev_handle.has_next1(), "Imported block must have next1"); + let next1 = db.load_block_next1(prev_handle.id())?; + assert_eq!(&next1, block_id); + + block_stuff = Some(db.load_block_data(&handle).await?); + let _ = db.load_block_proof(&handle, !block_id.is_masterchain()).await?; + } + + let loaded_state = db.load_shard_state_dynamic(block_id)?; + let boc = write_boc(loaded_state.root_cell())?; + let deserialized_state = read_single_root_boc(&boc)?; + assert_eq!(loaded_state.root_cell().repr_hash(), deserialized_state.repr_hash()); + if block_id.seq_no() > 0 { + assert_eq!( + deserialized_state.repr_hash(), + block_stuff.as_ref().unwrap().block()?.read_state_update()?.new_hash + ); + } else { + assert_eq!(&deserialized_state.repr_hash(), block_id.root_hash()); + } + + Ok(block_stuff) +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_import_and_verify() -> Result<()> { + init_test_log(); + let dir = tempfile::tempdir().unwrap(); + let config = import_config(dir.path()); + + run_import(config).await?; + + let db = open_db(dir.path()).await?; + + let last_mc = + db.load_full_node_state(LAST_APPLIED_MC_BLOCK)?.expect("LAST_APPLIED_MC_BLOCK must be set"); + assert_eq!(last_mc.seq_no(), 199); + assert!(last_mc.shard().is_masterchain()); + + let gc_block = db.load_full_node_state(ARCHIVES_GC_BLOCK)?; + assert_eq!(last_mc, gc_block.unwrap()); + + let pss_block = db.load_full_node_state(PSS_KEEPER_MC_BLOCK)?; + assert_eq!(last_mc, pss_block.unwrap()); + + let shard_client = db.load_full_node_state(SHARD_CLIENT_MC_BLOCK)?; + assert_eq!(last_mc, shard_client.unwrap()); + + let last_mc_block = check_imported_block(&db, &last_mc).await?.unwrap(); + + for shard_block in last_mc_block.top_blocks_all()? { + check_imported_block(&db, &shard_block).await?; + } + + let first_mc = + db.lookup_block_by_seqno(&AccountIdPrefixFull::any_masterchain(), 1).await?.unwrap(); + let first_mc_block = check_imported_block(&db, &first_mc.0).await?.unwrap(); + // MC zerostate + check_imported_block(&db, &first_mc_block.construct_prev_id()?.0).await?; + + let first_wc = + db.lookup_block_by_seqno(&AccountIdPrefixFull::workchain(0, SHARD_FULL), 1).await?.unwrap(); + let first_wc_block = check_imported_block(&db, &first_wc.0).await?.unwrap(); + // WC zerostate + check_imported_block(&db, &first_wc_block.construct_prev_id()?.0).await?; + + db.stop_states_db().await; + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_import_resume() -> Result<()> { + init_test_log(); + let dir = tempfile::tempdir().unwrap(); + let partial_archives = dir.path().join("partial"); + std::fs::create_dir_all(&partial_archives)?; + + // Copy only the first group (archive.00000.*) + for entry in std::fs::read_dir(ARCHIVES_PATH)? { + let entry = entry?; + let name = entry.file_name().to_string_lossy().to_string(); + if name.starts_with("archive.00000.") { + std::fs::copy(entry.path(), partial_archives.join(&name))?; + } + } + + // First import — only first group + let config1 = ImportConfig { + archives_path: partial_archives.clone(), + epochs_path: dir.path().join("epochs"), + epoch_size: 20_000, + node_db_path: dir.path().join("node_db"), + mc_zerostate_path: PathBuf::from(MC_ZEROSTATE_PATH), + wc_zerostate_paths: vec![PathBuf::from(WC_ZEROSTATE_PATH)], + global_config_path: PathBuf::from(GLOBAL_CONFIG_PATH), + skip_validation: false, + move_files: true, + }; + let node_db = run_import(config1).await?; + wait_for_db_release(node_db).await; + + let db1 = open_db(dir.path()).await?; + let last_mc_1 = db1 + .load_full_node_state(LAST_APPLIED_MC_BLOCK)? + .expect("After first import, LAST_APPLIED_MC_BLOCK must be set"); + assert_eq!(last_mc_1.seq_no(), 99); + drop(db1); + + // Copy remaining files for second import + for entry in std::fs::read_dir(ARCHIVES_PATH)? { + let entry = entry?; + let name = entry.file_name().to_string_lossy().to_string(); + if !name.starts_with("archive.00000.") { + std::fs::copy(entry.path(), partial_archives.join(&name))?; + } + } + + // Second import — should resume and process remaining groups + let config2 = ImportConfig { + archives_path: partial_archives, + epochs_path: dir.path().join("epochs"), + epoch_size: 20_000, + node_db_path: dir.path().join("node_db"), + mc_zerostate_path: PathBuf::from(MC_ZEROSTATE_PATH), + wc_zerostate_paths: vec![PathBuf::from(WC_ZEROSTATE_PATH)], + global_config_path: PathBuf::from(GLOBAL_CONFIG_PATH), + skip_validation: false, + move_files: false, + }; + run_import(config2).await?; + + let db2 = open_db(dir.path()).await?; + let last_mc_2 = db2 + .load_full_node_state(LAST_APPLIED_MC_BLOCK)? + .expect("After second import, LAST_APPLIED_MC_BLOCK must be set"); + assert_eq!(last_mc_2.seq_no(), 199); + db2.stop_states_db().await; + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_import_skip_validation() -> Result<()> { + init_test_log(); + let dir = tempfile::tempdir().unwrap(); + let mut config = import_config(dir.path()); + config.skip_validation = true; + + run_import(config).await?; + + let db = open_db(dir.path()).await?; + let last_mc = db.load_full_node_state(LAST_APPLIED_MC_BLOCK)?; + assert!(last_mc.is_some(), "Even with skip_validation, last MC must be set"); + + let last_mc = last_mc.unwrap(); + let handle = db + .load_block_handle(&last_mc)? + .expect("Block handle must exist after skip_validation import"); + assert!(handle.has_data()); + + db.stop_states_db().await; + Ok(()) +} diff --git a/src/node/src/tests/test_sync.rs b/src/node/src/tests/test_sync.rs index ba0b3fc..2a37923 100644 --- a/src/node/src/tests/test_sync.rs +++ b/src/node/src/tests/test_sync.rs @@ -40,7 +40,10 @@ use std::{ }, }; use storage::{ - archives::archive_manager::ArchiveManager, + archives::{ + archive_manager::ArchiveManager, + db_provider::{ArchiveDbProvider, SingleDbProvider}, + }, block_handle_db::BlockHandleStorage, db::rocksdb::{AccessType, RocksDb}, types::{BlockMeta, PersistentStatePartId}, @@ -119,9 +122,13 @@ async fn test_sync() -> Result<()> { let allocated = create_engine_allocated(); #[cfg(feature = "telemetry")] let telemetry = create_engine_telemetry(); + let db_root_path = Arc::new(PathBuf::from(DB_PATH)); + let db_provider: Arc = + Arc::new(SingleDbProvider::new(db.clone(), db_root_path.clone())); let archive_manager = ArchiveManager::with_data( db.clone(), - Arc::new(PathBuf::from(DB_PATH)), + db_root_path, + db_provider, init_mc_block_id.seq_no(), monitor_min_split.clone(), #[cfg(feature = "telemetry")] diff --git a/src/node/src/types/awaiters_pool.rs b/src/node/src/types/awaiters_pool.rs index d27cbe3..894cdc4 100644 --- a/src/node/src/types/awaiters_pool.rs +++ b/src/node/src/types/awaiters_pool.rs @@ -161,6 +161,18 @@ where Ok(()) } + pub async fn shunt_async( + &self, + id: &I, + operation: impl futures::Future>, + ) -> Result<()> { + if let Some(op_awaiters) = self.ops_awaiters.get(id) { + let r = operation.await?; + let _ = op_awaiters.1.tx.send(Some(Ok(r))); + } + Ok(()) + } + async fn wait_operation( &self, id: &I, diff --git a/src/node/src/validator/accept_block.rs b/src/node/src/validator/accept_block.rs index e80dde3..4951a46 100644 --- a/src/node/src/validator/accept_block.rs +++ b/src/node/src/validator/accept_block.rs @@ -12,7 +12,7 @@ use crate::{ block::{construct_and_check_prev_stuff, BlockStuff}, block_proof::BlockProofStuff, engine_traits::EngineOperations, - full_node::apply_block::calc_shard_state, + full_node::apply_block::store_state_update, shard_state::ShardStateStuff, types::top_block_descr::TopBlockDescrStuff, validating_utils::{fmt_block_id_short, simplex_to_sign_checked, UNREGISTERED_CHAIN_MAX_LEN}, @@ -295,8 +295,7 @@ pub async fn accept_block_routine( } log::debug!(target: "validator", "({}): accept_block: calculating shard state", block_descr); - let _ss = - calc_shard_state(&handle, &block, &(prev[0].clone(), prev.get(1).cloned()), engine).await?; + store_state_update(&handle, &block, &(prev[0].clone(), prev.get(1).cloned()), engine).await?; // Create proof using variant-aware function if Simplex variant provided let (proof, signatures_out) = match signatures_variant { diff --git a/src/node/storage/Cargo.toml b/src/node/storage/Cargo.toml index 5b263b6..8119d15 100644 --- a/src/node/storage/Cargo.toml +++ b/src/node/storage/Cargo.toml @@ -23,6 +23,7 @@ rocksdb = '0.23' serde = '1.0' serde_cbor = '0.11' serde_derive = '1.0' +serde_json = '1.0' smallvec = { features = [ 'const_new', 'union', 'write' ], version = '1.10' } strum = '0.18' strum_macros = '0.18' @@ -38,6 +39,7 @@ ton_api = { path = '../../tl/ton_api' } cc = { features = [ 'parallel' ], version = '1.0.61' } [dev-dependencies] +tempfile = '3' zip = '2.2' [features] diff --git a/src/node/storage/src/archive_shardstate_db.rs b/src/node/storage/src/archive_shardstate_db.rs new file mode 100644 index 0000000..c1362ac --- /dev/null +++ b/src/node/storage/src/archive_shardstate_db.rs @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2025-2026 RSquad Blockchain Lab. + * + * Licensed under the GNU General Public License v3.0. + * See the LICENSE file in the root of this repository. + * + * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. + */ +#[cfg(feature = "telemetry")] +use crate::StorageTelemetry; +use crate::{ + cell_db::CellByHashStorageAdapter, + db::rocksdb::{RocksDb, RocksDbTable}, + dynamic_boc_archive_db::DynamicBocArchiveDb, + shardstate_db_async::{CellsDbConfig, DbEntry}, + traits::Serializable, + StorageAlloc, TARGET, +}; +use std::{path::Path, sync::Arc}; +use ton_block::{BlockIdExt, Cell, CellsFactory, CellsStorage, Result, UInt256, UnixTime}; + +pub struct ArchiveShardStateDb { + index: Arc>, + boc_db: Arc, +} + +impl ArchiveShardStateDb { + #[allow(clippy::too_many_arguments)] + pub fn new( + db: Arc, + index_cf: &str, + cells_cf: &str, + db_root_path: impl AsRef, + config: &CellsDbConfig, + #[cfg(feature = "telemetry")] telemetry: Arc, + allocated: Arc, + ) -> Result { + let boc_db = Arc::new(DynamicBocArchiveDb::with_db( + db.clone(), + cells_cf, + db_root_path.as_ref(), + config, + #[cfg(feature = "telemetry")] + telemetry, + allocated, + )?); + let index = Arc::new(RocksDbTable::with_db(db, index_cf, true)?); + Ok(Self { index, boc_db }) + } + + pub fn put(&self, id: &BlockIdExt, state_root: Cell) -> Result { + let cell_id = state_root.repr_hash(); + log::debug!( + target: TARGET, + "ArchiveShardStateDb::put id {} root_cell_id {:x}", id, cell_id + ); + + if self.index.contains(id)? { + log::debug!( + target: TARGET, + "ArchiveShardStateDb::put ALREADY EXISTS id {}", id + ); + let data = self.index.get(id)?; + let db_entry = DbEntry::deserialize(&data)?; + return self.boc_db.cell_db().load_cell(&db_entry.cell_id, false); + } + + let saved = self.boc_db.save_boc(state_root, &|| Ok(()))?; + let save_utime = UnixTime::now(); + let db_entry = DbEntry::with_params(id.clone(), cell_id, save_utime); + self.index.put(id, &db_entry.serialize())?; + Ok(saved) + } + + pub fn put_update(&self, id: &BlockIdExt, state_root: Cell) -> Result<()> { + let state_root = state_root.virtualize(1); + let cell_id = state_root.repr_hash(); + log::debug!( + target: TARGET, + "ArchiveShardStateDb::put_update id {} root_cell_id {:x}", id, cell_id + ); + + if self.index.contains(id)? { + log::info!( + target: TARGET, + "ArchiveShardStateDb::put_update ALREADY EXISTS id {}", id + ); + return Ok(()); + } + + self.boc_db.save_update(state_root)?; + let save_utime = UnixTime::now(); + let db_entry = DbEntry::with_params(id.clone(), cell_id, save_utime); + self.index.put(id, &db_entry.serialize())?; + Ok(()) + } + + pub fn get(&self, id: &BlockIdExt) -> Result { + let data = self.index.get(id)?; + let db_entry = DbEntry::deserialize(&data)?; + log::debug!( + target: TARGET, + "ArchiveShardStateDb::get id {} cell_id {:x}", id, db_entry.cell_id + ); + self.boc_db.cell_db().load_cell(&db_entry.cell_id, false) + } + + pub fn get_cell(&self, id: &UInt256) -> Result { + self.boc_db.cell_db().load_cell(id, false) + } + + pub fn contains(&self, id: &BlockIdExt) -> Result { + self.index.contains(id) + } + + pub fn cells_factory(&self) -> Arc { + self.boc_db.cell_db().clone() as Arc + } + + pub fn create_hashed_cell_storage( + &self, + root: Option<&Cell>, + max_inmemory_cells: usize, + ) -> Result { + CellByHashStorageAdapter::new(self.boc_db.cell_db().clone(), root, max_inmemory_cells) + } +} diff --git a/src/node/storage/src/archives/archive_manager.rs b/src/node/storage/src/archives/archive_manager.rs index c54ed3a..1910fa4 100644 --- a/src/node/storage/src/archives/archive_manager.rs +++ b/src/node/storage/src/archives/archive_manager.rs @@ -13,11 +13,13 @@ use crate::StorageTelemetry; use crate::{ archives::{ archive_slice::ArchiveSlice, + db_provider::ArchiveDbProvider, file_maps::{BlockRanges, FileDescription, FileMaps}, get_mc_seq_no, + package::PKG_HEADER_SIZE, package_entry::PackageEntry, package_entry_id::{parse_short_filename, GetFileName, PackageEntryId}, - package_id::PackageId, + package_id::{PackageId, PackageType}, ARCHIVE_SLICE_SIZE, KEY_ARCHIVE_PACKAGE_SIZE, }, block_handle_db::BlockHandle, @@ -28,7 +30,7 @@ use std::{ borrow::Borrow, hash::Hash, io::ErrorKind, - path::PathBuf, + path::{Path, PathBuf}, sync::{ atomic::{AtomicU8, Ordering}, Arc, @@ -38,12 +40,32 @@ use std::{ use tokio::io::AsyncWriteExt; use ton_block::{error, fail, AccountIdPrefixFull, BlockIdExt, Result, ShardIdent, MASTERCHAIN_ID}; +/// Metadata about a block being imported into the archive. +pub struct ImportBlockMeta { + pub seq_no: u32, + pub shard: ShardIdent, + pub gen_utime: u32, + pub end_lt: u64, + pub mc_ref_seq_no: u32, +} + +/// A single entry from a .pack file being imported. +pub struct ImportEntry { + pub entry_id: PackageEntryId, + pub offset: u64, + /// Metadata for Block entries. Must be Some for PackageEntryId::Block, + /// None for Proof/ProofLink. + pub block_meta: Option, +} + pub struct ArchiveManager { db: Arc, db_root_path: Arc, + db_provider: Arc, file_maps: FileMaps, shard_split_depth: Arc, unapplied_files_path: PathBuf, + create_slice_mutex: tokio::sync::Mutex<()>, #[cfg(feature = "telemetry")] telemetry: Arc, allocated: Arc, @@ -55,6 +77,7 @@ impl ArchiveManager { pub async fn with_data( db: Arc, db_root_path: Arc, + db_provider: Arc, last_unneeded_key_block: u32, shard_split_depth: Arc, #[cfg(feature = "telemetry")] telemetry: Arc, @@ -63,6 +86,7 @@ impl ArchiveManager { let file_maps = FileMaps::new( db.clone(), &db_root_path, + &db_provider, last_unneeded_key_block, #[cfg(feature = "telemetry")] &telemetry, @@ -76,9 +100,11 @@ impl ArchiveManager { let ret = Self { db, db_root_path, + db_provider, file_maps, shard_split_depth, unapplied_files_path, + create_slice_mutex: tokio::sync::Mutex::new(()), #[cfg(feature = "telemetry")] telemetry, allocated, @@ -373,14 +399,14 @@ impl ArchiveManager { } Ok(read) => read, }; - let data = self.move_file_to_archive(data, handle, entry_id, false).await?; + let data = self.add_block_data_to_package(data, handle, entry_id, false).await?; if handle.is_key_block()? { - self.move_file_to_archive(data, handle, entry_id, true).await?; + self.add_block_data_to_package(data, handle, entry_id, true).await?; } Ok(Some(filename)) } - async fn move_file_to_archive + Hash>( + pub async fn add_block_data_to_package + Hash>( &self, data: Vec, handle: &BlockHandle, @@ -404,8 +430,10 @@ impl ArchiveManager { .get_file_desc(&package_id, true) .await? .ok_or_else(|| error!("Expected some value for {package_id:?}"))?; - if !key_archive && fd.update_block_ranges(handle) { - self.file_maps.files().update(fd.id().id(), &fd).await?; + if fd.update_block_ranges(handle) { + let file_map = + if key_archive { self.file_maps.key_files() } else { self.file_maps.files() }; + file_map.update(fd.id().id(), &fd).await?; } fd.archive_slice().add_file(handle, entry_id, data).await } @@ -436,7 +464,6 @@ impl ArchiveManager { id: &PackageId, force_create: bool, ) -> Result>> { - // TODO: Rewrite logics in order to handle multithreaded adding of packages if let Some(fd) = self.file_maps.get(id.package_type()).get(id.id()).await { if fd.deleted() { return Ok(None); @@ -444,6 +471,13 @@ impl ArchiveManager { return Ok(Some(fd)); } if force_create { + let _guard = self.create_slice_mutex.lock().await; + if let Some(fd) = self.file_maps.get(id.package_type()).get(id.id()).await { + if fd.deleted() { + return Ok(None); + } + return Ok(Some(fd)); + } Ok(Some(self.add_file_desc(id).await?)) } else { Ok(None) @@ -451,14 +485,17 @@ impl ArchiveManager { } async fn add_file_desc(&self, id: &PackageId) -> Result> { - // TODO: Rewrite logics in order to handle multithreaded adding of packages let file_map = self.file_maps.get(id.package_type()); assert!(file_map.get(id.id()).await.is_none()); - let dir = self.db_root_path.join(id.path()); + let (slice_db, slice_root_path) = match id.package_type() { + PackageType::KeyBlocks => (self.db.clone(), Arc::clone(&self.db_root_path)), + PackageType::Blocks => self.db_provider.db_for_archive(id.id()).await?, + }; + let dir = slice_root_path.join(id.path()); tokio::fs::create_dir_all(&dir).await?; let archive_slice = ArchiveSlice::new_empty( - self.db.clone(), - Arc::clone(&self.db_root_path), + slice_db, + slice_root_path, id.id(), id.package_type(), self.shard_split_depth.load(Ordering::Relaxed), @@ -522,6 +559,137 @@ impl ArchiveManager { } } + pub async fn import_package( + &self, + source_path: &Path, + archive_id: u32, + shard: &ShardIdent, + entries: &[ImportEntry], + move_file: bool, + contains_key_block: bool, + ) -> Result<()> { + let slice_id = self.get_package_id_force(archive_id, false, contains_key_block).await; + let fd = self.get_or_create_import_desc(&slice_id, shard.prefix_len()).await?; + + let pkg_id = PackageId::for_block(archive_id); + let target_path = pkg_id.full_path(fd.archive_slice().db_root_path(), shard)?; + + if target_path.exists() { + tokio::fs::remove_file(&target_path).await.map_err(|e| { + error!("Failed to remove existing file {}: {}", target_path.display(), e) + })?; + } else { + if let Some(parent) = target_path.parent() { + tokio::fs::create_dir_all(parent).await?; + } + } + + if move_file { + tokio::fs::rename(source_path, &target_path).await.map_err(|e| { + error!( + "Failed to move {} to {}: {}", + source_path.display(), + target_path.display(), + e + ) + })?; + } else { + tokio::fs::copy(source_path, &target_path).await.map_err(|e| { + error!( + "Failed to copy {} to {}: {}", + source_path.display(), + target_path.display(), + e + ) + })?; + } + + let file_len = tokio::fs::metadata(&target_path).await?.len(); + let file_size = file_len.checked_sub(PKG_HEADER_SIZE as u64).ok_or_else(|| { + error!("Package file {} is too short ({} bytes)", target_path.display(), file_len) + })?; + + fd.archive_slice().import_package_entries(archive_id, &shard, file_size, entries).await?; + + let file_map = self.file_maps.get(PackageType::Blocks); + let mut ranges_updated = false; + for entry in entries { + if let Some(meta) = &entry.block_meta { + ranges_updated |= fd.update_block_ranges_raw( + &meta.shard, + meta.seq_no, + meta.gen_utime, + meta.end_lt, + ); + } + } + if ranges_updated { + file_map.update(fd.id().id(), &fd).await?; + } + + Ok(()) + } + + async fn get_or_create_import_desc( + &self, + id: &PackageId, + shard_split_depth: u8, + ) -> Result> { + if let Some(fd) = self.file_maps.get(id.package_type()).get(id.id()).await { + if !fd.deleted() { + return Ok(fd); + } + } + let _guard = self.create_slice_mutex.lock().await; + if let Some(fd) = self.file_maps.get(id.package_type()).get(id.id()).await { + if !fd.deleted() { + return Ok(fd); + } + } + self.add_file_desc_for_import(id, shard_split_depth).await + } + + async fn add_file_desc_for_import( + &self, + id: &PackageId, + shard_split_depth: u8, + ) -> Result> { + let file_map = self.file_maps.get(id.package_type()); + let (slice_db, slice_root_path) = match id.package_type() { + PackageType::KeyBlocks => (self.db.clone(), Arc::clone(&self.db_root_path)), + PackageType::Blocks => self.db_provider.db_for_archive(id.id()).await?, + }; + let dir = slice_root_path.join(id.path()); + tokio::fs::create_dir_all(&dir).await?; + let archive_slice = ArchiveSlice::new_for_import( + slice_db, + slice_root_path, + id.id(), + id.package_type(), + shard_split_depth, + #[cfg(feature = "telemetry")] + self.telemetry.clone(), + self.allocated.clone(), + ) + .await?; + let fd = Arc::new(FileDescription::with_data( + id.clone(), + archive_slice, + false, + lockfree::map::Map::new(), + )); + file_map + .put( + id.id(), + Arc::clone(&fd), + #[cfg(feature = "telemetry")] + &self.telemetry, + &self.allocated, + ) + .await?; + Ok(fd) + } + pub async fn trunc bool>( &self, block_id: &BlockIdExt, @@ -568,6 +736,31 @@ impl ArchiveManager { Ok(()) } + pub async fn get_max_mc_seqno(&self) -> Option { + let fd = self.file_maps.files().get_closest(u32::MAX).await?; + let guard = fd.blocks_ranges().get(&ShardIdent::masterchain())?; + Some(guard.val().max_seqno.load(Ordering::Relaxed)) + } + + pub async fn get_max_key_block_seqno(&self) -> Option { + let fd = self.file_maps.key_files().get_closest(u32::MAX).await?; + let guard = fd.blocks_ranges().get(&ShardIdent::masterchain())?; + Some(guard.val().max_seqno.load(Ordering::Relaxed)) + } + + pub async fn lookup_proof_by_seqno( + &self, + prefix: &AccountIdPrefixFull, + seqno: u32, + ) -> Result)>> { + if let Some(fd) = + self.lookup_file_descr_by(prefix, &mut |br| br.compare_seqno(&seqno)).await + { + return fd.archive_slice().lookup_proof_by_seqno(prefix, seqno).await; + } + Ok(None) + } + async fn lookup_file_descr_by( &self, prefix: &AccountIdPrefixFull, diff --git a/src/node/storage/src/archives/archive_slice.rs b/src/node/storage/src/archives/archive_slice.rs index 6bb346e..f49b21e 100644 --- a/src/node/storage/src/archives/archive_slice.rs +++ b/src/node/storage/src/archives/archive_slice.rs @@ -12,7 +12,7 @@ use crate::StorageTelemetry; use crate::{ archives::{ - archive_manager::ArchiveManager, + archive_manager::{ArchiveManager, ImportEntry}, block_index_db::{BlockIndexDb, LookupResult}, get_mc_seq_no, package::{read_package_from, Package}, @@ -161,6 +161,86 @@ impl ArchiveSlice { Ok(ret) } + /// Create a new archive slice for importing existing .pack files. + /// Unlike `new_empty()`, this does not create an initial package file. + /// Packages are registered later via `import_package_entries()`. + pub async fn new_for_import( + db: Arc, + db_root_path: Arc, + archive_id: u32, + package_type: PackageType, + shard_split_depth: u8, + #[cfg(feature = "telemetry")] telemetry: Arc, + allocated: Arc, + ) -> Result { + let mut ret = Self::create( + db, + db_root_path, + archive_id, + package_type, + true, // finalized: prevents truncation when opening packages + true, // create_if_not_exist + shard_split_depth, + #[cfg(feature = "telemetry")] + telemetry, + allocated, + ) + .await?; + let mut transaction = ret.package_status_db.begin_transaction()?; + if ret.sliced_mode { + ret.shard_separated = true; + transaction.put(&PackageStatusKey::SlicedMode, &true.serialize())?; + transaction.put(&PackageStatusKey::TotalSlices, &0u32.serialize())?; + transaction.put(&PackageStatusKey::SliceSize, &ret.slice_size.serialize())?; + transaction + .put(&PackageStatusKey::ShardSplitDepth, &ret.shard_split_depth.serialize())?; + } else { + transaction.put(&PackageStatusKey::SlicedMode, &false.serialize())?; + transaction.put(&PackageStatusKey::NonSlicedSize, &0u64.serialize())?; + } + transaction.commit()?; + Ok(ret) + } + + pub async fn import_package_entries( + &self, + package_archive_id: u32, + shard: &ShardIdent, + file_size: u64, + entries: &[ImportEntry], + ) -> Result<()> { + let entry = PackageEntryInfo { seqno: package_archive_id, shard: shard.clone() }; + + if self.package_store.get(&entry).is_none() { + self.add_package(entry, file_size).await?; + } + + for import_entry in entries { + let offset_key = (&import_entry.entry_id).into(); + self.offsets_db.put_value(&offset_key, &import_entry.offset)?; + if let (PackageEntryId::Block(_), Some(bm)) = + (&import_entry.entry_id, &import_entry.block_meta) + { + self.block_index_db.put_raw( + &bm.shard, + bm.seq_no, + bm.end_lt, + bm.gen_utime, + bm.mc_ref_seq_no, + u32::try_from(import_entry.offset).map_err(|_| { + error!("entry offset {} exceeds u32 range", import_entry.offset) + })?, + )?; + } + } + + Ok(()) + } + + pub fn db_root_path(&self) -> &std::path::Path { + self.db_root_path.as_path() + } + #[allow(clippy::too_many_arguments)] pub async fn with_data( db: Arc, @@ -344,6 +424,58 @@ impl ArchiveSlice { } } + async fn add_package(&self, entry: PackageEntryInfo, size: u64) -> Result<()> { + let try_add_package = async |package_count, entry: &PackageEntryInfo| { + if self + .new_package(entry.clone(), Some(package_count), size, DEFAULT_PKG_VERSION) + .await? + { + let info = if self.shard_separated { Some(entry) } else { None }; + self.entry_db.put_value( + &package_count.into(), + &PackageEntryMeta::with_data(size, DEFAULT_PKG_VERSION, info), + )?; + self.package_status_db + .put_value(&PackageStatusKey::TotalSlices, &(package_count + 1))?; + Ok(true) + } else { + Ok(false) + } + }; + loop { + const BUSY: u32 = 0x80000000; + let package_count = self.package_count.fetch_or(BUSY, Ordering::Relaxed); + if (package_count & BUSY) != 0 { + tokio::task::yield_now().await; + continue; + } + let result = try_add_package(package_count, &entry).await; + let new_count = match &result { + Err(_) | Ok(false) => package_count, + Ok(true) => package_count + 1, + }; + if self + .package_count + .compare_exchange( + package_count | BUSY, + new_count, + Ordering::Relaxed, + Ordering::Relaxed, + ) + .is_err() + && result.is_ok() + { + tokio::task::yield_now().await; + continue; + } + if let Err(e) = result { + break Err(e); + } else { + break Ok(()); + } + } + } + pub async fn add_file + Hash>( &self, block_handle: &BlockHandle, @@ -372,55 +504,7 @@ impl ArchiveSlice { mc_seq_no - (mc_seq_no - self.archive_id) / self.slice_size ) } - let try_add_package = async |package_count, entry: &PackageEntryInfo| { - if self - .new_package(entry.clone(), Some(package_count), 0, DEFAULT_PKG_VERSION) - .await? - { - let info = if self.shard_separated { Some(entry) } else { None }; - self.entry_db.put_value( - &package_count.into(), - &PackageEntryMeta::with_data(0, DEFAULT_PKG_VERSION, info), - )?; - self.package_status_db - .put_value(&PackageStatusKey::TotalSlices, &(package_count + 1))?; - Ok(true) - } else { - Ok(false) - } - }; - loop { - const BUSY: u32 = 0x80000000; - let package_count = self.package_count.fetch_or(BUSY, Ordering::Relaxed); - if (package_count & BUSY) != 0 { - tokio::task::yield_now().await; - continue; - } - let result = try_add_package(package_count, &entry).await; - let new_count = match &result { - Err(_) | Ok(false) => package_count, - Ok(true) => package_count + 1, - }; - if self - .package_count - .compare_exchange( - package_count | BUSY, - new_count, - Ordering::Relaxed, - Ordering::Relaxed, - ) - .is_err() - && result.is_ok() - { - tokio::task::yield_now().await; - continue; - } - if let Err(e) = result { - return Err(e); - } else { - break; - } - } + self.add_package(entry, 0).await?; } } }; @@ -451,14 +535,23 @@ impl ArchiveSlice { &self, block_handle: &BlockHandle, entry_id: &PackageEntryId, + ) -> Result> { + let mc_seq_no = get_mc_seq_no(block_handle); + let shard = block_handle.id().shard(); + self.get_file_raw(mc_seq_no, &shard, entry_id).await + } + + async fn get_file_raw + Hash>( + &self, + mc_seq_no: u32, + shard: &ShardIdent, + entry_id: &PackageEntryId, ) -> Result> { let offset_key = entry_id.into(); let offset = match self.offsets_db.try_get_value(&offset_key)? { Some(offset) => offset, None => return Ok(None), }; - let mc_seq_no = get_mc_seq_no(block_handle); - let shard = block_handle.id().shard(); let package_info = match self.choose_package(mc_seq_no, shard).await? { ChosenPackage::Info(info) => info, ChosenPackage::Slot(_) => { @@ -569,6 +662,31 @@ impl ArchiveSlice { self.get_block_by_lookup_result(lr).await } + pub async fn lookup_proof_by_seqno( + &self, + prefix: &AccountIdPrefixFull, + seqno: u32, + ) -> Result)>> { + let Some(lr) = self.block_index_db.lookup_by_seqno(prefix, seqno)? else { + return Ok(None); + }; + let mc_seq_no = lr.mc_ref; + let Some((block_id, _)) = self.get_block_by_lookup_result(lr).await? else { + return Ok(None); + }; + + // Masterchain blocks store proofs under `Proof`, shard blocks under `ProofLink`. + let entry_id = if block_id.shard().is_masterchain() { + PackageEntryId::Proof(block_id.clone()) + } else { + PackageEntryId::ProofLink(block_id.clone()) + }; + + self.get_file_raw(mc_seq_no, block_id.shard(), &entry_id) + .await + .map(|opt_entry| opt_entry.map(|entry| (block_id, entry.take_data()))) + } + pub async fn lookup_block_by_lt( &self, prefix: &AccountIdPrefixFull, @@ -928,7 +1046,7 @@ impl ArchiveSlice { .map_err(|e| error!("Cannot create directory {} : {e}", parent.display()))?; if add_unbound_object_to_map(&self.package_store, entry.clone(), || Ok(OnceLock::new()))? { let create_package = async || { - let package = match Package::open(path.clone(), false, true).await { + let package = match Package::open(path.clone(), self.finalized, true).await { Ok(p) => p, Err(e) => match tokio::fs::remove_file(path.as_path()).await { Ok(_) => fail!( diff --git a/src/node/storage/src/archives/block_index_db.rs b/src/node/storage/src/archives/block_index_db.rs index e87fcee..6493868 100644 --- a/src/node/storage/src/archives/block_index_db.rs +++ b/src/node/storage/src/archives/block_index_db.rs @@ -164,25 +164,40 @@ impl BlockIndexDb { offset, block.masterchain_ref_seq_no() ); + self.put_raw( + block.id().shard(), + block.id().seq_no(), + block.end_lt(), + block.gen_utime(), + block.masterchain_ref_seq_no(), + offset, + ) + } + /// Write block index entries from raw values (for archive import). + pub fn put_raw( + &self, + shard: &ShardIdent, + seq_no: u32, + end_lt: u64, + gen_utime: u32, + mc_ref_seq_no: u32, + offset: u32, + ) -> Result<()> { let cf = self.cf()?; - let value = Self::serialize_value(block.masterchain_ref_seq_no(), offset); + let value = Self::serialize_value(mc_ref_seq_no, offset); let mut transaction = rocksdb::WriteBatch::default(); - let key = BlocksIndexKey::key_with_lt(block.id().shard(), block.end_lt()); + let key = BlocksIndexKey::key_with_lt(shard, end_lt); log::trace!("Putting key: {}", key); transaction.put_cf(&cf, &key, value); - let key = BlocksIndexKey::key_with_seqno(block.id().shard(), block.id().seq_no()); + let key = BlocksIndexKey::key_with_seqno(shard, seq_no); log::trace!("Putting key: {}", key); transaction.put_cf(&cf, &key, value); - let key = BlocksIndexKey::key_with_utime( - block.id().shard(), - block.gen_utime(), - block.id().seq_no(), - ); + let key = BlocksIndexKey::key_with_utime(shard, gen_utime, seq_no); log::trace!("Putting key: {}", key); transaction.put_cf(&cf, &key, value); diff --git a/src/node/storage/src/archives/db_provider.rs b/src/node/storage/src/archives/db_provider.rs new file mode 100644 index 0000000..b574192 --- /dev/null +++ b/src/node/storage/src/archives/db_provider.rs @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2025-2026 RSquad Blockchain Lab. + * + * Licensed under the GNU General Public License v3.0. + * See the LICENSE file in the root of this repository. + * + * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. + */ +use super::epoch::EpochRouter; +use crate::db::rocksdb::RocksDb; +use std::{path::PathBuf, sync::Arc}; +use ton_block::Result; + +/// Abstracts over single-db and epoch-based db selection for archive slices. +/// Provides the correct RocksDb instance and root path for a given archive_id. +#[async_trait::async_trait] +pub trait ArchiveDbProvider: Send + Sync { + /// Get the root path and RocksDb instance for the archive slice + async fn db_for_archive(&self, archive_id: u32) -> Result<(Arc, Arc)>; +} + +/// Single shared RocksDb, single root path. +/// Used when archival_mode is not configured. +pub struct SingleDbProvider { + db: Arc, + db_root_path: Arc, +} + +impl SingleDbProvider { + pub fn new(db: Arc, db_root_path: Arc) -> Self { + Self { db, db_root_path } + } +} + +#[async_trait::async_trait] +impl ArchiveDbProvider for SingleDbProvider { + async fn db_for_archive(&self, _archive_id: u32) -> Result<(Arc, Arc)> { + Ok((self.db.clone(), self.db_root_path.clone())) + } +} + +/// Epoch-based provider: routes archive requests to the correct epoch's RocksDb and path. +pub struct EpochDbProvider { + router: Arc, +} + +impl EpochDbProvider { + pub fn new(router: Arc) -> Self { + Self { router } + } + + pub fn router(&self) -> &Arc { + &self.router + } +} + +#[async_trait::async_trait] +impl ArchiveDbProvider for EpochDbProvider { + async fn db_for_archive(&self, archive_id: u32) -> Result<(Arc, Arc)> { + let epoch_db = self.router.resolve_or_create(archive_id).await?; + Ok((epoch_db.db().clone(), epoch_db.path().clone())) + } +} diff --git a/src/node/storage/src/archives/epoch.rs b/src/node/storage/src/archives/epoch.rs new file mode 100644 index 0000000..5251d28 --- /dev/null +++ b/src/node/storage/src/archives/epoch.rs @@ -0,0 +1,276 @@ +/* + * Copyright (C) 2025-2026 RSquad Blockchain Lab. + * + * Licensed under the GNU General Public License v3.0. + * See the LICENSE file in the root of this repository. + * + * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. + */ +use crate::{ + archives::ARCHIVE_SLICE_SIZE, + db::rocksdb::{AccessType, RocksDb}, + TARGET, +}; +use std::{ + path::{Path, PathBuf}, + sync::Arc, +}; +use ton_block::{error, fail, Result}; + +const EPOCH_META_FILENAME: &str = "epoch_meta.json"; + +/// Persisted metadata for an epoch directory +#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] +pub(crate) struct EpochMeta { + pub mc_seq_no_start: u32, + pub mc_seq_no_end: u32, +} + +async fn read_epoch_meta(epoch_path: &Path) -> Result { + let meta_path = epoch_path.join(EPOCH_META_FILENAME); + let data = tokio::fs::read_to_string(&meta_path) + .await + .map_err(|e| error!("Cannot read {}: {}", meta_path.display(), e))?; + serde_json::from_str(&data).map_err(|e| error!("Cannot parse {}: {}", meta_path.display(), e)) +} + +pub(crate) async fn write_epoch_meta(epoch_path: &Path, meta: &EpochMeta) -> Result<()> { + let meta_path = epoch_path.join(EPOCH_META_FILENAME); + let data = serde_json::to_string_pretty(meta) + .map_err(|e| error!("Cannot serialize epoch meta: {}", e))?; + tokio::fs::write(&meta_path, data.as_bytes()) + .await + .map_err(|e| error!("Cannot write {}: {}", meta_path.display(), e)) +} + +/// Configuration for a single existing epoch directory +#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] +pub struct EpochEntry { + pub path: PathBuf, +} + +/// Archival mode configuration. +/// When present, archives are split into epochs and GC is disabled. +#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] +pub struct ArchivalModeConfig { + /// Number of MC blocks per epoch. Must be a positive multiple of ARCHIVE_SLICE_SIZE (20_000). + pub epoch_size: u32, + /// Path where new epoch directories will be created + pub new_epochs_path: PathBuf, + /// List of existing epoch directories, ordered by ascending MC seq_no. + #[serde(default)] + pub existing_epochs: Vec, +} + +/// Runtime state for a single epoch +pub struct Epoch { + mc_seq_no_start: u32, + mc_seq_no_end: u32, + path: Arc, + db: Arc, +} + +impl Epoch { + pub fn mc_seq_no_start(&self) -> u32 { + self.mc_seq_no_start + } + + pub fn mc_seq_no_end(&self) -> u32 { + self.mc_seq_no_end + } + + pub fn path(&self) -> &Arc { + &self.path + } + + pub fn db(&self) -> &Arc { + &self.db + } +} + +/// Routes mc_seq_no to the appropriate epoch's RocksDb and filesystem path. +/// +/// All epochs must have the same size (`epoch_size`), which allows O(1) arithmetic lookup +/// without any map search. +pub struct EpochRouter { + epochs: lockfree::map::Map>, + epoch_size: u32, + new_epochs_path: PathBuf, + creation_mutex: tokio::sync::Mutex<()>, +} + +impl EpochRouter { + pub async fn new(config: &ArchivalModeConfig) -> Result { + if config.epoch_size == 0 || config.epoch_size % ARCHIVE_SLICE_SIZE != 0 { + fail!( + "epoch_size must be a positive multiple of ARCHIVE_SLICE_SIZE ({}), got {}", + ARCHIVE_SLICE_SIZE, + config.epoch_size + ); + } + + let epochs = lockfree::map::Map::new(); + + for (i, entry) in config.existing_epochs.iter().enumerate() { + if !entry.path.exists() { + fail!("Epoch {} path does not exist: {}", i, entry.path.display()); + } + + let meta = read_epoch_meta(&entry.path).await?; + Self::validate_epoch_meta(&meta, config.epoch_size, &entry.path)?; + + let db = RocksDb::new(&entry.path, "archive_db", None, AccessType::ReadWrite)?; + + log::info!( + target: TARGET, + "Opened epoch {}: mc_seq_no [{}, {}], path: {}", + i, meta.mc_seq_no_start, meta.mc_seq_no_end, entry.path.display() + ); + + epochs.insert( + meta.mc_seq_no_start, + Arc::new(Epoch { + mc_seq_no_start: meta.mc_seq_no_start, + mc_seq_no_end: meta.mc_seq_no_end, + path: Arc::new(entry.path.clone()), + db, + }), + ); + } + + tokio::fs::create_dir_all(&config.new_epochs_path).await.map_err(|e| { + error!("Cannot create new_epochs_path {}: {}", config.new_epochs_path.display(), e) + })?; + + // Discover epochs previously created in new_epochs_path (survive restarts) + let mut read_dir = tokio::fs::read_dir(&config.new_epochs_path).await.map_err(|e| { + error!("Cannot read new_epochs_path {}: {}", config.new_epochs_path.display(), e) + })?; + let mut discovered = Vec::new(); + while let Some(entry) = read_dir + .next_entry() + .await + .map_err(|e| error!("Error reading new_epochs_path: {}", e))? + { + let epoch_path = entry.path(); + if epoch_path.is_dir() && epoch_path.join(EPOCH_META_FILENAME).exists() { + discovered.push(epoch_path); + } + } + + for epoch_path in discovered { + let meta = read_epoch_meta(&epoch_path).await?; + Self::validate_epoch_meta(&meta, config.epoch_size, &epoch_path)?; + + // Skip if already loaded from existing_epochs + if epochs.get(&meta.mc_seq_no_start).is_some() { + continue; + } + + let db = RocksDb::new(&epoch_path, "archive_db", None, AccessType::ReadWrite)?; + + log::info!( + target: TARGET, + "Discovered epoch: mc_seq_no [{}, {}], path: {}", + meta.mc_seq_no_start, meta.mc_seq_no_end, epoch_path.display() + ); + + epochs.insert( + meta.mc_seq_no_start, + Arc::new(Epoch { + mc_seq_no_start: meta.mc_seq_no_start, + mc_seq_no_end: meta.mc_seq_no_end, + path: Arc::new(epoch_path), + db, + }), + ); + } + + Ok(Self { + epochs, + epoch_size: config.epoch_size, + new_epochs_path: config.new_epochs_path.clone(), + creation_mutex: tokio::sync::Mutex::new(()), + }) + } + + pub fn resolve(&self, mc_seq_no: u32) -> Option> { + let start = (mc_seq_no / self.epoch_size) * self.epoch_size; + self.epochs.get(&start).map(|g| Arc::clone(g.val())) + } + + /// Resolve the epoch for a given mc_seq_no, creating a new one if needed. + pub async fn resolve_or_create(&self, mc_seq_no: u32) -> Result> { + if let Some(epoch) = self.resolve(mc_seq_no) { + return Ok(epoch); + } + + // Serialize creation to prevent concurrent RocksDb::new() on the same path + let _creation_guard = self.creation_mutex.lock().await; + + // Double-check after acquiring the mutex — another caller may have created the epoch + if let Some(epoch) = self.resolve(mc_seq_no) { + return Ok(epoch); + } + + let epoch_index = mc_seq_no / self.epoch_size; + let start = epoch_index * self.epoch_size; + let end = start + self.epoch_size - 1; + + let epoch_dir = self.new_epochs_path.join(format!("epoch_{}", epoch_index)); + tokio::fs::create_dir_all(&epoch_dir) + .await + .map_err(|e| error!("Cannot create epoch directory {}: {}", epoch_dir.display(), e))?; + + let meta = EpochMeta { mc_seq_no_start: start, mc_seq_no_end: end }; + write_epoch_meta(&epoch_dir, &meta).await?; + + let db = RocksDb::new(&epoch_dir, "archive_db", None, AccessType::ReadWrite)?; + + log::info!( + target: TARGET, + "Created new epoch {}: mc_seq_no [{}, {}], path: {}", + epoch_index, start, end, epoch_dir.display() + ); + + let epoch = Arc::new(Epoch { + mc_seq_no_start: start, + mc_seq_no_end: end, + path: Arc::new(epoch_dir), + db, + }); + self.epochs.insert(start, Arc::clone(&epoch)); + + Ok(epoch) + } + + pub fn epoch_size(&self) -> u32 { + self.epoch_size + } + + fn validate_epoch_meta(meta: &EpochMeta, epoch_size: u32, path: &Path) -> Result<()> { + if meta.mc_seq_no_start % epoch_size != 0 { + fail!( + "Epoch at {} has mc_seq_no_start={} which is not aligned to epoch_size={}", + path.display(), + meta.mc_seq_no_start, + epoch_size + ); + } + let expected_end = meta.mc_seq_no_start + epoch_size - 1; + if meta.mc_seq_no_end != expected_end { + fail!( + "Epoch at {} has mc_seq_no_end={} but expected {} for epoch_size={}", + path.display(), + meta.mc_seq_no_end, + expected_end, + epoch_size + ); + } + Ok(()) + } +} + +#[cfg(test)] +#[path = "../tests/test_epoch.rs"] +mod tests; diff --git a/src/node/storage/src/archives/file_maps.rs b/src/node/storage/src/archives/file_maps.rs index fd66f4f..0a0d4ad 100644 --- a/src/node/storage/src/archives/file_maps.rs +++ b/src/node/storage/src/archives/file_maps.rs @@ -14,6 +14,7 @@ use crate::StorageTelemetry; use crate::{ archives::{ archive_slice::ArchiveSlice, + db_provider::{ArchiveDbProvider, SingleDbProvider}, package_id::{PackageId, PackageType}, package_index_db::{PackageIndexDb, PackageIndexEntry}, }, @@ -35,6 +36,9 @@ use std::{ }; use ton_block::{error, fail, BlockIdExt, Result, ShardIdent, LT_ALIGN}; +pub const FILES_DB_NAME: &str = "files"; +pub const KEY_FILES_DB_NAME: &str = "key_files"; + #[derive(serde::Serialize, serde::Deserialize)] pub struct BlockRanges { pub min_seqno: AtomicU32, @@ -71,17 +75,6 @@ impl Clone for BlockRanges { } } impl BlockRanges { - pub fn new(handle: &BlockHandle) -> Self { - Self { - min_seqno: AtomicU32::new(handle.id().seq_no()), - max_seqno: AtomicU32::new(handle.id().seq_no()), - min_utime: AtomicU32::new(handle.gen_utime()), - max_utime: AtomicU32::new(handle.gen_utime()), - min_lt: AtomicU64::new(handle.end_lt()), - max_lt: AtomicU64::new(handle.end_lt()), - } - } - pub fn compare_seqno(&self, seqno: &u32) -> std::cmp::Ordering { let min_sn = self.min_seqno.load(Ordering::Relaxed); let max_sn = self.max_seqno.load(Ordering::Relaxed); @@ -150,6 +143,21 @@ impl FileDescription { } pub fn update_block_ranges(&self, handle: &BlockHandle) -> bool { + self.update_block_ranges_raw( + handle.id().shard(), + handle.id().seq_no(), + handle.gen_utime(), + handle.end_lt(), + ) + } + + pub fn update_block_ranges_raw( + &self, + shard: &ShardIdent, + seq_no: u32, + gen_utime: u32, + end_lt: u64, + ) -> bool { macro_rules! update_atomic { ($atomic:expr, $new:expr, $cmp_fn:expr) => {{ let mut prev = $atomic.load(Ordering::Relaxed); @@ -181,26 +189,27 @@ impl FileDescription { } let mut updated = false; - let _ = add_unbound_object_to_map_with_update( - &self.blocks_ranges, - handle.id().shard().clone(), - |prev| { - if let Some(prev) = prev { - let sn = handle.id().seq_no(); - updated |= update_min_32(&prev.min_seqno, sn); - updated |= update_max_32(&prev.max_seqno, sn); - let ut = handle.gen_utime(); - updated |= update_min_32(&prev.min_utime, ut); - updated |= update_max_32(&prev.max_utime, ut); - let lt = handle.end_lt(); - updated |= update_min_64(&prev.min_lt, lt - lt % LT_ALIGN); - updated |= update_max_64(&prev.max_lt, lt); - Ok(None) - } else { - Ok(Some(BlockRanges::new(handle))) - } - }, - ); + let _ = add_unbound_object_to_map_with_update(&self.blocks_ranges, shard.clone(), |prev| { + if let Some(prev) = prev { + updated |= update_min_32(&prev.min_seqno, seq_no); + updated |= update_max_32(&prev.max_seqno, seq_no); + updated |= update_min_32(&prev.min_utime, gen_utime); + updated |= update_max_32(&prev.max_utime, gen_utime); + updated |= update_min_64(&prev.min_lt, end_lt - end_lt % LT_ALIGN); + updated |= update_max_64(&prev.max_lt, end_lt); + Ok(None) + } else { + updated = true; + Ok(Some(BlockRanges { + min_seqno: AtomicU32::new(seq_no), + max_seqno: AtomicU32::new(seq_no), + min_utime: AtomicU32::new(gen_utime), + max_utime: AtomicU32::new(gen_utime), + min_lt: AtomicU64::new(end_lt - end_lt % LT_ALIGN), + max_lt: AtomicU64::new(end_lt), + })) + } + }); updated } @@ -241,15 +250,15 @@ pub struct FileMap { impl FileMap { pub async fn new( - db: Arc, - db_root_path: &Arc, + index_db: Arc, + db_provider: &Arc, path: impl ToString, package_type: PackageType, last_unneeded_key_block: u32, #[cfg(feature = "telemetry")] telemetry: &Arc, allocated: &Arc, ) -> Result { - let storage = PackageIndexDb::with_db(db.clone(), path, true)?; + let storage = PackageIndexDb::with_db(index_db, path, true)?; let mut index_pairs = Vec::new(); storage.for_each_deserialized(|key, value| { @@ -258,19 +267,21 @@ impl FileMap { })?; index_pairs.sort_by_key(|pair| pair.0); + let last = index_pairs.last().map(|pair| pair.0); let mut elements = Vec::new(); for (key, value) in index_pairs { let unneeded = key < last_unneeded_key_block; - let finalized = value.finalized(); + let finalized = value.finalized() && Some(key) != last; log::info!( target: TARGET, "Opening archive slice {}, finalized {}, unneeded {}", key, finalized, unneeded ); + let (slice_db, slice_root_path) = db_provider.db_for_archive(key).await?; let archive_slice = match ArchiveSlice::with_data( - db.clone(), - db_root_path.clone(), + slice_db, + slice_root_path, key, package_type, finalized, @@ -509,15 +520,18 @@ impl FileMaps { pub async fn new( db: Arc, db_root_path: &Arc, + db_provider: &Arc, last_unneeded_key_block: u32, #[cfg(feature = "telemetry")] telemetry: &Arc, allocated: &Arc, ) -> Result { + let key_db_provider: Arc = + Arc::new(SingleDbProvider::new(db.clone(), db_root_path.clone())); Ok(Self { files: FileMap::new( db.clone(), - db_root_path, - "files", + db_provider, + FILES_DB_NAME, PackageType::Blocks, last_unneeded_key_block, #[cfg(feature = "telemetry")] @@ -527,15 +541,15 @@ impl FileMaps { .await?, key_files: FileMap::new( db.clone(), - db_root_path, - "key_files", + &key_db_provider, + KEY_FILES_DB_NAME, PackageType::KeyBlocks, 0, #[cfg(feature = "telemetry")] telemetry, allocated, ) - .await?, // temp_files: FileMap::new(db_root_path, path.join("temp_files"), PackageType::Temp).await?, + .await?, }) } diff --git a/src/node/storage/src/archives/mod.rs b/src/node/storage/src/archives/mod.rs index 20420b4..89d96d2 100644 --- a/src/node/storage/src/archives/mod.rs +++ b/src/node/storage/src/archives/mod.rs @@ -13,6 +13,8 @@ use crate::block_handle_db::BlockHandle; mod package_index_db; pub mod archive_manager; +pub mod db_provider; +pub mod epoch; pub mod package; pub mod package_entry; pub mod package_entry_id; @@ -21,7 +23,7 @@ mod archive_slice; mod block_index_db; mod file_maps; mod package_entry_meta_db; -mod package_id; +pub mod package_id; mod package_info; mod package_offsets_db; mod package_status_db; diff --git a/src/node/storage/src/archives/package.rs b/src/node/storage/src/archives/package.rs index 48eceda..97d523b 100644 --- a/src/node/storage/src/archives/package.rs +++ b/src/node/storage/src/archives/package.rs @@ -45,27 +45,36 @@ async fn read_header(reader: &mut R) -> Resu impl Package { pub async fn open(path: PathBuf, read_only: bool, create: bool) -> Result { - let mut file = Self::open_file_ext(read_only, create, path.as_path()).await?; - let mut size = file.metadata().await?.len(); - - file.seek(SeekFrom::Start(0)).await?; - if size < PKG_HEADER_SIZE as u64 { - if !create { + let (file, size) = if read_only { + let size = tokio::fs::metadata(&path).await?.len(); + if size < PKG_HEADER_SIZE as u64 { fail!("Package file is too short") } - file.write_all(&PKG_HEADER_MAGIC.to_le_bytes()).await?; - file.flush().await?; - size = PKG_HEADER_SIZE as u64; + (None, size) } else { - read_header(&mut file).await?; - file.seek(SeekFrom::End(0)).await?; - } + let mut file = Self::open_file_ext(read_only, create, path.as_path()).await?; + let mut size = file.metadata().await?.len(); + + file.seek(SeekFrom::Start(0)).await?; + if size < PKG_HEADER_SIZE as u64 { + if !create { + fail!("Package file is too short") + } + file.write_all(&PKG_HEADER_MAGIC.to_le_bytes()).await?; + file.flush().await?; + size = PKG_HEADER_SIZE as u64; + } else { + read_header(&mut file).await?; + file.seek(SeekFrom::End(0)).await?; + } + (Some(file), size) + }; Ok(Self { path, read_only, size: AtomicU64::new(size), - write_mutex: tokio::sync::Mutex::new(Some(file)), + write_mutex: tokio::sync::Mutex::new(file), }) } @@ -102,22 +111,21 @@ impl Package { pub async fn truncate(&self, size: u64) -> Result<()> { let new_size = PKG_HEADER_SIZE as u64 + size; - // let md = tokio::fs::metadata(self.path()).await?; - // if md.len() == new_size { - // return Ok(()) - // } - log::debug!( - target: TARGET, - "Truncating package {}, new size: {new_size} bytes", - self.path.display() - ); - self.size.store(new_size, Ordering::SeqCst); let Some(file) = &*self.write_mutex.lock().await else { fail!( "Cannot truncate package file {}, because it was not opened", self.path().display() ) }; + let old_raw = self.size.load(Ordering::SeqCst); + let old_file_len = file.metadata().await?.len(); + log::warn!( + target: TARGET, + "Truncating package {}: raw_size {old_raw} -> {new_size}, \ + file_len {old_file_len} -> {new_size}", + self.path.display() + ); + self.size.store(new_size, Ordering::SeqCst); file.set_len(new_size).await?; Ok(()) } @@ -164,23 +172,34 @@ impl Package { self.path().display() ) }; - let actual = file.metadata().await?.len(); - let entry_offset = self.size(); - if entry_offset + PKG_HEADER_SIZE as u64 != actual { + let actual_before = file.metadata().await?.len(); + let raw_size = self.size.load(Ordering::SeqCst); + let entry_offset = raw_size - PKG_HEADER_SIZE as u64; + if raw_size != actual_before { log::error!( target: TARGET, - "Package entry {} offset mismatch: expected {entry_offset} vs {actual}", - entry.filename() + "Package {} entry {} offset mismatch BEFORE write: \ + raw_size={raw_size}, file_len={actual_before}, \ + diff={}, entry_data_len={}, entry_filename={}", + self.path.display(), + entry.filename(), + actual_before as i64 - raw_size as i64, + entry.data().len(), + entry.filename(), ) } let entry_size = entry.write_to(file).await?; let total_size = self.size.fetch_add(entry_size, Ordering::SeqCst) + entry_size; - let actual = file.metadata().await?.len(); - if total_size != actual { + let actual_after = file.metadata().await?.len(); + if total_size != actual_after { log::error!( target: TARGET, - "Package entry {} size mismatch: expected {total_size} vs {actual}", - entry.filename() + "Package {} entry {} size mismatch AFTER write: \ + expected_total={total_size}, file_len={actual_after}, \ + diff={}, entry_size={entry_size}, raw_size_before={raw_size}", + self.path.display(), + entry.filename(), + actual_after as i64 - total_size as i64, ) } after_append(entry_offset, entry_offset + entry_size) diff --git a/src/node/storage/src/archives/package_entry.rs b/src/node/storage/src/archives/package_entry.rs index 2fcfdf8..1cb705e 100644 --- a/src/node/storage/src/archives/package_entry.rs +++ b/src/node/storage/src/archives/package_entry.rs @@ -109,4 +109,11 @@ impl PackageEntry { pub fn take_data(self) -> Vec { self.data } + + /// Returns the serialized size of this entry (header + filename + data). + pub fn serialized_size(&self) -> u64 { + PKG_ENTRY_HEADER_SIZE as u64 + + self.filename.as_bytes().len() as u64 + + self.data.len() as u64 + } } diff --git a/src/node/storage/src/archives/package_id.rs b/src/node/storage/src/archives/package_id.rs index 21e2988..7d59352 100644 --- a/src/node/storage/src/archives/package_id.rs +++ b/src/node/storage/src/archives/package_id.rs @@ -13,13 +13,13 @@ use std::path::{Path, PathBuf}; use ton_block::{fail, Result, ShardIdent}; #[derive(Clone, Copy, Debug, PartialEq, serde::Serialize, serde::Deserialize)] -pub(crate) enum PackageType { +pub enum PackageType { Blocks, KeyBlocks, //Temp } #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] -pub(crate) struct PackageId { +pub struct PackageId { id: u32, package_type: PackageType, } diff --git a/src/node/storage/src/block_handle_db.rs b/src/node/storage/src/block_handle_db.rs index ecc9837..f0f6037 100644 --- a/src/node/storage/src/block_handle_db.rs +++ b/src/node/storage/src/block_handle_db.rs @@ -29,25 +29,27 @@ use ton_block::{error, fail, BlockIdExt, Result, ShardIdent, UInt256}; #[path = "tests/test_block_handle_db.rs"] mod tests; -const FLAG_DATA: u32 = 0x00000001; -const FLAG_PROOF: u32 = 0x00000002; -const FLAG_PROOF_LINK: u32 = 0x00000004; +pub(crate) const FLAG_DATA: u32 = 0x00000001; +pub(crate) const FLAG_PROOF: u32 = 0x00000002; +pub(crate) const FLAG_PROOF_LINK: u32 = 0x00000004; //const FLAG_EXT_DB: u32 = 0x00000008; -const FLAG_STATE: u32 = 0x00000010; +pub(crate) const FLAG_STATE: u32 = 0x00000010; const FLAG_PERSISTENT_STATE: u32 = 0x00000020; const FLAG_NEXT_1: u32 = 0x00000040; const FLAG_NEXT_2: u32 = 0x00000080; -const FLAG_PREV_1: u32 = 0x00000100; -const FLAG_PREV_2: u32 = 0x00000200; -const FLAG_APPLIED: u32 = 0x00000400; +pub(crate) const FLAG_PREV_1: u32 = 0x00000100; +pub(crate) const FLAG_PREV_2: u32 = 0x00000200; +pub(crate) const FLAG_APPLIED: u32 = 0x00000400; pub(crate) const FLAG_KEY_BLOCK: u32 = 0x00000800; -const FLAG_MOVED_TO_ARCHIVE: u32 = 0x00002000; -const FLAG_STATE_SAVED: u32 = 0x00010000; +pub(crate) const FLAG_MOVED_TO_ARCHIVE: u32 = 0x00002000; +pub(crate) const FLAG_STATE_SAVED: u32 = 0x00010000; const FLAG_HAS_FULL_ID: u32 = 0x00020000; // not serializing flags (possible flags - 1, 2, 4, 8) const FLAG_ARCHIVING: u32 = 0x80000000; +pub const VALIDATOR_STATE_DB_NAME: &str = "validator_state_db"; + db_impl_base!(NodeStateDb, &'static str); /// Meta information related to block @@ -436,6 +438,8 @@ impl Drop for BlockHandle { // Real value is // - BlockMeta if FLAG_HAS_FULL_ID is not set // - BlockMeta + wc (i32) + shard (u64) + seqno (u32) + file_hash (UInt256) if FLAG_HAS_FULL_ID is set +pub const BLOCK_HANDLE_DB_NAME: &str = "block_handle_db"; + db_impl_base!(BlockHandleDb, BlockIdExt); declare_counted!( @@ -464,6 +468,7 @@ pub trait Callback: Sync + Send { pub struct BlockHandleStorage { handle_db: Arc, handle_cache: Arc, + no_cache: bool, full_node_state_db: Arc, validator_state_db: Arc, state_cache: lockfree::map::Map>, @@ -485,6 +490,7 @@ impl BlockHandleStorage { let ret = Self { handle_db: handle_db.clone(), handle_cache: Arc::new(lockfree::map::Map::new()), + no_cache: false, full_node_state_db: full_node_state_db.clone(), validator_state_db: validator_state_db.clone(), state_cache: lockfree::map::Map::new(), @@ -578,6 +584,10 @@ impl BlockHandleStorage { ret } + pub fn set_no_cache(&mut self) { + self.no_cache = true; + } + pub fn create_handle( &self, id: BlockIdExt, @@ -613,10 +623,13 @@ impl BlockHandleStorage { pub fn load_full_block_id(&self, root_hash: &UInt256) -> Result> { log::trace!(target: TARGET, "load_full_block_id {:x}", root_hash); - let weak = self.handle_cache.get(root_hash); - if let Some(Some(handle)) = weak.map(|weak| weak.val().object.upgrade()) { - Ok(Some(handle.id.clone())) - } else if let Some(data) = self.handle_db.try_get_raw(root_hash.as_slice())? { + if !self.no_cache { + let weak = self.handle_cache.get(root_hash); + if let Some(Some(handle)) = weak.map(|weak| weak.val().object.upgrade()) { + return Ok(Some(handle.id.clone())); + } + } + if let Some(data) = self.handle_db.try_get_raw(root_hash.as_slice())? { Ok(BlockHandle::deserialize_full_id(root_hash, &data)?) } else { Ok(None) @@ -642,8 +655,11 @@ impl BlockHandleStorage { pub fn save_handle( &self, handle: &Arc, - callback: Option>, + callback: Option>, // not invoked in no-cache mode ) -> Result<()> { + if self.no_cache { + return self.handle_db.put_raw(handle.id().root_hash().as_slice(), &handle.serialize()); + } self.storer .send((StoreJob::SaveHandle(handle.clone()), callback)) .map_err(|_| error!("Cannot store handle {}: storer thread dropped", handle.id())) @@ -705,23 +721,35 @@ impl BlockHandleStorage { ) -> Result>> { let rh = id.root_hash().clone(); let ret = Arc::new(BlockHandle::with_values(id, meta, self.handle_cache.clone())); - let added = add_counted_object_to_map(&self.handle_cache, rh, || { - let ret = HandleObject { - object: Arc::downgrade(&ret), - counter: self.allocated.handles.clone().into(), - }; - #[cfg(feature = "telemetry")] - self.telemetry.handles.update(self.allocated.handles.load(Ordering::Relaxed)); - Ok(ret) - })?; - if added { - if store { - self.save_handle(&ret, callback)? + let ret = if self.no_cache { + if self.handle_db.try_get_raw(rh.as_slice())?.is_some() { + None + } else { + if store { + self.save_handle(&ret, callback)? + } + Some(ret) } - Ok(Some(ret)) } else { - Ok(None) - } + let added = add_counted_object_to_map(&self.handle_cache, rh, || { + let ret = HandleObject { + object: Arc::downgrade(&ret), + counter: self.allocated.handles.clone().into(), + }; + #[cfg(feature = "telemetry")] + self.telemetry.handles.update(self.allocated.handles.load(Ordering::Relaxed)); + Ok(ret) + })?; + if added { + if store { + self.save_handle(&ret, callback)? + } + Some(ret) + } else { + None + } + }; + Ok(ret) } fn create_state(&self, key: String, id: &BlockIdExt) -> Result> { @@ -745,11 +773,7 @@ impl BlockHandleStorage { } else { log::trace!(target: TARGET, "load block handle by id {id}") } - let ret = loop { - let weak = self.handle_cache.get(id.root_hash()); - if let Some(Some(handle)) = weak.map(|weak| weak.val().object.upgrade()) { - break Some(handle); - } + let ret = if self.no_cache { if let Some(data) = self.handle_db.try_get_raw(id.root_hash().as_slice())? { let meta = if rh_only { BlockHandle::deserialize_nonchecked(&mut id, &data)? @@ -758,12 +782,31 @@ impl BlockHandleStorage { meta.set_flags(FLAG_HAS_FULL_ID); meta }; - let handle = self.create_handle_and_store(id.clone(), meta, None, false)?; - if let Some(handle) = handle { + Some(Arc::new(BlockHandle::with_values(id, meta, self.handle_cache.clone()))) + } else { + None + } + } else { + loop { + let weak = self.handle_cache.get(id.root_hash()); + if let Some(Some(handle)) = weak.map(|weak| weak.val().object.upgrade()) { break Some(handle); } - } else { - break None; + if let Some(data) = self.handle_db.try_get_raw(id.root_hash().as_slice())? { + let meta = if rh_only { + BlockHandle::deserialize_nonchecked(&mut id, &data)? + } else { + let meta = BlockHandle::deserialize(&id, &data)?; + meta.set_flags(FLAG_HAS_FULL_ID); + meta + }; + let handle = self.create_handle_and_store(id.clone(), meta, None, false)?; + if let Some(handle) = handle { + break Some(handle); + } + } else { + break None; + } } }; Ok(ret) diff --git a/src/node/storage/src/block_info_db.rs b/src/node/storage/src/block_info_db.rs index 376c68d..de5f575 100644 --- a/src/node/storage/src/block_info_db.rs +++ b/src/node/storage/src/block_info_db.rs @@ -11,4 +11,9 @@ use crate::db_impl_base; use ton_block::BlockIdExt; +pub const PREV1_BLOCK_DB_NAME: &str = "prev1_block_db"; +pub const PREV2_BLOCK_DB_NAME: &str = "prev2_block_db"; +pub const NEXT1_BLOCK_DB_NAME: &str = "next1_block_db"; +pub const NEXT2_BLOCK_DB_NAME: &str = "next2_block_db"; + db_impl_base!(BlockInfoDb, BlockIdExt); diff --git a/src/node/storage/src/cell_db.rs b/src/node/storage/src/cell_db.rs new file mode 100644 index 0000000..1ad3c4f --- /dev/null +++ b/src/node/storage/src/cell_db.rs @@ -0,0 +1,439 @@ +/* + * Copyright (C) 2025-2026 RSquad Blockchain Lab. + * + * Licensed under the GNU General Public License v3.0. + * See the LICENSE file in the root of this repository. + * + * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. + */ +#[cfg(feature = "telemetry")] +use crate::StorageTelemetry; +use crate::{ + db::rocksdb::RocksDb, + shardstate_db_async::CellsDbConfig, + types::{StoredCell, StoringCell}, + StorageAlloc, TARGET, +}; +#[cfg(feature = "telemetry")] +use std::sync::atomic::{AtomicU64, Ordering}; +use std::{ + fs::write, + io::Write, + ops::Deref, + path::{Path, PathBuf}, + sync::Arc, + time::Duration, +}; +use ton_block::{ + error, fail, merkle_update::CellsFactory, BuilderData, Cell, CellsStorage, Result, UInt256, +}; + +pub const BROKEN_CELL_BEACON_FILE: &str = "ton_node.broken_cell"; + +pub struct CellDb { + db: Arc, + cells_cf_name: String, + db_root_path: PathBuf, + storing_cells: Arc>, + #[cfg(feature = "telemetry")] + storing_cells_count: AtomicU64, + cell_cache: quick_cache::sync::Cache, + #[cfg(feature = "telemetry")] + telemetry: Arc, + allocated: Arc, +} + +impl CellDb { + pub fn with_db( + db: Arc, + cell_db_cf: &str, + db_root_path: impl AsRef, + config: &CellsDbConfig, + #[cfg(feature = "telemetry")] telemetry: Arc, + allocated: Arc, + ) -> Result { + if db.cf_handle(cell_db_cf).is_none() { + db.create_cf(cell_db_cf, &Self::build_cf_options(config.cells_cache_size_bytes))?; + } + Ok(Self { + db, + cells_cf_name: cell_db_cf.to_string(), + db_root_path: db_root_path.as_ref().to_path_buf(), + storing_cells: Arc::new(lockfree::map::Map::new()), + #[cfg(feature = "telemetry")] + storing_cells_count: AtomicU64::new(0), + cell_cache: quick_cache::sync::Cache::new(config.cells_lru_cache_capacity), + #[cfg(feature = "telemetry")] + telemetry, + allocated, + }) + } + + pub fn build_cf_options(cache_size: u64) -> rocksdb::Options { + let mut options = rocksdb::Options::default(); + let mut block_opts = rocksdb::BlockBasedOptions::default(); + + // specified cache for blocks. + let cache = rocksdb::Cache::new_lru_cache(cache_size as usize); + block_opts.set_block_cache(&cache); + + // save in LRU block cache also indexes and bloom filters + block_opts.set_cache_index_and_filter_blocks(true); + + // keep indexes and filters in block cache until tablereader freed + block_opts.set_pin_l0_filter_and_index_blocks_in_cache(true); + + // Setup bloom filter with length of 10 bits per key. + // This length provides less than 1% false positive rate. + block_opts.set_bloom_filter(10.0, false); + + options.set_block_based_table_factory(&block_opts); + + // Enable whole key bloom filter in memtable. + options.set_memtable_whole_key_filtering(true); + + // Amount of data to build up in memory (backed by an unsorted log + // on disk) before converting to a sorted on-disk file. + // + // Larger values increase performance, especially during bulk loads. + // Up to max_write_buffer_number write buffers may be held in memory + // at the same time, + // so you may wish to adjust this parameter to control memory usage. + // Also, a larger write buffer will result in a longer recovery time + // the next time the database is opened. + options.set_write_buffer_size(1024 * 1024 * 1024); + + // The maximum number of write buffers that are built up in memory. + // The default and the minimum number is 2, so that when 1 write buffer + // is being flushed to storage, new writes can continue to the other + // write buffer. + // If max_write_buffer_number > 3, writing will be slowed down to + // options.delayed_write_rate if we are writing to the last write buffer + // allowed. + options.set_max_write_buffer_number(4); + + // if prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0, + // create prefix bloom for memtable with the size of + // write_buffer_size * memtable_prefix_bloom_size_ratio. + // If it is larger than 0.25, it is sanitized to 0.25. + let transform = rocksdb::SliceTransform::create_fixed_prefix(32); + options.set_prefix_extractor(transform); + options.set_memtable_prefix_bloom_ratio(0.1); + + options + } + + pub fn db(&self) -> &Arc { + &self.db + } + + pub fn allocated(&self) -> &StorageAlloc { + &self.allocated + } + + pub fn cells_cf(&self) -> Result>> { + self.db + .cf_handle(&self.cells_cf_name) + .ok_or_else(|| error!("Can't get `{}` cf handle", self.cells_cf_name)) + } + + pub fn storing_cells(&self) -> &Arc> { + &self.storing_cells + } + + #[cfg(feature = "telemetry")] + pub fn telemetry(&self) -> &Arc { + &self.telemetry + } + + /// If root cell already exists in DB, load and return it. Otherwise return None. + pub fn try_load_existing_root( + self: &Arc, + root_id: &UInt256, + cells_cf: &impl rocksdb::AsColumnFamilyRef, + ) -> Result> { + #[cfg(feature = "telemetry")] + let now = std::time::Instant::now(); + if let Some(val) = self.db.get_pinned_cf(cells_cf, root_id.as_slice())? { + let cell = StoredCell::deserialize(self, root_id, &val)?; + #[cfg(feature = "telemetry")] + { + self.telemetry + .stored_cells + .update(self.allocated.storage_cells.load(Ordering::Relaxed)); + self.telemetry.loaded_cells_from_db.update(1); + self.telemetry.load_cell_from_db_time_nanos.update(now.elapsed().as_nanos() as u64); + } + Ok(Some(Cell::with_cell_impl(cell))) + } else { + Ok(None) + } + } + + /// Remove saved cell hashes from the storing_cells in-memory cache. + pub fn cleanup_storing_cells<'a>(&self, saved_ids: impl Iterator) { + for id in saved_ids { + let mut stack = vec![id.clone()]; + while let Some(id) = stack.pop() { + if let Some(removed) = self.storing_cells.remove(&id) { + log::trace!( + target: TARGET, + "CellDb::cleanup_storing_cells {:x} removed from storing_cells", id + ); + #[cfg(feature = "telemetry")] + { + let _count = self.storing_cells_count.fetch_sub(1, Ordering::Relaxed); + self.telemetry.storing_cells.update(_count - 1); + } + + for i in 0..removed.val().references_count() { + if let Ok(ref_hash) = removed.val().reference_repr_hash(i) { + stack.push(ref_hash); + } + } + } + } + } + } + + #[cfg(test)] + pub fn count(&self) -> usize { + if let Ok(cf) = self.cells_cf() { + self.db.iterator_cf(&cf, rocksdb::IteratorMode::Start).count() + } else { + 0 + } + } + + pub(crate) fn load_cell(self: &Arc, cell_id: &UInt256, panic: bool) -> Result { + #[cfg(feature = "telemetry")] + let now = std::time::Instant::now(); + if let Some(cell) = self.cell_cache.get(cell_id) { + #[cfg(feature = "telemetry")] + { + self.telemetry.cell_cache_hits.update(1); + self.telemetry + .load_cell_from_cache_time_nanos + .update(now.elapsed().as_nanos() as u64); + } + return Ok(cell); + } + #[cfg(feature = "telemetry")] + self.telemetry.cell_cache_misses.update(1); + let cell = self.load_cell_uncached(cell_id, panic)?; + #[cfg(feature = "telemetry")] + let now_insert = std::time::Instant::now(); + self.cell_cache.insert(cell_id.clone(), cell.clone()); + #[cfg(feature = "telemetry")] + { + self.telemetry + .store_cell_to_cache_time_nanos + .update(now_insert.elapsed().as_nanos() as u64); + self.telemetry.cell_cache_len.update(self.cell_cache.len() as u64); + } + Ok(cell) + } + + fn load_cell_uncached(self: &Arc, cell_id: &UInt256, panic: bool) -> Result { + #[cfg(feature = "telemetry")] + let now = std::time::Instant::now(); + let storage_cell_data = match self.db.get_pinned_cf(&self.cells_cf()?, cell_id.as_slice()) { + Ok(Some(data)) => data, + _ => { + if let Some(guard) = self.storing_cells.get(cell_id) { + log::trace!( + target: TARGET, + "CellDb::load_cell from storing_cells by id {cell_id:x}", + ); + return Ok(guard.val().clone()); + } + + if !panic { + fail!("Can't load cell {:x} from db", cell_id); + } + + log::error!("FATAL!"); + log::error!("FATAL! Can't load cell {:x} from db", cell_id); + log::error!("FATAL!"); + + let path = Path::new(&self.db_root_path).join(BROKEN_CELL_BEACON_FILE); + write(path, "")?; + + std::thread::sleep(Duration::from_millis(100)); + std::process::exit(0xFF); + } + }; + + #[cfg(feature = "telemetry")] + let load_cell_from_db_time_nanos = now.elapsed().as_nanos() as u64; + + let storage_cell = match StoredCell::deserialize(self, cell_id, &storage_cell_data) { + Ok(cell) => Arc::new(cell), + Err(e) => { + if !panic { + fail!("Can't deserialize cell {:x} from db, error: {:?}", cell_id, e); + } + + log::error!("FATAL!"); + log::error!( + "FATAL! Can't deserialize cell {:x} from db, data: {}, error: {:?}", + cell_id, + hex::encode(&storage_cell_data), + e + ); + log::error!("FATAL!"); + + let path = Path::new(&self.db_root_path).join(BROKEN_CELL_BEACON_FILE); + write(path, "")?; + + std::thread::sleep(Duration::from_millis(100)); + std::process::exit(0xFF); + } + }; + + #[cfg(feature = "telemetry")] + { + self.telemetry + .stored_cells + .update(self.allocated.storage_cells.load(Ordering::Relaxed)); + self.telemetry.load_cell_from_db_time_nanos.update(load_cell_from_db_time_nanos); + self.telemetry.loaded_cells_from_db.update(1); + } + + log::trace!( + target: TARGET, + "CellDb::load_cell from DB id {cell_id:x}" + ); + + Ok(Cell::with_cell_impl_arc(storage_cell)) + } +} + +impl CellsFactory for CellDb { + fn create_cell(self: Arc, builder: BuilderData) -> Result { + let cell = StoringCell::with_cell(&*builder.into_cell()?, &self)?; + let cell = Cell::with_cell_impl(cell); + let repr_hash = cell.repr_hash(); + + let mut result_cell = None; + + let result = self.storing_cells.insert_with(repr_hash, |_, inserted, found| { + if let Some((_, found)) = found { + result_cell = Some(found.clone()); + lockfree::map::Preview::Discard + } else if let Some(inserted) = inserted { + result_cell = Some(inserted.clone()); + lockfree::map::Preview::Keep + } else { + result_cell = Some(cell.clone()); + lockfree::map::Preview::New(cell.clone()) + } + }); + + let result_cell = result_cell + .ok_or_else(|| error!("INTERNAL ERROR: result_cell {:x} is None", cell.repr_hash()))?; + + match result { + lockfree::map::Insertion::Created => { + log::trace!(target: TARGET, "CellDb::create_cell {:x} - created new", cell.repr_hash()); + #[cfg(feature = "telemetry")] + { + let storing_cells_count = + self.storing_cells_count.fetch_add(1, Ordering::Relaxed); + self.telemetry.storing_cells.update(storing_cells_count + 1); + } + } + lockfree::map::Insertion::Failed(_) => { + log::trace!(target: TARGET, "CellDb::create_cell {:x} - already exists", cell.repr_hash()); + } + lockfree::map::Insertion::Updated(old) => { + fail!( + "INTERNAL ERROR: storing_cells.insert_with {:x} returned Updated({:?})", + cell.repr_hash(), + old + ) + } + } + + Ok(result_cell) + } +} + +// This wrapper-struct is added because it is impossible +// to implement foreign trait (CellByHashStorage) for foreign type (Arc) +pub struct CellByHashStorageAdapter { + db: Arc, + root_cells_data: ahash::HashMap>, +} + +impl CellByHashStorageAdapter { + pub fn new( + db: Arc, + root_cell: Option<&Cell>, + max_inmemory_cells: usize, + ) -> Result { + let mut root_cells_data = ahash::HashMap::default(); + if let Some(root_cell) = root_cell { + if db.load_cell(&root_cell.repr_hash(), false).is_err() { + let mut stack = vec![root_cell.clone()]; + while let Some(cell) = stack.pop() { + if root_cells_data.len() >= max_inmemory_cells { + fail!( + "Too many cells in boc to store in memory: {}, max_inmemory_cells: {}", + root_cells_data.len(), + max_inmemory_cells + ); + } + let cell_data = StoredCell::serialize(cell.cell_impl().deref())?; + let cell_hash = cell.repr_hash(); + root_cells_data.insert(cell_hash, cell_data); + + for i in 0..cell.references_count() { + if db.load_cell(&cell.reference_repr_hash(i)?, false).is_err() { + stack.push(cell.reference(i)?); + } + } + } + } + } + Ok(Self { db, root_cells_data }) + } +} + +impl CellsStorage for CellByHashStorageAdapter { + fn load_cell(&self, hash: &UInt256) -> Result { + if let Ok(c) = self.db.clone().load_cell_uncached(hash, false) { + Ok(c) + } else if let Some(data) = self.root_cells_data.get(hash) { + StoredCell::deserialize(&self.db, hash, data).map(Cell::with_cell_impl) + } else { + fail!("Can't load cell {:x} from db", hash); + } + } + + fn load_cell_data( + &self, + hash: &UInt256, + write_hashes: bool, + dest: &mut dyn Write, + ) -> Result<()> { + #[cfg(feature = "telemetry")] + let now = std::time::Instant::now(); + if let Ok(Some(data)) = self.db.db.get_pinned_cf(&self.db.cells_cf()?, hash.as_slice()) { + #[cfg(feature = "telemetry")] + { + self.db + .telemetry + .load_cell_from_db_time_nanos + .update(now.elapsed().as_nanos() as u64); + self.db.telemetry.loaded_cells_from_db.update(1); + } + + StoredCell::write_cell_data(&data, hash, write_hashes, dest) + } else if let Some(data) = self.root_cells_data.get(hash) { + StoredCell::write_cell_data(data, hash, write_hashes, dest) + } else { + fail!("Can't load cell {:x} from db", hash); + } + } +} diff --git a/src/node/storage/src/db/rocksdb.rs b/src/node/storage/src/db/rocksdb.rs index 41596ca..5a487ca 100644 --- a/src/node/storage/src/db/rocksdb.rs +++ b/src/node/storage/src/db/rocksdb.rs @@ -35,6 +35,8 @@ pub enum AccessType { pub const LAST_UNNEEDED_KEY_BLOCK: &str = "LastUnneededKeyBlockId"; // Latest key block we can delete in archives GC pub const NODE_STATE_DB_NAME: &str = "node_state_db"; +pub const NODE_DB_NAME: &str = "db"; +pub const CATCHAINS_DB_NAME: &str = "catchains"; pub type DbPredicateMut<'a> = &'a mut dyn FnMut(&[u8], &[u8]) -> Result; @@ -340,7 +342,7 @@ impl RocksDbTable { /// Returns true, if collection is empty; false otherwise pub fn is_empty(&self) -> Result { - Ok(self.len()? == 0) + Ok(self.db.iterator_cf(&self.cf()?, IteratorMode::Start).next().is_none()) } pub fn destroy(&mut self) -> Result { diff --git a/src/node/storage/src/dynamic_boc_archive_db.rs b/src/node/storage/src/dynamic_boc_archive_db.rs new file mode 100644 index 0000000..2588bc7 --- /dev/null +++ b/src/node/storage/src/dynamic_boc_archive_db.rs @@ -0,0 +1,219 @@ +/* + * Copyright (C) 2025-2026 RSquad Blockchain Lab. + * + * Licensed under the GNU General Public License v3.0. + * See the LICENSE file in the root of this repository. + * + * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. + */ +#[cfg(feature = "telemetry")] +use crate::StorageTelemetry; +use crate::{ + cell_db::CellDb, db::rocksdb::RocksDb, shardstate_db_async::CellsDbConfig, types::StoredCell, + StorageAlloc, TARGET, +}; +use std::{ops::Deref, path::Path, sync::Arc}; +use ton_block::{Cell, Result, UInt256, MAX_LEVEL}; + +pub struct DynamicBocArchiveDb { + cell_db: Arc, +} + +impl DynamicBocArchiveDb { + pub fn with_db( + db: Arc, + cell_db_cf: &str, + db_root_path: impl AsRef, + config: &CellsDbConfig, + #[cfg(feature = "telemetry")] telemetry: Arc, + allocated: Arc, + ) -> Result { + let cell_db = Arc::new(CellDb::with_db( + db, + cell_db_cf, + db_root_path, + config, + #[cfg(feature = "telemetry")] + telemetry, + allocated, + )?); + Ok(Self { cell_db }) + } + + pub fn cell_db(&self) -> &Arc { + &self.cell_db + } + + /// Thread-safe append-only save. + pub fn save_boc( + &self, + root_cell: Cell, + check_stop: &(dyn Fn() -> Result<()> + Sync), + ) -> Result { + let root_id = root_cell.hash(MAX_LEVEL); + let cells_cf = self.cell_db.cells_cf()?; + + log::debug!(target: TARGET, "DynamicBocArchiveDb::save_boc {:x}", root_id); + + if let Some(existing) = self.cell_db.try_load_existing_root(&root_id, &cells_cf)? { + log::info!(target: TARGET, "DynamicBocArchiveDb::save_boc ALREADY EXISTS {:x}", root_id); + return Ok(existing); + } + + let start = std::time::Instant::now(); + + // Traverse cell tree, collect new cells + let mut new_cells = fnv::FnvHashMap::default(); + let mut visited = fnv::FnvHashSet::default(); + self.collect_new_cells(&root_cell, &mut new_cells, &mut visited, &cells_cf, check_stop)?; + let cells_traverse_time = start.elapsed().as_micros(); + + // Batch write all new cells + let wrote_cells = new_cells.len(); + let write_start = std::time::Instant::now(); + if !new_cells.is_empty() { + let mut batch = rocksdb::WriteBatch::default(); + for (id, data) in &new_cells { + batch.put_cf(&cells_cf, id.as_slice(), data); + } + self.cell_db.db().write(batch)?; + } + #[cfg(feature = "telemetry")] + if wrote_cells > 0 { + self.cell_db + .telemetry() + .boc_db_element_write_nanos + .update(write_start.elapsed().as_nanos() as u64 / wrote_cells as u64); + } + let write_time = write_start.elapsed().as_micros(); + + let now4 = std::time::Instant::now(); + self.cell_db.cleanup_storing_cells(new_cells.keys()); + let storing_cells_cleanup_time = now4.elapsed().as_micros(); + + let total_time = start.elapsed().as_micros() as u64; + #[cfg(feature = "telemetry")] + { + self.cell_db.telemetry().stored_new_cells.update(wrote_cells as u64); + self.cell_db.telemetry().save_boc_total_micros.update(total_time); + self.cell_db.telemetry().save_boc_traverse_micros.update(cells_traverse_time as u64); + self.cell_db.telemetry().save_boc_commit_micros.update(write_time as u64); + self.cell_db + .telemetry() + .save_boc_cleanup_micros + .update(storing_cells_cleanup_time as u64); + } + + log::debug!( + target: TARGET, + "DynamicBocArchiveDb::save_boc {:x} wrote {}, visited {} TIME: {} (tr:{}|cmt:{}|scc:{})", + root_id, wrote_cells, visited.len(), total_time, cells_traverse_time, write_time, + storing_cells_cleanup_time + ); + + self.cell_db.load_cell(&root_id, true) + } + + fn collect_new_cells( + &self, + cell: &Cell, + new_cells: &mut fnv::FnvHashMap>, + visited: &mut fnv::FnvHashSet, + cells_cf: &impl rocksdb::AsColumnFamilyRef, + check_stop: &(dyn Fn() -> Result<()> + Sync), + ) -> Result<()> { + check_stop()?; + let cell_id = cell.repr_hash(); + + // Already visited in this traversal (new or existing) — skip + if !visited.insert(cell_id.clone()) { + return Ok(()); + } + + // Already a StoredCell (loaded from DB) + if cell.is::() { + return Ok(()); + } + + // Recurse into children first + for i in 0..cell.references_count() { + let reference = cell.reference(i)?; + self.collect_new_cells(&reference, new_cells, visited, cells_cf, check_stop)?; + } + + // Check if cell exists in DB + if self.cell_db.db().get_pinned_cf(cells_cf, cell_id.as_slice())?.is_some() { + return Ok(()); + } + + // Serialize and add to batch + let data = StoredCell::serialize(cell.deref())?; + new_cells.insert(cell_id, data); + Ok(()) + } + + /// Fast import-only save: writes all non-pruned cells from state update unconditionally, + /// without checking the DB. + pub fn save_update(&self, root_cell: Cell) -> Result<()> { + let root_id = root_cell.hash(MAX_LEVEL); + let cells_cf = self.cell_db.cells_cf()?; + + log::debug!(target: TARGET, "DynamicBocArchiveDb::save_update {:x}", root_id); + + let start = std::time::Instant::now(); + + let mut new_cells = fnv::FnvHashMap::default(); + Self::collect_cells_from_update(&root_cell, &mut new_cells)?; + let cells_traverse_time = start.elapsed().as_micros(); + + let wrote_cells = new_cells.len(); + let write_start = std::time::Instant::now(); + if !new_cells.is_empty() { + let mut batch = rocksdb::WriteBatch::default(); + for (id, data) in &new_cells { + batch.put_cf(&cells_cf, id.as_slice(), data); + } + self.cell_db.db().write(batch)?; + } + let write_time = write_start.elapsed().as_micros(); + + log::debug!( + target: TARGET, + "DynamicBocArchiveDb::save_update {:x} wrote {} TIME: {} (tr:{}|cmt:{})", + root_id, wrote_cells, start.elapsed().as_micros(), cells_traverse_time, write_time, + ); + + Ok(()) + } + + /// Collect all non-pruned cells from the tree. No DB lookups — pruned branches + /// are the boundary (they represent unchanged subtrees already in the DB). + fn collect_cells_from_update( + cell: &Cell, + new_cells: &mut fnv::FnvHashMap>, + ) -> Result<()> { + let cell_id = cell.repr_hash(); + + if new_cells.contains_key(&cell_id) { + return Ok(()); + } + + // PrunedBranch = unchanged subtree, already in DB + if cell.is_pruned() && cell.level() == 0 { + return Ok(()); + } + + for i in 0..cell.references_count() { + let reference = cell.reference(i)?; + Self::collect_cells_from_update(&reference, new_cells)?; + } + + let data = StoredCell::serialize_virtual(cell.deref())?; + new_cells.insert(cell_id, data); + Ok(()) + } + + pub fn load_cell(self: &Arc, cell_id: &UInt256, panic: bool) -> Result { + self.cell_db.load_cell(cell_id, panic) + } +} diff --git a/src/node/storage/src/dynamic_boc_rc_db.rs b/src/node/storage/src/dynamic_boc_rc_db.rs index 64a63d0..1dc497b 100644 --- a/src/node/storage/src/dynamic_boc_rc_db.rs +++ b/src/node/storage/src/dynamic_boc_rc_db.rs @@ -11,29 +11,15 @@ #[cfg(feature = "telemetry")] use crate::StorageTelemetry; use crate::{ - db::rocksdb::RocksDb, - shardstate_db_async::CellsDbConfig, - types::{StoredCell, StoringCell}, + cell_db::CellDb, db::rocksdb::RocksDb, shardstate_db_async::CellsDbConfig, types::StoredCell, StorageAlloc, TARGET, }; -use std::{ - fs::write, - io::{Cursor, Write}, - ops::Deref, - path::{Path, PathBuf}, - sync::{ - atomic::{AtomicU64, Ordering}, - Arc, - }, - time::{Duration, Instant}, -}; +use std::{io::Cursor, ops::Deref, path::Path, sync::Arc, time::Instant}; use ton_block::{ - error, fail, merkle_update::CellsFactory, BuilderData, ByteOrderRead, Cell, CellData, - CellsStorage, CellsTempStorage, Result, UInt256, MAX_LEVEL, MAX_REFERENCES_COUNT, + error, fail, ByteOrderRead, Cell, CellData, CellsFactory, CellsTempStorage, Result, UInt256, + MAX_LEVEL, MAX_REFERENCES_COUNT, }; -pub const BROKEN_CELL_BEACON_FILE: &str = "ton_node.broken_cell"; - // FnvHashMap is a standard HashMap with FNV hasher. This hasher is bit faster than default one. pub type CellsCounters = fnv::FnvHashMap; @@ -111,17 +97,9 @@ impl VisitedCell { } pub struct DynamicBocDb { - db: Arc, - cells_cf_name: String, + cell_db: Arc, counters_cf_name: String, - db_root_path: PathBuf, - storing_cells: Arc>, - storing_cells_count: AtomicU64, cells_counters: Option>>, - cell_cache: quick_cache::sync::Cache, - #[cfg(feature = "telemetry")] - telemetry: Arc, - allocated: Arc, } impl DynamicBocDb { @@ -134,11 +112,17 @@ impl DynamicBocDb { #[cfg(feature = "telemetry")] telemetry: Arc, allocated: Arc, ) -> Result { - if db.cf_handle(cell_db_cf).is_none() { - db.create_cf(cell_db_cf, &Self::build_cells_cf_options(config))?; - } + let cell_db = CellDb::with_db( + db.clone(), + cell_db_cf, + db_root_path.as_ref(), + config, + #[cfg(feature = "telemetry")] + telemetry, + allocated, + )?; if db.cf_handle(counters_cf_name).is_none() { - db.create_cf(counters_cf_name, &Self::build_cells_cf_options(config))?; + db.create_cf(counters_cf_name, &Self::build_counters_cf_options(config))?; } let cells_counters = if config.prefill_cells_counters { let counters = CellsCounters::default(); @@ -147,86 +131,41 @@ impl DynamicBocDb { None }; Ok(Self { - db, - cells_cf_name: cell_db_cf.to_string(), + cell_db: Arc::new(cell_db), counters_cf_name: counters_cf_name.to_string(), - db_root_path: db_root_path.as_ref().to_path_buf(), - storing_cells: Arc::new(lockfree::map::Map::new()), - storing_cells_count: AtomicU64::new(0), cells_counters, - cell_cache: quick_cache::sync::Cache::new(config.cells_lru_cache_capacity), - #[cfg(feature = "telemetry")] - telemetry, - allocated, }) } + pub fn cell_db(&self) -> &Arc { + &self.cell_db + } + pub fn build_cells_cf_options(config: &CellsDbConfig) -> rocksdb::Options { - Self::build_cf_options(config.cells_cache_size_bytes) + CellDb::build_cf_options(config.cells_cache_size_bytes) } pub fn build_counters_cf_options(config: &CellsDbConfig) -> rocksdb::Options { - Self::build_cf_options(config.counters_cache_size_bytes) + CellDb::build_cf_options(config.counters_cache_size_bytes) + } + + pub(crate) fn load_cell(&self, cell_id: &UInt256, panic: bool) -> Result { + self.cell_db.load_cell(cell_id, panic) + } + + #[allow(dead_code)] + fn allocated(&self) -> &StorageAlloc { + self.cell_db.allocated() } - fn build_cf_options(cache_size: u64) -> rocksdb::Options { - let mut options = rocksdb::Options::default(); - let mut block_opts = rocksdb::BlockBasedOptions::default(); - - // specified cache for blocks. - let cache = rocksdb::Cache::new_lru_cache(cache_size as usize); - block_opts.set_block_cache(&cache); - - // save in LRU block cache also indexes and bloom filters - block_opts.set_cache_index_and_filter_blocks(true); - - // keep indexes and filters in block cache until tablereader freed - block_opts.set_pin_l0_filter_and_index_blocks_in_cache(true); - - // Setup bloom filter with length of 10 bits per key. - // This length provides less than 1% false positive rate. - block_opts.set_bloom_filter(10.0, false); - - options.set_block_based_table_factory(&block_opts); - - // Enable whole key bloom filter in memtable. - options.set_memtable_whole_key_filtering(true); - - // Amount of data to build up in memory (backed by an unsorted log - // on disk) before converting to a sorted on-disk file. - // - // Larger values increase performance, especially during bulk loads. - // Up to max_write_buffer_number write buffers may be held in memory - // at the same time, - // so you may wish to adjust this parameter to control memory usage. - // Also, a larger write buffer will result in a longer recovery time - // the next time the database is opened. - options.set_write_buffer_size(1024 * 1024 * 1024); - - // The maximum number of write buffers that are built up in memory. - // The default and the minimum number is 2, so that when 1 write buffer - // is being flushed to storage, new writes can continue to the other - // write buffer. - // If max_write_buffer_number > 3, writing will be slowed down to - // options.delayed_write_rate if we are writing to the last write buffer - // allowed. - options.set_max_write_buffer_number(4); - - // if prefix_extractor is set and memtable_prefix_bloom_size_ratio is not 0, - // create prefix bloom for memtable with the size of - // write_buffer_size * memtable_prefix_bloom_size_ratio. - // If it is larger than 0.25, it is sanitized to 0.25. - let transform = rocksdb::SliceTransform::create_fixed_prefix(32); - options.set_prefix_extractor(transform); - options.set_memtable_prefix_bloom_ratio(0.1); - - options + pub fn cells_factory(&self) -> Arc { + self.cell_db.clone() as Arc } #[cfg(test)] pub fn count(&self) -> usize { if let Ok(cf) = self.counters_cf() { - self.db.iterator_cf(&cf, rocksdb::IteratorMode::Start).count() + self.cell_db.db().iterator_cf(&cf, rocksdb::IteratorMode::Start).count() } else { 0 } @@ -241,28 +180,18 @@ impl DynamicBocDb { let root_id = root_cell.hash(MAX_LEVEL); log::debug!(target: TARGET, "DynamicBocDb::save_boc {:x}", root_id); - let cells_cf = self.cells_cf()?; + let cells_cf = self.cell_db.cells_cf()?; - #[cfg(feature = "telemetry")] - let now = Instant::now(); - if let Some(val) = self.db.get_pinned_cf(&cells_cf, root_id.as_slice())? { + if let Some(existing) = self.cell_db.try_load_existing_root(&root_id, &cells_cf)? { log::info!(target: TARGET, "DynamicBocDb::save_boc ALREADY EXISTS {:x}", root_id); - let cell = StoredCell::deserialize(self, &root_id, &val)?; - #[cfg(feature = "telemetry")] - { - self.telemetry - .stored_cells - .update(self.allocated.storage_cells.load(Ordering::Relaxed)); - self.telemetry.loaded_cells_from_db.update(1); - self.telemetry.load_cell_from_db_time_nanos.update(now.elapsed().as_nanos() as u64); - } - return Ok(Cell::with_cell_impl(cell)); + return Ok(existing); } let mut guard = self.cells_counters.as_ref().map(|m| m.lock()); let mut cells_counters: Option<&mut CellsCounters> = guard.as_deref_mut(); #[cfg(feature = "telemetry")] - self.telemetry + self.cell_db + .telemetry() .cached_cells_counters .update(cells_counters.as_ref().map(|c| c.len()).unwrap_or_default() as u64); @@ -297,55 +226,40 @@ impl DynamicBocDb { let tr_build_time = now2.elapsed().as_micros(); let now3 = Instant::now(); - self.db.write(transaction)?; + self.cell_db.db().write(transaction)?; #[cfg(feature = "telemetry")] if !visited.is_empty() { - self.telemetry.boc_db_element_write_nanos.update( + self.cell_db.telemetry().boc_db_element_write_nanos.update( now3.elapsed().as_nanos() as u64 / (wrote_cells as u64 + wrote_counters as u64), ); } let tr_commit_time = now3.elapsed().as_micros(); let now4 = Instant::now(); - for (id, _) in visited.iter() { - let mut stack = vec![id.clone()]; - while let Some(id) = stack.pop() { - if let Some(removed) = self.storing_cells.remove(&id) { - log::trace!( - target: TARGET, - "DynamicBocDb::save_boc {:x} cell removed from storing_cells", id - ); - let _storing_cells_count = - self.storing_cells_count.fetch_sub(1, Ordering::Relaxed); - #[cfg(feature = "telemetry")] - self.telemetry.storing_cells.update(_storing_cells_count - 1); - - for i in 0..removed.val().references_count() { - stack.push(removed.val().reference_repr_hash(i)?); - } - } - } - } + self.cell_db.cleanup_storing_cells(visited.keys()); let storing_cells_cleanup_time = now4.elapsed().as_micros(); let saved_root = if let Some(c) = visited.get(&root_id).and_then(|vc| vc.cell()) { c.clone() } else { // only if the root cell was already saved (just updated counter) - we need to load it here - self.load_cell(&root_id, true)? + self.cell_db.load_cell(&root_id, true)? }; let updated = visited.len() - wrote_cells; let total_time = now.elapsed().as_micros() as u64; #[cfg(feature = "telemetry")] { - self.telemetry.stored_new_cells.update(wrote_cells as u64); - self.telemetry.updated_counters.update((wrote_counters - wrote_cells) as u64); - self.telemetry.save_boc_total_micros.update(total_time); - self.telemetry.save_boc_traverse_micros.update(cells_traverse_time as u64); - self.telemetry.save_boc_tr_build_micros.update(tr_build_time as u64); - self.telemetry.save_boc_commit_micros.update(tr_commit_time as u64); - self.telemetry.save_boc_cleanup_micros.update(storing_cells_cleanup_time as u64); + self.cell_db.telemetry().stored_new_cells.update(wrote_cells as u64); + self.cell_db.telemetry().updated_counters.update((wrote_counters - wrote_cells) as u64); + self.cell_db.telemetry().save_boc_total_micros.update(total_time); + self.cell_db.telemetry().save_boc_traverse_micros.update(cells_traverse_time as u64); + self.cell_db.telemetry().save_boc_tr_build_micros.update(tr_build_time as u64); + self.cell_db.telemetry().save_boc_commit_micros.update(tr_commit_time as u64); + self.cell_db + .telemetry() + .save_boc_cleanup_micros + .update(storing_cells_cleanup_time as u64); } log::debug!( @@ -368,7 +282,7 @@ impl DynamicBocDb { fail!("INTERNAL ERROR: fill_counters called with already filled counters cache"); } let counters_cf = self.counters_cf()?; - for kv in self.db.iterator_cf(&counters_cf, rocksdb::IteratorMode::Start) { + for kv in self.cell_db.db().iterator_cf(&counters_cf, rocksdb::IteratorMode::Start) { let (key, value) = kv?; let cell_id = UInt256::from_slice(key.as_ref()); let counter = Cursor::new(value).read_le_u32()?; @@ -412,7 +326,8 @@ impl DynamicBocDb { let mut guard = self.cells_counters.as_ref().map(|m| m.lock()); let cells_counters: Option<&mut CellsCounters> = guard.as_deref_mut(); #[cfg(feature = "telemetry")] - self.telemetry + self.cell_db + .telemetry() .cached_cells_counters .update(cells_counters.as_ref().map(|c| c.len()).unwrap_or_default() as u64); self.delete_cells_recursive( @@ -427,7 +342,7 @@ impl DynamicBocDb { #[cfg(feature = "telemetry")] let now2 = std::time::Instant::now(); - let cells_cf = self.cells_cf()?; + let cells_cf = self.cell_db.cells_cf()?; let counters_cf = self.counters_cf()?; let mut deleted = 0; let mut transaction = rocksdb::WriteBatch::default(); @@ -452,7 +367,7 @@ impl DynamicBocDb { #[cfg(feature = "telemetry")] let now3 = Instant::now(); - self.db.write(transaction)?; + self.cell_db.db().write(transaction)?; #[cfg(feature = "telemetry")] let tr_commit_time = now3.elapsed().as_micros(); @@ -462,15 +377,16 @@ impl DynamicBocDb { let updated = visited.len() - deleted; #[cfg(feature = "telemetry")] if !visited.is_empty() { - self.telemetry + self.cell_db + .telemetry() .boc_db_element_write_nanos .update(now3.elapsed().as_nanos() as u64 / (visited.len() as u64 + deleted as u64)); - self.telemetry.deleted_cells.update(deleted as u64); - self.telemetry.updated_counters.update(updated as u64); - self.telemetry.delete_boc_total_micros.update(total_time); - self.telemetry.delete_boc_traverse_micros.update(traverse_time as u64); - self.telemetry.delete_boc_tr_build_micros.update(tr_build_time as u64); - self.telemetry.delete_boc_commit_micros.update(tr_commit_time as u64); + self.cell_db.telemetry().deleted_cells.update(deleted as u64); + self.cell_db.telemetry().updated_counters.update(updated as u64); + self.cell_db.telemetry().delete_boc_total_micros.update(total_time); + self.cell_db.telemetry().delete_boc_traverse_micros.update(traverse_time as u64); + self.cell_db.telemetry().delete_boc_tr_build_micros.update(tr_build_time as u64); + self.cell_db.telemetry().delete_boc_commit_micros.update(tr_commit_time as u64); } #[cfg(feature = "telemetry")] @@ -488,125 +404,9 @@ impl DynamicBocDb { Ok(()) } - pub(crate) fn load_cell(self: &Arc, cell_id: &UInt256, panic: bool) -> Result { - #[cfg(feature = "telemetry")] - let now = Instant::now(); - if let Some(cell) = self.cell_cache.get(cell_id) { - #[cfg(feature = "telemetry")] - { - self.telemetry.cell_cache_hits.update(1); - self.telemetry - .load_cell_from_cache_time_nanos - .update(now.elapsed().as_nanos() as u64); - } - return Ok(cell); - } - #[cfg(feature = "telemetry")] - self.telemetry.cell_cache_misses.update(1); - let cell = self.load_cell_uncached(cell_id, panic)?; - #[cfg(feature = "telemetry")] - let now_insert = Instant::now(); - self.cell_cache.insert(cell_id.clone(), cell.clone()); - #[cfg(feature = "telemetry")] - { - self.telemetry - .store_cell_to_cache_time_nanos - .update(now_insert.elapsed().as_nanos() as u64); - self.telemetry.cell_cache_len.update(self.cell_cache.len() as u64); - } - Ok(cell) - } - - pub(crate) fn load_cell_uncached( - self: &Arc, - cell_id: &UInt256, - panic: bool, - ) -> Result { - #[cfg(feature = "telemetry")] - let now = Instant::now(); - let storage_cell_data = match self.db.get_pinned_cf(&self.cells_cf()?, cell_id.as_slice()) { - Ok(Some(data)) => data, - _ => { - if let Some(guard) = self.storing_cells.get(cell_id) { - log::trace!( - target: TARGET, - "DynamicBocDb::load_cell from storing_cells by id {cell_id:x}", - ); - return Ok(guard.val().clone()); - } - - if !panic { - fail!("Can't load cell {:x} from db", cell_id); - } - - log::error!("FATAL!"); - log::error!("FATAL! Can't load cell {:x} from db", cell_id); - log::error!("FATAL!"); - - let path = Path::new(&self.db_root_path).join(BROKEN_CELL_BEACON_FILE); - write(path, "")?; - - std::thread::sleep(Duration::from_millis(100)); - std::process::exit(0xFF); - } - }; - - #[cfg(feature = "telemetry")] - let load_cell_from_db_time_nanos = now.elapsed().as_nanos() as u64; - - let storage_cell = match StoredCell::deserialize(self, cell_id, &storage_cell_data) { - Ok(cell) => Arc::new(cell), - Err(e) => { - if !panic { - fail!("Can't deserialize cell {:x} from db, error: {:?}", cell_id, e); - } - - log::error!("FATAL!"); - log::error!( - "FATAL! Can't deserialize cell {:x} from db, data: {}, error: {:?}", - cell_id, - hex::encode(&storage_cell_data), - e - ); - log::error!("FATAL!"); - - let path = Path::new(&self.db_root_path).join(BROKEN_CELL_BEACON_FILE); - write(path, "")?; - - std::thread::sleep(Duration::from_millis(100)); - std::process::exit(0xFF); - } - }; - - #[cfg(feature = "telemetry")] - { - self.telemetry - .stored_cells - .update(self.allocated.storage_cells.load(Ordering::Relaxed)); - self.telemetry.load_cell_from_db_time_nanos.update(load_cell_from_db_time_nanos); - self.telemetry.loaded_cells_from_db.update(1); - } - - log::trace!( - target: TARGET, - "DynamicBocDb::load_cell from DB id {cell_id:x}" - ); - - Ok(Cell::with_cell_impl_arc(storage_cell)) - } - - pub(crate) fn allocated(&self) -> &StorageAlloc { - &self.allocated - } - - fn cells_cf(&self) -> Result>> { - self.db - .cf_handle(&self.cells_cf_name) - .ok_or_else(|| error!("Can't get `{}` cf handle", self.cells_cf_name)) - } - fn counters_cf(&self) -> Result>> { - self.db + self.cell_db + .db() .cf_handle(&self.counters_cf_name) .ok_or_else(|| error!("Can't get `{}` cf handle", self.counters_cf_name)) } @@ -666,12 +466,15 @@ impl DynamicBocDb { } #[cfg(feature = "telemetry")] let now = Instant::now(); - if let Some(raw) = self.db.get_pinned_cf(counters_cf, cell_id.as_slice())? { + if let Some(raw) = self.cell_db.db().get_pinned_cf(counters_cf, cell_id.as_slice())? { // Cell is existing #[cfg(feature = "telemetry")] { - self.telemetry.load_counter_time_nanos.update(now.elapsed().as_nanos() as u64); - self.telemetry.loaded_counters.update(1); + self.cell_db + .telemetry() + .load_counter_time_nanos + .update(now.elapsed().as_nanos() as u64); + self.cell_db.telemetry().loaded_counters.update(1); } let mut reader = Cursor::new(raw); return Ok((false, Some(reader.read_le_u32()?))); @@ -821,7 +624,7 @@ impl DynamicBocDb { let cell = if let Some(c) = cell { c } else { - match self.load_cell(&cell_id, true) { + match self.cell_db.load_cell(&cell_id, true) { Ok(cell) => cell, Err(e) => { log::warn!("DynamicBocDb::delete_cells_recursive {:?}", e); @@ -893,13 +696,18 @@ impl DynamicBocDb { if cells_counters.is_none() { #[cfg(feature = "telemetry")] let now = Instant::now(); - if let Some(counter_raw) = self.db.get_pinned_cf(counters_cf, cell_id.as_slice())? { + if let Some(counter_raw) = + self.cell_db.db().get_pinned_cf(counters_cf, cell_id.as_slice())? + { // Cell's counter is in DB - load it and update #[cfg(feature = "telemetry")] { - self.telemetry.load_counter_time_nanos.update(now.elapsed().as_nanos() as u64); - self.telemetry.loaded_counters.update(1); + self.cell_db + .telemetry() + .load_counter_time_nanos + .update(now.elapsed().as_nanos() as u64); + self.cell_db.telemetry().loaded_counters.update(1); } let mut visited_cell = VisitedCell::with_raw_counter(&counter_raw)?; @@ -922,135 +730,6 @@ impl DynamicBocDb { } } -impl CellsFactory for DynamicBocDb { - fn create_cell(self: Arc, builder: BuilderData) -> Result { - let cell = StoringCell::with_cell(&*builder.into_cell()?, &self)?; - let cell = Cell::with_cell_impl(cell); - let repr_hash = cell.repr_hash(); - - let mut result_cell = None; - - let result = self.storing_cells.insert_with(repr_hash, |_, inserted, found| { - if let Some((_, found)) = found { - result_cell = Some(found.clone()); - lockfree::map::Preview::Discard - } else if let Some(inserted) = inserted { - result_cell = Some(inserted.clone()); - lockfree::map::Preview::Keep - } else { - result_cell = Some(cell.clone()); - lockfree::map::Preview::New(cell.clone()) - } - }); - - let result_cell = result_cell - .ok_or_else(|| error!("INTERNAL ERROR: result_cell {:x} is None", cell.repr_hash()))?; - - match result { - lockfree::map::Insertion::Created => { - log::trace!(target: TARGET, "DynamicBocDb::create_cell {:x} - created new", cell.repr_hash()); - #[cfg(feature = "telemetry")] - { - let storing_cells_count = - self.storing_cells_count.fetch_add(1, Ordering::Relaxed); - self.telemetry.storing_cells.update(storing_cells_count + 1); - } - } - lockfree::map::Insertion::Failed(_) => { - log::trace!(target: TARGET, "DynamicBocDb::create_cell {:x} - already exists", cell.repr_hash()); - } - lockfree::map::Insertion::Updated(old) => { - fail!( - "INTERNAL ERROR: storing_cells.insert_with {:x} returned Updated({:?})", - cell.repr_hash(), - old - ) - } - } - - Ok(result_cell) - } -} - -// This wrapper-struct is added because it is impossible -// to implement foreign trait (CellByHashStorage) for foreign type (Arc) -pub struct CellByHashStorageAdapter { - db: Arc, - root_cells_data: ahash::HashMap>, -} - -impl CellByHashStorageAdapter { - pub fn new( - db: Arc, - root_cell: Option<&Cell>, - max_inmemory_cells: usize, - ) -> Result { - let mut root_cells_data = ahash::HashMap::default(); - if let Some(root_cell) = root_cell { - if db.load_cell(&root_cell.repr_hash(), false).is_err() { - let mut stack = vec![root_cell.clone()]; - while let Some(cell) = stack.pop() { - if root_cells_data.len() >= max_inmemory_cells { - fail!( - "Too many cells in boc to store in memory: {}, max_inmemory_cells: {}", - root_cells_data.len(), - max_inmemory_cells - ); - } - let cell_data = StoredCell::serialize(cell.cell_impl().deref())?; - let cell_hash = cell.repr_hash(); - root_cells_data.insert(cell_hash, cell_data); - - for i in 0..cell.references_count() { - if db.load_cell(&cell.reference_repr_hash(i)?, false).is_err() { - stack.push(cell.reference(i)?); - } - } - } - } - } - Ok(Self { db, root_cells_data }) - } -} - -impl CellsStorage for CellByHashStorageAdapter { - fn load_cell(&self, hash: &UInt256) -> Result { - if let Ok(c) = self.db.clone().load_cell_uncached(hash, false) { - Ok(c) - } else if let Some(data) = self.root_cells_data.get(hash) { - StoredCell::deserialize(&self.db, hash, data).map(Cell::with_cell_impl) - } else { - fail!("Can't load cell {:x} from db", hash); - } - } - - fn load_cell_data( - &self, - hash: &UInt256, - write_hashes: bool, - dest: &mut dyn Write, - ) -> Result<()> { - #[cfg(feature = "telemetry")] - let now = std::time::Instant::now(); - if let Ok(Some(data)) = self.db.db.get_pinned_cf(&self.db.cells_cf()?, hash.as_slice()) { - #[cfg(feature = "telemetry")] - { - self.db - .telemetry - .load_cell_from_db_time_nanos - .update(now.elapsed().as_nanos() as u64); - self.db.telemetry.loaded_cells_from_db.update(1); - } - - StoredCell::write_cell_data(&data, hash, write_hashes, dest) - } else if let Some(data) = self.root_cells_data.get(hash) { - StoredCell::write_cell_data(data, hash, write_hashes, dest) - } else { - fail!("Can't load cell {:x} from db", hash); - } - } -} - pub struct AsyncCellsStorageAdapter { boc_db: Arc, index: Vec<(UInt256, u16)>, // hash & depth. @@ -1072,7 +751,7 @@ impl AsyncCellsStorageAdapter { let mut guard = boc_db_clone.cells_counters.as_ref().map(|m| m.lock()); let mut cells_counters: Option<&mut CellsCounters> = guard.as_deref_mut(); - let cells_cf = boc_db_clone.cells_cf()?; + let cells_cf = boc_db_clone.cell_db.cells_cf()?; let counters_cf = boc_db_clone.counters_cf()?; let mut visited = fnv::FnvHashMap::::default(); @@ -1088,7 +767,7 @@ impl AsyncCellsStorageAdapter { // counter transaction.put_cf(&counters_cf, id.as_slice(), vc.serialize_counter()); } - boc_db_clone.db.write(transaction)?; + boc_db_clone.cell_db.db().write(transaction)?; visited.clear(); Ok(()) }; @@ -1143,7 +822,7 @@ impl CellsTempStorage for AsyncCellsStorageAdapter { Ok(guard.val().clone()) } else { let (hash, _) = self.load_hash_and_depth(index)?; - let cell = self.boc_db.clone().load_cell(&hash, false)?; + let cell = self.boc_db.cell_db.load_cell(&hash, false)?; self.cache.insert(index, cell.clone()); Ok(cell) } @@ -1162,7 +841,8 @@ impl CellsTempStorage for AsyncCellsStorageAdapter { fail!("AsyncCellsStorageAdapter::store_simple_cell supports only zero level cells"); } self.index[index as usize] = (data.hash(0), data.depth(0)); - let cell = Cell::with_cell_impl(StoredCell::with_cell_data(data, refs, &self.boc_db)?); + let cell = + Cell::with_cell_impl(StoredCell::with_cell_data(data, refs, &self.boc_db.cell_db)?); self.cache.insert(index, cell.clone()); self.sender.blocking_send((index, cell))?; Ok(()) diff --git a/src/node/storage/src/lib.rs b/src/node/storage/src/lib.rs index 4059f99..dd82204 100644 --- a/src/node/storage/src/lib.rs +++ b/src/node/storage/src/lib.rs @@ -8,11 +8,14 @@ * This file has been modified from its original version. * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. */ +pub mod archive_shardstate_db; pub mod archives; pub mod block_handle_db; pub mod block_info_db; pub mod catchain_persistent_db; +pub mod cell_db; pub mod db; +pub mod dynamic_boc_archive_db; pub mod dynamic_boc_rc_db; pub mod error; mod macros; diff --git a/src/node/storage/src/shard_top_blocks_db.rs b/src/node/storage/src/shard_top_blocks_db.rs index 23d2dbf..01619cd 100644 --- a/src/node/storage/src/shard_top_blocks_db.rs +++ b/src/node/storage/src/shard_top_blocks_db.rs @@ -10,4 +10,6 @@ */ use crate::db_impl_base; +pub const SHARD_TOP_BLOCKS_DB_NAME: &str = "shard_top_blocks_db"; + db_impl_base!(ShardTopBlocksDb, Vec); diff --git a/src/node/storage/src/shardstate_db_async.rs b/src/node/storage/src/shardstate_db_async.rs index a3eac99..78f8599 100644 --- a/src/node/storage/src/shardstate_db_async.rs +++ b/src/node/storage/src/shardstate_db_async.rs @@ -11,11 +11,12 @@ #[cfg(feature = "telemetry")] use crate::StorageTelemetry; use crate::{ + cell_db::CellByHashStorageAdapter, db::{ rocksdb::{RocksDb, RocksDbTable}, DbKey, }, - dynamic_boc_rc_db::{AsyncCellsStorageAdapter, CellByHashStorageAdapter, DynamicBocDb}, + dynamic_boc_rc_db::{AsyncCellsStorageAdapter, DynamicBocDb}, error::StorageError, traits::Serializable, StorageAlloc, TARGET, @@ -489,7 +490,11 @@ impl ShardStateDb { root: Option<&Cell>, max_inmemory_cells: usize, ) -> Result { - CellByHashStorageAdapter::new(self.dynamic_boc_db.clone(), root, max_inmemory_cells) + CellByHashStorageAdapter::new( + self.dynamic_boc_db.cell_db().clone(), + root, + max_inmemory_cells, + ) } pub fn create_fast_cell_storage( @@ -500,7 +505,7 @@ impl ShardStateDb { } pub fn cells_factory(&self) -> Result> { - Ok(self.dynamic_boc_db.clone() as Arc) + Ok(self.dynamic_boc_db.cells_factory()) } pub fn enumerate_ids( diff --git a/src/node/storage/src/tests/mod.rs b/src/node/storage/src/tests/mod.rs index 2abde60..1d864c8 100644 --- a/src/node/storage/src/tests/mod.rs +++ b/src/node/storage/src/tests/mod.rs @@ -9,6 +9,7 @@ * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. */ mod test_catchain_persistent_db; +mod test_dynamic_boc_archive_db; mod test_dynamic_boc_rc_db; mod test_shardstate_db_async; diff --git a/src/node/storage/src/tests/test_archive_manager.rs b/src/node/storage/src/tests/test_archive_manager.rs index 2d07c52..763f7d1 100644 --- a/src/node/storage/src/tests/test_archive_manager.rs +++ b/src/node/storage/src/tests/test_archive_manager.rs @@ -11,8 +11,10 @@ use crate::StorageTelemetry; use crate::{ archives::{ archive_manager::ArchiveManager, + db_provider::{ArchiveDbProvider, EpochDbProvider, SingleDbProvider}, + epoch::{ArchivalModeConfig, EpochRouter}, package_entry_id::{GetFileName, PackageEntryId}, - ARCHIVE_PACKAGE_SIZE, + ARCHIVE_PACKAGE_SIZE, ARCHIVE_SLICE_SIZE, }, block_handle_db::{BlockHandleStorage, FLAG_KEY_BLOCK}, db::rocksdb::{destroy_rocks_db, AccessType, RocksDb}, @@ -44,9 +46,12 @@ async fn create_manager( std::fs::remove_dir_all(&path).ok(); } let db = RocksDb::new(root, name, None, AccessType::ReadWrite)?; + let db_root_path = Arc::new(path); + let db_provider = Arc::new(SingleDbProvider::new(db.clone(), db_root_path.clone())); let manager = ArchiveManager::with_data( db.clone(), - Arc::new(path), + db_root_path, + db_provider, 0, Arc::new(AtomicU8::new(0)), #[cfg(feature = "telemetry")] @@ -363,6 +368,11 @@ async fn test_block_index() -> Result<()> { data.extend_from_slice(&id.seq_no().to_le_bytes()); data } + fn make_proof(id: &BlockIdExt) -> Vec { + let mut data = id.shard().shard_prefix_with_tag().to_be_bytes().to_vec(); + data.extend_from_slice(&id.seq_no().to_be_bytes()); + data + } const DB_NAME: &str = "test_block_index"; @@ -382,7 +392,7 @@ async fn test_block_index() -> Result<()> { for mc_seqno in 1..total_mc_blocks { let id = generate_block_id(-1, 0x8000_0000_0000_0000, mc_seqno); manager.add_file(&PackageEntryId::Block(&id), &make_data(&id)).await?; - manager.add_file(&PackageEntryId::Proof(&id), &[1, 2, 3]).await?; + manager.add_file(&PackageEntryId::Proof(&id), &make_proof(&id)).await?; let flags = if rand::random::() % 12345 == 0 { FLAG_KEY_BLOCK } else { 0 }; let block_meta = BlockMeta::with_data(flags, gen_utime, lt, mc_seqno, 0); let handle = block_handle_storage.create_handle(id.clone(), block_meta, None)?.unwrap(); @@ -401,7 +411,7 @@ async fn test_block_index() -> Result<()> { *seqno, ); manager.add_file(&PackageEntryId::Block(&id), &make_data(&id)).await?; - manager.add_file(&PackageEntryId::ProofLink(&id), &[1, 2, 3]).await?; + manager.add_file(&PackageEntryId::ProofLink(&id), &make_proof(&id)).await?; let block_meta = BlockMeta::with_data(0, gen_utime, lt + i as u64 * 1_000_000, mc_seqno, 0); let handle = @@ -443,8 +453,11 @@ async fn test_block_index() -> Result<()> { let prefix = AccountIdPrefixFull { workchain_id: -1, prefix: rand::random::() }; let (id, data) = manager.lookup_block_by_seqno(&prefix, seqno).await?.unwrap(); assert_eq!(data, make_data(&id)); + let (id, data) = manager.lookup_proof_by_seqno(&prefix, seqno).await?.unwrap(); + assert_eq!(data, make_proof(&id)); let mut found = 0; + let mut ids = vec![]; let utime = init_utime + (rand::random::() % (gen_utime - init_utime)) - 100; log::info!("lookup by utime {}", utime); let prefix = AccountIdPrefixFull { workchain_id: 0, prefix: rand::random::() }; @@ -455,12 +468,21 @@ async fn test_block_index() -> Result<()> { Box::new(|id, data| { assert_eq!(data, make_data(&id)); found += 1; + ids.push(id); Ok(true) }), ) .await?; assert!(found > 0); + for id in ids { + let (id, data) = manager + .lookup_proof_by_seqno(&id.shard().account_id_prefix(), id.seq_no()) + .await? + .unwrap(); + assert_eq!(data, make_proof(&id)); + } } + assert_eq!(manager.get_max_mc_seqno().await, Some(total_mc_blocks - 1)); drop(block_handle_storage); drop(manager); @@ -484,6 +506,8 @@ async fn test_block_index() -> Result<()> { assert_eq!(data, make_data(&id)); assert_eq!(id.seq_no(), 20_000); + assert_eq!(manager.get_max_mc_seqno().await, Some(total_mc_blocks - 1)); + for _ in 0..20_000 { let lt = rand::random::() % lt; log::info!("lookup by lt {}", lt); @@ -497,8 +521,12 @@ async fn test_block_index() -> Result<()> { if let Some((id, data)) = manager.lookup_block_by_seqno(&prefix, seqno).await? { assert_eq!(data, make_data(&id)); } + if let Some((id, data)) = manager.lookup_proof_by_seqno(&prefix, seqno).await? { + assert_eq!(data, make_proof(&id)); + } let mut found = 0; + let mut ids = vec![]; let utime = init_utime + rand::random::() % (gen_utime - init_utime) - 100; log::info!("lookup by utime {}", utime); let prefix = AccountIdPrefixFull { workchain_id: -1, prefix: rand::random::() }; @@ -509,11 +537,19 @@ async fn test_block_index() -> Result<()> { Box::new(|id, data| { assert_eq!(data, make_data(&id)); found += 1; + ids.push(id); Ok(true) }), ) .await?; assert!(found > 0); + for id in ids { + let (id, data) = manager + .lookup_proof_by_seqno(&id.shard().account_id_prefix(), id.seq_no()) + .await? + .unwrap(); + assert_eq!(data, make_proof(&id)); + } } drop(manager); @@ -521,3 +557,180 @@ async fn test_block_index() -> Result<()> { destroy_rocks_db(DB_PATH, DB_NAME).await.unwrap(); Ok(()) } + +// --- Archival mode (epoch-based) tests --- + +fn mc_block_id(mc_seq_no: u32) -> BlockIdExt { + BlockIdExt::with_params( + ShardIdent::masterchain(), + mc_seq_no, + UInt256::from_le_bytes(&mc_seq_no.to_le_bytes()), + UInt256::default(), + ) +} + +async fn write_blocks( + manager: &ArchiveManager, + bhs: &BlockHandleStorage, + range: std::ops::Range, + data: &[u8], +) -> Result<()> { + for mc_seq_no in range { + let block_id = mc_block_id(mc_seq_no); + let meta = BlockMeta::with_data(0, 0, 0, 0, 0); + let handle = bhs + .create_handle(block_id.clone(), meta, None)? + .ok_or_else(|| error!("Cannot create handle for block {}", block_id))?; + manager.add_file(&PackageEntryId::Proof(&block_id), data).await?; + handle.set_proof(); + handle.set_block_applied(); + manager.move_to_archive(&handle, || Ok(())).await?; + handle.set_archived(); + bhs.save_handle(&handle, None)?; + } + Ok(()) +} + +async fn read_block( + manager: &ArchiveManager, + bhs: &BlockHandleStorage, + mc_seq_no: u32, +) -> Result> { + let block_id = mc_block_id(mc_seq_no); + let handle = bhs.load_handle_by_id(&block_id)?.unwrap(); + manager.get_file(&handle, &PackageEntryId::Proof(&block_id)).await +} + +async fn create_epoch_manager( + dir: &Path, +) -> Result<(ArchiveManager, Arc, Arc)> { + let db_root = dir.join("main_db"); + let new_epochs_path = dir.join("new_epochs"); + + let config = ArchivalModeConfig { + epoch_size: ARCHIVE_SLICE_SIZE, + new_epochs_path, + existing_epochs: vec![], + }; + + let db = RocksDb::new(&db_root, "db", None, AccessType::ReadWrite)?; + let db_root_path = Arc::new(db_root); + + let router = Arc::new(EpochRouter::new(&config).await?); + let db_provider: Arc = Arc::new(EpochDbProvider::new(router.clone())); + + let manager = ArchiveManager::with_data( + db.clone(), + db_root_path, + db_provider, + 0, + Arc::new(AtomicU8::new(0)), + #[cfg(feature = "telemetry")] + Arc::new(StorageTelemetry::default()), + Arc::new(StorageAlloc::default()), + ) + .await?; + + Ok((manager, db, router)) +} + +#[tokio::test] +async fn test_archival_mode_minimal() -> Result<()> { + let dir = tempfile::tempdir().unwrap(); + let (manager, db, router) = create_epoch_manager(dir.path()).await?; + let (bhs, _) = create_block_handle_storage(db.clone()); + router.resolve_or_create(0).await?; + + write_blocks(&manager, &bhs, 50..51, &[1, 2, 3]).await?; + + let result = read_block(&manager, &bhs, 50).await?; + assert_eq!(result, vec![1, 2, 3]); + Ok(()) +} + +#[tokio::test] +async fn test_archival_mode_write_and_read() -> Result<()> { + let dir = tempfile::tempdir().unwrap(); + let (manager, db, router) = create_epoch_manager(dir.path()).await?; + let (bhs, _) = create_block_handle_storage(db.clone()); + router.resolve_or_create(0).await?; + + let data = vec![1, 2, 3, 4, 5]; + write_blocks(&manager, &bhs, 0..150, &data).await?; + + for mc_seq_no in 0..150 { + assert_eq!(read_block(&manager, &bhs, mc_seq_no).await?, data); + } + + // Verify .pack files are in epoch directory, not main db + let epoch_dir = dir.path().join("new_epochs").join("epoch_0"); + assert!(epoch_dir.exists(), "Epoch directory should exist"); + assert!( + epoch_dir.join("archive").join("packages").exists(), + "Pack files should be in epoch directory" + ); + + Ok(()) +} + +#[tokio::test] +async fn test_archival_mode_multiple_epochs() -> Result<()> { + let dir = tempfile::tempdir().unwrap(); + let (manager, db, router) = create_epoch_manager(dir.path()).await?; + let (bhs, _) = create_block_handle_storage(db.clone()); + + router.resolve_or_create(0).await?; + router.resolve_or_create(20_000).await?; + + let data_epoch0 = vec![10, 20, 30]; + let data_epoch1 = vec![40, 50, 60]; + + write_blocks(&manager, &bhs, 0..100, &data_epoch0).await?; + assert_eq!(read_block(&manager, &bhs, 50).await?, data_epoch0); + + write_blocks(&manager, &bhs, 20_000..20_100, &data_epoch1).await?; + + assert_eq!(read_block(&manager, &bhs, 50).await?, data_epoch0); + assert_eq!(read_block(&manager, &bhs, 20_050).await?, data_epoch1); + + assert!(dir.path().join("new_epochs").join("epoch_0").exists()); + assert!(dir.path().join("new_epochs").join("epoch_1").exists()); + + Ok(()) +} + +#[tokio::test] +async fn test_archival_mode_restart_preserves_data() -> Result<()> { + let dir = tempfile::tempdir().unwrap(); + let data = vec![7, 8, 9]; + + // First "run": write some blocks + let db = { + let (manager, db, router) = create_epoch_manager(dir.path()).await?; + let (bhs, _bh_db) = create_block_handle_storage(db.clone()); + router.resolve_or_create(0).await?; + write_blocks(&manager, &bhs, 0..50, &data).await?; + db + }; + + // BlockHandleStorage has background task which holds RocksDB instance + while Arc::strong_count(&db) > 1 { + tokio::time::sleep(std::time::Duration::from_millis(1)).await; + } + drop(db); + + // Second "run": recreate manager, verify data is accessible + let (manager, db, _router) = create_epoch_manager(dir.path()).await?; + let (bhs, _) = create_block_handle_storage(db.clone()); + + for mc_seq_no in 0..50 { + assert_eq!( + read_block(&manager, &bhs, mc_seq_no).await?, + data, + "Block {} data mismatch after restart", + mc_seq_no + ); + } + + Ok(()) +} diff --git a/src/node/storage/src/tests/test_archive_slice.rs b/src/node/storage/src/tests/test_archive_slice.rs index 313042d..53cbaf1 100644 --- a/src/node/storage/src/tests/test_archive_slice.rs +++ b/src/node/storage/src/tests/test_archive_slice.rs @@ -23,7 +23,7 @@ use crate::{ StorageAlloc, }; use std::{future::Future, path::Path, pin::Pin, sync::Arc}; -use ton_block::{error, BlockIdExt, Result, ShardIdent, UInt256}; +use ton_block::{error, AccountIdPrefixFull, BlockIdExt, Result, ShardIdent, UInt256}; const DB_PATH: &str = "../../target/test"; @@ -41,6 +41,7 @@ async fn prepare_test( name: &str, package_type: PackageType, shard_split_depth: u8, + archive_id: u32, ) -> Result<(Arc, TestContext)> { let db_root = Path::new(DB_PATH).join(name); let _ = std::fs::remove_dir_all(&db_root); @@ -48,7 +49,7 @@ async fn prepare_test( let archive_slice = ArchiveSlice::new_empty( db.clone(), Arc::new(db_root), - 0, + archive_id, package_type, shard_split_depth, #[cfg(feature = "telemetry")] @@ -72,9 +73,11 @@ async fn run_test( name: &str, package_type: PackageType, shard_split_depth: u8, + archive_id: u32, scenario: impl Fn(TestContext) -> Pinned, ) -> Result<()> { - let (db, test_context) = prepare_test(name, package_type, shard_split_depth).await?; + let (db, test_context) = + prepare_test(name, package_type, shard_split_depth, archive_id).await?; scenario(test_context).await?; destroy_db(db, name).await; Ok(()) @@ -147,7 +150,7 @@ async fn test_scenario_gold() -> Result<()> { Ok(()) } - run_test("test_archive_slice_scenario_gold", PackageType::Blocks, 0, |ctx| { + run_test("test_archive_slice_scenario_gold", PackageType::Blocks, 0, 0, |ctx| { Box::pin(scenario(ctx)) }) .await @@ -184,6 +187,53 @@ async fn test_key_blocks_slice() -> Result<()> { Ok(()) } - run_test("test_key_blocks_slice", PackageType::KeyBlocks, 0, |ctx| Box::pin(scenario(ctx))) + run_test("test_key_blocks_slice", PackageType::KeyBlocks, 0, 0, |ctx| Box::pin(scenario(ctx))) .await } + +#[tokio::test] +async fn test_lookup_proof_by_seqno() -> Result<()> { + async fn scenario(test_context: TestContext) -> Result<()> { + let proof_data = vec![7u8, 8, 9]; + let mc_seqno = 55u32; + + let block_id = BlockIdExt::with_params( + ShardIdent::masterchain(), + mc_seqno, + UInt256::with_array([mc_seqno as u8; 32]), + UInt256::default(), + ); + let meta = BlockMeta::with_data(0, 1000, 100_000, mc_seqno, 0); + let handle = test_context + .block_handle_storage + .create_handle(block_id.clone(), meta, None)? + .ok_or_else(|| error!("Cannot create handle"))?; + + test_context + .archive_slice + .add_file(&handle, &PackageEntryId::Block(&block_id), vec![1, 2, 3]) + .await?; + test_context + .archive_slice + .add_file(&handle, &PackageEntryId::Proof(&block_id), proof_data.clone()) + .await?; + + let prefix = AccountIdPrefixFull { workchain_id: -1, prefix: 0 }; + + let result = test_context.archive_slice.lookup_proof_by_seqno(&prefix, mc_seqno).await?; + let (found_id, found_data) = result.expect("proof should be found"); + assert_eq!(found_id, block_id); + assert_eq!(found_data, proof_data); + + let result = test_context.archive_slice.lookup_proof_by_seqno(&prefix, 999).await?; + assert!(result.is_none(), "lookup of non-existent seqno should return None"); + + drop(test_context); + Ok(()) + } + + run_test("test_lookup_proof_by_seqno", PackageType::Blocks, 0, 50, |ctx| { + Box::pin(scenario(ctx)) + }) + .await +} diff --git a/src/node/storage/src/tests/test_dynamic_boc_archive_db.rs b/src/node/storage/src/tests/test_dynamic_boc_archive_db.rs new file mode 100644 index 0000000..5f01ee9 --- /dev/null +++ b/src/node/storage/src/tests/test_dynamic_boc_archive_db.rs @@ -0,0 +1,261 @@ +/* + * Copyright (C) 2025-2026 RSquad Blockchain Lab. + * + * Licensed under the GNU General Public License v3.0. + * See the LICENSE file in the root of this repository. + * + * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. + */ +#[cfg(feature = "telemetry")] +use crate::StorageTelemetry; +use crate::{ + archive_shardstate_db::ArchiveShardStateDb, + cell_db::CellByHashStorageAdapter, + db::rocksdb::{destroy_rocks_db, AccessType, RocksDb}, + dynamic_boc_archive_db::DynamicBocArchiveDb, + shardstate_db_async::CellsDbConfig, + tests::utils::{count_tree_unique_cells, get_test_tree_of_cells, init_test_log}, + StorageAlloc, +}; +use std::sync::Arc; +use ton_block::{ + read_single_root_boc, BigBocWriter, BlockIdExt, BocFlags, BuilderData, CellsFactory, + IBitstring, Result, ShardIdent, UInt256, MAX_SAFE_DEPTH, SHARD_FULL, +}; + +const DB_PATH: &str = "../../target/test"; + +fn make_block_id(seq_no: u32) -> BlockIdExt { + BlockIdExt::with_params( + ShardIdent::with_tagged_prefix(-1, SHARD_FULL).unwrap(), + seq_no, + UInt256::from([seq_no as u8; 32]), + UInt256::from([(seq_no + 100) as u8; 32]), + ) +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_dynamic_boc_archive_db() -> Result<()> { + init_test_log(); + + const DB_NAME: &str = "test_dynamic_boc_archive_db"; + destroy_rocks_db(DB_PATH, DB_NAME).await.unwrap(); + + let db = RocksDb::new(DB_PATH, DB_NAME, None, AccessType::ReadWrite)?; + let boc_db = Arc::new(DynamicBocArchiveDb::with_db( + db.clone(), + "cells", + "", + &CellsDbConfig::default(), + #[cfg(feature = "telemetry")] + Arc::new(StorageTelemetry::default()), + Arc::new(StorageAlloc::default()), + )?); + + let root_cell = get_test_tree_of_cells(); + let initial_count = count_tree_unique_cells(root_cell.clone()); + + // Save and verify + boc_db.save_boc(root_cell.clone(), &|| Ok(()))?; + assert_eq!(boc_db.cell_db().count(), initial_count); + + // Load and verify + let loaded = boc_db.cell_db().load_cell(&root_cell.repr_hash(), false)?; + assert_eq!(count_tree_unique_cells(loaded), initial_count); + + drop(boc_db); + drop(db); + destroy_rocks_db(DB_PATH, DB_NAME).await.unwrap(); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_archive_save_idempotent() -> Result<()> { + init_test_log(); + + const DB_NAME: &str = "test_archive_save_idempotent"; + destroy_rocks_db(DB_PATH, DB_NAME).await.unwrap(); + + let db = RocksDb::new(DB_PATH, DB_NAME, None, AccessType::ReadWrite)?; + let boc_db = Arc::new(DynamicBocArchiveDb::with_db( + db.clone(), + "cells", + "", + &CellsDbConfig::default(), + #[cfg(feature = "telemetry")] + Arc::new(StorageTelemetry::default()), + Arc::new(StorageAlloc::default()), + )?); + + let root_cell = get_test_tree_of_cells(); + let initial_count = count_tree_unique_cells(root_cell.clone()); + + // Save twice + boc_db.save_boc(root_cell.clone(), &|| Ok(()))?; + boc_db.save_boc(root_cell.clone(), &|| Ok(()))?; + + // Count should not change + assert_eq!(boc_db.cell_db().count(), initial_count); + + drop(boc_db); + drop(db); + destroy_rocks_db(DB_PATH, DB_NAME).await.unwrap(); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_archive_shared_cells() -> Result<()> { + init_test_log(); + + const DB_NAME: &str = "test_archive_shared_cells"; + destroy_rocks_db(DB_PATH, DB_NAME).await.unwrap(); + + let db = RocksDb::new(DB_PATH, DB_NAME, None, AccessType::ReadWrite)?; + let boc_db = Arc::new(DynamicBocArchiveDb::with_db( + db.clone(), + "cells", + "", + &CellsDbConfig::default(), + #[cfg(feature = "telemetry")] + Arc::new(StorageTelemetry::default()), + Arc::new(StorageAlloc::default()), + )?); + + // Create shared cells via CellsFactory + let cells_factory = boc_db.cell_db().clone() as Arc; + let create_chain = |data_values: Vec<&str>| -> ton_block::Cell { + let mut child = None; + let mut cell = ton_block::Cell::default(); + for data in data_values.iter().rev() { + let mut builder = BuilderData::new(); + let mut data = data.as_bytes().to_vec(); + data.push(0x80); + builder.append_bitstring(&data).unwrap(); + if let Some(child) = child { + builder.checked_append_reference(child).unwrap(); + } + cell = cells_factory.clone().create_cell(builder).unwrap(); + child = Some(cell.clone()); + } + cell + }; + + let r1 = create_chain(vec!["r1", "shared", "leaf"]); + boc_db.save_boc(r1.clone(), &|| Ok(()))?; + let count_after_r1 = boc_db.cell_db().count(); + + let r2 = create_chain(vec!["r2", "shared", "leaf"]); + boc_db.save_boc(r2.clone(), &|| Ok(()))?; + let count_after_r2 = boc_db.cell_db().count(); + + // r2 shares "shared" and "leaf" with r1, so only 1 new cell ("r2") should be added + assert_eq!(count_after_r2, count_after_r1 + 1); + + // Both roots should be loadable + let _ = boc_db.cell_db().load_cell(&r1.repr_hash(), false)?; + let _ = boc_db.cell_db().load_cell(&r2.repr_hash(), false)?; + + drop(cells_factory); + drop(boc_db); + drop(db); + destroy_rocks_db(DB_PATH, DB_NAME).await.unwrap(); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_archive_shardstate_db() -> Result<()> { + init_test_log(); + + const DB_NAME: &str = "test_archive_shardstate_db"; + destroy_rocks_db(DB_PATH, DB_NAME).await.unwrap(); + + let db = RocksDb::new(DB_PATH, DB_NAME, None, AccessType::ReadWrite)?; + let ss_db = ArchiveShardStateDb::new( + db.clone(), + "shardstate_idx", + "cells", + "", + &CellsDbConfig::default(), + #[cfg(feature = "telemetry")] + Arc::new(StorageTelemetry::default()), + Arc::new(StorageAlloc::default()), + )?; + + let root_cell = get_test_tree_of_cells(); + let block_id = make_block_id(1); + + // Put + assert!(!ss_db.contains(&block_id)?); + ss_db.put(&block_id, root_cell.clone())?; + assert!(ss_db.contains(&block_id)?); + + // Get + let loaded = ss_db.get(&block_id)?; + assert_eq!(count_tree_unique_cells(loaded), count_tree_unique_cells(root_cell)); + + // Put idempotent + ss_db.put(&block_id, ton_block::Cell::default())?; // should return existing, not overwrite + let loaded2 = ss_db.get(&block_id)?; + assert_eq!(loaded2.repr_hash(), ss_db.get(&block_id)?.repr_hash()); + + drop(ss_db); + drop(db); + destroy_rocks_db(DB_PATH, DB_NAME).await.unwrap(); + Ok(()) +} + +#[tokio::test(flavor = "multi_thread")] +async fn test_archive_cell_by_hash_storage() -> Result<()> { + init_test_log(); + + const DB_NAME: &str = "test_archive_cell_by_hash_storage"; + destroy_rocks_db(DB_PATH, DB_NAME).await?; + + let db = RocksDb::new(DB_PATH, DB_NAME, None, AccessType::ReadWrite)?; + let boc_db = Arc::new(DynamicBocArchiveDb::with_db( + db.clone(), + "cells", + "", + &CellsDbConfig::default(), + #[cfg(feature = "telemetry")] + Arc::new(StorageTelemetry::default()), + Arc::new(StorageAlloc::default()), + )?); + + let data = std::fs::read( + "../../block/src/tests/data/6A3BD5B96ABEA186BFEE202B70D510C29F85E126A522B08C1DCAD39F92CF5C51.boc", + )?; + let root_cell = read_single_root_boc(&data)?; + + // Repack without hashes (same as test_cell_by_hash_storage in test_dynamic_boc_rc_db.rs) + fn repack(cell: ton_block::Cell) -> Result { + let mut builder = BuilderData::with_raw(cell.data(), cell.bit_length())?; + builder.set_type(cell.cell_type()); + for r in cell.clone_references() { + builder.checked_append_reference(repack(r)?)?; + } + builder.finalize(MAX_SAFE_DEPTH) + } + let root_cell = repack(root_cell)?; + + boc_db.save_boc(root_cell.clone(), &|| Ok(()))?; + + let writer = BigBocWriter::with_params( + [root_cell.clone()], + MAX_SAFE_DEPTH, + BocFlags::all(), + &|| false, + Arc::new(CellByHashStorageAdapter::new(boc_db.cell_db().clone(), None, 0)?), + )?; + + let mut boc = Vec::new(); + writer.write(&mut boc)?; + + assert_eq!(boc.len(), data.len()); + assert_eq!(boc, data); + + drop(boc_db); + drop(db); + destroy_rocks_db(DB_PATH, DB_NAME).await.unwrap(); + Ok(()) +} diff --git a/src/node/storage/src/tests/test_dynamic_boc_rc_db.rs b/src/node/storage/src/tests/test_dynamic_boc_rc_db.rs index d543db0..bdda250 100644 --- a/src/node/storage/src/tests/test_dynamic_boc_rc_db.rs +++ b/src/node/storage/src/tests/test_dynamic_boc_rc_db.rs @@ -11,8 +11,9 @@ #[cfg(feature = "telemetry")] use crate::StorageTelemetry; use crate::{ + cell_db::CellByHashStorageAdapter, db::rocksdb::{destroy_rocks_db, AccessType, RocksDb}, - dynamic_boc_rc_db::{CellByHashStorageAdapter, DynamicBocDb}, + dynamic_boc_rc_db::DynamicBocDb, shardstate_db_async::CellsDbConfig, tests::utils::{ count_tree_unique_cells, get_another_test_tree_of_cells, get_test_tree_of_cells, @@ -22,8 +23,8 @@ use crate::{ }; use std::sync::Arc; use ton_block::{ - read_single_root_boc, BigBocWriter, BocFlags, BuilderData, Cell, CellsFactory, IBitstring, - Result, MAX_SAFE_DEPTH, + read_single_root_boc, BigBocWriter, BocFlags, BuilderData, Cell, IBitstring, Result, + MAX_SAFE_DEPTH, }; const DB_PATH: &str = "../../target/test"; @@ -94,7 +95,7 @@ async fn test_dynamic_boc_rc_db_2() -> Result<()> { Arc::new(StorageAlloc::default()), )?); - let cells_factory = boc_db.clone() as Arc; + let cells_factory = boc_db.cells_factory(); let create_ss = |cells_chain: Vec<&str>| -> Cell { let mut child = None; let mut cell = Cell::default(); @@ -180,7 +181,7 @@ async fn test_cell_by_hash_storage() -> Result<()> { MAX_SAFE_DEPTH, BocFlags::all(), &|| false, - Arc::new(CellByHashStorageAdapter::new(boc_db.clone(), None, 0)?), + Arc::new(CellByHashStorageAdapter::new(boc_db.cell_db().clone(), None, 0)?), )?; let mut boc = Vec::new(); diff --git a/src/node/storage/src/tests/test_epoch.rs b/src/node/storage/src/tests/test_epoch.rs new file mode 100644 index 0000000..e65e734 --- /dev/null +++ b/src/node/storage/src/tests/test_epoch.rs @@ -0,0 +1,156 @@ +/* + * Copyright (C) 2025-2026 RSquad Blockchain Lab. + * + * Licensed under the GNU General Public License v3.0. + * See the LICENSE file in the root of this repository. + * + * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. + */ +use super::*; + +#[tokio::test] +async fn test_epoch_router_validation() { + let dir = tempfile::tempdir().unwrap(); + + let config = ArchivalModeConfig { + epoch_size: 0, + new_epochs_path: dir.path().to_path_buf(), + existing_epochs: vec![], + }; + assert!(EpochRouter::new(&config).await.is_err()); + + let config = ArchivalModeConfig { + epoch_size: 10_000, // not a multiple of ARCHIVE_SLICE_SIZE + new_epochs_path: dir.path().to_path_buf(), + existing_epochs: vec![], + }; + assert!(EpochRouter::new(&config).await.is_err()); +} + +#[tokio::test] +async fn test_epoch_router_resolve_and_create() { + let dir = tempfile::tempdir().unwrap(); + let new_epochs_path = dir.path().join("new_epochs"); + + let config = ArchivalModeConfig { + epoch_size: 40_000, + new_epochs_path: new_epochs_path.clone(), + existing_epochs: vec![], + }; + + let router = EpochRouter::new(&config).await.unwrap(); + + // No epochs exist yet + assert!(router.resolve(0).is_none()); + assert!(router.resolve(39_999).is_none()); + + // Create epoch for mc_seq_no 0 + let epoch = router.resolve_or_create(0).await.unwrap(); + assert_eq!(epoch.mc_seq_no_start(), 0); + assert_eq!(epoch.mc_seq_no_end(), 39_999); + assert!(epoch.path().starts_with(&new_epochs_path)); + + // Resolve same epoch + let epoch2 = router.resolve(20_000).unwrap(); + assert_eq!(epoch2.mc_seq_no_start(), 0); + + // Create second epoch + let epoch3 = router.resolve_or_create(50_000).await.unwrap(); + assert_eq!(epoch3.mc_seq_no_start(), 40_000); + assert_eq!(epoch3.mc_seq_no_end(), 79_999); + + // Verify both exist + assert!(router.resolve(0).is_some()); + assert!(router.resolve(50_000).is_some()); + assert!(router.resolve(80_000).is_none()); +} + +#[tokio::test] +async fn test_epoch_router_with_existing_epochs() { + let dir = tempfile::tempdir().unwrap(); + let epoch0_path = dir.path().join("epoch_0"); + let epoch1_path = dir.path().join("epoch_1"); + let new_epochs_path = dir.path().join("new_epochs"); + + std::fs::create_dir_all(&epoch0_path).unwrap(); + std::fs::create_dir_all(&epoch1_path).unwrap(); + + // Write metadata for existing epochs + let meta0 = EpochMeta { mc_seq_no_start: 0, mc_seq_no_end: 39_999 }; + let meta1 = EpochMeta { mc_seq_no_start: 40_000, mc_seq_no_end: 79_999 }; + write_epoch_meta(&epoch0_path, &meta0).await.unwrap(); + write_epoch_meta(&epoch1_path, &meta1).await.unwrap(); + + let config = ArchivalModeConfig { + epoch_size: 40_000, + new_epochs_path, + existing_epochs: vec![EpochEntry { path: epoch0_path }, EpochEntry { path: epoch1_path }], + }; + + let router = EpochRouter::new(&config).await.unwrap(); + + let e0 = router.resolve(0).unwrap(); + assert_eq!(e0.mc_seq_no_start(), 0); + assert_eq!(e0.mc_seq_no_end(), 39_999); + + let e1 = router.resolve(40_000).unwrap(); + assert_eq!(e1.mc_seq_no_start(), 40_000); + assert_eq!(e1.mc_seq_no_end(), 79_999); + + assert!(router.resolve(80_000).is_none()); +} + +#[tokio::test] +async fn test_epoch_router_rejects_misaligned_existing() { + let dir = tempfile::tempdir().unwrap(); + let epoch_path = dir.path().join("bad_epoch"); + std::fs::create_dir_all(&epoch_path).unwrap(); + + // Epoch with wrong size (60_000 != 40_000) + let meta = EpochMeta { mc_seq_no_start: 0, mc_seq_no_end: 59_999 }; + write_epoch_meta(&epoch_path, &meta).await.unwrap(); + + let config = ArchivalModeConfig { + epoch_size: 40_000, + new_epochs_path: dir.path().join("new_epochs"), + existing_epochs: vec![EpochEntry { path: epoch_path }], + }; + + assert!(EpochRouter::new(&config).await.is_err()); +} + +#[tokio::test] +async fn test_epoch_router_discovers_on_restart() { + let dir = tempfile::tempdir().unwrap(); + let new_epochs_path = dir.path().join("new_epochs"); + + // First "run": create epochs dynamically + let config = ArchivalModeConfig { + epoch_size: 40_000, + new_epochs_path: new_epochs_path.clone(), + existing_epochs: vec![], + }; + let router = EpochRouter::new(&config).await.unwrap(); + router.resolve_or_create(0).await.unwrap(); + router.resolve_or_create(50_000).await.unwrap(); + assert!(router.resolve(0).is_some()); + assert!(router.resolve(50_000).is_some()); + drop(router); + + // Second "run": new router should discover epochs from new_epochs_path + let config2 = ArchivalModeConfig { + epoch_size: 40_000, + new_epochs_path: new_epochs_path.clone(), + existing_epochs: vec![], + }; + let router2 = EpochRouter::new(&config2).await.unwrap(); + let e0 = router2.resolve(0).unwrap(); + assert_eq!(e0.mc_seq_no_start(), 0); + assert_eq!(e0.mc_seq_no_end(), 39_999); + + let e1 = router2.resolve(50_000).unwrap(); + assert_eq!(e1.mc_seq_no_start(), 40_000); + assert_eq!(e1.mc_seq_no_end(), 79_999); + + assert!(router2.resolve(80_000).is_none()); +} diff --git a/src/node/storage/src/types/block_meta.rs b/src/node/storage/src/types/block_meta.rs index 945eb61..e95aece 100644 --- a/src/node/storage/src/types/block_meta.rs +++ b/src/node/storage/src/types/block_meta.rs @@ -25,6 +25,35 @@ pub struct BlockMeta { } impl BlockMeta { + /// Create BlockMeta for archive import with all necessary flags pre-set. + pub fn for_import( + gen_utime: u32, + end_lt: u64, + masterchain_ref_seq_no: u32, + is_key_block: bool, + is_masterchain: bool, + has_prev2: bool, + ) -> Self { + let mut flags = block_handle_db::FLAG_DATA + | block_handle_db::FLAG_APPLIED + | block_handle_db::FLAG_STATE + | block_handle_db::FLAG_STATE_SAVED + | block_handle_db::FLAG_MOVED_TO_ARCHIVE + | block_handle_db::FLAG_PREV_1; + if has_prev2 { + flags |= block_handle_db::FLAG_PREV_2; + } + if is_masterchain { + flags |= block_handle_db::FLAG_PROOF; + } else { + flags |= block_handle_db::FLAG_PROOF_LINK; + } + if is_key_block { + flags |= block_handle_db::FLAG_KEY_BLOCK; + } + Self::with_data(flags, gen_utime, end_lt, masterchain_ref_seq_no, 0) + } + pub fn from_block(block: &Block) -> Result { let info = block.read_info()?; let flags = if info.key_block() { block_handle_db::FLAG_KEY_BLOCK } else { 0 }; diff --git a/src/node/storage/src/types/storage_cell.rs b/src/node/storage/src/types/storage_cell.rs index 9264a8b..7dbd352 100644 --- a/src/node/storage/src/types/storage_cell.rs +++ b/src/node/storage/src/types/storage_cell.rs @@ -8,7 +8,8 @@ * This file has been modified from its original version. * This software is provided "AS IS", WITHOUT WARRANTY OF ANY KIND. */ -use crate::{dynamic_boc_rc_db::DynamicBocDb, TARGET}; +use crate::{cell_db::CellDb, TARGET}; +use smallvec::SmallVec; use std::{ io::Write, sync::{ @@ -17,9 +18,9 @@ use std::{ }, }; use ton_block::{ - calc_d1, cell_type, error, fail, full_len, hashes_count, level, level_mask, refs_count, - store_hashes, Cell, CellData, CellImpl, CellType, LevelMask, Result, UInt256, DEPTH_SIZE, - MAX_LEVEL, SHA256_SIZE, + append_tag, calc_d1, cell_type, error, fail, full_len, hashes_count, level, level_mask, + refs_count, store_hashes, Cell, CellData, CellImpl, CellType, LevelMask, Result, UInt256, + DEPTH_SIZE, MAX_LEVEL, SHA256_SIZE, }; #[cfg(test)] @@ -28,6 +29,9 @@ mod tests; const NOT_INITIALIZED_DEPTH: u16 = u16::MAX; +// Max raw data: d1(1) + d2(1) + hashes(32*3) + depths(2*4) + data(128) + ref_hashes(32*4) + ref_depths(2*4) +pub const STORED_CELL_MAX_RAW_LEN: usize = 1 + 1 + 32 * 3 + 2 * 4 + 128 + 32 * 4 + 2 * 4; + struct Reference { hash: UInt256, depth: u16, @@ -37,7 +41,7 @@ struct Reference { pub struct StoredCell { cell_data: CellData, references: parking_lot::RwLock>, - boc_db: Weak, + boc_db: Weak, } static STORED_CELL_COUNT: AtomicU64 = AtomicU64::new(0); @@ -62,11 +66,7 @@ impl<'a> SliceReader<'a> { /// Represents Cell for storing in persistent storage impl StoredCell { - pub fn deserialize( - boc_db: &Arc, - repr_hash: &UInt256, - data: &[u8], - ) -> Result { + pub fn deserialize(boc_db: &Arc, repr_hash: &UInt256, data: &[u8]) -> Result { if data.len() < 2 { fail!("Buffer is too small to read description bytes"); } @@ -221,9 +221,35 @@ impl StoredCell { } pub fn serialize(cell: &dyn CellImpl) -> Result> { - let store_hashes = cell.store_hashes(); + Self::serialize_internal(cell, cell.raw_data()?, cell.store_hashes()) + } + + pub fn serialize_virtual(cell: &dyn CellImpl) -> Result> { + if cell.is_pruned() && cell.level() == 0 { + fail!("Virtual pruned cell can't be serialized"); + } + + let mut data = SmallVec::from_slice(cell.data()); + if cell.bit_length() % 8 == 0 { + append_tag(&mut data, cell.bit_length()); + }; + let data = CellData::with_params( + cell.cell_type(), + data.as_slice(), + cell.level_mask().mask(), + cell.references_count() as u8, + )?; + + Self::serialize_internal(cell, data.raw_data(), false) + } + + fn serialize_internal( + cell: &dyn CellImpl, + raw_data: &[u8], + store_hashes: bool, + ) -> Result> { let data_size = Self::calc_serialized_size( - cell.raw_data()?.len(), + raw_data.len(), store_hashes, cell.level(), cell.references_count(), @@ -231,7 +257,7 @@ impl StoredCell { ); let mut data = Vec::with_capacity(data_size); - data.extend_from_slice(cell.raw_data()?); + data.extend_from_slice(raw_data); if !store_hashes { if cell.cell_type() != CellType::PrunedBranch { @@ -258,7 +284,7 @@ impl StoredCell { pub fn with_cell_data( cell_data: CellData, refs: &[(UInt256, u16)], - boc_db: &Arc, + boc_db: &Arc, ) -> Result { if cell_data.references_count() != refs.len() { fail!("References count mismatch: {} != {}", cell_data.references_count(), refs.len()); @@ -298,7 +324,7 @@ impl PartialEq for StoredCell { pub struct StoringCell { cell_data: CellData, references: parking_lot::RwLock>, - boc_db: Weak, + boc_db: Weak, } impl PartialEq for StoringCell { @@ -308,7 +334,7 @@ impl PartialEq for StoringCell { } impl StoringCell { - pub fn with_cell(cell: &dyn CellImpl, boc_db: &Arc) -> Result { + pub fn with_cell(cell: &dyn CellImpl, boc_db: &Arc) -> Result { let references_count = cell.references_count(); let mut references = Vec::with_capacity(references_count); for i in 0..references_count { @@ -436,7 +462,7 @@ define_CellImpl!(StoringCell); fn reference( index: usize, references: &parking_lot::RwLock>, - boc_db: &Weak, + boc_db: &Weak, repr_hash: &dyn Fn() -> UInt256, ) -> Result> { let hash = { diff --git a/src/node/storage/src/types/tests/test_storage_cell.rs b/src/node/storage/src/types/tests/test_storage_cell.rs index 8767dfa..bb2c531 100644 --- a/src/node/storage/src/types/tests/test_storage_cell.rs +++ b/src/node/storage/src/types/tests/test_storage_cell.rs @@ -21,13 +21,12 @@ use ton_block::{create_cell, BuilderData, IBitstring}; const DB_PATH: &str = "../../target/test"; -async fn init_boc_db(db_name: &str) -> Result> { +async fn init_cell_db(db_name: &str) -> Result> { destroy_rocks_db(DB_PATH, db_name).await?; let db = RocksDb::new(DB_PATH, db_name, None, AccessType::ReadWrite)?; - Ok(Arc::new(DynamicBocDb::with_db( + Ok(Arc::new(CellDb::with_db( db.clone(), "cells", - "counters", DB_PATH, &CellsDbConfig::default(), #[cfg(feature = "telemetry")] @@ -38,7 +37,7 @@ async fn init_boc_db(db_name: &str) -> Result> { #[tokio::test] async fn test_storage_cell_serde() -> Result<()> { - let boc_db = init_boc_db("test_storage_cell_serde").await?; + let cell_db = init_cell_db("test_storage_cell_serde").await?; let c1 = create_cell(vec![], &[1, 2, 45, 76, 200])?; let c2 = create_cell(vec![], &[10, 200, 45, 7, 20])?; @@ -52,20 +51,20 @@ async fn test_storage_cell_serde() -> Result<()> { b.append_u16(47)?; let c4 = b.into_cell()?; - let s1 = StoringCell::with_cell(c1.cell_impl().deref(), &boc_db)?; - let s2 = StoringCell::with_cell(c2.cell_impl().deref(), &boc_db)?; - let s3 = StoringCell::with_cell(c3.cell_impl().deref(), &boc_db)?; - let s4 = StoringCell::with_cell(c4.cell_impl().deref(), &boc_db)?; + let s1 = StoringCell::with_cell(c1.cell_impl().deref(), &cell_db)?; + let s2 = StoringCell::with_cell(c2.cell_impl().deref(), &cell_db)?; + let s3 = StoringCell::with_cell(c3.cell_impl().deref(), &cell_db)?; + let s4 = StoringCell::with_cell(c4.cell_impl().deref(), &cell_db)?; let d1 = StoredCell::serialize(&s1)?; let d2 = StoredCell::serialize(&s2)?; let d3 = StoredCell::serialize(&s3)?; let d4 = StoredCell::serialize(&s4)?; - assert!(s1.cell_data == StoredCell::deserialize(&boc_db, &c1.repr_hash(), &d1)?.cell_data); - assert!(s2.cell_data == StoredCell::deserialize(&boc_db, &c2.repr_hash(), &d2)?.cell_data); - assert!(s3.cell_data == StoredCell::deserialize(&boc_db, &c3.repr_hash(), &d3)?.cell_data); - assert!(s4.cell_data == StoredCell::deserialize(&boc_db, &c4.repr_hash(), &d4)?.cell_data); + assert!(s1.cell_data == StoredCell::deserialize(&cell_db, &c1.repr_hash(), &d1)?.cell_data); + assert!(s2.cell_data == StoredCell::deserialize(&cell_db, &c2.repr_hash(), &d2)?.cell_data); + assert!(s3.cell_data == StoredCell::deserialize(&cell_db, &c3.repr_hash(), &d3)?.cell_data); + assert!(s4.cell_data == StoredCell::deserialize(&cell_db, &c4.repr_hash(), &d4)?.cell_data); Ok(()) }