diff --git a/zebra-state/src/constants.rs b/zebra-state/src/constants.rs index 166d93dd..85ae1e77 100644 --- a/zebra-state/src/constants.rs +++ b/zebra-state/src/constants.rs @@ -28,6 +28,17 @@ pub const DATABASE_FORMAT_VERSION: u32 = 25; /// Zebra usually only has to check back a few blocks, but on testnet it can be a long time between v5 transactions. pub const MAX_LEGACY_CHAIN_BLOCKS: usize = 100_000; +/// The maximum number of non-finalized chain forks Zebra will track. +/// When this limit is reached, we drop the chain with the lowest work. +/// +/// When the network is under heavy transaction load, there are around 5 active forks in the last +/// 100 blocks. (1 fork per 20 blocks.) When block propagation is efficient, there is around +/// 1 fork per 300 blocks. +/// +/// This limits non-finalized chain memory to around: +/// `10 forks * 100 blocks * 2 MB per block = 2 GB` +pub const MAX_NON_FINALIZED_CHAIN_FORKS: usize = 10; + /// The maximum number of block hashes allowed in `getblocks` responses in the Zcash network protocol. pub const MAX_FIND_BLOCK_HASHES_RESULTS: u32 = 500; diff --git a/zebra-state/src/service/check/tests/nullifier.rs b/zebra-state/src/service/check/tests/nullifier.rs index 904426d4..7f45e1d7 100644 --- a/zebra-state/src/service/check/tests/nullifier.rs +++ b/zebra-state/src/service/check/tests/nullifier.rs @@ -115,7 +115,7 @@ proptest! { prop_assert!(!non_finalized_state.eq_internal_state(&previous_mem)); // the non-finalized state has the nullifiers - prop_assert_eq!(non_finalized_state.chain_set.len(), 1); + prop_assert_eq!(non_finalized_state.chain_count(), 1); prop_assert!(non_finalized_state .best_contains_sprout_nullifier(&expected_nullifiers[0])); prop_assert!(non_finalized_state diff --git a/zebra-state/src/service/check/tests/utxo.rs b/zebra-state/src/service/check/tests/utxo.rs index 6d14b8a1..364bb0dc 100644 --- a/zebra-state/src/service/check/tests/utxo.rs +++ b/zebra-state/src/service/check/tests/utxo.rs @@ -207,10 +207,9 @@ proptest! { prop_assert!(!non_finalized_state.eq_internal_state(&previous_non_finalized_state)); // the non-finalized state has created and spent the UTXO - prop_assert_eq!(non_finalized_state.chain_set.len(), 1); + prop_assert_eq!(non_finalized_state.chain_count(), 1); let chain = non_finalized_state - .chain_set - .iter() + .chain_iter() .next() .unwrap(); prop_assert!(!chain.unspent_utxos().contains_key(&expected_outpoint)); @@ -294,10 +293,9 @@ proptest! { prop_assert!(!non_finalized_state.eq_internal_state(&previous_non_finalized_state)); // the UTXO is spent - prop_assert_eq!(non_finalized_state.chain_set.len(), 1); + prop_assert_eq!(non_finalized_state.chain_count(), 1); let chain = non_finalized_state - .chain_set - .iter() + .chain_iter() .next() .unwrap(); prop_assert!(!chain.unspent_utxos().contains_key(&expected_outpoint)); @@ -448,11 +446,10 @@ proptest! { // the finalized state has the UTXO prop_assert!(finalized_state.utxo(&expected_outpoint).is_some()); // the non-finalized state has no chains (so it can't have the UTXO) - prop_assert!(non_finalized_state.chain_set.iter().next().is_none()); + prop_assert!(non_finalized_state.chain_iter().next().is_none()); } else { let chain = non_finalized_state - .chain_set - .iter() + .chain_iter() .next() .unwrap(); // the non-finalized state has the UTXO @@ -534,11 +531,10 @@ proptest! { // the finalized state has the UTXO prop_assert!(finalized_state.utxo(&expected_outpoint).is_some()); // the non-finalized state has no chains (so it can't have the UTXO) - prop_assert!(non_finalized_state.chain_set.iter().next().is_none()); + prop_assert!(non_finalized_state.chain_iter().next().is_none()); } else { let chain = non_finalized_state - .chain_set - .iter() + .chain_iter() .next() .unwrap(); // the non-finalized state has the UTXO @@ -637,10 +633,9 @@ proptest! { // the block data is in the non-finalized state prop_assert!(!non_finalized_state.eq_internal_state(&previous_non_finalized_state)); - prop_assert_eq!(non_finalized_state.chain_set.len(), 1); + prop_assert_eq!(non_finalized_state.chain_count(), 1); let chain = non_finalized_state - .chain_set - .iter() + .chain_iter() .next() .unwrap(); @@ -926,13 +921,12 @@ fn new_state_with_mainnet_transparent_data( // the block data is in the non-finalized state assert!(!non_finalized_state.eq_internal_state(&previous_non_finalized_state)); - assert_eq!(non_finalized_state.chain_set.len(), 1); + assert_eq!(non_finalized_state.chain_count(), 1); for expected_outpoint in expected_outpoints { // the non-finalized state has the unspent UTXOs assert!(non_finalized_state - .chain_set - .iter() + .chain_iter() .next() .unwrap() .unspent_utxos() diff --git a/zebra-state/src/service/non_finalized_state.rs b/zebra-state/src/service/non_finalized_state.rs index 27bc7a15..ad775c16 100644 --- a/zebra-state/src/service/non_finalized_state.rs +++ b/zebra-state/src/service/non_finalized_state.rs @@ -15,6 +15,7 @@ use zebra_chain::{ }; use crate::{ + constants::MAX_NON_FINALIZED_CHAIN_FORKS, request::{ContextuallyValidBlock, FinalizedWithTrees}, service::{check, finalized_state::ZebraDb}, PreparedBlock, ValidateContextError, @@ -38,8 +39,9 @@ pub(crate) use chain::Chain; pub struct NonFinalizedState { /// Verified, non-finalized chains, in ascending order. /// - /// The best chain is `chain_set.last()` or `chain_set.iter().next_back()`. - pub chain_set: BTreeSet>, + /// The best chain is `chain_iter().next()`. + /// Using `chain_set.last()` or `chain_set.iter().next_back()` is deprecated, and should migrate to `chain_iter().next()`. + chain_set: BTreeSet>, /// The configured Zcash network. pub network: Network, @@ -86,6 +88,34 @@ impl NonFinalizedState { && self.network == other.network } + /// Returns an iterator over the non-finalized chains, with the best chain first. + // + // TODO: replace chain_set.iter().rev() with this method + pub fn chain_iter(&self) -> impl Iterator> { + self.chain_set.iter().rev() + } + + /// Insert `chain` into `self.chain_set`, apply `chain_filter` to the chains, + /// then limit the number of tracked chains. + fn insert_with(&mut self, chain: Arc, chain_filter: F) + where + F: FnOnce(&mut BTreeSet>), + { + self.chain_set.insert(chain); + + chain_filter(&mut self.chain_set); + + while self.chain_set.len() > MAX_NON_FINALIZED_CHAIN_FORKS { + // The first chain is the chain with the lowest work. + self.chain_set.pop_first(); + } + } + + /// Insert `chain` into `self.chain_set`, then limit the number of tracked chains. + fn insert(&mut self, chain: Arc) { + self.insert_with(chain, |_ignored_chain| { /* no filter */ }) + } + /// Finalize the lowest height block in the non-finalized portion of the best /// chain and update all side-chains to match. pub fn finalize(&mut self) -> FinalizedWithTrees { @@ -111,7 +141,7 @@ impl NonFinalizedState { // add best_chain back to `self.chain_set` if !best_chain.is_empty() { - self.chain_set.insert(best_chain); + self.insert(best_chain); } // for each remaining chain in side_chains @@ -134,7 +164,7 @@ impl NonFinalizedState { assert_eq!(side_chain_root.hash, best_chain_root.hash); // add the chain back to `self.chain_set` - self.chain_set.insert(side_chain); + self.insert(side_chain); } self.update_metrics_for_chains(); @@ -165,9 +195,9 @@ impl NonFinalizedState { // - add the new chain fork or updated chain to the set of recent chains // - remove the parent chain, if it was in the chain set // (if it was a newly created fork, it won't be in the chain set) - self.chain_set.insert(modified_chain); - self.chain_set - .retain(|chain| chain.non_finalized_tip_hash() != parent_hash); + self.insert_with(modified_chain, |chain_set| { + chain_set.retain(|chain| chain.non_finalized_tip_hash() != parent_hash) + }); self.update_metrics_for_committed_block(height, hash); @@ -206,7 +236,7 @@ impl NonFinalizedState { let chain = self.validate_and_commit(Arc::new(chain), prepared, finalized_state)?; // If the block is valid, add the new chain fork to the set of recent chains. - self.chain_set.insert(chain); + self.insert(chain); self.update_metrics_for_committed_block(height, hash); Ok(()) @@ -458,10 +488,15 @@ impl NonFinalizedState { } /// Return the non-finalized portion of the current best chain. - pub(crate) fn best_chain(&self) -> Option<&Arc> { + pub fn best_chain(&self) -> Option<&Arc> { self.chain_set.iter().next_back() } + /// Return the number of chains. + pub fn chain_count(&self) -> usize { + self.chain_set.len() + } + /// Return the chain whose tip block hash is `parent_hash`. /// /// The chain can be an existing chain in the non-finalized state, or a freshly diff --git a/zebra-state/src/service/read/find.rs b/zebra-state/src/service/read/find.rs index 78f9121d..3e1c4996 100644 --- a/zebra-state/src/service/read/find.rs +++ b/zebra-state/src/service/read/find.rs @@ -107,7 +107,7 @@ pub fn non_finalized_state_contains_block_hash( non_finalized_state: &NonFinalizedState, hash: block::Hash, ) -> Option { - let mut chains_iter = non_finalized_state.chain_set.iter().rev(); + let mut chains_iter = non_finalized_state.chain_iter(); let is_hash_in_chain = |chain: &Arc| chain.contains_block_hash(&hash); // Equivalent to `chain_set.iter().next_back()` in `NonFinalizedState.best_chain()` method. diff --git a/zebra-state/src/service/write.rs b/zebra-state/src/service/write.rs index ad04dd07..ab7b466c 100644 --- a/zebra-state/src/service/write.rs +++ b/zebra-state/src/service/write.rs @@ -43,7 +43,7 @@ const PARENT_ERROR_MAP_LIMIT: usize = MAX_BLOCK_REORG_HEIGHT as usize * 2; fields( height = ?prepared.height, hash = %prepared.hash, - chains = non_finalized_state.chain_set.len() + chains = non_finalized_state.chain_count() ) )] pub(crate) fn validate_and_commit_non_finalized( @@ -82,7 +82,7 @@ pub(crate) fn validate_and_commit_non_finalized( non_finalized_state_sender, last_zebra_mined_log_height ), - fields(chains = non_finalized_state.chain_set.len()) + fields(chains = non_finalized_state.chain_count()) )] fn update_latest_chain_channels( non_finalized_state: &NonFinalizedState,