diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dad99b43..80af5e0c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -37,6 +37,14 @@ jobs: with: command: test args: --verbose --all + # Explicitly run any tests that are usually #[ignored] + - name: Run zebrad large sync tests + env: + RUST_BACKTRACE: full + uses: actions-rs/cargo@v1 + with: + command: test + args: --verbose --manifest-path zebrad/Cargo.toml -- --ignored build-chain-no-features: name: Build zebra-chain w/o features on ubuntu-latest diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index 584063f7..a0c67a35 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -38,6 +38,11 @@ pub struct Config { /// /// [`cache_dir`]: struct.Config.html#structfield.cache_dir pub ephemeral: bool, + + /// Commit blocks to the finalized state up to this height, then exit Zebra. + /// + /// If `None`, continue syncing indefinitely. + pub debug_stop_at_height: Option, } impl Config { @@ -79,6 +84,7 @@ impl Default for Config { cache_dir, memory_cache_bytes: 512 * 1024 * 1024, ephemeral: false, + debug_stop_at_height: None, } } } diff --git a/zebra-state/src/sled_state.rs b/zebra-state/src/sled_state.rs index f5baa9d4..dd2a4207 100644 --- a/zebra-state/src/sled_state.rs +++ b/zebra-state/src/sled_state.rs @@ -44,6 +44,8 @@ pub struct FinalizedState { // sapling_nullifiers: sled::Tree, // sprout_anchors: sled::Tree, // sapling_anchors: sled::Tree, + /// Commit blocks to the finalized state up to this height, then exit Zebra. + debug_stop_at_height: Option, } /// Helper trait for inserting (Key, Value) pairs into sled when both the key and @@ -116,11 +118,20 @@ impl SledDeserialize for sled::Tree { } } +/// Where is the stop check being performed? +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +enum StopCheckContext { + /// Checking when the state is loaded + OnLoad, + /// Checking when a block is committed + OnCommit, +} + impl FinalizedState { pub fn new(config: &Config, network: Network) -> Self { let db = config.sled_config(network).open().unwrap(); - Self { + let new_state = Self { queued_by_prev_hash: HashMap::new(), hash_by_height: db.open_tree(b"hash_by_height").unwrap(), height_by_hash: db.open_tree(b"height_by_hash").unwrap(), @@ -129,7 +140,97 @@ impl FinalizedState { utxo_by_outpoint: db.open_tree(b"utxo_by_outpoint").unwrap(), // sprout_nullifiers: db.open_tree(b"sprout_nullifiers").unwrap(), // sapling_nullifiers: db.open_tree(b"sapling_nullifiers").unwrap(), + debug_stop_at_height: config.debug_stop_at_height.map(block::Height), + }; + + if let Some(tip_height) = new_state.finalized_tip_height() { + new_state.stop_if_at_height_limit( + StopCheckContext::OnLoad, + tip_height, + new_state.finalized_tip_hash(), + ); } + + new_state + } + + /// Synchronously flushes all dirty IO buffers and calls fsync. + /// + /// Returns the number of bytes flushed during this call. + /// See sled's `Tree.flush` for more details. + pub fn flush(&self) -> sled::Result { + let mut total_flushed = 0; + + total_flushed += self.hash_by_height.flush()?; + total_flushed += self.height_by_hash.flush()?; + total_flushed += self.block_by_height.flush()?; + // total_flushed += self.tx_by_hash.flush()?; + total_flushed += self.utxo_by_outpoint.flush()?; + // total_flushed += self.sprout_nullifiers.flush()?; + // total_flushed += self.sapling_nullifiers.flush()?; + + Ok(total_flushed) + } + + /// If `block_height` is greater than or equal to the configured stop height, + /// stop the process. + /// + /// Flushes sled trees before exiting. + /// + /// `called_from` and `block_hash` are used for assertions and logging. + fn stop_if_at_height_limit( + &self, + called_from: StopCheckContext, + block_height: block::Height, + block_hash: block::Hash, + ) { + let debug_stop_at_height = match self.debug_stop_at_height { + Some(debug_stop_at_height) => debug_stop_at_height, + None => return, + }; + + if block_height < debug_stop_at_height { + return; + } + + // this error is expected on load, but unexpected on commit + if block_height > debug_stop_at_height { + if called_from == StopCheckContext::OnLoad { + tracing::error!( + ?debug_stop_at_height, + ?called_from, + ?block_height, + ?block_hash, + "previous state height is greater than the stop height", + ); + } else { + unreachable!("committed blocks must be committed in order"); + } + } + + // Don't sync when the trees have just been opened + if called_from == StopCheckContext::OnCommit { + if let Err(e) = self.flush() { + tracing::error!( + ?e, + ?debug_stop_at_height, + ?called_from, + ?block_height, + ?block_hash, + "error flushing sled state before stopping" + ); + } + } + + tracing::info!( + ?debug_stop_at_height, + ?called_from, + ?block_height, + ?block_hash, + "stopping at configured height" + ); + + std::process::exit(0); } /// Queue a finalized block to be committed to the state. @@ -184,7 +285,7 @@ impl FinalizedState { trace!(?height, "Finalized block"); - ( + let result = ( &self.hash_by_height, &self.height_by_hash, &self.block_by_height, @@ -222,8 +323,13 @@ impl FinalizedState { // for some reason type inference fails here Ok::<_, sled::transaction::ConflictableTransactionError>(hash) }, - ) - .map_err(Into::into) + ); + + if result.is_ok() { + self.stop_if_at_height_limit(StopCheckContext::OnCommit, height, hash); + } + + result.map_err(Into::into) } /// Commit a finalized block to the state. diff --git a/zebra-test/src/command.rs b/zebra-test/src/command.rs index 0323e454..6b613f7e 100644 --- a/zebra-test/src/command.rs +++ b/zebra-test/src/command.rs @@ -149,7 +149,7 @@ impl TestStatus { #[derive(Debug)] pub struct TestChild { - dir: T, + pub dir: T, pub cmd: String, pub child: Child, pub stdout: Option>>, @@ -239,7 +239,8 @@ impl TestChild { } let report = eyre!("stdout of command did not contain any matches for the given regex") - .context_from(self); + .context_from(self) + .with_section(|| format!("{:?}", regex).header("Match Regex:")); Err(report) } @@ -296,6 +297,7 @@ impl TestOutput { "stdout of command did not contain any matches for the given regex" )) .context_from(self) + .with_section(|| format!("{:?}", regex).header("Match Regex:")) } #[instrument(skip(self))] @@ -306,7 +308,9 @@ impl TestOutput { return Ok(self); } - Err(eyre!("stdout of command is not equal the given string")).context_from(self) + Err(eyre!("stdout of command is not equal the given string")) + .context_from(self) + .with_section(|| format!("{:?}", s).header("Match String:")) } #[instrument(skip(self))] @@ -318,7 +322,9 @@ impl TestOutput { return Ok(self); } - Err(eyre!("stdout of command is not equal to the given regex")).context_from(self) + Err(eyre!("stdout of command is not equal to the given regex")) + .context_from(self) + .with_section(|| format!("{:?}", regex).header("Match Regex:")) } /// Returns Ok if the program was killed, Err(Report) if exit was by another diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index 36897e0f..5d4d051f 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -13,6 +13,7 @@ #![warn(warnings, missing_docs, trivial_casts, unused_qualifications)] #![forbid(unsafe_code)] +#![allow(clippy::try_err)] use color_eyre::eyre::Result; use eyre::WrapErr; @@ -20,7 +21,10 @@ use tempdir::TempDir; use std::{borrow::Borrow, env, fs, io::Write, time::Duration}; -use zebra_chain::parameters::Network::{self, *}; +use zebra_chain::{ + block::Height, + parameters::Network::{self, *}, +}; use zebra_test::{command::TestDirExt, prelude::*}; use zebrad::config::ZebradConfig; @@ -57,6 +61,10 @@ where /// Add the given config to the test directory and use it for all /// subsequently spawned processes. fn with_config(self, config: ZebradConfig) -> Result; + + /// Overwrite any existing config the test directory and use it for all + /// subsequently spawned processes. + fn replace_config(self, config: ZebradConfig) -> Result; } impl ZebradTestDirExt for T @@ -97,6 +105,31 @@ where Ok(self) } + + fn replace_config(self, mut config: ZebradConfig) -> Result { + let dir = self.borrow().path(); + + if !config.state.ephemeral { + let cache_dir = dir.join("state"); + + // Create dir, ignoring existing directories + match fs::create_dir(&cache_dir) { + Ok(_) => {} + Err(e) if (e.kind() == std::io::ErrorKind::AlreadyExists) => {} + Err(e) => Err(e)?, + }; + + config.state.cache_dir = cache_dir; + } + + let config_file = dir.join("zebrad.toml"); + + // Remove any existing config before writing a new one + let _ = fs::remove_file(config_file.clone()); + fs::File::create(config_file)?.write_all(toml::to_string(&config)?.as_bytes())?; + + Ok(self) + } } #[test] @@ -440,16 +473,29 @@ fn valid_generated_config(command: &str, expected_output: &str) -> Result<()> { Ok(()) } +const LARGE_CHECKPOINT_TEST_HEIGHT: Height = + Height((zebra_consensus::MAX_CHECKPOINT_HEIGHT_GAP * 2) as u32); + +const STOP_AT_HEIGHT_REGEX: &str = "stopping at configured height"; + +const STOP_ON_LOAD_TIMEOUT: Duration = Duration::from_secs(5); +// usually it's much shorter than this +const SMALL_CHECKPOINT_TIMEOUT: Duration = Duration::from_secs(30); +const LARGE_CHECKPOINT_TIMEOUT: Duration = Duration::from_secs(180); + /// Test if `zebrad` can sync the first checkpoint on mainnet. /// /// The first checkpoint contains a single genesis block. #[test] fn sync_one_checkpoint_mainnet() -> Result<()> { sync_until( - "verified checkpoint range", + Height(0), Mainnet, - Duration::from_secs(20), + STOP_AT_HEIGHT_REGEX, + SMALL_CHECKPOINT_TIMEOUT, + None, ) + .map(|_tempdir| ()) } /// Test if `zebrad` can sync the first checkpoint on testnet. @@ -458,73 +504,131 @@ fn sync_one_checkpoint_mainnet() -> Result<()> { #[test] fn sync_one_checkpoint_testnet() -> Result<()> { sync_until( - "verified checkpoint range", + Height(0), Testnet, - Duration::from_secs(20), + STOP_AT_HEIGHT_REGEX, + SMALL_CHECKPOINT_TIMEOUT, + None, ) + .map(|_tempdir| ()) } -/// Test if `zebrad` can sync the second checkpoint on mainnet. +/// Test if `zebrad` can sync the first checkpoint, restart, and stop on load. +#[test] +fn restart_stop_at_height() -> Result<()> { + let reuse_tempdir = sync_until( + Height(0), + Mainnet, + STOP_AT_HEIGHT_REGEX, + SMALL_CHECKPOINT_TIMEOUT, + None, + )?; + // if stopping corrupts the sled database, zebrad might hang here + // if stopping does not sync the sled database, the logs will contain OnCommit + sync_until( + Height(0), + Mainnet, + "called_from=OnLoad", + STOP_ON_LOAD_TIMEOUT, + Some(reuse_tempdir), + )?; + + Ok(()) +} + +/// Test if `zebrad` can sync some larger checkpoints on mainnet. /// -/// The second checkpoint contains a large number of blocks. /// This test might fail or timeout on slow or unreliable networks, /// so we don't run it by default. It also takes a lot longer than /// our 10 second target time for default tests. #[test] #[ignore] -fn sync_two_checkpoints_mainnet() -> Result<()> { - sync_until( - "verified checkpoint range block_count=2000", +fn sync_large_checkpoints_mainnet() -> Result<()> { + let reuse_tempdir = sync_until( + LARGE_CHECKPOINT_TEST_HEIGHT, Mainnet, - Duration::from_secs(120), - ) + STOP_AT_HEIGHT_REGEX, + LARGE_CHECKPOINT_TIMEOUT, + None, + )?; + // if this sync fails, see the failure notes in `restart_stop_at_height` + sync_until( + (LARGE_CHECKPOINT_TEST_HEIGHT - 1).unwrap(), + Mainnet, + "previous state height is greater than the stop height", + STOP_ON_LOAD_TIMEOUT, + Some(reuse_tempdir), + )?; + + Ok(()) } -/// Test if `zebrad` can sync the second checkpoint on testnet. +/// Test if `zebrad` can sync some larger checkpoints on testnet. /// -/// This test does not run by default, see `sync_two_checkpoints_mainnet` +/// This test does not run by default, see `sync_large_checkpoints_mainnet` /// for details. #[test] #[ignore] -fn sync_two_checkpoints_testnet() -> Result<()> { +fn sync_large_checkpoints_testnet() -> Result<()> { sync_until( - "verified checkpoint range block_count=2000", + LARGE_CHECKPOINT_TEST_HEIGHT, Testnet, - Duration::from_secs(120), + STOP_AT_HEIGHT_REGEX, + LARGE_CHECKPOINT_TIMEOUT, + None, ) + .map(|_tempdir| ()) } -/// Sync `network` until `zebrad` outputs `regex`. -/// Returns an error if `timeout` elapses before `regex` is output. +/// Sync `network` until `zebrad` reaches `height`, and ensure that +/// the output contains `stop_regex`. If `reuse_tempdir` is supplied, +/// use it as the test's temporary directory. +/// +/// If `stop_regex` is encountered before the process exits, kills the +/// process, and mark the test as successful, even if `height` has not +/// been reached. +/// +/// On success, returns the associated `TempDir`. Returns an error if +/// the child exits or `timeout` elapses before `regex` is found. /// /// If your test environment does not have network access, skip /// this test by setting the `ZEBRA_SKIP_NETWORK_TESTS` env var. -fn sync_until(regex: &str, network: Network, timeout: Duration) -> Result<()> { +fn sync_until( + height: Height, + network: Network, + stop_regex: &str, + timeout: Duration, + reuse_tempdir: Option, +) -> Result { zebra_test::init(); if env::var_os("ZEBRA_SKIP_NETWORK_TESTS").is_some() { // This message is captured by the test runner, use // `cargo test -- --nocapture` to see it. eprintln!("Skipping network test because '$ZEBRA_SKIP_NETWORK_TESTS' is set."); - return Ok(()); + return Ok(testdir()?); } // Use a persistent state, so we can handle large syncs let mut config = persistent_test_config()?; - // TODO: add a convenience method? + // TODO: add convenience methods? config.network.network = network; + config.state.debug_stop_at_height = Some(height.0); - let mut child = testdir()? - .with_config(config)? - .spawn_child(&["start"])? - .with_timeout(timeout); + let tempdir = if let Some(reuse_tempdir) = reuse_tempdir { + reuse_tempdir.replace_config(config)? + } else { + testdir()?.with_config(config)? + }; + + let mut child = tempdir.spawn_child(&["start"])?.with_timeout(timeout); // TODO: is there a way to check for testnet or mainnet here? // For example: "network=Mainnet" or "network=Testnet" - child.expect_stdout(regex)?; + child.expect_stdout(stop_regex)?; child.kill()?; - Ok(()) + Ok(child.dir) } #[tokio::test]