Run a block sync in CI with 2 large checkpoints (#1193)
* Run large checkpoint sync tests in CI * Improve test child output match error context * Add a debug_stop_at_height config * Use stop at height in acceptance tests And add some restart acceptance tests, to make sure the stop at height feature works correctly.
This commit is contained in:
parent
83c844abb5
commit
ea510b7d41
|
|
@ -37,6 +37,14 @@ jobs:
|
|||
with:
|
||||
command: test
|
||||
args: --verbose --all
|
||||
# Explicitly run any tests that are usually #[ignored]
|
||||
- name: Run zebrad large sync tests
|
||||
env:
|
||||
RUST_BACKTRACE: full
|
||||
uses: actions-rs/cargo@v1
|
||||
with:
|
||||
command: test
|
||||
args: --verbose --manifest-path zebrad/Cargo.toml -- --ignored
|
||||
|
||||
build-chain-no-features:
|
||||
name: Build zebra-chain w/o features on ubuntu-latest
|
||||
|
|
|
|||
|
|
@ -38,6 +38,11 @@ pub struct Config {
|
|||
///
|
||||
/// [`cache_dir`]: struct.Config.html#structfield.cache_dir
|
||||
pub ephemeral: bool,
|
||||
|
||||
/// Commit blocks to the finalized state up to this height, then exit Zebra.
|
||||
///
|
||||
/// If `None`, continue syncing indefinitely.
|
||||
pub debug_stop_at_height: Option<u32>,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
|
|
@ -79,6 +84,7 @@ impl Default for Config {
|
|||
cache_dir,
|
||||
memory_cache_bytes: 512 * 1024 * 1024,
|
||||
ephemeral: false,
|
||||
debug_stop_at_height: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,6 +44,8 @@ pub struct FinalizedState {
|
|||
// sapling_nullifiers: sled::Tree,
|
||||
// sprout_anchors: sled::Tree,
|
||||
// sapling_anchors: sled::Tree,
|
||||
/// Commit blocks to the finalized state up to this height, then exit Zebra.
|
||||
debug_stop_at_height: Option<block::Height>,
|
||||
}
|
||||
|
||||
/// Helper trait for inserting (Key, Value) pairs into sled when both the key and
|
||||
|
|
@ -116,11 +118,20 @@ impl SledDeserialize for sled::Tree {
|
|||
}
|
||||
}
|
||||
|
||||
/// Where is the stop check being performed?
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
enum StopCheckContext {
|
||||
/// Checking when the state is loaded
|
||||
OnLoad,
|
||||
/// Checking when a block is committed
|
||||
OnCommit,
|
||||
}
|
||||
|
||||
impl FinalizedState {
|
||||
pub fn new(config: &Config, network: Network) -> Self {
|
||||
let db = config.sled_config(network).open().unwrap();
|
||||
|
||||
Self {
|
||||
let new_state = Self {
|
||||
queued_by_prev_hash: HashMap::new(),
|
||||
hash_by_height: db.open_tree(b"hash_by_height").unwrap(),
|
||||
height_by_hash: db.open_tree(b"height_by_hash").unwrap(),
|
||||
|
|
@ -129,7 +140,97 @@ impl FinalizedState {
|
|||
utxo_by_outpoint: db.open_tree(b"utxo_by_outpoint").unwrap(),
|
||||
// sprout_nullifiers: db.open_tree(b"sprout_nullifiers").unwrap(),
|
||||
// sapling_nullifiers: db.open_tree(b"sapling_nullifiers").unwrap(),
|
||||
debug_stop_at_height: config.debug_stop_at_height.map(block::Height),
|
||||
};
|
||||
|
||||
if let Some(tip_height) = new_state.finalized_tip_height() {
|
||||
new_state.stop_if_at_height_limit(
|
||||
StopCheckContext::OnLoad,
|
||||
tip_height,
|
||||
new_state.finalized_tip_hash(),
|
||||
);
|
||||
}
|
||||
|
||||
new_state
|
||||
}
|
||||
|
||||
/// Synchronously flushes all dirty IO buffers and calls fsync.
|
||||
///
|
||||
/// Returns the number of bytes flushed during this call.
|
||||
/// See sled's `Tree.flush` for more details.
|
||||
pub fn flush(&self) -> sled::Result<usize> {
|
||||
let mut total_flushed = 0;
|
||||
|
||||
total_flushed += self.hash_by_height.flush()?;
|
||||
total_flushed += self.height_by_hash.flush()?;
|
||||
total_flushed += self.block_by_height.flush()?;
|
||||
// total_flushed += self.tx_by_hash.flush()?;
|
||||
total_flushed += self.utxo_by_outpoint.flush()?;
|
||||
// total_flushed += self.sprout_nullifiers.flush()?;
|
||||
// total_flushed += self.sapling_nullifiers.flush()?;
|
||||
|
||||
Ok(total_flushed)
|
||||
}
|
||||
|
||||
/// If `block_height` is greater than or equal to the configured stop height,
|
||||
/// stop the process.
|
||||
///
|
||||
/// Flushes sled trees before exiting.
|
||||
///
|
||||
/// `called_from` and `block_hash` are used for assertions and logging.
|
||||
fn stop_if_at_height_limit(
|
||||
&self,
|
||||
called_from: StopCheckContext,
|
||||
block_height: block::Height,
|
||||
block_hash: block::Hash,
|
||||
) {
|
||||
let debug_stop_at_height = match self.debug_stop_at_height {
|
||||
Some(debug_stop_at_height) => debug_stop_at_height,
|
||||
None => return,
|
||||
};
|
||||
|
||||
if block_height < debug_stop_at_height {
|
||||
return;
|
||||
}
|
||||
|
||||
// this error is expected on load, but unexpected on commit
|
||||
if block_height > debug_stop_at_height {
|
||||
if called_from == StopCheckContext::OnLoad {
|
||||
tracing::error!(
|
||||
?debug_stop_at_height,
|
||||
?called_from,
|
||||
?block_height,
|
||||
?block_hash,
|
||||
"previous state height is greater than the stop height",
|
||||
);
|
||||
} else {
|
||||
unreachable!("committed blocks must be committed in order");
|
||||
}
|
||||
}
|
||||
|
||||
// Don't sync when the trees have just been opened
|
||||
if called_from == StopCheckContext::OnCommit {
|
||||
if let Err(e) = self.flush() {
|
||||
tracing::error!(
|
||||
?e,
|
||||
?debug_stop_at_height,
|
||||
?called_from,
|
||||
?block_height,
|
||||
?block_hash,
|
||||
"error flushing sled state before stopping"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
tracing::info!(
|
||||
?debug_stop_at_height,
|
||||
?called_from,
|
||||
?block_height,
|
||||
?block_hash,
|
||||
"stopping at configured height"
|
||||
);
|
||||
|
||||
std::process::exit(0);
|
||||
}
|
||||
|
||||
/// Queue a finalized block to be committed to the state.
|
||||
|
|
@ -184,7 +285,7 @@ impl FinalizedState {
|
|||
|
||||
trace!(?height, "Finalized block");
|
||||
|
||||
(
|
||||
let result = (
|
||||
&self.hash_by_height,
|
||||
&self.height_by_hash,
|
||||
&self.block_by_height,
|
||||
|
|
@ -222,8 +323,13 @@ impl FinalizedState {
|
|||
// for some reason type inference fails here
|
||||
Ok::<_, sled::transaction::ConflictableTransactionError>(hash)
|
||||
},
|
||||
)
|
||||
.map_err(Into::into)
|
||||
);
|
||||
|
||||
if result.is_ok() {
|
||||
self.stop_if_at_height_limit(StopCheckContext::OnCommit, height, hash);
|
||||
}
|
||||
|
||||
result.map_err(Into::into)
|
||||
}
|
||||
|
||||
/// Commit a finalized block to the state.
|
||||
|
|
|
|||
|
|
@ -149,7 +149,7 @@ impl TestStatus {
|
|||
|
||||
#[derive(Debug)]
|
||||
pub struct TestChild<T> {
|
||||
dir: T,
|
||||
pub dir: T,
|
||||
pub cmd: String,
|
||||
pub child: Child,
|
||||
pub stdout: Option<Lines<BufReader<ChildStdout>>>,
|
||||
|
|
@ -239,7 +239,8 @@ impl<T> TestChild<T> {
|
|||
}
|
||||
|
||||
let report = eyre!("stdout of command did not contain any matches for the given regex")
|
||||
.context_from(self);
|
||||
.context_from(self)
|
||||
.with_section(|| format!("{:?}", regex).header("Match Regex:"));
|
||||
|
||||
Err(report)
|
||||
}
|
||||
|
|
@ -296,6 +297,7 @@ impl<T> TestOutput<T> {
|
|||
"stdout of command did not contain any matches for the given regex"
|
||||
))
|
||||
.context_from(self)
|
||||
.with_section(|| format!("{:?}", regex).header("Match Regex:"))
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
|
|
@ -306,7 +308,9 @@ impl<T> TestOutput<T> {
|
|||
return Ok(self);
|
||||
}
|
||||
|
||||
Err(eyre!("stdout of command is not equal the given string")).context_from(self)
|
||||
Err(eyre!("stdout of command is not equal the given string"))
|
||||
.context_from(self)
|
||||
.with_section(|| format!("{:?}", s).header("Match String:"))
|
||||
}
|
||||
|
||||
#[instrument(skip(self))]
|
||||
|
|
@ -318,7 +322,9 @@ impl<T> TestOutput<T> {
|
|||
return Ok(self);
|
||||
}
|
||||
|
||||
Err(eyre!("stdout of command is not equal to the given regex")).context_from(self)
|
||||
Err(eyre!("stdout of command is not equal to the given regex"))
|
||||
.context_from(self)
|
||||
.with_section(|| format!("{:?}", regex).header("Match Regex:"))
|
||||
}
|
||||
|
||||
/// Returns Ok if the program was killed, Err(Report) if exit was by another
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@
|
|||
|
||||
#![warn(warnings, missing_docs, trivial_casts, unused_qualifications)]
|
||||
#![forbid(unsafe_code)]
|
||||
#![allow(clippy::try_err)]
|
||||
|
||||
use color_eyre::eyre::Result;
|
||||
use eyre::WrapErr;
|
||||
|
|
@ -20,7 +21,10 @@ use tempdir::TempDir;
|
|||
|
||||
use std::{borrow::Borrow, env, fs, io::Write, time::Duration};
|
||||
|
||||
use zebra_chain::parameters::Network::{self, *};
|
||||
use zebra_chain::{
|
||||
block::Height,
|
||||
parameters::Network::{self, *},
|
||||
};
|
||||
use zebra_test::{command::TestDirExt, prelude::*};
|
||||
use zebrad::config::ZebradConfig;
|
||||
|
||||
|
|
@ -57,6 +61,10 @@ where
|
|||
/// Add the given config to the test directory and use it for all
|
||||
/// subsequently spawned processes.
|
||||
fn with_config(self, config: ZebradConfig) -> Result<Self>;
|
||||
|
||||
/// Overwrite any existing config the test directory and use it for all
|
||||
/// subsequently spawned processes.
|
||||
fn replace_config(self, config: ZebradConfig) -> Result<Self>;
|
||||
}
|
||||
|
||||
impl<T> ZebradTestDirExt for T
|
||||
|
|
@ -97,6 +105,31 @@ where
|
|||
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
fn replace_config(self, mut config: ZebradConfig) -> Result<Self> {
|
||||
let dir = self.borrow().path();
|
||||
|
||||
if !config.state.ephemeral {
|
||||
let cache_dir = dir.join("state");
|
||||
|
||||
// Create dir, ignoring existing directories
|
||||
match fs::create_dir(&cache_dir) {
|
||||
Ok(_) => {}
|
||||
Err(e) if (e.kind() == std::io::ErrorKind::AlreadyExists) => {}
|
||||
Err(e) => Err(e)?,
|
||||
};
|
||||
|
||||
config.state.cache_dir = cache_dir;
|
||||
}
|
||||
|
||||
let config_file = dir.join("zebrad.toml");
|
||||
|
||||
// Remove any existing config before writing a new one
|
||||
let _ = fs::remove_file(config_file.clone());
|
||||
fs::File::create(config_file)?.write_all(toml::to_string(&config)?.as_bytes())?;
|
||||
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -440,16 +473,29 @@ fn valid_generated_config(command: &str, expected_output: &str) -> Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
const LARGE_CHECKPOINT_TEST_HEIGHT: Height =
|
||||
Height((zebra_consensus::MAX_CHECKPOINT_HEIGHT_GAP * 2) as u32);
|
||||
|
||||
const STOP_AT_HEIGHT_REGEX: &str = "stopping at configured height";
|
||||
|
||||
const STOP_ON_LOAD_TIMEOUT: Duration = Duration::from_secs(5);
|
||||
// usually it's much shorter than this
|
||||
const SMALL_CHECKPOINT_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
const LARGE_CHECKPOINT_TIMEOUT: Duration = Duration::from_secs(180);
|
||||
|
||||
/// Test if `zebrad` can sync the first checkpoint on mainnet.
|
||||
///
|
||||
/// The first checkpoint contains a single genesis block.
|
||||
#[test]
|
||||
fn sync_one_checkpoint_mainnet() -> Result<()> {
|
||||
sync_until(
|
||||
"verified checkpoint range",
|
||||
Height(0),
|
||||
Mainnet,
|
||||
Duration::from_secs(20),
|
||||
STOP_AT_HEIGHT_REGEX,
|
||||
SMALL_CHECKPOINT_TIMEOUT,
|
||||
None,
|
||||
)
|
||||
.map(|_tempdir| ())
|
||||
}
|
||||
|
||||
/// Test if `zebrad` can sync the first checkpoint on testnet.
|
||||
|
|
@ -458,73 +504,131 @@ fn sync_one_checkpoint_mainnet() -> Result<()> {
|
|||
#[test]
|
||||
fn sync_one_checkpoint_testnet() -> Result<()> {
|
||||
sync_until(
|
||||
"verified checkpoint range",
|
||||
Height(0),
|
||||
Testnet,
|
||||
Duration::from_secs(20),
|
||||
STOP_AT_HEIGHT_REGEX,
|
||||
SMALL_CHECKPOINT_TIMEOUT,
|
||||
None,
|
||||
)
|
||||
.map(|_tempdir| ())
|
||||
}
|
||||
|
||||
/// Test if `zebrad` can sync the second checkpoint on mainnet.
|
||||
/// Test if `zebrad` can sync the first checkpoint, restart, and stop on load.
|
||||
#[test]
|
||||
fn restart_stop_at_height() -> Result<()> {
|
||||
let reuse_tempdir = sync_until(
|
||||
Height(0),
|
||||
Mainnet,
|
||||
STOP_AT_HEIGHT_REGEX,
|
||||
SMALL_CHECKPOINT_TIMEOUT,
|
||||
None,
|
||||
)?;
|
||||
// if stopping corrupts the sled database, zebrad might hang here
|
||||
// if stopping does not sync the sled database, the logs will contain OnCommit
|
||||
sync_until(
|
||||
Height(0),
|
||||
Mainnet,
|
||||
"called_from=OnLoad",
|
||||
STOP_ON_LOAD_TIMEOUT,
|
||||
Some(reuse_tempdir),
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Test if `zebrad` can sync some larger checkpoints on mainnet.
|
||||
///
|
||||
/// The second checkpoint contains a large number of blocks.
|
||||
/// This test might fail or timeout on slow or unreliable networks,
|
||||
/// so we don't run it by default. It also takes a lot longer than
|
||||
/// our 10 second target time for default tests.
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn sync_two_checkpoints_mainnet() -> Result<()> {
|
||||
sync_until(
|
||||
"verified checkpoint range block_count=2000",
|
||||
fn sync_large_checkpoints_mainnet() -> Result<()> {
|
||||
let reuse_tempdir = sync_until(
|
||||
LARGE_CHECKPOINT_TEST_HEIGHT,
|
||||
Mainnet,
|
||||
Duration::from_secs(120),
|
||||
)
|
||||
STOP_AT_HEIGHT_REGEX,
|
||||
LARGE_CHECKPOINT_TIMEOUT,
|
||||
None,
|
||||
)?;
|
||||
// if this sync fails, see the failure notes in `restart_stop_at_height`
|
||||
sync_until(
|
||||
(LARGE_CHECKPOINT_TEST_HEIGHT - 1).unwrap(),
|
||||
Mainnet,
|
||||
"previous state height is greater than the stop height",
|
||||
STOP_ON_LOAD_TIMEOUT,
|
||||
Some(reuse_tempdir),
|
||||
)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Test if `zebrad` can sync the second checkpoint on testnet.
|
||||
/// Test if `zebrad` can sync some larger checkpoints on testnet.
|
||||
///
|
||||
/// This test does not run by default, see `sync_two_checkpoints_mainnet`
|
||||
/// This test does not run by default, see `sync_large_checkpoints_mainnet`
|
||||
/// for details.
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn sync_two_checkpoints_testnet() -> Result<()> {
|
||||
fn sync_large_checkpoints_testnet() -> Result<()> {
|
||||
sync_until(
|
||||
"verified checkpoint range block_count=2000",
|
||||
LARGE_CHECKPOINT_TEST_HEIGHT,
|
||||
Testnet,
|
||||
Duration::from_secs(120),
|
||||
STOP_AT_HEIGHT_REGEX,
|
||||
LARGE_CHECKPOINT_TIMEOUT,
|
||||
None,
|
||||
)
|
||||
.map(|_tempdir| ())
|
||||
}
|
||||
|
||||
/// Sync `network` until `zebrad` outputs `regex`.
|
||||
/// Returns an error if `timeout` elapses before `regex` is output.
|
||||
/// Sync `network` until `zebrad` reaches `height`, and ensure that
|
||||
/// the output contains `stop_regex`. If `reuse_tempdir` is supplied,
|
||||
/// use it as the test's temporary directory.
|
||||
///
|
||||
/// If `stop_regex` is encountered before the process exits, kills the
|
||||
/// process, and mark the test as successful, even if `height` has not
|
||||
/// been reached.
|
||||
///
|
||||
/// On success, returns the associated `TempDir`. Returns an error if
|
||||
/// the child exits or `timeout` elapses before `regex` is found.
|
||||
///
|
||||
/// If your test environment does not have network access, skip
|
||||
/// this test by setting the `ZEBRA_SKIP_NETWORK_TESTS` env var.
|
||||
fn sync_until(regex: &str, network: Network, timeout: Duration) -> Result<()> {
|
||||
fn sync_until(
|
||||
height: Height,
|
||||
network: Network,
|
||||
stop_regex: &str,
|
||||
timeout: Duration,
|
||||
reuse_tempdir: Option<TempDir>,
|
||||
) -> Result<TempDir> {
|
||||
zebra_test::init();
|
||||
|
||||
if env::var_os("ZEBRA_SKIP_NETWORK_TESTS").is_some() {
|
||||
// This message is captured by the test runner, use
|
||||
// `cargo test -- --nocapture` to see it.
|
||||
eprintln!("Skipping network test because '$ZEBRA_SKIP_NETWORK_TESTS' is set.");
|
||||
return Ok(());
|
||||
return Ok(testdir()?);
|
||||
}
|
||||
|
||||
// Use a persistent state, so we can handle large syncs
|
||||
let mut config = persistent_test_config()?;
|
||||
// TODO: add a convenience method?
|
||||
// TODO: add convenience methods?
|
||||
config.network.network = network;
|
||||
config.state.debug_stop_at_height = Some(height.0);
|
||||
|
||||
let mut child = testdir()?
|
||||
.with_config(config)?
|
||||
.spawn_child(&["start"])?
|
||||
.with_timeout(timeout);
|
||||
let tempdir = if let Some(reuse_tempdir) = reuse_tempdir {
|
||||
reuse_tempdir.replace_config(config)?
|
||||
} else {
|
||||
testdir()?.with_config(config)?
|
||||
};
|
||||
|
||||
let mut child = tempdir.spawn_child(&["start"])?.with_timeout(timeout);
|
||||
|
||||
// TODO: is there a way to check for testnet or mainnet here?
|
||||
// For example: "network=Mainnet" or "network=Testnet"
|
||||
child.expect_stdout(regex)?;
|
||||
child.expect_stdout(stop_regex)?;
|
||||
child.kill()?;
|
||||
|
||||
Ok(())
|
||||
Ok(child.dir)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
|
|||
Loading…
Reference in New Issue