From 7348d080d6dd0bb2e7153569ce4158d86861524f Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 22 Sep 2023 23:58:41 +1000 Subject: [PATCH] change(state): Check database format is valid on shutdown, to catch format errors in new block code (#7606) * Provide a user hint when the database format is invalid * Split a path-based database version method * Check the database format before Zebra shuts down * Fix a typo in zebra-state/src/service/finalized_state/zebra_db.rs Co-authored-by: Marek * Fix a typo in upgrade.rs --------- Co-authored-by: Marek --- zebra-state/src/config.rs | 13 ++++- .../finalized_state/disk_format/upgrade.rs | 12 +++-- .../src/service/finalized_state/zebra_db.rs | 47 ++++++++++++++++++- 3 files changed, 65 insertions(+), 7 deletions(-) diff --git a/zebra-state/src/config.rs b/zebra-state/src/config.rs index 3e305dc2..65461968 100644 --- a/zebra-state/src/config.rs +++ b/zebra-state/src/config.rs @@ -323,7 +323,18 @@ pub fn database_format_version_on_disk( network: Network, ) -> Result, BoxError> { let version_path = config.version_file_path(network); + let db_path = config.db_path(network); + database_format_version_at_path(&version_path, &db_path) +} + +/// Returns the full semantic version of the on-disk database at `version_path`. +/// +/// See [`database_format_version_on_disk()`] for details. +pub(crate) fn database_format_version_at_path( + version_path: &Path, + db_path: &Path, +) -> Result, BoxError> { let disk_version_file = match fs::read_to_string(version_path) { Ok(version) => Some(version), Err(e) if e.kind() == ErrorKind::NotFound => { @@ -346,8 +357,6 @@ pub fn database_format_version_on_disk( ))); } - let db_path = config.db_path(network); - // There's no version file on disk, so we need to guess the version // based on the database content match fs::metadata(db_path) { diff --git a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs index 60afb696..15f28eb0 100644 --- a/zebra-state/src/service/finalized_state/disk_format/upgrade.rs +++ b/zebra-state/src/service/finalized_state/disk_format/upgrade.rs @@ -104,7 +104,7 @@ pub struct DbFormatChangeThreadHandle { cancel_handle: mpsc::SyncSender, } -/// Marker type that is sent to cancel a format upgrade, and returned as a error on cancellation. +/// Marker type that is sent to cancel a format upgrade, and returned as an error on cancellation. #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct CancelFormatChange; @@ -300,7 +300,7 @@ impl DbFormatChange { /// Run a format change in the database, or check the format of the database once. #[allow(clippy::unwrap_in_result)] - fn run_format_change_or_check( + pub(crate) fn run_format_change_or_check( &self, config: &Config, network: Network, @@ -369,8 +369,12 @@ impl DbFormatChange { // (unless a future upgrade breaks these format checks) // - re-opening the current version should be valid, regardless of whether the upgrade // or new block code created the format (or any combination). - Self::format_validity_checks_detailed(upgrade_db, cancel_receiver)? - .expect("new, upgraded, or downgraded database format is valid"); + Self::format_validity_checks_detailed(upgrade_db, cancel_receiver)?.unwrap_or_else(|_| { + panic!( + "unexpected invalid database format: delete and re-sync the database at '{:?}'", + upgrade_db.path() + ) + }); let inital_disk_version = self .initial_disk_version() diff --git a/zebra-state/src/service/finalized_state/zebra_db.rs b/zebra-state/src/service/finalized_state/zebra_db.rs index 687c7afb..c370c980 100644 --- a/zebra-state/src/service/finalized_state/zebra_db.rs +++ b/zebra-state/src/service/finalized_state/zebra_db.rs @@ -9,7 +9,10 @@ //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must //! be incremented each time the database format (column, serialization, etc) changes. -use std::path::Path; +use std::{ + path::Path, + sync::{mpsc, Arc}, +}; use zebra_chain::parameters::Network; @@ -46,6 +49,11 @@ pub struct ZebraDb { // This configuration cannot be modified after the database is initialized, // because some clones would have different values. // + /// The configuration for the database. + // + // TODO: use the database and version paths instead, and refactor the upgrade code to use paths + config: Arc, + /// Should format upgrades and format checks be skipped for this instance? /// Only used in test code. debug_skip_format_upgrades: bool, @@ -82,6 +90,7 @@ impl ZebraDb { // Open the database and do initial checks. let mut db = ZebraDb { + config: Arc::new(config.clone()), debug_skip_format_upgrades, format_change_handle: None, // After the database directory is created, a newly created database temporarily @@ -169,6 +178,42 @@ impl ZebraDb { if let Some(format_change_handle) = self.format_change_handle.as_mut() { format_change_handle.force_cancel(); } + + // # Correctness + // + // Check that the database format is correct before shutting down. + // This lets users know to delete and re-sync their database immediately, + // rather than surprising them next time Zebra starts up. + // + // # Testinng + // + // In Zebra's CI, panicking here stops us writing invalid cached states, + // which would then make unrelated PRs fail when Zebra starts up. + + // If the upgrade has completed, or we've done a downgrade, check the state is valid. + let disk_version = database_format_version_on_disk(&self.config, self.network()) + .expect("unexpected invalid or unreadable database version file"); + + if let Some(disk_version) = disk_version { + // We need to keep the cancel handle until the format check has finished, + // because dropping it cancels the format check. + let (_never_cancel_handle, never_cancel_receiver) = mpsc::sync_channel(1); + + // We block here because the checks are quick and database validity is + // consensus-critical. + if disk_version >= database_format_version_in_code() { + DbFormatChange::check_new_blocks() + .run_format_change_or_check( + &self.config, + self.network(), + // This argument is not used by the format check. + None, + self, + &never_cancel_receiver, + ) + .expect("cancel handle is never used"); + } + } } self.check_for_panics();