change(state): Check database format is valid on shutdown, to catch format errors in new block code (#7606)

* Provide a user hint when the database format is invalid

* Split a path-based database version method

* Check the database format before Zebra shuts down

* Fix a typo in zebra-state/src/service/finalized_state/zebra_db.rs

Co-authored-by: Marek <mail@marek.onl>

* Fix a typo in upgrade.rs

---------

Co-authored-by: Marek <mail@marek.onl>
This commit is contained in:
teor 2023-09-22 23:58:41 +10:00 committed by GitHub
parent b737ccf570
commit 7348d080d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 65 additions and 7 deletions

View File

@ -323,7 +323,18 @@ pub fn database_format_version_on_disk(
network: Network, network: Network,
) -> Result<Option<Version>, BoxError> { ) -> Result<Option<Version>, BoxError> {
let version_path = config.version_file_path(network); let version_path = config.version_file_path(network);
let db_path = config.db_path(network);
database_format_version_at_path(&version_path, &db_path)
}
/// Returns the full semantic version of the on-disk database at `version_path`.
///
/// See [`database_format_version_on_disk()`] for details.
pub(crate) fn database_format_version_at_path(
version_path: &Path,
db_path: &Path,
) -> Result<Option<Version>, BoxError> {
let disk_version_file = match fs::read_to_string(version_path) { let disk_version_file = match fs::read_to_string(version_path) {
Ok(version) => Some(version), Ok(version) => Some(version),
Err(e) if e.kind() == ErrorKind::NotFound => { Err(e) if e.kind() == ErrorKind::NotFound => {
@ -346,8 +357,6 @@ pub fn database_format_version_on_disk(
))); )));
} }
let db_path = config.db_path(network);
// There's no version file on disk, so we need to guess the version // There's no version file on disk, so we need to guess the version
// based on the database content // based on the database content
match fs::metadata(db_path) { match fs::metadata(db_path) {

View File

@ -104,7 +104,7 @@ pub struct DbFormatChangeThreadHandle {
cancel_handle: mpsc::SyncSender<CancelFormatChange>, cancel_handle: mpsc::SyncSender<CancelFormatChange>,
} }
/// Marker type that is sent to cancel a format upgrade, and returned as a error on cancellation. /// Marker type that is sent to cancel a format upgrade, and returned as an error on cancellation.
#[derive(Copy, Clone, Debug, Eq, PartialEq)] #[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct CancelFormatChange; pub struct CancelFormatChange;
@ -300,7 +300,7 @@ impl DbFormatChange {
/// Run a format change in the database, or check the format of the database once. /// Run a format change in the database, or check the format of the database once.
#[allow(clippy::unwrap_in_result)] #[allow(clippy::unwrap_in_result)]
fn run_format_change_or_check( pub(crate) fn run_format_change_or_check(
&self, &self,
config: &Config, config: &Config,
network: Network, network: Network,
@ -369,8 +369,12 @@ impl DbFormatChange {
// (unless a future upgrade breaks these format checks) // (unless a future upgrade breaks these format checks)
// - re-opening the current version should be valid, regardless of whether the upgrade // - re-opening the current version should be valid, regardless of whether the upgrade
// or new block code created the format (or any combination). // or new block code created the format (or any combination).
Self::format_validity_checks_detailed(upgrade_db, cancel_receiver)? Self::format_validity_checks_detailed(upgrade_db, cancel_receiver)?.unwrap_or_else(|_| {
.expect("new, upgraded, or downgraded database format is valid"); panic!(
"unexpected invalid database format: delete and re-sync the database at '{:?}'",
upgrade_db.path()
)
});
let inital_disk_version = self let inital_disk_version = self
.initial_disk_version() .initial_disk_version()

View File

@ -9,7 +9,10 @@
//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must
//! be incremented each time the database format (column, serialization, etc) changes. //! be incremented each time the database format (column, serialization, etc) changes.
use std::path::Path; use std::{
path::Path,
sync::{mpsc, Arc},
};
use zebra_chain::parameters::Network; use zebra_chain::parameters::Network;
@ -46,6 +49,11 @@ pub struct ZebraDb {
// This configuration cannot be modified after the database is initialized, // This configuration cannot be modified after the database is initialized,
// because some clones would have different values. // because some clones would have different values.
// //
/// The configuration for the database.
//
// TODO: use the database and version paths instead, and refactor the upgrade code to use paths
config: Arc<Config>,
/// Should format upgrades and format checks be skipped for this instance? /// Should format upgrades and format checks be skipped for this instance?
/// Only used in test code. /// Only used in test code.
debug_skip_format_upgrades: bool, debug_skip_format_upgrades: bool,
@ -82,6 +90,7 @@ impl ZebraDb {
// Open the database and do initial checks. // Open the database and do initial checks.
let mut db = ZebraDb { let mut db = ZebraDb {
config: Arc::new(config.clone()),
debug_skip_format_upgrades, debug_skip_format_upgrades,
format_change_handle: None, format_change_handle: None,
// After the database directory is created, a newly created database temporarily // After the database directory is created, a newly created database temporarily
@ -169,6 +178,42 @@ impl ZebraDb {
if let Some(format_change_handle) = self.format_change_handle.as_mut() { if let Some(format_change_handle) = self.format_change_handle.as_mut() {
format_change_handle.force_cancel(); format_change_handle.force_cancel();
} }
// # Correctness
//
// Check that the database format is correct before shutting down.
// This lets users know to delete and re-sync their database immediately,
// rather than surprising them next time Zebra starts up.
//
// # Testinng
//
// In Zebra's CI, panicking here stops us writing invalid cached states,
// which would then make unrelated PRs fail when Zebra starts up.
// If the upgrade has completed, or we've done a downgrade, check the state is valid.
let disk_version = database_format_version_on_disk(&self.config, self.network())
.expect("unexpected invalid or unreadable database version file");
if let Some(disk_version) = disk_version {
// We need to keep the cancel handle until the format check has finished,
// because dropping it cancels the format check.
let (_never_cancel_handle, never_cancel_receiver) = mpsc::sync_channel(1);
// We block here because the checks are quick and database validity is
// consensus-critical.
if disk_version >= database_format_version_in_code() {
DbFormatChange::check_new_blocks()
.run_format_change_or_check(
&self.config,
self.network(),
// This argument is not used by the format check.
None,
self,
&never_cancel_receiver,
)
.expect("cancel handle is never used");
}
}
} }
self.check_for_panics(); self.check_for_panics();