change(state): Check database format is valid on shutdown, to catch format errors in new block code (#7606)
* Provide a user hint when the database format is invalid * Split a path-based database version method * Check the database format before Zebra shuts down * Fix a typo in zebra-state/src/service/finalized_state/zebra_db.rs Co-authored-by: Marek <mail@marek.onl> * Fix a typo in upgrade.rs --------- Co-authored-by: Marek <mail@marek.onl>
This commit is contained in:
parent
b737ccf570
commit
7348d080d6
|
|
@ -323,7 +323,18 @@ pub fn database_format_version_on_disk(
|
|||
network: Network,
|
||||
) -> Result<Option<Version>, BoxError> {
|
||||
let version_path = config.version_file_path(network);
|
||||
let db_path = config.db_path(network);
|
||||
|
||||
database_format_version_at_path(&version_path, &db_path)
|
||||
}
|
||||
|
||||
/// Returns the full semantic version of the on-disk database at `version_path`.
|
||||
///
|
||||
/// See [`database_format_version_on_disk()`] for details.
|
||||
pub(crate) fn database_format_version_at_path(
|
||||
version_path: &Path,
|
||||
db_path: &Path,
|
||||
) -> Result<Option<Version>, BoxError> {
|
||||
let disk_version_file = match fs::read_to_string(version_path) {
|
||||
Ok(version) => Some(version),
|
||||
Err(e) if e.kind() == ErrorKind::NotFound => {
|
||||
|
|
@ -346,8 +357,6 @@ pub fn database_format_version_on_disk(
|
|||
)));
|
||||
}
|
||||
|
||||
let db_path = config.db_path(network);
|
||||
|
||||
// There's no version file on disk, so we need to guess the version
|
||||
// based on the database content
|
||||
match fs::metadata(db_path) {
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ pub struct DbFormatChangeThreadHandle {
|
|||
cancel_handle: mpsc::SyncSender<CancelFormatChange>,
|
||||
}
|
||||
|
||||
/// Marker type that is sent to cancel a format upgrade, and returned as a error on cancellation.
|
||||
/// Marker type that is sent to cancel a format upgrade, and returned as an error on cancellation.
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub struct CancelFormatChange;
|
||||
|
||||
|
|
@ -300,7 +300,7 @@ impl DbFormatChange {
|
|||
|
||||
/// Run a format change in the database, or check the format of the database once.
|
||||
#[allow(clippy::unwrap_in_result)]
|
||||
fn run_format_change_or_check(
|
||||
pub(crate) fn run_format_change_or_check(
|
||||
&self,
|
||||
config: &Config,
|
||||
network: Network,
|
||||
|
|
@ -369,8 +369,12 @@ impl DbFormatChange {
|
|||
// (unless a future upgrade breaks these format checks)
|
||||
// - re-opening the current version should be valid, regardless of whether the upgrade
|
||||
// or new block code created the format (or any combination).
|
||||
Self::format_validity_checks_detailed(upgrade_db, cancel_receiver)?
|
||||
.expect("new, upgraded, or downgraded database format is valid");
|
||||
Self::format_validity_checks_detailed(upgrade_db, cancel_receiver)?.unwrap_or_else(|_| {
|
||||
panic!(
|
||||
"unexpected invalid database format: delete and re-sync the database at '{:?}'",
|
||||
upgrade_db.path()
|
||||
)
|
||||
});
|
||||
|
||||
let inital_disk_version = self
|
||||
.initial_disk_version()
|
||||
|
|
|
|||
|
|
@ -9,7 +9,10 @@
|
|||
//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must
|
||||
//! be incremented each time the database format (column, serialization, etc) changes.
|
||||
|
||||
use std::path::Path;
|
||||
use std::{
|
||||
path::Path,
|
||||
sync::{mpsc, Arc},
|
||||
};
|
||||
|
||||
use zebra_chain::parameters::Network;
|
||||
|
||||
|
|
@ -46,6 +49,11 @@ pub struct ZebraDb {
|
|||
// This configuration cannot be modified after the database is initialized,
|
||||
// because some clones would have different values.
|
||||
//
|
||||
/// The configuration for the database.
|
||||
//
|
||||
// TODO: use the database and version paths instead, and refactor the upgrade code to use paths
|
||||
config: Arc<Config>,
|
||||
|
||||
/// Should format upgrades and format checks be skipped for this instance?
|
||||
/// Only used in test code.
|
||||
debug_skip_format_upgrades: bool,
|
||||
|
|
@ -82,6 +90,7 @@ impl ZebraDb {
|
|||
|
||||
// Open the database and do initial checks.
|
||||
let mut db = ZebraDb {
|
||||
config: Arc::new(config.clone()),
|
||||
debug_skip_format_upgrades,
|
||||
format_change_handle: None,
|
||||
// After the database directory is created, a newly created database temporarily
|
||||
|
|
@ -169,6 +178,42 @@ impl ZebraDb {
|
|||
if let Some(format_change_handle) = self.format_change_handle.as_mut() {
|
||||
format_change_handle.force_cancel();
|
||||
}
|
||||
|
||||
// # Correctness
|
||||
//
|
||||
// Check that the database format is correct before shutting down.
|
||||
// This lets users know to delete and re-sync their database immediately,
|
||||
// rather than surprising them next time Zebra starts up.
|
||||
//
|
||||
// # Testinng
|
||||
//
|
||||
// In Zebra's CI, panicking here stops us writing invalid cached states,
|
||||
// which would then make unrelated PRs fail when Zebra starts up.
|
||||
|
||||
// If the upgrade has completed, or we've done a downgrade, check the state is valid.
|
||||
let disk_version = database_format_version_on_disk(&self.config, self.network())
|
||||
.expect("unexpected invalid or unreadable database version file");
|
||||
|
||||
if let Some(disk_version) = disk_version {
|
||||
// We need to keep the cancel handle until the format check has finished,
|
||||
// because dropping it cancels the format check.
|
||||
let (_never_cancel_handle, never_cancel_receiver) = mpsc::sync_channel(1);
|
||||
|
||||
// We block here because the checks are quick and database validity is
|
||||
// consensus-critical.
|
||||
if disk_version >= database_format_version_in_code() {
|
||||
DbFormatChange::check_new_blocks()
|
||||
.run_format_change_or_check(
|
||||
&self.config,
|
||||
self.network(),
|
||||
// This argument is not used by the format check.
|
||||
None,
|
||||
self,
|
||||
&never_cancel_receiver,
|
||||
)
|
||||
.expect("cancel handle is never used");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.check_for_panics();
|
||||
|
|
|
|||
Loading…
Reference in New Issue