change(state): Check database format is valid on shutdown, to catch format errors in new block code (#7606)

* Provide a user hint when the database format is invalid

* Split a path-based database version method

* Check the database format before Zebra shuts down

* Fix a typo in zebra-state/src/service/finalized_state/zebra_db.rs

Co-authored-by: Marek <mail@marek.onl>

* Fix a typo in upgrade.rs

---------

Co-authored-by: Marek <mail@marek.onl>
This commit is contained in:
teor 2023-09-22 23:58:41 +10:00 committed by GitHub
parent b737ccf570
commit 7348d080d6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 65 additions and 7 deletions

View File

@ -323,7 +323,18 @@ pub fn database_format_version_on_disk(
network: Network,
) -> Result<Option<Version>, BoxError> {
let version_path = config.version_file_path(network);
let db_path = config.db_path(network);
database_format_version_at_path(&version_path, &db_path)
}
/// Returns the full semantic version of the on-disk database at `version_path`.
///
/// See [`database_format_version_on_disk()`] for details.
pub(crate) fn database_format_version_at_path(
version_path: &Path,
db_path: &Path,
) -> Result<Option<Version>, BoxError> {
let disk_version_file = match fs::read_to_string(version_path) {
Ok(version) => Some(version),
Err(e) if e.kind() == ErrorKind::NotFound => {
@ -346,8 +357,6 @@ pub fn database_format_version_on_disk(
)));
}
let db_path = config.db_path(network);
// There's no version file on disk, so we need to guess the version
// based on the database content
match fs::metadata(db_path) {

View File

@ -104,7 +104,7 @@ pub struct DbFormatChangeThreadHandle {
cancel_handle: mpsc::SyncSender<CancelFormatChange>,
}
/// Marker type that is sent to cancel a format upgrade, and returned as a error on cancellation.
/// Marker type that is sent to cancel a format upgrade, and returned as an error on cancellation.
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub struct CancelFormatChange;
@ -300,7 +300,7 @@ impl DbFormatChange {
/// Run a format change in the database, or check the format of the database once.
#[allow(clippy::unwrap_in_result)]
fn run_format_change_or_check(
pub(crate) fn run_format_change_or_check(
&self,
config: &Config,
network: Network,
@ -369,8 +369,12 @@ impl DbFormatChange {
// (unless a future upgrade breaks these format checks)
// - re-opening the current version should be valid, regardless of whether the upgrade
// or new block code created the format (or any combination).
Self::format_validity_checks_detailed(upgrade_db, cancel_receiver)?
.expect("new, upgraded, or downgraded database format is valid");
Self::format_validity_checks_detailed(upgrade_db, cancel_receiver)?.unwrap_or_else(|_| {
panic!(
"unexpected invalid database format: delete and re-sync the database at '{:?}'",
upgrade_db.path()
)
});
let inital_disk_version = self
.initial_disk_version()

View File

@ -9,7 +9,10 @@
//! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must
//! be incremented each time the database format (column, serialization, etc) changes.
use std::path::Path;
use std::{
path::Path,
sync::{mpsc, Arc},
};
use zebra_chain::parameters::Network;
@ -46,6 +49,11 @@ pub struct ZebraDb {
// This configuration cannot be modified after the database is initialized,
// because some clones would have different values.
//
/// The configuration for the database.
//
// TODO: use the database and version paths instead, and refactor the upgrade code to use paths
config: Arc<Config>,
/// Should format upgrades and format checks be skipped for this instance?
/// Only used in test code.
debug_skip_format_upgrades: bool,
@ -82,6 +90,7 @@ impl ZebraDb {
// Open the database and do initial checks.
let mut db = ZebraDb {
config: Arc::new(config.clone()),
debug_skip_format_upgrades,
format_change_handle: None,
// After the database directory is created, a newly created database temporarily
@ -169,6 +178,42 @@ impl ZebraDb {
if let Some(format_change_handle) = self.format_change_handle.as_mut() {
format_change_handle.force_cancel();
}
// # Correctness
//
// Check that the database format is correct before shutting down.
// This lets users know to delete and re-sync their database immediately,
// rather than surprising them next time Zebra starts up.
//
// # Testinng
//
// In Zebra's CI, panicking here stops us writing invalid cached states,
// which would then make unrelated PRs fail when Zebra starts up.
// If the upgrade has completed, or we've done a downgrade, check the state is valid.
let disk_version = database_format_version_on_disk(&self.config, self.network())
.expect("unexpected invalid or unreadable database version file");
if let Some(disk_version) = disk_version {
// We need to keep the cancel handle until the format check has finished,
// because dropping it cancels the format check.
let (_never_cancel_handle, never_cancel_receiver) = mpsc::sync_channel(1);
// We block here because the checks are quick and database validity is
// consensus-critical.
if disk_version >= database_format_version_in_code() {
DbFormatChange::check_new_blocks()
.run_format_change_or_check(
&self.config,
self.network(),
// This argument is not used by the format check.
None,
self,
&never_cancel_receiver,
)
.expect("cancel handle is never used");
}
}
}
self.check_for_panics();