From 43e80fd61c4f1147c1e3b957ccda5207212a8a90 Mon Sep 17 00:00:00 2001 From: teor Date: Wed, 13 Apr 2022 14:06:52 +1000 Subject: [PATCH] 6. feat(db): Add a transparent address UTXO index (#3999) * Add test-only serialization, and make existing serialization test-only * Make AddressLocations clearer in the API * Add UnspentOutputAddressLocation * Add the AddressLocation to the UTXO database value * Update the snapshot test code for UnspentOutputAddressLocation * Update the raw data snapshots * Update the high-level data snapshots * Increment the database version * Make serialization clearer Co-authored-by: Janito Vaqueiro Ferreira Filho * Fix code formatting Co-authored-by: Janito Vaqueiro Ferreira Filho * Add an empty utxo_by_transparent_addr_loc column family * Update snapshot data for the new column family * Add an AddressUnspentOutputs type * Add round-trip tests for AddressUnspentOutputs * Move address balances into their own method * Simplify updating address balances * Fix utxo_by_out_loc column family name * Implement reads and writes of address UTXOs * Update raw data snapshots * Update the snapshot tests for high-level address UTXOs * Assert rather than taking empty address snapshots for genesis * Update high-level address UTXO snapshot data, and delete empty snapshots * Increment the database version * Use typed values for all ReadDisk methods * Implement test-only serialization for transparent::Address * Implement FromDisk for () * Store AddressUnspentOutput as the column family key * Update round-trip serialization tests for AddressUnspentOutput * Update snapshot test code, and add a UTXO data snapshot * Update existing snapshot data * Add new UTXO snapshot data * Update column family name ```sh fastmod utxo_by_transparent_addr_loc utxo_loc_by_transparent_addr_loc zebra* ``` * cargo fmt --all * cargo insta test --review --delete-unreferenced-snapshots * Explain why it is ok to use invalid database iterator indexes Co-authored-by: Conrado Gouvea * Add explanations of UTXO database updates * Simplify an assertion * Remove UnspentOutputAddressLocation and just store transparent::Output * Update snapshot test data Co-authored-by: Janito Vaqueiro Ferreira Filho Co-authored-by: Conrado Gouvea Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com> --- Cargo.lock | 59 ++++++ deny.toml | 1 + zebra-chain/Cargo.toml | 8 +- zebra-chain/src/transparent.rs | 5 +- zebra-chain/src/transparent/address.rs | 7 +- zebra-chain/src/transparent/script.rs | 2 +- zebra-state/src/constants.rs | 2 +- .../src/service/finalized_state/disk_db.rs | 82 ++++---- .../service/finalized_state/disk_format.rs | 13 ++ .../finalized_state/disk_format/block.rs | 18 +- .../finalized_state/disk_format/tests/prop.rs | 30 ++- .../tests/snapshots/column_family_names.snap | 1 + .../empty_column_families@mainnet_0.snap | 1 + .../empty_column_families@no_blocks.snap | 1 + .../empty_column_families@testnet_0.snap | 1 + ...ansparent_addr_loc_raw_data@mainnet_1.snap | 10 + ...ansparent_addr_loc_raw_data@mainnet_2.snap | 14 ++ ...ansparent_addr_loc_raw_data@testnet_1.snap | 10 + ...ansparent_addr_loc_raw_data@testnet_2.snap | 14 ++ .../disk_format/transparent.rs | 181 ++++++++++++++++-- .../service/finalized_state/zebra_db/block.rs | 30 +-- .../zebra_db/block/tests/snapshot.rs | 77 ++++++-- .../snapshots/address_balances@mainnet_0.snap | 5 - .../snapshots/address_balances@testnet_0.snap | 5 - .../address_utxo_data@mainnet_1.snap | 16 ++ .../address_utxo_data@mainnet_2.snap | 24 +++ .../address_utxo_data@testnet_1.snap | 16 ++ .../address_utxo_data@testnet_2.snap | 24 +++ .../snapshots/address_utxos@mainnet_1.snap | 21 ++ .../snapshots/address_utxos@mainnet_2.snap | 28 +++ .../snapshots/address_utxos@testnet_1.snap | 21 ++ .../snapshots/address_utxos@testnet_2.snap | 28 +++ .../finalized_state/zebra_db/transparent.rs | 181 +++++++++++++++--- 33 files changed, 797 insertions(+), 139 deletions(-) create mode 100644 zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@mainnet_1.snap create mode 100644 zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@mainnet_2.snap create mode 100644 zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@testnet_1.snap create mode 100644 zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@testnet_2.snap delete mode 100644 zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_balances@mainnet_0.snap delete mode 100644 zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_balances@testnet_0.snap create mode 100644 zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@mainnet_1.snap create mode 100644 zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@mainnet_2.snap create mode 100644 zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@testnet_1.snap create mode 100644 zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@testnet_2.snap create mode 100644 zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@mainnet_1.snap create mode 100644 zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@mainnet_2.snap create mode 100644 zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@testnet_1.snap create mode 100644 zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@testnet_2.snap diff --git a/Cargo.lock b/Cargo.lock index f4880e3b..1ba6a0fb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1037,6 +1037,16 @@ dependencies = [ "darling_macro 0.12.4", ] +[[package]] +name = "darling" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c" +dependencies = [ + "darling_core 0.13.4", + "darling_macro 0.13.4", +] + [[package]] name = "darling_core" version = "0.10.2" @@ -1065,6 +1075,20 @@ dependencies = [ "syn 1.0.86", ] +[[package]] +name = "darling_core" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2 1.0.36", + "quote 1.0.15", + "strsim 0.10.0", + "syn 1.0.86", +] + [[package]] name = "darling_macro" version = "0.10.2" @@ -1087,6 +1111,17 @@ dependencies = [ "syn 1.0.86", ] +[[package]] +name = "darling_macro" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835" +dependencies = [ + "darling_core 0.13.4", + "quote 1.0.15", + "syn 1.0.86", +] + [[package]] name = "dashmap" version = "4.0.2" @@ -3887,6 +3922,29 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "1.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "946fa04a8ac43ff78a1f4b811990afb9ddbdf5890b46d6dda0ba1998230138b7" +dependencies = [ + "rustversion", + "serde", + "serde_with_macros", +] + +[[package]] +name = "serde_with_macros" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082" +dependencies = [ + "darling 0.13.4", + "proc-macro2 1.0.36", + "quote 1.0.15", + "syn 1.0.86", +] + [[package]] name = "serde_yaml" version = "0.8.23" @@ -5639,6 +5697,7 @@ dependencies = [ "secp256k1", "serde", "serde-big-array", + "serde_with", "sha2", "spandoc", "static_assertions", diff --git a/deny.toml b/deny.toml index 2a4b6823..a216d9ad 100644 --- a/deny.toml +++ b/deny.toml @@ -61,6 +61,7 @@ skip-tree = [ # upgrade abscissa and arti { name = "darling", version = "=0.10.2" }, + { name = "darling", version = "=0.12.4" }, # recent major version bumps # we should re-check these dependencies in February 2022 diff --git a/zebra-chain/Cargo.toml b/zebra-chain/Cargo.toml index 26a2922a..bc04a9c2 100644 --- a/zebra-chain/Cargo.toml +++ b/zebra-chain/Cargo.toml @@ -10,7 +10,7 @@ edition = "2021" [features] default = [] proptest-impl = ["proptest", "proptest-derive", "zebra-test", "rand", "rand_chacha", "tokio", -"hex/serde"] +"hex/serde", "serde_with"] bench = ["zebra-test"] [dependencies] @@ -37,9 +37,12 @@ jubjub = "0.8.0" lazy_static = "1.4.0" rand_core = "0.6.3" ripemd160 = "0.9" + serde = { version = "1.0.136", features = ["serde_derive", "rc"] } -secp256k1 = { version = "0.21.3", features = ["serde"] } +serde_with = { version = "1.12.0", optional = true } serde-big-array = "0.4.1" + +secp256k1 = { version = "0.21.3", features = ["serde"] } sha2 = { version = "0.9.9", features=["compress"] } static_assertions = "1.1.0" subtle = "2.4.1" @@ -78,6 +81,7 @@ spandoc = "0.2.1" tracing = "0.1.31" hex = { version = "0.4.3", features = ["serde"] } +serde_with = "1.12.0" proptest = "0.10.1" proptest-derive = "0.3.0" diff --git a/zebra-chain/src/transparent.rs b/zebra-chain/src/transparent.rs index f2b97b11..2a970f90 100644 --- a/zebra-chain/src/transparent.rs +++ b/zebra-chain/src/transparent.rs @@ -314,7 +314,10 @@ impl Input { /// that spends my UTXO and sends 1 ZEC to you and 1 ZEC back to me /// (just like receiving change). #[derive(Clone, Debug, Eq, PartialEq, Hash)] -#[cfg_attr(any(test, feature = "proptest-impl"), derive(Arbitrary, Serialize))] +#[cfg_attr( + any(test, feature = "proptest-impl"), + derive(Arbitrary, Serialize, Deserialize) +)] pub struct Output { /// Transaction value. // At https://en.bitcoin.it/wiki/Protocol_documentation#tx, this is an i64. diff --git a/zebra-chain/src/transparent/address.rs b/zebra-chain/src/transparent/address.rs index fda1bb28..0780532d 100644 --- a/zebra-chain/src/transparent/address.rs +++ b/zebra-chain/src/transparent/address.rs @@ -43,7 +43,11 @@ mod magics { #[derive(Copy, Clone, Eq, PartialEq, Hash)] #[cfg_attr( any(test, feature = "proptest-impl"), - derive(proptest_derive::Arbitrary) + derive( + proptest_derive::Arbitrary, + serde_with::SerializeDisplay, + serde_with::DeserializeFromStr + ) )] pub enum Address { /// P2SH (Pay to Script Hash) addresses @@ -53,6 +57,7 @@ pub enum Address { /// 20 bytes specifying a script hash. script_hash: [u8; 20], }, + /// P2PKH (Pay to Public Key Hash) addresses PayToPublicKeyHash { /// Production, test, or other network diff --git a/zebra-chain/src/transparent/script.rs b/zebra-chain/src/transparent/script.rs index c6e02db1..c2517285 100644 --- a/zebra-chain/src/transparent/script.rs +++ b/zebra-chain/src/transparent/script.rs @@ -10,7 +10,7 @@ use crate::serialization::{ #[derive(Clone, Eq, PartialEq, Hash)] #[cfg_attr( any(test, feature = "proptest-impl"), - derive(proptest_derive::Arbitrary, serde::Serialize) + derive(proptest_derive::Arbitrary, Serialize, Deserialize) )] pub struct Script( /// # Correctness diff --git a/zebra-state/src/constants.rs b/zebra-state/src/constants.rs index 4144d25b..48df90ec 100644 --- a/zebra-state/src/constants.rs +++ b/zebra-state/src/constants.rs @@ -18,7 +18,7 @@ pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY; pub const MAX_BLOCK_REORG_HEIGHT: u32 = MIN_TRANSPARENT_COINBASE_MATURITY - 1; /// The database format version, incremented each time the database format changes. -pub const DATABASE_FORMAT_VERSION: u32 = 19; +pub const DATABASE_FORMAT_VERSION: u32 = 21; /// The maximum number of blocks to check for NU5 transactions, /// before we assume we are on a pre-NU5 legacy chain. diff --git a/zebra-state/src/service/finalized_state/disk_db.rs b/zebra-state/src/service/finalized_state/disk_db.rs index d336a464..141b0029 100644 --- a/zebra-state/src/service/finalized_state/disk_db.rs +++ b/zebra-state/src/service/finalized_state/disk_db.rs @@ -149,42 +149,40 @@ pub trait ReadDisk { /// Returns the lowest key in `cf`, and the corresponding value. /// /// Returns `None` if the column family is empty. - fn zs_first_key_value(&self, cf: &C) -> Option<(Box<[u8]>, Box<[u8]>)> + fn zs_first_key_value(&self, cf: &C) -> Option<(K, V)> where - C: rocksdb::AsColumnFamilyRef; + C: rocksdb::AsColumnFamilyRef, + K: FromDisk, + V: FromDisk; /// Returns the highest key in `cf`, and the corresponding value. /// /// Returns `None` if the column family is empty. - fn zs_last_key_value(&self, cf: &C) -> Option<(Box<[u8]>, Box<[u8]>)> + fn zs_last_key_value(&self, cf: &C) -> Option<(K, V)> where - C: rocksdb::AsColumnFamilyRef; + C: rocksdb::AsColumnFamilyRef, + K: FromDisk, + V: FromDisk; /// Returns the first key greater than or equal to `lower_bound` in `cf`, /// and the corresponding value. /// /// Returns `None` if there are no keys greater than or equal to `lower_bound`. - fn zs_next_key_value_from( - &self, - cf: &C, - lower_bound: &K, - ) -> Option<(Box<[u8]>, Box<[u8]>)> + fn zs_next_key_value_from(&self, cf: &C, lower_bound: &K) -> Option<(K, V)> where C: rocksdb::AsColumnFamilyRef, - K: IntoDisk; + K: IntoDisk + FromDisk, + V: FromDisk; /// Returns the first key less than or equal to `upper_bound` in `cf`, /// and the corresponding value. /// /// Returns `None` if there are no keys less than or equal to `upper_bound`. - fn zs_prev_key_value_back_from( - &self, - cf: &C, - upper_bound: &K, - ) -> Option<(Box<[u8]>, Box<[u8]>)> + fn zs_prev_key_value_back_from(&self, cf: &C, upper_bound: &K) -> Option<(K, V)> where C: rocksdb::AsColumnFamilyRef, - K: IntoDisk; + K: IntoDisk + FromDisk, + V: FromDisk; } impl PartialEq for DiskDb { @@ -255,52 +253,62 @@ impl ReadDisk for DiskDb { .is_some() } - fn zs_first_key_value(&self, cf: &C) -> Option<(Box<[u8]>, Box<[u8]>)> + fn zs_first_key_value(&self, cf: &C) -> Option<(K, V)> where C: rocksdb::AsColumnFamilyRef, + K: FromDisk, + V: FromDisk, { // Reading individual values from iterators does not seem to cause database hangs. - self.db.iterator_cf(cf, rocksdb::IteratorMode::Start).next() + self.db + .iterator_cf(cf, rocksdb::IteratorMode::Start) + .next() + .map(|(key_bytes, value_bytes)| (K::from_bytes(key_bytes), V::from_bytes(value_bytes))) } - fn zs_last_key_value(&self, cf: &C) -> Option<(Box<[u8]>, Box<[u8]>)> + fn zs_last_key_value(&self, cf: &C) -> Option<(K, V)> where C: rocksdb::AsColumnFamilyRef, + K: FromDisk, + V: FromDisk, { // Reading individual values from iterators does not seem to cause database hangs. - self.db.iterator_cf(cf, rocksdb::IteratorMode::End).next() + self.db + .iterator_cf(cf, rocksdb::IteratorMode::End) + .next() + .map(|(key_bytes, value_bytes)| (K::from_bytes(key_bytes), V::from_bytes(value_bytes))) } - fn zs_next_key_value_from( - &self, - cf: &C, - lower_bound: &K, - ) -> Option<(Box<[u8]>, Box<[u8]>)> + fn zs_next_key_value_from(&self, cf: &C, lower_bound: &K) -> Option<(K, V)> where C: rocksdb::AsColumnFamilyRef, - K: IntoDisk, + K: IntoDisk + FromDisk, + V: FromDisk, { let lower_bound = lower_bound.as_bytes(); let from = rocksdb::IteratorMode::From(lower_bound.as_ref(), rocksdb::Direction::Forward); // Reading individual values from iterators does not seem to cause database hangs. - self.db.iterator_cf(cf, from).next() + self.db + .iterator_cf(cf, from) + .next() + .map(|(key_bytes, value_bytes)| (K::from_bytes(key_bytes), V::from_bytes(value_bytes))) } - fn zs_prev_key_value_back_from( - &self, - cf: &C, - upper_bound: &K, - ) -> Option<(Box<[u8]>, Box<[u8]>)> + fn zs_prev_key_value_back_from(&self, cf: &C, upper_bound: &K) -> Option<(K, V)> where C: rocksdb::AsColumnFamilyRef, - K: IntoDisk, + K: IntoDisk + FromDisk, + V: FromDisk, { let upper_bound = upper_bound.as_bytes(); let from = rocksdb::IteratorMode::From(upper_bound.as_ref(), rocksdb::Direction::Reverse); // Reading individual values from iterators does not seem to cause database hangs. - self.db.iterator_cf(cf, from).next() + self.db + .iterator_cf(cf, from) + .next() + .map(|(key_bytes, value_bytes)| (K::from_bytes(key_bytes), V::from_bytes(value_bytes))) } } @@ -374,8 +382,10 @@ impl DiskDb { //rocksdb::ColumnFamilyDescriptor::new("tx_by_transparent_addr_loc", db_options.clone()), // TODO: rename to utxo_by_out_loc (#3952) rocksdb::ColumnFamilyDescriptor::new("utxo_by_outpoint", db_options.clone()), - // TODO: #3953 - //rocksdb::ColumnFamilyDescriptor::new("utxo_by_transparent_addr_loc", db_options.clone()), + rocksdb::ColumnFamilyDescriptor::new( + "utxo_loc_by_transparent_addr_loc", + db_options.clone(), + ), // Sprout rocksdb::ColumnFamilyDescriptor::new("sprout_nullifiers", db_options.clone()), rocksdb::ColumnFamilyDescriptor::new("sprout_anchors", db_options.clone()), diff --git a/zebra-state/src/service/finalized_state/disk_format.rs b/zebra-state/src/service/finalized_state/disk_format.rs index 03d60fd3..858506db 100644 --- a/zebra-state/src/service/finalized_state/disk_format.rs +++ b/zebra-state/src/service/finalized_state/disk_format.rs @@ -91,6 +91,19 @@ impl IntoDisk for () { } } +impl FromDisk for () { + #[allow(clippy::unused_unit)] + fn from_bytes(bytes: impl AsRef<[u8]>) -> Self { + assert_eq!( + bytes.as_ref().len(), + 0, + "unexpected data in zero-sized column family type", + ); + + () + } +} + // Serialization Modification Functions /// Truncates `mem_bytes` to `disk_len`, by removing zero bytes from the start of the slice. diff --git a/zebra-state/src/service/finalized_state/disk_format/block.rs b/zebra-state/src/service/finalized_state/disk_format/block.rs index 1bf67471..735ccfb8 100644 --- a/zebra-state/src/service/finalized_state/disk_format/block.rs +++ b/zebra-state/src/service/finalized_state/disk_format/block.rs @@ -5,8 +5,6 @@ //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must //! be incremented each time the database format (column, serialization, etc) changes. -use serde::{Deserialize, Serialize}; - use zebra_chain::{ block::{self, Height}, serialization::{ZcashDeserializeInto, ZcashSerialize}, @@ -19,6 +17,8 @@ use crate::service::finalized_state::disk_format::{ #[cfg(any(test, feature = "proptest-impl"))] use proptest_derive::Arbitrary; +#[cfg(any(test, feature = "proptest-impl"))] +use serde::{Deserialize, Serialize}; /// The maximum value of an on-disk serialized [`Height`]. /// @@ -62,8 +62,11 @@ pub const TRANSACTION_LOCATION_DISK_BYTES: usize = HEIGHT_DISK_BYTES + TX_INDEX_ /// blocks larger than this size are rejected before reaching the database. /// /// (The maximum transaction count is tested by the large generated block serialization tests.) -#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] -#[cfg_attr(any(test, feature = "proptest-impl"), derive(Arbitrary))] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[cfg_attr( + any(test, feature = "proptest-impl"), + derive(Arbitrary, Serialize, Deserialize) +)] pub struct TransactionIndex(u16); impl TransactionIndex { @@ -114,8 +117,11 @@ impl TransactionIndex { /// A transaction's location in the chain, by block height and transaction index. /// /// This provides a chain-order list of transactions. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] -#[cfg_attr(any(test, feature = "proptest-impl"), derive(Arbitrary))] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[cfg_attr( + any(test, feature = "proptest-impl"), + derive(Arbitrary, Serialize, Deserialize) +)] pub struct TransactionLocation { /// The block height of the transaction. pub height: Height, diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/prop.rs b/zebra-state/src/service/finalized_state/disk_format/tests/prop.rs index 54051b82..9dff1739 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/prop.rs +++ b/zebra-state/src/service/finalized_state/disk_format/tests/prop.rs @@ -15,11 +15,23 @@ use crate::service::finalized_state::{ arbitrary::assert_value_properties, disk_format::{ block::MAX_ON_DISK_HEIGHT, - transparent::{AddressBalanceLocation, AddressLocation, OutputLocation}, + transparent::{ + AddressBalanceLocation, AddressLocation, AddressUnspentOutput, OutputLocation, + }, IntoDisk, TransactionLocation, }, }; +// Common + +// TODO: turn this into a unit test, it has a fixed value +#[test] +fn roundtrip_unit_type() { + zebra_test::init(); + + proptest!(|(val in any::<()>())| assert_value_properties(val)); +} + // Block // TODO: split these tests into the disk_format sub-modules @@ -152,7 +164,7 @@ fn roundtrip_address_balance_location() { proptest!( |(mut val in any::())| { - *val.height_mut() = val.location().height().clamp(Height(0), MAX_ON_DISK_HEIGHT); + *val.height_mut() = val.address_location().height().clamp(Height(0), MAX_ON_DISK_HEIGHT); assert_value_properties(val) } ); @@ -165,6 +177,20 @@ fn roundtrip_transparent_output() { proptest!(|(val in any::())| assert_value_properties(val)); } +#[test] +fn roundtrip_address_unspent_output() { + zebra_test::init(); + + proptest!( + |(mut val in any::())| { + *val.address_location_mut().height_mut() = val.address_location().height().clamp(Height(0), MAX_ON_DISK_HEIGHT); + *val.unspent_output_location_mut().height_mut() = val.unspent_output_location().height().clamp(Height(0), MAX_ON_DISK_HEIGHT); + + assert_value_properties(val) + } + ); +} + #[test] fn roundtrip_amount() { zebra_test::init(); diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/column_family_names.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/column_family_names.snap index cde7c9aa..f2d5c38b 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/column_family_names.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/column_family_names.snap @@ -23,4 +23,5 @@ expression: cf_names "tx_by_hash", "tx_by_loc", "utxo_by_outpoint", + "utxo_loc_by_transparent_addr_loc", ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_0.snap index 1b887c09..08724f90 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_0.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@mainnet_0.snap @@ -13,4 +13,5 @@ expression: empty_column_families "sprout_nullifiers: no entries", "tip_chain_value_pool: no entries", "utxo_by_outpoint: no entries", + "utxo_loc_by_transparent_addr_loc: no entries", ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@no_blocks.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@no_blocks.snap index f83855a2..1f38d2bc 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@no_blocks.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@no_blocks.snap @@ -22,4 +22,5 @@ expression: empty_column_families "tx_by_hash: no entries", "tx_by_loc: no entries", "utxo_by_outpoint: no entries", + "utxo_loc_by_transparent_addr_loc: no entries", ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_0.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_0.snap index 1b887c09..08724f90 100644 --- a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_0.snap +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/empty_column_families@testnet_0.snap @@ -13,4 +13,5 @@ expression: empty_column_families "sprout_nullifiers: no entries", "tip_chain_value_pool: no entries", "utxo_by_outpoint: no entries", + "utxo_loc_by_transparent_addr_loc: no entries", ] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@mainnet_1.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@mainnet_1.snap new file mode 100644 index 00000000..dccf08d2 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@mainnet_1.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "00000100000000010000010000000001", + v: "", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@mainnet_2.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@mainnet_2.snap new file mode 100644 index 00000000..39f028bb --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@mainnet_2.snap @@ -0,0 +1,14 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "00000100000000010000010000000001", + v: "", + ), + KV( + k: "00000100000000010000020000000001", + v: "", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@testnet_1.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@testnet_1.snap new file mode 100644 index 00000000..dccf08d2 --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@testnet_1.snap @@ -0,0 +1,10 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "00000100000000010000010000000001", + v: "", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@testnet_2.snap b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@testnet_2.snap new file mode 100644 index 00000000..39f028bb --- /dev/null +++ b/zebra-state/src/service/finalized_state/disk_format/tests/snapshots/utxo_loc_by_transparent_addr_loc_raw_data@testnet_2.snap @@ -0,0 +1,14 @@ +--- +source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs +expression: cf_data +--- +[ + KV( + k: "00000100000000010000010000000001", + v: "", + ), + KV( + k: "00000100000000010000020000000001", + v: "", + ), +] diff --git a/zebra-state/src/service/finalized_state/disk_format/transparent.rs b/zebra-state/src/service/finalized_state/disk_format/transparent.rs index c675bf1d..052e4e6b 100644 --- a/zebra-state/src/service/finalized_state/disk_format/transparent.rs +++ b/zebra-state/src/service/finalized_state/disk_format/transparent.rs @@ -7,10 +7,8 @@ use std::fmt::Debug; -use serde::{Deserialize, Serialize}; - use zebra_chain::{ - amount::{Amount, NonNegative}, + amount::{self, Amount, NonNegative}, block::Height, parameters::Network::*, serialization::{ZcashDeserializeInto, ZcashSerialize}, @@ -24,6 +22,8 @@ use crate::service::finalized_state::disk_format::{ #[cfg(any(test, feature = "proptest-impl"))] use proptest_derive::Arbitrary; +#[cfg(any(test, feature = "proptest-impl"))] +use serde::{Deserialize, Serialize}; #[cfg(any(test, feature = "proptest-impl"))] mod arbitrary; @@ -46,7 +46,8 @@ pub const OUTPUT_LOCATION_DISK_BYTES: usize = // Transparent types /// A transparent output's index in its transaction. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[cfg_attr(any(test, feature = "proptest-impl"), derive(Serialize, Deserialize))] pub struct OutputIndex(u32); impl OutputIndex { @@ -101,8 +102,11 @@ impl OutputIndex { /// /// [`OutputLocation`]s are sorted in increasing chain order, by height, transaction index, /// and output index. -#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] -#[cfg_attr(any(test, feature = "proptest-impl"), derive(Arbitrary))] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +#[cfg_attr( + any(test, feature = "proptest-impl"), + derive(Arbitrary, Serialize, Deserialize) +)] pub struct OutputLocation { /// The location of the transparent input's transaction. transaction_location: TransactionLocation, @@ -194,13 +198,16 @@ pub type AddressLocation = OutputLocation; /// /// Currently, Zebra tracks this data 1:1 for each address: /// - the balance [`Amount`] for a transparent address, and -/// - the [`OutputLocation`] for the first [`transparent::Output`] sent to that address +/// - the [`AddressLocation`] for the first [`transparent::Output`] sent to that address /// (regardless of whether that output is spent or unspent). /// /// All other address data is tracked multiple times for each address /// (UTXOs and transactions). -#[derive(Copy, Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -#[cfg_attr(any(test, feature = "proptest-impl"), derive(Arbitrary))] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[cfg_attr( + any(test, feature = "proptest-impl"), + derive(Arbitrary, Serialize, Deserialize) +)] pub struct AddressBalanceLocation { /// The total balance of all UTXOs sent to an address. balance: Amount, @@ -231,8 +238,28 @@ impl AddressBalanceLocation { &mut self.balance } + /// Updates the current balance by adding the supplied output's value. + pub fn receive_output( + &mut self, + unspent_output: &transparent::Output, + ) -> Result<(), amount::Error> { + self.balance = (self.balance + unspent_output.value())?; + + Ok(()) + } + + /// Updates the current balance by subtracting the supplied output's value. + pub fn spend_output( + &mut self, + spent_output: &transparent::Output, + ) -> Result<(), amount::Error> { + self.balance = (self.balance - spent_output.value())?; + + Ok(()) + } + /// Returns the location of the first [`transparent::Output`] sent to an address. - pub fn location(&self) -> AddressLocation { + pub fn address_location(&self) -> AddressLocation { self.location } @@ -244,6 +271,96 @@ impl AddressBalanceLocation { } } +/// A single unspent output for a [`transparent::Address`]. +/// +/// We store both the address location key and unspend output location value +/// in the RocksDB column family key. This improves insert and delete performance. +/// +/// This requires 8 extra bytes for each unspent output, +/// because we repeat the key for each value. +/// But RocksDB compression reduces the duplicate data size on disk. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)] +#[cfg_attr( + any(test, feature = "proptest-impl"), + derive(Arbitrary, Serialize, Deserialize) +)] +pub struct AddressUnspentOutput { + /// The location of the first [`transparent::Output`] sent to the address in `output`. + address_location: AddressLocation, + + /// The location of this unspent output. + unspent_output_location: OutputLocation, +} + +impl AddressUnspentOutput { + /// Create a new [`AddressUnspentOutput`] from an address location, + /// and an unspent output location. + pub fn new( + address_location: AddressLocation, + unspent_output_location: OutputLocation, + ) -> AddressUnspentOutput { + AddressUnspentOutput { + address_location, + unspent_output_location, + } + } + + /// Create an [`AddressUnspentOutput`] which starts iteration for the supplied address. + /// Used to look up the first output with [`ReadDisk::zs_next_key_value_from`]. + /// + /// The unspent output location is before all unspent output locations in the index. + /// It is always invalid, due to the genesis consensus rules. But this is not an issue + /// since [`ReadDisk::zs_next_key_value_from`] will fetch the next existing (valid) value. + pub fn address_iterator_start(address_location: AddressLocation) -> AddressUnspentOutput { + // Iterating from the lowest possible output location gets us the first output. + let zero_output_location = OutputLocation::from_usize(Height(0), 0, 0); + + AddressUnspentOutput { + address_location, + unspent_output_location: zero_output_location, + } + } + + /// Update the unspent output location to the next possible output for the supplied address. + /// Used to look up the next output with [`ReadDisk::zs_next_key_value_from`]. + /// + /// The updated unspent output location may be invalid, which is not an issue + /// since [`ReadDisk::zs_next_key_value_from`] will fetch the next existing (valid) value. + pub fn address_iterator_next(&mut self) { + // Iterating from the next possible output location gets us the next output, + // even if it is in a later block or transaction. + // + // Consensus: the block size limit is 2MB, which is much lower than the index range. + self.unspent_output_location.output_index.0 += 1; + } + + /// The location of the first [`transparent::Output`] sent to the address of this output. + /// + /// This can be used to look up the address. + pub fn address_location(&self) -> AddressLocation { + self.address_location + } + + /// The location of this unspent output. + pub fn unspent_output_location(&self) -> OutputLocation { + self.unspent_output_location + } + + /// Allows tests to modify the address location. + #[cfg(any(test, feature = "proptest-impl"))] + #[allow(dead_code)] + pub fn address_location_mut(&mut self) -> &mut AddressLocation { + &mut self.address_location + } + + /// Allows tests to modify the unspent output location. + #[cfg(any(test, feature = "proptest-impl"))] + #[allow(dead_code)] + pub fn unspent_output_location_mut(&mut self) -> &mut OutputLocation { + &mut self.unspent_output_location + } +} + // Transparent trait impls /// Returns a byte representing the [`transparent::Address`] variant. @@ -367,23 +484,27 @@ impl IntoDisk for AddressBalanceLocation { fn as_bytes(&self) -> Self::Bytes { let balance_bytes = self.balance().as_bytes().to_vec(); - let location_bytes = self.location().as_bytes().to_vec(); + let address_location_bytes = self.address_location().as_bytes().to_vec(); - [balance_bytes, location_bytes].concat().try_into().unwrap() + [balance_bytes, address_location_bytes] + .concat() + .try_into() + .unwrap() } } impl FromDisk for AddressBalanceLocation { fn from_bytes(disk_bytes: impl AsRef<[u8]>) -> Self { - let (balance_bytes, location_bytes) = disk_bytes.as_ref().split_at(BALANCE_DISK_BYTES); + let (balance_bytes, address_location_bytes) = + disk_bytes.as_ref().split_at(BALANCE_DISK_BYTES); let balance = Amount::from_bytes(balance_bytes.try_into().unwrap()).unwrap(); - let location = AddressLocation::from_bytes(location_bytes); + let address_location = AddressLocation::from_bytes(address_location_bytes); - let mut balance_location = AddressBalanceLocation::new(location); - *balance_location.balance_mut() = balance; + let mut address_balance_location = AddressBalanceLocation::new(address_location); + *address_balance_location.balance_mut() = balance; - balance_location + address_balance_location } } @@ -400,3 +521,29 @@ impl FromDisk for transparent::Output { bytes.as_ref().zcash_deserialize_into().unwrap() } } + +impl IntoDisk for AddressUnspentOutput { + type Bytes = [u8; OUTPUT_LOCATION_DISK_BYTES + OUTPUT_LOCATION_DISK_BYTES]; + + fn as_bytes(&self) -> Self::Bytes { + let address_location_bytes = self.address_location().as_bytes(); + let unspent_output_location_bytes = self.unspent_output_location().as_bytes(); + + [address_location_bytes, unspent_output_location_bytes] + .concat() + .try_into() + .unwrap() + } +} + +impl FromDisk for AddressUnspentOutput { + fn from_bytes(disk_bytes: impl AsRef<[u8]>) -> Self { + let (address_location_bytes, unspent_output_location_bytes) = + disk_bytes.as_ref().split_at(OUTPUT_LOCATION_DISK_BYTES); + + let address_location = AddressLocation::from_bytes(address_location_bytes); + let unspent_output_location = AddressLocation::from_bytes(unspent_output_location_bytes); + + AddressUnspentOutput::new(address_location, unspent_output_location) + } +} diff --git a/zebra-state/src/service/finalized_state/zebra_db/block.rs b/zebra-state/src/service/finalized_state/zebra_db/block.rs index a3d820eb..33addbb2 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/block.rs @@ -10,7 +10,7 @@ //! be incremented each time the database format (column, serialization, etc) changes. use std::{ - collections::{BTreeMap, HashMap}, + collections::{BTreeMap, HashMap, HashSet}, sync::Arc, }; @@ -33,7 +33,6 @@ use crate::{ disk_format::{ block::TransactionLocation, transparent::{AddressBalanceLocation, OutputLocation}, - FromDisk, }, zebra_db::{metrics::block_precommit_metrics, shielded::NoteCommitmentTrees, ZebraDb}, FinalizedBlock, @@ -60,14 +59,7 @@ impl ZebraDb { // TODO: move this method to the tip section pub fn tip(&self) -> Option<(block::Height, block::Hash)> { let hash_by_height = self.db.cf_handle("hash_by_height").unwrap(); - self.db - .zs_last_key_value(&hash_by_height) - .map(|(height_bytes, hash_bytes)| { - let height = block::Height::from_bytes(height_bytes); - let hash = block::Hash::from_bytes(hash_bytes); - - (height, hash) - }) + self.db.zs_last_key_value(&hash_by_height) } /// Returns the finalized hash for a given `block::Height` if it is present. @@ -98,7 +90,7 @@ impl ZebraDb { // Transactions let tx_by_loc = self.db.cf_handle("tx_by_loc").unwrap(); - // Fetch the entire block's transactions + // Manually fetch the entire block's transactions let mut transactions = Vec::new(); // TODO: is this loop more efficient if we store the number of transactions? @@ -250,15 +242,21 @@ impl ZebraDb { .map(|(_outpoint, out_loc, utxo)| (out_loc, utxo)) .collect(); - // Get the current address balances, before the transactions in this block - let address_balances = spent_utxos_by_out_loc + // Get the transparent addresses with changed balances/UTXOs + let changed_addresses: HashSet = spent_utxos_by_out_loc .values() .chain(finalized.new_outputs.values()) .filter_map(|utxo| utxo.output.address(network)) .unique() - .filter_map(|address| Some((address, self.address_balance_location(&address)?))) .collect(); + // Get the current address balances, before the transactions in this block + let address_balances: HashMap = + changed_addresses + .into_iter() + .filter_map(|address| Some((address, self.address_balance_location(&address)?))) + .collect(); + let mut batch = DiskWriteBatch::new(network); // In case of errors, propagate and do not write the batch. @@ -311,6 +309,8 @@ impl DiskWriteBatch { /// # Errors /// /// - Propagates any errors from updating history tree, note commitment trees, or value pools + // + // TODO: move db, finalized, and maybe other arguments into DiskWriteBatch #[allow(clippy::too_many_arguments)] pub fn prepare_block_batch( &mut self, @@ -457,6 +457,8 @@ impl DiskWriteBatch { /// # Errors /// /// - Propagates any errors from updating note commitment trees + // + // TODO: move db, finalized, and maybe other arguments into DiskWriteBatch pub fn prepare_transaction_index_batch( &mut self, db: &DiskDb, diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs index 48dd0324..2da0349a 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs @@ -196,7 +196,7 @@ fn test_block_and_transaction_data_with_network(network: Network) { settings.set_snapshot_suffix(format!("{}_{}", net_suffix, height)); settings.bind(|| snapshot_block_and_transaction_data(&state)); - settings.bind(|| snapshot_transparent_address_data(&state)); + settings.bind(|| snapshot_transparent_address_data(&state, height)); } } @@ -355,7 +355,6 @@ fn snapshot_block_and_transaction_data(state: &FinalizedState) { let output = &stored_block.transactions[tx_index].outputs()[output_index]; let outpoint = transparent::OutPoint::from_usize(transaction_hash, output_index); - let output_location = OutputLocation::from_usize(query_height, tx_index, output_index); @@ -432,44 +431,86 @@ fn snapshot_block_and_transaction_data(state: &FinalizedState) { } /// Snapshot transparent address data, using `cargo insta` and RON serialization. -fn snapshot_transparent_address_data(state: &FinalizedState) { +fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) { + // TODO: transactions for each address (#3951) + let balance_by_transparent_addr = state.cf_handle("balance_by_transparent_addr").unwrap(); + let utxo_loc_by_transparent_addr_loc = + state.cf_handle("utxo_loc_by_transparent_addr_loc").unwrap(); let mut stored_address_balances = Vec::new(); - - // TODO: UTXOs for each address (#3953) - // transactions for each address (#3951) + let mut stored_address_utxo_locations = Vec::new(); + let mut stored_address_utxos = Vec::new(); // Correctness: Multi-key iteration causes hangs in concurrent code, but seems ok in tests. let addresses = state.full_iterator_cf(&balance_by_transparent_addr, rocksdb::IteratorMode::Start); + let utxo_address_location_count = state + .full_iterator_cf( + &utxo_loc_by_transparent_addr_loc, + rocksdb::IteratorMode::Start, + ) + .count(); - // The default raw data serialization is very verbose, so we hex-encode the bytes. let addresses: Vec = addresses .map(|(key, _value)| transparent::Address::from_bytes(key)) .collect(); + // # Consensus + // + // The genesis transaction's UTXO is not indexed. + // This check also ignores spent UTXOs. + if height == 0 { + assert_eq!(addresses.len(), 0); + assert_eq!(utxo_address_location_count, 0); + return; + } + for address in addresses { - let stored_address_balance = state + let stored_address_balance_location = state .address_balance_location(&address) .expect("address indexes are consistent"); - stored_address_balances.push((address.to_string(), stored_address_balance)); - } + let stored_address_location = stored_address_balance_location.address_location(); - // TODO: check that the UTXO and transaction lists are in chain order. - /* - assert!( - is_sorted(&stored_address_utxos), - "unsorted: {:?}", - stored_address_utxos, - ); - */ + let mut stored_utxo_locations = Vec::new(); + for address_utxo_loc in state.address_utxo_locations(stored_address_location) { + assert_eq!(address_utxo_loc.address_location(), stored_address_location); + + stored_utxo_locations.push(address_utxo_loc.unspent_output_location()); + } + + let mut stored_utxos = Vec::new(); + for (utxo_loc, utxo) in state.address_utxos(&address) { + assert!(stored_utxo_locations.contains(&utxo_loc)); + + stored_utxos.push(utxo); + } + + // Check that the lists are in chain order + // + // TODO: check that the transaction list is in chain order (#3951) + assert!( + is_sorted(&stored_utxo_locations), + "unsorted: {:?}\n\ + for address: {:?}", + stored_utxo_locations, + address, + ); + + // The default raw data serialization is very verbose, so we hex-encode the bytes. + stored_address_balances.push((address.to_string(), stored_address_balance_location)); + stored_address_utxo_locations.push((stored_address_location, stored_utxo_locations)); + stored_address_utxos.push((address, stored_utxos)); + } // We want to snapshot the order in the database, // because sometimes it is significant for performance or correctness. // So we don't sort the vectors before snapshotting. insta::assert_ron_snapshot!("address_balances", stored_address_balances); + // TODO: change these names to address_utxo_locations and address_utxos + insta::assert_ron_snapshot!("address_utxos", stored_address_utxo_locations); + insta::assert_ron_snapshot!("address_utxo_data", stored_address_utxos); } /// Return true if `list` is sorted in ascending order. diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_balances@mainnet_0.snap b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_balances@mainnet_0.snap deleted file mode 100644 index 7dbefb77..00000000 --- a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_balances@mainnet_0.snap +++ /dev/null @@ -1,5 +0,0 @@ ---- -source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs -expression: stored_address_balances ---- -[] diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_balances@testnet_0.snap b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_balances@testnet_0.snap deleted file mode 100644 index 7dbefb77..00000000 --- a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_balances@testnet_0.snap +++ /dev/null @@ -1,5 +0,0 @@ ---- -source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs -expression: stored_address_balances ---- -[] diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@mainnet_1.snap b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@mainnet_1.snap new file mode 100644 index 00000000..3eed7e01 --- /dev/null +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@mainnet_1.snap @@ -0,0 +1,16 @@ +--- +source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs +expression: stored_address_utxos +--- +[ + ("t3Vz22vK5z2LcKEdg16Yv4FFneEL1zg9ojd", [ + Utxo( + output: Output( + value: Amount(12500), + lock_script: Script("a9147d46a730d31f97b1930d3368a967c309bd4d136a87"), + ), + height: Height(1), + from_coinbase: true, + ), + ]), +] diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@mainnet_2.snap b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@mainnet_2.snap new file mode 100644 index 00000000..7176fa29 --- /dev/null +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@mainnet_2.snap @@ -0,0 +1,24 @@ +--- +source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs +expression: stored_address_utxos +--- +[ + ("t3Vz22vK5z2LcKEdg16Yv4FFneEL1zg9ojd", [ + Utxo( + output: Output( + value: Amount(12500), + lock_script: Script("a9147d46a730d31f97b1930d3368a967c309bd4d136a87"), + ), + height: Height(1), + from_coinbase: true, + ), + Utxo( + output: Output( + value: Amount(25000), + lock_script: Script("a9147d46a730d31f97b1930d3368a967c309bd4d136a87"), + ), + height: Height(2), + from_coinbase: true, + ), + ]), +] diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@testnet_1.snap b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@testnet_1.snap new file mode 100644 index 00000000..6b758ac6 --- /dev/null +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@testnet_1.snap @@ -0,0 +1,16 @@ +--- +source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs +expression: stored_address_utxos +--- +[ + ("t2UNzUUx8mWBCRYPRezvA363EYXyEpHokyi", [ + Utxo( + output: Output( + value: Amount(12500), + lock_script: Script("a914ef775f1f997f122a062fff1a2d7443abd1f9c64287"), + ), + height: Height(1), + from_coinbase: true, + ), + ]), +] diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@testnet_2.snap b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@testnet_2.snap new file mode 100644 index 00000000..4fa3cc5b --- /dev/null +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxo_data@testnet_2.snap @@ -0,0 +1,24 @@ +--- +source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs +expression: stored_address_utxos +--- +[ + ("t2UNzUUx8mWBCRYPRezvA363EYXyEpHokyi", [ + Utxo( + output: Output( + value: Amount(12500), + lock_script: Script("a914ef775f1f997f122a062fff1a2d7443abd1f9c64287"), + ), + height: Height(1), + from_coinbase: true, + ), + Utxo( + output: Output( + value: Amount(25000), + lock_script: Script("a914ef775f1f997f122a062fff1a2d7443abd1f9c64287"), + ), + height: Height(2), + from_coinbase: true, + ), + ]), +] diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@mainnet_1.snap b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@mainnet_1.snap new file mode 100644 index 00000000..89f67655 --- /dev/null +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@mainnet_1.snap @@ -0,0 +1,21 @@ +--- +source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs +expression: stored_address_utxo_locations +--- +[ + (OutputLocation( + transaction_location: TransactionLocation( + height: Height(1), + index: TransactionIndex(0), + ), + output_index: OutputIndex(1), + ), [ + OutputLocation( + transaction_location: TransactionLocation( + height: Height(1), + index: TransactionIndex(0), + ), + output_index: OutputIndex(1), + ), + ]), +] diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@mainnet_2.snap b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@mainnet_2.snap new file mode 100644 index 00000000..821fb94f --- /dev/null +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@mainnet_2.snap @@ -0,0 +1,28 @@ +--- +source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs +expression: stored_address_utxo_locations +--- +[ + (OutputLocation( + transaction_location: TransactionLocation( + height: Height(1), + index: TransactionIndex(0), + ), + output_index: OutputIndex(1), + ), [ + OutputLocation( + transaction_location: TransactionLocation( + height: Height(1), + index: TransactionIndex(0), + ), + output_index: OutputIndex(1), + ), + OutputLocation( + transaction_location: TransactionLocation( + height: Height(2), + index: TransactionIndex(0), + ), + output_index: OutputIndex(1), + ), + ]), +] diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@testnet_1.snap b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@testnet_1.snap new file mode 100644 index 00000000..89f67655 --- /dev/null +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@testnet_1.snap @@ -0,0 +1,21 @@ +--- +source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs +expression: stored_address_utxo_locations +--- +[ + (OutputLocation( + transaction_location: TransactionLocation( + height: Height(1), + index: TransactionIndex(0), + ), + output_index: OutputIndex(1), + ), [ + OutputLocation( + transaction_location: TransactionLocation( + height: Height(1), + index: TransactionIndex(0), + ), + output_index: OutputIndex(1), + ), + ]), +] diff --git a/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@testnet_2.snap b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@testnet_2.snap new file mode 100644 index 00000000..821fb94f --- /dev/null +++ b/zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshots/address_utxos@testnet_2.snap @@ -0,0 +1,28 @@ +--- +source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs +expression: stored_address_utxo_locations +--- +[ + (OutputLocation( + transaction_location: TransactionLocation( + height: Height(1), + index: TransactionIndex(0), + ), + output_index: OutputIndex(1), + ), [ + OutputLocation( + transaction_location: TransactionLocation( + height: Height(1), + index: TransactionIndex(0), + ), + output_index: OutputIndex(1), + ), + OutputLocation( + transaction_location: TransactionLocation( + height: Height(2), + index: TransactionIndex(0), + ), + output_index: OutputIndex(1), + ), + ]), +] diff --git a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs index d8583a9a..5d8708c9 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs @@ -11,7 +11,7 @@ //! The [`crate::constants::DATABASE_FORMAT_VERSION`] constant must //! be incremented each time the database format (column, serialization, etc) changes. -use std::collections::{BTreeMap, HashMap}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; use zebra_chain::{ amount::{Amount, NonNegative}, @@ -21,7 +21,9 @@ use zebra_chain::{ use crate::{ service::finalized_state::{ disk_db::{DiskDb, DiskWriteBatch, ReadDisk, WriteDisk}, - disk_format::transparent::{AddressBalanceLocation, AddressLocation, OutputLocation}, + disk_format::transparent::{ + AddressBalanceLocation, AddressLocation, AddressUnspentOutput, OutputLocation, + }, zebra_db::ZebraDb, }, BoxError, @@ -53,10 +55,9 @@ impl ZebraDb { /// if it is in the finalized state. /// /// This location is used as an efficient index key for addresses. - #[allow(dead_code)] pub fn address_location(&self, address: &transparent::Address) -> Option { self.address_balance_location(address) - .map(|abl| abl.location()) + .map(|abl| abl.address_location()) } /// Returns the [`OutputLocation`] for a [`transparent::OutPoint`]. @@ -96,16 +97,86 @@ impl ZebraDb { Some(utxo) } + + /// Returns the unspent transparent outputs for a [`transparent::Address`], + /// if they are in the finalized state. + #[allow(dead_code)] + pub fn address_utxos( + &self, + address: &transparent::Address, + ) -> BTreeMap { + let address_location = match self.address_location(address) { + Some(address_location) => address_location, + None => return BTreeMap::new(), + }; + + let output_locations = self.address_utxo_locations(address_location); + + // Ignore any outputs spent by blocks committed during this query + output_locations + .iter() + .flat_map(|&addr_out_loc| { + Some(( + addr_out_loc.unspent_output_location(), + self.utxo_by_location(addr_out_loc.unspent_output_location())? + .utxo, + )) + }) + .collect() + } + + /// Returns the unspent transparent output locations for a [`transparent::Address`], + /// if they are in the finalized state. + pub fn address_utxo_locations( + &self, + address_location: AddressLocation, + ) -> BTreeSet { + let utxo_loc_by_transparent_addr_loc = self + .db + .cf_handle("utxo_loc_by_transparent_addr_loc") + .unwrap(); + + // Manually fetch the entire addresses' UTXO locations + let mut addr_unspent_outputs = BTreeSet::new(); + + // An invalid key representing the minimum possible output + let mut unspent_output = AddressUnspentOutput::address_iterator_start(address_location); + + loop { + // A valid key representing an entry for this address or the next + unspent_output = match self + .db + .zs_next_key_value_from(&utxo_loc_by_transparent_addr_loc, &unspent_output) + { + Some((unspent_output, ())) => unspent_output, + // We're finished with the final address in the column family + None => break, + }; + + // We found the next address, so we're finished with this address + if unspent_output.address_location() != address_location { + break; + } + + addr_unspent_outputs.insert(unspent_output); + + // A potentially invalid key representing the next possible output + unspent_output.address_iterator_next(); + } + + addr_unspent_outputs + } } impl DiskWriteBatch { /// Prepare a database batch containing `finalized.block`'s: /// - transparent address balance changes, /// - UTXO changes, and - /// TODO: - /// - transparent address index changes (add in #3951, #3953), + /// - transparent address index changes, /// and return it (without actually writing anything). /// + /// TODO: transparent address transaction index (#3951) + /// /// # Errors /// /// - This method doesn't currently return any errors, but it might in future @@ -116,57 +187,107 @@ impl DiskWriteBatch { utxos_spent_by_block: BTreeMap, mut address_balances: HashMap, ) -> Result<(), BoxError> { - let utxo_by_outpoint = db.cf_handle("utxo_by_outpoint").unwrap(); - let balance_by_transparent_addr = db.cf_handle("balance_by_transparent_addr").unwrap(); + let utxo_by_out_loc = db.cf_handle("utxo_by_outpoint").unwrap(); + let utxo_loc_by_transparent_addr_loc = + db.cf_handle("utxo_loc_by_transparent_addr_loc").unwrap(); // Index all new transparent outputs, before deleting any we've spent - for (output_location, utxo) in new_outputs_by_out_loc { - let output = utxo.output; - let receiving_address = output.address(self.network()); + for (new_output_location, utxo) in new_outputs_by_out_loc { + let unspent_output = utxo.output; + let receiving_address = unspent_output.address(self.network()); // Update the address balance by adding this UTXO's value if let Some(receiving_address) = receiving_address { - let address_balance = address_balances + // TODO: fix up tests that use missing outputs, + // then replace entry() with get_mut().expect() + + // In memory: + // - create the balance for the address, if needed. + // - create or fetch the link from the address to the AddressLocation + // (the first location of the address in the chain). + let address_balance_location = address_balances .entry(receiving_address) - .or_insert_with(|| AddressBalanceLocation::new(output_location)) - .balance_mut(); + .or_insert_with(|| AddressBalanceLocation::new(new_output_location)); + let receiving_address_location = address_balance_location.address_location(); - let new_address_balance = - (*address_balance + output.value()).expect("balance overflow already checked"); + // Update the balance for the address in memory. + address_balance_location + .receive_output(&unspent_output) + .expect("balance overflow already checked"); - *address_balance = new_address_balance; + // Create a link from the AddressLocation to the new OutputLocation in the database. + let address_unspent_output = + AddressUnspentOutput::new(receiving_address_location, new_output_location); + self.zs_insert( + &utxo_loc_by_transparent_addr_loc, + address_unspent_output, + (), + ); } - self.zs_insert(&utxo_by_outpoint, output_location, output); + // Use the OutputLocation to store a copy of the new Output in the database. + // (For performance reasons, we don't want to deserialize the whole transaction + // to get an output.) + self.zs_insert(&utxo_by_out_loc, new_output_location, unspent_output); } // Mark all transparent inputs as spent. // // Coinbase inputs represent new coins, so there are no UTXOs to mark as spent. - for (output_location, utxo) in utxos_spent_by_block { + for (spent_output_location, utxo) in utxos_spent_by_block { let spent_output = utxo.output; let sending_address = spent_output.address(self.network()); - // Update the address balance by subtracting this UTXO's value + // Fetch the balance, and the link from the address to the AddressLocation, from memory. if let Some(sending_address) = sending_address { - let address_balance = address_balances - .entry(sending_address) - .or_insert_with(|| panic!("spent outputs must already have an address balance")) - .balance_mut(); + let address_balance_location = address_balances + .get_mut(&sending_address) + .expect("spent outputs must already have an address balance"); - let new_address_balance = (*address_balance - spent_output.value()) + // Update the address balance by subtracting this UTXO's value, in memory. + address_balance_location + .spend_output(&spent_output) .expect("balance underflow already checked"); - *address_balance = new_address_balance; + // Delete the link from the AddressLocation to the spent OutputLocation in the database. + let address_spent_output = AddressUnspentOutput::new( + address_balance_location.address_location(), + spent_output_location, + ); + self.zs_delete(&utxo_loc_by_transparent_addr_loc, address_spent_output); } - self.zs_delete(&utxo_by_outpoint, output_location); + // Delete the OutputLocation, and the copy of the spent Output in the database. + self.zs_delete(&utxo_by_out_loc, spent_output_location); } - // Write the new address balances to the database - for (address, address_balance) in address_balances.into_iter() { + self.prepare_transparent_balances_batch(db, address_balances)?; + + Ok(()) + } + + /// Prepare a database batch containing `finalized.block`'s: + /// - transparent address balance changes, + /// and return it (without actually writing anything). + /// + /// # Errors + /// + /// - This method doesn't currently return any errors, but it might in future + pub fn prepare_transparent_balances_batch( + &mut self, + db: &DiskDb, + address_balances: HashMap, + ) -> Result<(), BoxError> { + let balance_by_transparent_addr = db.cf_handle("balance_by_transparent_addr").unwrap(); + + // Update all the changed address balances in the database. + for (address, address_balance_location) in address_balances.into_iter() { // Some of these balances are new, and some are updates - self.zs_insert(&balance_by_transparent_addr, address, address_balance); + self.zs_insert( + &balance_by_transparent_addr, + address, + address_balance_location, + ); } Ok(())