From 65b94f7e50a2e12696d20fa4456047dea9f136b2 Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 19 Apr 2022 23:34:53 +1000 Subject: [PATCH] 9. feat(state): add a query function for transparent UTXOs (#4111) * Add address UTXOs query functions, but without the transaction IDs * Return transaction IDs along with address UTXOs * Add a convenience type for address UTXOs * Add output addresses to the convenience method * Fix query documentation * Rename the chain transaction IDs method --- .../finalized_state/zebra_db/transparent.rs | 24 +- .../src/service/non_finalized_state/chain.rs | 69 ++++- zebra-state/src/service/read.rs | 239 +++++++++++++++++- zebra-state/src/service/read/utxo.rs | 65 +++++ 4 files changed, 392 insertions(+), 5 deletions(-) create mode 100644 zebra-state/src/service/read/utxo.rs diff --git a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs index 93b76618..c11b8f94 100644 --- a/zebra-state/src/service/finalized_state/zebra_db/transparent.rs +++ b/zebra-state/src/service/finalized_state/zebra_db/transparent.rs @@ -103,7 +103,6 @@ impl ZebraDb { /// Returns the unspent transparent outputs for a [`transparent::Address`], /// if they are in the finalized state. - #[allow(dead_code)] pub fn address_utxos( &self, address: &transparent::Address, @@ -270,6 +269,29 @@ impl ZebraDb { "unexpected amount overflow: value balances are valid, so partial sum should be valid", ) } + + /// Returns the UTXOs for `addresses` in the finalized chain. + /// + /// If none of the addresses has finalized UTXOs, returns an empty list. + /// + /// # Correctness + /// + /// Callers should apply the non-finalized UTXO changes for `addresses` to the returned UTXOs. + /// + /// The UTXOs will only be correct if the non-finalized chain matches or overlaps with + /// the finalized state. + /// + /// Specifically, a block in the partial chain must be a child block of the finalized tip. + /// (But the child block does not have to be the partial chain root.) + pub fn partial_finalized_transparent_utxos( + &self, + addresses: &HashSet, + ) -> BTreeMap { + addresses + .iter() + .flat_map(|address| self.address_utxos(address)) + .collect() + } } impl DiskWriteBatch { diff --git a/zebra-state/src/service/non_finalized_state/chain.rs b/zebra-state/src/service/non_finalized_state/chain.rs index aff43ec9..be7992d6 100644 --- a/zebra-state/src/service/non_finalized_state/chain.rs +++ b/zebra-state/src/service/non_finalized_state/chain.rs @@ -3,7 +3,7 @@ use std::{ cmp::Ordering, - collections::{BTreeMap, HashMap, HashSet}, + collections::{BTreeMap, BTreeSet, HashMap, HashSet}, ops::Deref, sync::Arc, }; @@ -27,7 +27,8 @@ use zebra_chain::{ }; use crate::{ - service::check, ContextuallyValidBlock, HashOrHeight, TransactionLocation, ValidateContextError, + service::check, ContextuallyValidBlock, HashOrHeight, OutputLocation, TransactionLocation, + ValidateContextError, }; use self::index::TransparentTransfers; @@ -537,6 +538,70 @@ impl Chain { ) } + /// Returns the transparent UTXO changes for `addresses` in this non-finalized chain. + /// + /// If the UTXOs don't change for any of the addresses, returns empty lists. + /// + /// # Correctness + /// + /// Callers should apply these non-finalized UTXO changes to the finalized state UTXOs. + /// + /// The UTXOs will only be correct if the non-finalized chain matches or overlaps with + /// the finalized state. + /// + /// Specifically, a block in the partial chain must be a child block of the finalized tip. + /// (But the child block does not have to be the partial chain root.) + pub fn partial_transparent_utxo_changes( + &self, + addresses: &HashSet, + ) -> ( + BTreeMap, + BTreeSet, + ) { + let created_utxos = self + .partial_transparent_indexes(addresses) + .flat_map(|transfers| transfers.created_utxos()) + .map(|(out_loc, output)| (*out_loc, output.clone())) + .collect(); + + let spent_utxos = self + .partial_transparent_indexes(addresses) + .flat_map(|transfers| transfers.spent_utxos()) + .cloned() + .collect(); + + (created_utxos, spent_utxos) + } + + /// Returns the [`transaction::Hash`]es used by `addresses` to receive or spend funds. + /// + /// If none of the addresses receive or spend funds in this partial chain, returns an empty list. + /// + /// # Correctness + /// + /// Callers should combine these non-finalized transactions with the finalized state transactions. + /// + /// The transaction IDs will only be correct if the non-finalized chain matches or overlaps with + /// the finalized state. + /// + /// Specifically, a block in the partial chain must be a child block of the finalized tip. + /// (But the child block does not have to be the partial chain root.) + /// + /// This condition does not apply if there is only one address. + /// Since address transactions are only appended by blocks, + /// and the finalized state query reads them in order, + /// it is impossible to get inconsistent transactions for a single address. + pub fn partial_transparent_tx_ids( + &self, + addresses: &HashSet, + ) -> BTreeMap { + self.partial_transparent_indexes(addresses) + .flat_map(|transfers| transfers.tx_ids(&self.tx_by_hash)) + .collect() + } + + // Cloning + /// Clone the Chain but not the history and note commitment trees, using /// the specified trees instead. /// diff --git a/zebra-state/src/service/read.rs b/zebra-state/src/service/read.rs index b7fe623e..4ab7cf50 100644 --- a/zebra-state/src/service/read.rs +++ b/zebra-state/src/service/read.rs @@ -4,23 +4,32 @@ //! to read from the best [`Chain`] in the [`NonFinalizedState`], //! and the database in the [`FinalizedState`]. -use std::{collections::HashSet, sync::Arc}; +use std::{ + collections::{BTreeMap, BTreeSet, HashSet}, + ops::RangeInclusive, + sync::Arc, +}; use zebra_chain::{ amount::{self, Amount, NegativeAllowed, NonNegative}, block::{self, Block, Height}, + parameters::Network, transaction::{self, Transaction}, transparent, }; use crate::{ service::{finalized_state::ZebraDb, non_finalized_state::Chain}, - BoxError, HashOrHeight, + BoxError, HashOrHeight, OutputLocation, TransactionLocation, }; +pub mod utxo; + #[cfg(test)] mod tests; +pub use utxo::AddressUtxos; + /// If the transparent address index queries are interrupted by a new finalized block, /// retry this many times. /// @@ -203,3 +212,229 @@ fn apply_balance_change( balance?.constrain() } + +/// Returns the unspent transparent outputs (UTXOs) for the supplied [`transparent::Address`]es, +/// in chain order; and the transaction IDs for the transactions containing those UTXOs. +/// +/// If the addresses do not exist in the non-finalized `chain` or finalized `db`, +/// returns an empty list. +#[allow(dead_code)] +pub(crate) fn transparent_utxos( + network: Network, + chain: Option, + db: &ZebraDb, + addresses: HashSet, +) -> Result +where + C: AsRef, +{ + let mut utxo_error = None; + + // Retry the finalized UTXO query if it was interruped by a finalizing block, + // and the non-finalized chain doesn't overlap the changed heights. + for _ in 0..=FINALIZED_ADDRESS_INDEX_RETRIES { + let (finalized_utxos, finalized_tip_range) = finalized_transparent_utxos(db, &addresses); + + // Apply the non-finalized UTXO changes. + let chain_utxo_changes = + chain_transparent_utxo_changes(chain.as_ref(), &addresses, finalized_tip_range); + + // If the UTXOs are valid, return them, otherwise, retry or return an error. + match chain_utxo_changes { + Ok(chain_utxo_changes) => { + let utxos = apply_utxo_changes(finalized_utxos, chain_utxo_changes); + let tx_ids = lookup_tx_ids_for_utxos(chain, db, &addresses, &utxos); + + return Ok(AddressUtxos::new(network, utxos, tx_ids)); + } + + Err(error) => utxo_error = Some(Err(error)), + } + } + + utxo_error.expect("unexpected missing error: attempts should set error or return") +} + +/// Returns the unspent transparent outputs (UTXOs) for `addresses` in the finalized chain, +/// and the finalized tip heights the UTXOs were queried at. +/// +/// If the addresses do not exist in the finalized `db`, returns an empty list. +// +// TODO: turn the return type into a struct? +fn finalized_transparent_utxos( + db: &ZebraDb, + addresses: &HashSet, +) -> ( + BTreeMap, + Option>, +) { + // # Correctness + // + // The StateService can commit additional blocks while we are querying address UTXOs. + + // Check if the finalized state changed while we were querying it + let start_finalized_tip = db.finalized_tip_height(); + + let finalized_utxos = db.partial_finalized_transparent_utxos(addresses); + + let end_finalized_tip = db.finalized_tip_height(); + + let finalized_tip_range = if let (Some(start_finalized_tip), Some(end_finalized_tip)) = + (start_finalized_tip, end_finalized_tip) + { + Some(start_finalized_tip..=end_finalized_tip) + } else { + // State is empty + None + }; + + (finalized_utxos, finalized_tip_range) +} + +/// Returns the UTXO changes for `addresses` in the non-finalized chain, +/// matching or overlapping the UTXOs for the `finalized_tip_range`. +/// +/// If the addresses do not exist in the non-finalized `chain`, returns an empty list. +// +// TODO: turn the return type into a struct? +fn chain_transparent_utxo_changes( + chain: Option, + addresses: &HashSet, + finalized_tip_range: Option>, +) -> Result< + ( + BTreeMap, + BTreeSet, + ), + BoxError, +> +where + C: AsRef, +{ + let finalized_tip_range = match finalized_tip_range { + Some(finalized_tip_range) => finalized_tip_range, + None => { + assert!( + chain.is_none(), + "unexpected non-finalized chain when finalized state is empty" + ); + + // Empty chains don't contain any changes. + return Ok(Default::default()); + } + }; + + // # Correctness + // + // The StateService commits blocks to the finalized state before updating the latest chain, + // and it can commit additional blocks after we've cloned this `chain` variable. + // + // But we can compensate for deleted UTXOs by applying the overlapping non-finalized UTXO changes. + + // Check if the finalized and non-finalized states match or overlap + let required_min_chain_root = finalized_tip_range.start().0 + 1; + let mut required_chain_overlap = required_min_chain_root..=finalized_tip_range.end().0; + + if chain.is_none() { + if required_chain_overlap.is_empty() { + // The non-finalized chain is empty, and we don't need it. + return Ok(Default::default()); + } else { + // We can't compensate for inconsistent database queries, + // because the non-finalized chain is empty. + return Err("unable to get UTXOs: state was committing a block, and non-finalized chain is empty".into()); + } + } + + let chain = chain.unwrap(); + let chain = chain.as_ref(); + + let chain_root = chain.non_finalized_root_height().0; + let chain_tip = chain.non_finalized_tip_height().0; + + assert!( + chain_root <= required_min_chain_root, + "unexpected chain gap: the best chain is updated after its previous root is finalized" + ); + + // If we've already committed this entire chain, ignore its UTXO changes. + // This is more likely if the non-finalized state is just getting started. + if chain_tip > *required_chain_overlap.end() { + if required_chain_overlap.is_empty() { + // The non-finalized chain has been committed, and we don't need it. + return Ok(Default::default()); + } else { + // We can't compensate for inconsistent database queries, + // because the non-finalized chain is below the inconsistent query range. + return Err("unable to get UTXOs: state was committing a block, and non-finalized chain has been committed".into()); + } + } + + // Correctness: some finalized UTXOs might have duplicate creates or spends, + // but we've just checked they can be corrected by applying the non-finalized UTXO changes. + assert!( + required_chain_overlap.all(|height| chain.blocks.contains_key(&Height(height))), + "UTXO query inconsistency: chain must contain required overlap blocks", + ); + + Ok(chain.partial_transparent_utxo_changes(addresses)) +} + +/// Combines the supplied finalized and non-finalized UTXOs, +/// removes the spent UTXOs, and returns the result. +fn apply_utxo_changes( + finalized_utxos: BTreeMap, + (created_chain_utxos, spent_chain_utxos): ( + BTreeMap, + BTreeSet, + ), +) -> BTreeMap { + // Correctness: combine the created UTXOs, then remove spent UTXOs, + // to compensate for overlapping finalized and non-finalized blocks. + finalized_utxos + .into_iter() + .chain(created_chain_utxos.into_iter()) + .filter(|(utxo_location, _output)| !spent_chain_utxos.contains(utxo_location)) + .collect() +} + +/// Returns the [`transaction::Hash`]es containing the supplied UTXOs, +/// from the non-finalized `chain` and finalized `db`. +/// +/// # Panics +/// +/// If any UTXO is not in the supplied state. +fn lookup_tx_ids_for_utxos( + chain: Option, + db: &ZebraDb, + addresses: &HashSet, + utxos: &BTreeMap, +) -> BTreeMap +where + C: AsRef, +{ + // Get the unique set of transaction locations + let transaction_locations: BTreeSet = utxos + .keys() + .map(|output_location| output_location.transaction_location()) + .collect(); + + let chain_tx_ids = chain + .as_ref() + .map(|chain| chain.as_ref().partial_transparent_tx_ids(addresses)) + .unwrap_or_default(); + + // First try the in-memory chain, then the disk database + transaction_locations + .iter() + .map(|tx_loc| { + ( + *tx_loc, + chain_tx_ids.get(tx_loc).cloned().unwrap_or_else(|| { + db.transaction_hash(*tx_loc) + .expect("unexpected inconsistent UTXO indexes") + }), + ) + }) + .collect() +} diff --git a/zebra-state/src/service/read/utxo.rs b/zebra-state/src/service/read/utxo.rs new file mode 100644 index 00000000..dc1038fe --- /dev/null +++ b/zebra-state/src/service/read/utxo.rs @@ -0,0 +1,65 @@ +//! Convenience wrappers for transparent address index UTXO queries. + +use std::collections::BTreeMap; + +use zebra_chain::{parameters::Network, transaction, transparent}; + +use crate::{OutputLocation, TransactionLocation}; + +/// A convenience wrapper that efficiently stores unspent transparent outputs, +/// and the corresponding transaction IDs. +#[derive(Clone, Debug, Default, Eq, PartialEq, Hash)] +pub struct AddressUtxos { + /// A set of unspent transparent outputs. + utxos: BTreeMap, + + /// The transaction IDs for each [`OutputLocation`] in `utxos`. + tx_ids: BTreeMap, + + /// The configured network for this state. + network: Network, +} + +impl AddressUtxos { + /// Creates a new set of address UTXOs. + pub fn new( + network: Network, + utxos: BTreeMap, + tx_ids: BTreeMap, + ) -> Self { + Self { + utxos, + tx_ids, + network, + } + } + + /// Returns an iterator that provides the unspent output, its transaction hash, + /// its location in the chain, and the address it was sent to. + /// + /// The UTXOs are returned in chain order, across all addresses. + #[allow(dead_code)] + pub fn utxos( + &self, + ) -> impl Iterator< + Item = ( + transparent::Address, + &transaction::Hash, + &OutputLocation, + &transparent::Output, + ), + > { + self.utxos.iter().map(|(out_loc, output)| { + ( + output + .address(self.network) + .expect("address indexes only contain outputs with addresses"), + self.tx_ids + .get(&out_loc.transaction_location()) + .expect("address indexes are consistent"), + out_loc, + output, + ) + }) + } +}