7. feat(db): Add a transparent address transaction index (#4038)

* feat(db): add transaction location index

* Apply suggestions from code review

Co-authored-by: teor <teor@riseup.net>

* add address_tx_ids(); also index spends from addresses

Co-authored-by: teor <teor@riseup.net>
This commit is contained in:
Conrado Gouvea 2022-04-13 20:48:35 -03:00 committed by GitHub
parent 7b7d22aabc
commit 53a42999ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 382 additions and 12 deletions

View File

@ -18,7 +18,7 @@ pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY;
pub const MAX_BLOCK_REORG_HEIGHT: u32 = MIN_TRANSPARENT_COINBASE_MATURITY - 1;
/// The database format version, incremented each time the database format changes.
pub const DATABASE_FORMAT_VERSION: u32 = 21;
pub const DATABASE_FORMAT_VERSION: u32 = 22;
/// The maximum number of blocks to check for NU5 transactions,
/// before we assume we are on a pre-NU5 legacy chain.

View File

@ -386,6 +386,10 @@ impl DiskDb {
"utxo_loc_by_transparent_addr_loc",
db_options.clone(),
),
rocksdb::ColumnFamilyDescriptor::new(
"tx_loc_by_transparent_addr_loc",
db_options.clone(),
),
// Sprout
rocksdb::ColumnFamilyDescriptor::new("sprout_nullifiers", db_options.clone()),
rocksdb::ColumnFamilyDescriptor::new("sprout_anchors", db_options.clone()),

View File

@ -67,7 +67,7 @@ pub const TRANSACTION_LOCATION_DISK_BYTES: usize = HEIGHT_DISK_BYTES + TX_INDEX_
any(test, feature = "proptest-impl"),
derive(Arbitrary, Serialize, Deserialize)
)]
pub struct TransactionIndex(u16);
pub struct TransactionIndex(pub(super) u16);
impl TransactionIndex {
/// Creates a transaction index from the inner type.

View File

@ -16,7 +16,8 @@ use crate::service::finalized_state::{
disk_format::{
block::MAX_ON_DISK_HEIGHT,
transparent::{
AddressBalanceLocation, AddressLocation, AddressUnspentOutput, OutputLocation,
AddressBalanceLocation, AddressLocation, AddressTransaction, AddressUnspentOutput,
OutputLocation,
},
IntoDisk, TransactionLocation,
},
@ -191,6 +192,20 @@ fn roundtrip_address_unspent_output() {
);
}
#[test]
fn roundtrip_address_transaction() {
zebra_test::init();
proptest!(
|(mut val in any::<AddressTransaction>())| {
*val.address_location_mut().height_mut() = val.address_location().height().clamp(Height(0), MAX_ON_DISK_HEIGHT);
val.transaction_location_mut().height = val.transaction_location().height.clamp(Height(0), MAX_ON_DISK_HEIGHT);
assert_value_properties(val)
}
);
}
#[test]
fn roundtrip_amount() {
zebra_test::init();

View File

@ -22,6 +22,7 @@ expression: cf_names
"tip_chain_value_pool",
"tx_by_hash",
"tx_by_loc",
"tx_loc_by_transparent_addr_loc",
"utxo_by_outpoint",
"utxo_loc_by_transparent_addr_loc",
]

View File

@ -12,6 +12,7 @@ expression: empty_column_families
"sprout_anchors: no entries",
"sprout_nullifiers: no entries",
"tip_chain_value_pool: no entries",
"tx_loc_by_transparent_addr_loc: no entries",
"utxo_by_outpoint: no entries",
"utxo_loc_by_transparent_addr_loc: no entries",
]

View File

@ -21,6 +21,7 @@ expression: empty_column_families
"tip_chain_value_pool: no entries",
"tx_by_hash: no entries",
"tx_by_loc: no entries",
"tx_loc_by_transparent_addr_loc: no entries",
"utxo_by_outpoint: no entries",
"utxo_loc_by_transparent_addr_loc: no entries",
]

View File

@ -12,6 +12,7 @@ expression: empty_column_families
"sprout_anchors: no entries",
"sprout_nullifiers: no entries",
"tip_chain_value_pool: no entries",
"tx_loc_by_transparent_addr_loc: no entries",
"utxo_by_outpoint: no entries",
"utxo_loc_by_transparent_addr_loc: no entries",
]

View File

@ -0,0 +1,10 @@
---
source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs
expression: cf_data
---
[
KV(
k: "00000100000000010000010000",
v: "",
),
]

View File

@ -0,0 +1,14 @@
---
source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs
expression: cf_data
---
[
KV(
k: "00000100000000010000010000",
v: "",
),
KV(
k: "00000100000000010000020000",
v: "",
),
]

View File

@ -0,0 +1,10 @@
---
source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs
expression: cf_data
---
[
KV(
k: "00000100000000010000010000",
v: "",
),
]

View File

@ -0,0 +1,14 @@
---
source: zebra-state/src/service/finalized_state/disk_format/tests/snapshot.rs
expression: cf_data
---
[
KV(
k: "00000100000000010000010000",
v: "",
),
KV(
k: "00000100000000010000020000",
v: "",
),
]

View File

@ -361,6 +361,96 @@ impl AddressUnspentOutput {
}
}
/// A single transaction sent to a [`transparent::Address`].
///
/// We store both the address location key and transaction location value
/// in the RocksDB column family key. This improves insert and delete performance.
///
/// This requires 8 extra bytes for each transaction location,
/// because we repeat the key for each value.
/// But RocksDB compression reduces the duplicate data size on disk.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd)]
#[cfg_attr(
any(test, feature = "proptest-impl"),
derive(Arbitrary, Serialize, Deserialize)
)]
pub struct AddressTransaction {
/// The location of the first [`transparent::Output`] sent to the address in `output`.
address_location: AddressLocation,
/// The location of the transaction sent to the address.
transaction_location: TransactionLocation,
}
impl AddressTransaction {
/// Create a new [`AddressTransaction`] from an address location,
/// and a transaction location.
pub fn new(
address_location: AddressLocation,
transaction_location: TransactionLocation,
) -> AddressTransaction {
AddressTransaction {
address_location,
transaction_location,
}
}
/// Create an [`AddressTransaction`] which starts iteration for the supplied address.
/// Used to look up the first transaction with [`ReadDisk::zs_next_key_value_from`].
///
/// The transaction location is before all unspent output locations in the index.
/// It is always invalid, due to the genesis consensus rules. But this is not an issue
/// since [`ReadDisk::zs_next_key_value_from`] will fetch the next existing (valid) value.
pub fn address_iterator_start(address_location: AddressLocation) -> AddressTransaction {
// Iterating from the lowest possible transaction location gets us the first transaction.
let zero_transaction_location = TransactionLocation::from_usize(Height(0), 0);
AddressTransaction {
address_location,
transaction_location: zero_transaction_location,
}
}
/// Update the transaction location to the next possible transaction for the supplied address.
/// Used to look up the next output with [`ReadDisk::zs_next_key_value_from`].
///
/// The updated transaction location may be invalid, which is not an issue
/// since [`ReadDisk::zs_next_key_value_from`] will fetch the next existing (valid) value.
pub fn address_iterator_next(&mut self) {
// Iterating from the next possible output location gets us the next output,
// even if it is in a later block or transaction.
//
// Consensus: the block size limit is 2MB, which is much lower than the index range.
self.transaction_location.index.0 += 1;
}
/// The location of the first [`transparent::Output`] sent to the address of this output.
///
/// This can be used to look up the address.
pub fn address_location(&self) -> AddressLocation {
self.address_location
}
/// The location of this transaction.
pub fn transaction_location(&self) -> TransactionLocation {
self.transaction_location
}
/// Allows tests to modify the address location.
#[cfg(any(test, feature = "proptest-impl"))]
#[allow(dead_code)]
pub fn address_location_mut(&mut self) -> &mut AddressLocation {
&mut self.address_location
}
/// Allows tests to modify the unspent output location.
#[cfg(any(test, feature = "proptest-impl"))]
#[allow(dead_code)]
pub fn transaction_location_mut(&mut self) -> &mut TransactionLocation {
&mut self.transaction_location
}
}
// Transparent trait impls
/// Returns a byte representing the [`transparent::Address`] variant.
@ -547,3 +637,34 @@ impl FromDisk for AddressUnspentOutput {
AddressUnspentOutput::new(address_location, unspent_output_location)
}
}
impl IntoDisk for AddressTransaction {
type Bytes = [u8; OUTPUT_LOCATION_DISK_BYTES + TRANSACTION_LOCATION_DISK_BYTES];
fn as_bytes(&self) -> Self::Bytes {
let address_location_bytes: [u8; OUTPUT_LOCATION_DISK_BYTES] =
self.address_location().as_bytes();
let transaction_location_bytes: [u8; TRANSACTION_LOCATION_DISK_BYTES] =
self.transaction_location().as_bytes();
address_location_bytes
.iter()
.copied()
.chain(transaction_location_bytes.iter().copied())
.collect::<Vec<u8>>()
.try_into()
.expect("concatenation of fixed-sized arrays should have the correct size")
}
}
impl FromDisk for AddressTransaction {
fn from_bytes(disk_bytes: impl AsRef<[u8]>) -> Self {
let (address_location_bytes, transaction_location_bytes) =
disk_bytes.as_ref().split_at(OUTPUT_LOCATION_DISK_BYTES);
let address_location = AddressLocation::from_bytes(address_location_bytes);
let transaction_location = TransactionLocation::from_bytes(transaction_location_bytes);
AddressTransaction::new(address_location, transaction_location)
}
}

View File

@ -432,15 +432,15 @@ fn snapshot_block_and_transaction_data(state: &FinalizedState) {
/// Snapshot transparent address data, using `cargo insta` and RON serialization.
fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
// TODO: transactions for each address (#3951)
let balance_by_transparent_addr = state.cf_handle("balance_by_transparent_addr").unwrap();
let utxo_loc_by_transparent_addr_loc =
state.cf_handle("utxo_loc_by_transparent_addr_loc").unwrap();
let tx_loc_by_transparent_addr_loc = state.cf_handle("tx_loc_by_transparent_addr_loc").unwrap();
let mut stored_address_balances = Vec::new();
let mut stored_address_utxo_locations = Vec::new();
let mut stored_address_utxos = Vec::new();
let mut stored_address_transaction_locations = Vec::new();
// Correctness: Multi-key iteration causes hangs in concurrent code, but seems ok in tests.
let addresses =
@ -451,6 +451,12 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
rocksdb::IteratorMode::Start,
)
.count();
let transaction_address_location_count = state
.full_iterator_cf(
&tx_loc_by_transparent_addr_loc,
rocksdb::IteratorMode::Start,
)
.count();
let addresses: Vec<transparent::Address> = addresses
.map(|(key, _value)| transparent::Address::from_bytes(key))
@ -463,6 +469,7 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
if height == 0 {
assert_eq!(addresses.len(), 0);
assert_eq!(utxo_address_location_count, 0);
assert_eq!(transaction_address_location_count, 0);
return;
}
@ -487,9 +494,17 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
stored_utxos.push(utxo);
}
let mut stored_transaction_locations = Vec::new();
for transaction_location in state.address_transaction_locations(stored_address_location) {
assert_eq!(
transaction_location.address_location(),
stored_address_location
);
stored_transaction_locations.push(transaction_location.transaction_location());
}
// Check that the lists are in chain order
//
// TODO: check that the transaction list is in chain order (#3951)
assert!(
is_sorted(&stored_utxo_locations),
"unsorted: {:?}\n\
@ -497,11 +512,19 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
stored_utxo_locations,
address,
);
assert!(
is_sorted(&stored_transaction_locations),
"unsorted: {:?}\n\
for address: {:?}",
stored_transaction_locations,
address,
);
// The default raw data serialization is very verbose, so we hex-encode the bytes.
stored_address_balances.push((address.to_string(), stored_address_balance_location));
stored_address_utxo_locations.push((stored_address_location, stored_utxo_locations));
stored_address_utxos.push((address, stored_utxos));
stored_address_transaction_locations.push((address, stored_transaction_locations));
}
// We want to snapshot the order in the database,
@ -511,6 +534,10 @@ fn snapshot_transparent_address_data(state: &FinalizedState, height: u32) {
// TODO: change these names to address_utxo_locations and address_utxos
insta::assert_ron_snapshot!("address_utxos", stored_address_utxo_locations);
insta::assert_ron_snapshot!("address_utxo_data", stored_address_utxos);
insta::assert_ron_snapshot!(
"address_transaction_locations",
stored_address_transaction_locations
);
}
/// Return true if `list` is sorted in ascending order.

View File

@ -0,0 +1,12 @@
---
source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs
expression: stored_address_transaction_locations
---
[
("t3Vz22vK5z2LcKEdg16Yv4FFneEL1zg9ojd", [
TransactionLocation(
height: Height(1),
index: TransactionIndex(0),
),
]),
]

View File

@ -0,0 +1,16 @@
---
source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs
expression: stored_address_transaction_locations
---
[
("t3Vz22vK5z2LcKEdg16Yv4FFneEL1zg9ojd", [
TransactionLocation(
height: Height(1),
index: TransactionIndex(0),
),
TransactionLocation(
height: Height(2),
index: TransactionIndex(0),
),
]),
]

View File

@ -0,0 +1,12 @@
---
source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs
expression: stored_address_transaction_locations
---
[
("t2UNzUUx8mWBCRYPRezvA363EYXyEpHokyi", [
TransactionLocation(
height: Height(1),
index: TransactionIndex(0),
),
]),
]

View File

@ -0,0 +1,16 @@
---
source: zebra-state/src/service/finalized_state/zebra_db/block/tests/snapshot.rs
expression: stored_address_transaction_locations
---
[
("t2UNzUUx8mWBCRYPRezvA363EYXyEpHokyi", [
TransactionLocation(
height: Height(1),
index: TransactionIndex(0),
),
TransactionLocation(
height: Height(2),
index: TransactionIndex(0),
),
]),
]

View File

@ -15,14 +15,18 @@ use std::collections::{BTreeMap, BTreeSet, HashMap};
use zebra_chain::{
amount::{Amount, NonNegative},
transparent,
transaction, transparent,
};
use crate::{
service::finalized_state::{
disk_db::{DiskDb, DiskWriteBatch, ReadDisk, WriteDisk},
disk_format::transparent::{
AddressBalanceLocation, AddressLocation, AddressUnspentOutput, OutputLocation,
disk_format::{
transparent::{
AddressBalanceLocation, AddressLocation, AddressTransaction, AddressUnspentOutput,
OutputLocation,
},
TransactionLocation,
},
zebra_db::ZebraDb,
},
@ -166,6 +170,80 @@ impl ZebraDb {
addr_unspent_outputs
}
/// Returns the transaction hash for an [`TransactionLocation`].
pub fn tx_id_by_location(&self, tx_location: TransactionLocation) -> Option<transaction::Hash> {
let hash_by_tx_loc = self.db.cf_handle("hash_by_tx_loc").unwrap();
self.db.zs_get(&hash_by_tx_loc, &tx_location)
}
/// Returns the [`transaction::Hash`]es that created or spent outputs for a [`transparent::Address`],
/// in chain order, if they are in the finalized state.
#[allow(dead_code)]
pub fn address_tx_ids(
&self,
address: &transparent::Address,
) -> BTreeMap<TransactionLocation, transaction::Hash> {
let address_location = match self.address_location(address) {
Some(address_location) => address_location,
None => return BTreeMap::new(),
};
let transaction_locations = self.address_transaction_locations(address_location);
transaction_locations
.iter()
.map(|&tx_loc| {
(
tx_loc.transaction_location(),
self.tx_id_by_location(tx_loc.transaction_location())
.expect("transactions whose locations are stored must exist"),
)
})
.collect()
}
/// Returns the locations of any transactions that sent or received from a [`transparent::Address`],
/// if they are in the finalized state.
#[allow(dead_code)]
pub fn address_transaction_locations(
&self,
address_location: AddressLocation,
) -> BTreeSet<AddressTransaction> {
let tx_loc_by_transparent_addr_loc =
self.db.cf_handle("tx_loc_by_transparent_addr_loc").unwrap();
// Manually fetch the entire addresses' transaction locations
let mut addr_transactions = BTreeSet::new();
// An invalid key representing the minimum possible transaction
let mut transaction_location = AddressTransaction::address_iterator_start(address_location);
loop {
// A valid key representing an entry for this address or the next
transaction_location = match self
.db
.zs_next_key_value_from(&tx_loc_by_transparent_addr_loc, &transaction_location)
{
Some((unspent_output, ())) => unspent_output,
// We're finished with the final address in the column family
None => break,
};
// We found the next address, so we're finished with this address
if transaction_location.address_location() != address_location {
break;
}
addr_transactions.insert(transaction_location);
// A potentially invalid key representing the next possible output
transaction_location.address_iterator_next();
}
addr_transactions
}
}
impl DiskWriteBatch {
@ -175,8 +253,6 @@ impl DiskWriteBatch {
/// - transparent address index changes,
/// and return it (without actually writing anything).
///
/// TODO: transparent address transaction index (#3951)
///
/// # Errors
///
/// - This method doesn't currently return any errors, but it might in future
@ -190,6 +266,8 @@ impl DiskWriteBatch {
let utxo_by_out_loc = db.cf_handle("utxo_by_outpoint").unwrap();
let utxo_loc_by_transparent_addr_loc =
db.cf_handle("utxo_loc_by_transparent_addr_loc").unwrap();
let tx_loc_by_transparent_addr_loc =
db.cf_handle("tx_loc_by_transparent_addr_loc").unwrap();
// Index all new transparent outputs, before deleting any we've spent
for (new_output_location, utxo) in new_outputs_by_out_loc {
@ -223,6 +301,14 @@ impl DiskWriteBatch {
address_unspent_output,
(),
);
// Create a link from the AddressLocation to the new TransactionLocation in the database.
// Unlike the OutputLocation link, this will never be deleted.
let address_transaction = AddressTransaction::new(
receiving_address_location,
new_output_location.transaction_location(),
);
self.zs_insert(&tx_loc_by_transparent_addr_loc, address_transaction, ());
}
// Use the OutputLocation to store a copy of the new Output in the database.
@ -248,6 +334,7 @@ impl DiskWriteBatch {
address_balance_location
.spend_output(&spent_output)
.expect("balance underflow already checked");
let sending_address_location = address_balance_location.address_location();
// Delete the link from the AddressLocation to the spent OutputLocation in the database.
let address_spent_output = AddressUnspentOutput::new(
@ -255,6 +342,14 @@ impl DiskWriteBatch {
spent_output_location,
);
self.zs_delete(&utxo_loc_by_transparent_addr_loc, address_spent_output);
// Create a link from the AddressLocation to the spent TransactionLocation in the database.
// Unlike the OutputLocation link, this will never be deleted.
let address_transaction = AddressTransaction::new(
sending_address_location,
spent_output_location.transaction_location(),
);
self.zs_insert(&tx_loc_by_transparent_addr_loc, address_transaction, ());
}
// Delete the OutputLocation, and the copy of the spent Output in the database.