diff --git a/Cargo.lock b/Cargo.lock index 8b9bc6a7..f2ae4fae 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2729,8 +2729,15 @@ dependencies = [ name = "zebra-utils" version = "0.1.0" dependencies = [ + "abscissa_core", "color-eyre", + "hex", + "serde_json", "structopt", + "tracing-error", + "tracing-subscriber", + "zebra-chain", + "zebra-consensus", ] [[package]] diff --git a/zebra-consensus/src/checkpoint.rs b/zebra-consensus/src/checkpoint.rs index c324948a..e16458c2 100644 --- a/zebra-consensus/src/checkpoint.rs +++ b/zebra-consensus/src/checkpoint.rs @@ -77,6 +77,10 @@ type QueuedBlockList = Vec; /// usage by committing blocks to the disk state. (Or dropping invalid blocks.) pub const MAX_QUEUED_BLOCKS_PER_HEIGHT: usize = 4; +/// We limit the maximum number of blocks in each checkpoint. Each block uses a +/// constant amount of memory for the supporting data structures and futures. +pub const MAX_CHECKPOINT_HEIGHT_GAP: usize = 2_000; + /// A checkpointing block verifier. /// /// Verifies blocks using a supplied list of checkpoints. There must be at diff --git a/zebra-utils/Cargo.toml b/zebra-utils/Cargo.toml index f5b60b0c..a1bf7ef6 100644 --- a/zebra-utils/Cargo.toml +++ b/zebra-utils/Cargo.toml @@ -6,5 +6,13 @@ version = "3.0.0-alpha.0" edition = "2018" [dependencies] +abscissa_core = "0.5" structopt = "0.3.15" color-eyre = "0.5.0" +hex = "0.4" +serde_json = "1.0" +tracing-error = { version = "0.1.2", features = ["traced-error"] } +tracing-subscriber = { version = "0.2.8", features = ["tracing-log"] } + +zebra-chain = { path = "../zebra-chain" } +zebra-consensus = { path = "../zebra-consensus" } diff --git a/zebra-utils/calculate-checkpoints.sh b/zebra-utils/calculate-checkpoints.sh deleted file mode 100755 index b58d6386..00000000 --- a/zebra-utils/calculate-checkpoints.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -# Prints Zebra checkpoints, based on a list of block heights, sizes, ans hashes. -# -# Reads lines containing a block height, block byte size, and block header hash -# from stdin. Writes each checkpoint to stdout, as a line with space-separated -# fields. -# -# The block header hash is read in Bitcoin order, but written out in Zebra's -# internal byte order. -# -# Usage: get-height-size-hash.sh | calculate-checkpoints.sh -# get-height-size-hash.sh -testnet | calculate-checkpoints.sh -# -# calculate-checkpoints.sh ignores any command-line arguments. -# -# TODO: rewrite as a stand-alone Rust command-line tool. - -# zebra-consensus accepts an ordered list of checkpoints, starting with the -# genesis block. Checkpoint heights can be chosen arbitrarily. - -# We limit the memory usage for each checkpoint, based on the cumulative size of -# the serialized blocks in the chain. Deserialized blocks are larger, because -# they contain pointers and non-compact integers. But they should be within a -# constant factor of the serialized size. -MAX_CHECKPOINT_BYTE_COUNT=$((256*1024*1024)) - -# We limit the maximum number of blocks in each checkpoint. Each block uses a -# constant amount of memory for the supporting data structures and futures. -# -# TODO: In the Rust implementation, set this gap to half the sync service's -# LOOKAHEAD_LIMIT. -MAX_CHECKPOINT_HEIGHT_GAP=2000 - -cumulative_bytes=0 -height_gap=0 -while read -r height size hash; do - cumulative_bytes=$((cumulative_bytes + size)) - height_gap=$((height_gap + 1)) - - # Checkpoints can be slightly larger the maximum byte count. That's ok, - # because the memory usage is only approximate. (This is a bash-specific - # optimisation, to avoid keeping a copy of the previous height and hash. - # Since exact sizes don't matter, we can use the same check in the Rust - # implementation. Or choose a simpler alternative.) - if [ "$height" -eq 0 ] || \ - [ "$cumulative_bytes" -ge "$MAX_CHECKPOINT_BYTE_COUNT" ] || \ - [ "$height_gap" -ge "$MAX_CHECKPOINT_HEIGHT_GAP" ]; then - - # Reverse the byte order of hash. - # - # We reverse the hash after selecting the checkpoints, because launching - # a zebrad subprocess is expensive. (This is a bash-specific - # optimisation, the Rust implementation should reverse hashes as it loads - # them.) - hash=$(zebrad revhex "$hash") - - echo "$height $hash" - - cumulative_bytes=0 - height_gap=0 - fi -done diff --git a/zebra-utils/get-height-size-hash.sh b/zebra-utils/get-height-size-hash.sh deleted file mode 100755 index 1f604480..00000000 --- a/zebra-utils/get-height-size-hash.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -set -euo pipefail - -# Print the block height, size, and hash for each block. -# -# For each block in the best chain, gets the block height, block byte size, and -# block header hash using zcash RPC via zcash-cli. Writes each block's info to -# stdout, as a line with space-separated fields. -# -# The block header hash is written out in Bitcoin order, which is different from -# Zebra's internal byte order, as an optimisation. (calculate-checkpoints.sh -# converts hashes to Zebra's internal order after choosing checkpoints.) -# -# Usage: get-height-size-hash.sh | calculate-checkpoints.sh -# get-height-size-hash.sh -testnet | calculate-checkpoints.sh -# -# get-height-size-hash.sh passes its arguments through to zcash-cli. -# -# Requires zcash-cli, jq, and zebrad in your path. zcash-cli must be able to -# access a working, synced zcashd instance. -# -# TODO: rewrite as a stand-alone Rust command-line tool. - -block_count=$(zcash-cli "$@" getblockcount) - -# Checkpoints must be on the main chain, so we skip blocks that are within the -# zcashd reorg limit. -BLOCK_REORG_LIMIT=100 -block_count=$((block_count - BLOCK_REORG_LIMIT)) - -i=0 -while [ "$i" -lt "$block_count" ]; do - # Unfortunately, there is no simple RPC for height, size, and hash. - # So we use the expensive block RPC, and extract fields using jq. - # - # We don't byte-reverse the hash here, because launching a zebrad subprocess - # is expensive. (This is a bash-specific optimisation, the Rust - # implementation should reverse hashes as it loads them.) - zcash-cli "$@" getblock "$i" | \ - jq -r '"\(.height) \(.size) \(.hash)"' - i=$((i + 1)) -done diff --git a/zebra-utils/src/bin/zebra-checkpoints/args.rs b/zebra-utils/src/bin/zebra-checkpoints/args.rs index d15c60e1..e71b9710 100644 --- a/zebra-utils/src/bin/zebra-checkpoints/args.rs +++ b/zebra-utils/src/bin/zebra-checkpoints/args.rs @@ -2,9 +2,9 @@ use structopt::StructOpt; #[derive(Debug, StructOpt)] pub struct Args { - /// Use the test network - #[structopt(short, long)] - pub testnet: bool, + /// Path to zcash-cli command + #[structopt(default_value = "zcash-cli", short, long)] + pub cli: String, /// Passthrough args for `zcash-cli` #[structopt(last = true)] diff --git a/zebra-utils/src/bin/zebra-checkpoints/main.rs b/zebra-utils/src/bin/zebra-checkpoints/main.rs index cf7573af..fa7ba457 100644 --- a/zebra-utils/src/bin/zebra-checkpoints/main.rs +++ b/zebra-utils/src/bin/zebra-checkpoints/main.rs @@ -1,33 +1,127 @@ +//! Prints Zebra checkpoints as "height hash" output lines. +//! +//! Get all the blocks up to network current tip and print the ones that are +//! checkpoints according to rules. +//! +//! For usage please refer to the program help: `zebra-checkpoints --help` +//! +//! zebra-consensus accepts an ordered list of checkpoints, starting with the +//! genesis block. Checkpoint heights can be chosen arbitrarily. + #![allow(clippy::try_err)] -use color_eyre::eyre::{eyre, Result}; +use color_eyre::eyre::Result; +use serde_json::Value; +use std::process::Stdio; use structopt::StructOpt; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; + +use zebra_chain::block::BlockHeaderHash; +use zebra_chain::types::BlockHeight; mod args; +/// We limit the memory usage for each checkpoint, based on the cumulative size of +/// the serialized blocks in the chain. Deserialized blocks are larger, because +/// they contain pointers and non-compact integers. But they should be within a +/// constant factor of the serialized size. +const MAX_CHECKPOINT_BYTE_COUNT: u64 = 256 * 1024 * 1024; + +/// Checkpoints must be on the main chain, so we skip blocks that are within the +/// zcashd reorg limit. +const BLOCK_REORG_LIMIT: BlockHeight = BlockHeight(100); + +// Passthrough arguments if needed +fn passthrough(mut cmd: std::process::Command, args: &args::Args) -> std::process::Command { + if !args.zcli_args.is_empty() { + cmd.args(&args.zcli_args); + } + cmd +} + fn main() -> Result<()> { - // todo add tracing setup + init_tracing(); color_eyre::install()?; + // create process let args = args::Args::from_args(); + let mut cmd = std::process::Command::new(&args.cli); + cmd = passthrough(cmd, &args); - let mut cmd = std::process::Command::new("zcash-cli"); + // set up counters + let mut cumulative_bytes: u64 = 0; + let mut height_gap: BlockHeight = BlockHeight(0); - if args.testnet { - cmd.arg("-testnet"); - } + // get the current block count + cmd.arg("getblockcount"); + let mut subprocess = cmd.stdout(Stdio::piped()).spawn().unwrap(); + let output = cmd.output().unwrap(); + subprocess.kill()?; + let mut requested_height: BlockHeight = String::from_utf8_lossy(&output.stdout) + .trim() + .parse() + .unwrap(); + requested_height = BlockHeight( + requested_height + .0 + .checked_sub(BLOCK_REORG_LIMIT.0) + .expect("zcashd has some mature blocks: wait for zcashd to sync more blocks"), + ); - cmd.args(args.zcli_args.into_iter()); + // loop through all blocks + for x in 0..requested_height.0 { + // unfortunatly we need to create a process for each block + let mut cmd = std::process::Command::new(&args.cli); + cmd = passthrough(cmd, &args); - let mut child = cmd.spawn()?; + // get block data + cmd.args(&["getblock", &x.to_string()]); + let mut subprocess = cmd.stdout(Stdio::piped()).spawn().unwrap(); + let output = cmd.output().unwrap(); + let block_raw = String::from_utf8_lossy(&output.stdout); - // handle communicating with this child process via it's stdin and stdout handles + // convert raw block to json + let v: Value = serde_json::from_str(block_raw.trim())?; - let exit_status = child.wait()?; + // get the values we are interested in + let hash: BlockHeaderHash = v["hash"] + .as_str() + .map(zebra_chain::utils::byte_reverse_hex) + .unwrap() + .parse() + .unwrap(); + let height = BlockHeight(v["height"].as_u64().unwrap() as u32); + assert!(height <= BlockHeight::MAX); + assert_eq!(x, height.0); + let size = v["size"].as_u64().unwrap(); + assert!(size <= zebra_chain::block::MAX_BLOCK_BYTES); - if !exit_status.success() { - Err(eyre!("throw a more informative error here, might wanna shove stdin / stdout in here as custom sections"))?; + // kill spawned + subprocess.wait()?; + + // compute + cumulative_bytes += size; + height_gap = BlockHeight(height_gap.0 + 1); + + // check if checkpoint + if height == BlockHeight(0) + || cumulative_bytes >= MAX_CHECKPOINT_BYTE_COUNT + || height_gap.0 >= zebra_consensus::checkpoint::MAX_CHECKPOINT_HEIGHT_GAP as u32 + { + // print to output + println!("{} {}", height.0, &hex::encode(hash.0),); + + // reset counters + cumulative_bytes = 0; + height_gap = BlockHeight(0); + } } Ok(()) } + +fn init_tracing() { + tracing_subscriber::Registry::default() + .with(tracing_error::ErrorLayer::default()) + .init(); +} diff --git a/zebrad/src/commands/start/sync.rs b/zebrad/src/commands/start/sync.rs index c531f43d..cd7415d2 100644 --- a/zebrad/src/commands/start/sync.rs +++ b/zebrad/src/commands/start/sync.rs @@ -20,7 +20,7 @@ const FANOUT: usize = checkpoint::MAX_QUEUED_BLOCKS_PER_HEIGHT; /// Controls how far ahead of the chain tip the syncer tries to download before /// waiting for queued verifications to complete. Set to twice the maximum /// checkpoint distance. -const LOOKAHEAD_LIMIT: usize = 2 * 2_000; +pub const LOOKAHEAD_LIMIT: usize = checkpoint::MAX_CHECKPOINT_HEIGHT_GAP * 2; #[derive(Debug)] pub struct Syncer