diff --git a/zebra-chain/src/transaction/serialize.rs b/zebra-chain/src/transaction/serialize.rs index 41c55080..9ff3ce8f 100644 --- a/zebra-chain/src/transaction/serialize.rs +++ b/zebra-chain/src/transaction/serialize.rs @@ -15,6 +15,13 @@ use super::*; const OVERWINTER_VERSION_GROUP_ID: u32 = 0x03C4_8270; const SAPLING_VERSION_GROUP_ID: u32 = 0x892F_2085; +const GENESIS_COINBASE_DATA: [u8; 77] = [ + 4, 255, 255, 7, 31, 1, 4, 69, 90, 99, 97, 115, 104, 48, 98, 57, 99, 52, 101, 101, 102, 56, 98, + 55, 99, 99, 52, 49, 55, 101, 101, 53, 48, 48, 49, 101, 51, 53, 48, 48, 57, 56, 52, 98, 54, 102, + 101, 97, 51, 53, 54, 56, 51, 97, 55, 99, 97, 99, 49, 52, 49, 97, 48, 52, 51, 99, 52, 50, 48, + 54, 52, 56, 51, 53, 100, 51, 52, +]; + impl ZcashSerialize for OutPoint { fn zcash_serialize(&self, mut writer: W) -> Result<(), io::Error> { writer.write_all(&self.hash.0[..])?; @@ -32,6 +39,103 @@ impl ZcashDeserialize for OutPoint { } } +// Coinbase inputs include block heights (BIP34). These are not encoded +// directly, but as a Bitcoin script that pushes the block height to the stack +// when executed. The script data is otherwise unused. Because we want to +// *parse* transactions into an internal representation where illegal states are +// unrepresentable, we need just enough parsing of Bitcoin scripts to parse the +// coinbase height and split off the rest of the (inert) coinbase data. + +fn parse_coinbase_height(mut data: Vec) -> Result<(BlockHeight, Vec), SerializationError> { + match (data.get(0), data.len()) { + // Blocks 1 through 16 inclusive encode block height with OP_N opcodes. + (Some(op_n @ 0x51..=0x60), len) if len >= 1 => { + Ok((BlockHeight((op_n - 0x50) as u32), data.split_off(1))) + } + // Blocks 17 through 256 exclusive encode block height with the `0x01` opcode. + (Some(0x01), len) if len >= 2 => Ok((BlockHeight(data[1] as u32), data.split_off(2))), + // Blocks 256 through 65536 exclusive encode block height with the `0x02` opcode. + (Some(0x02), len) if len >= 3 => Ok(( + BlockHeight(data[1] as u32 + ((data[2] as u32) << 8)), + data.split_off(3), + )), + // Blocks 65536 through 2**24 exclusive encode block height with the `0x03` opcode. + (Some(0x03), len) if len >= 4 => Ok(( + BlockHeight(data[1] as u32 + ((data[2] as u32) << 8) + ((data[3] as u32) << 16)), + data.split_off(4), + )), + // The genesis block does not encode the block height by mistake; special case it. + // The first five bytes are [4, 255, 255, 7, 31], the little-endian encoding of + // 520_617_983. This is lucky because it means we can special-case the genesis block + // while remaining below the maximum `BlockHeight` of 500_000_000 forced by `LockTime`. + // While it's unlikely this code will ever process a block height that high, this means + // we don't need to maintain a cascade of different invariants for allowable `BlockHeight`s. + (Some(0x04), _) if &data[..] == &GENESIS_COINBASE_DATA[..] => Ok((BlockHeight(0), data)), + // As noted above, this is included for completeness. + (Some(0x04), len) if len >= 5 => { + let h = data[1] as u32 + + ((data[2] as u32) << 8) + + ((data[3] as u32) << 16) + + ((data[4] as u32) << 24); + if h < 500_000_000 { + Ok((BlockHeight(h), data.split_off(5))) + } else { + Err(SerializationError::Parse("Invalid block height")) + } + } + _ => Err(SerializationError::Parse( + "Could not parse BIP34 height in coinbase data", + )), + } +} + +fn coinbase_height_len(height: BlockHeight) -> usize { + // We can't write this as a match statement on stable until exclusive range + // guards are stabilized. + if let 0 = height.0 { + 0 + } else if let _h @ 1..=16 = height.0 { + 1 + } else if let _h @ 17..=255 = height.0 { + 2 + } else if let _h @ 256..=65535 = height.0 { + 3 + } else if let _h @ 65536..=16777215 = height.0 { + 4 + } else if let _h @ 16777216..=499_999_999 = height.0 { + 5 + } else { + panic!("Invalid coinbase height"); + } +} + +fn write_coinbase_height(height: BlockHeight, mut w: W) -> Result<(), io::Error> { + // We can't write this as a match statement on stable until exclusive range + // guards are stabilized. + if let 0 = height.0 { + // Genesis block does not include height. + } else if let h @ 1..=16 = height.0 { + w.write_u8(0x50 + (h as u8))?; + } else if let h @ 17..=255 = height.0 { + w.write_u8(0x01)?; + w.write_u8(h as u8)?; + } else if let h @ 256..=65535 = height.0 { + w.write_u8(0x02)?; + w.write_u16::(h as u16)?; + } else if let h @ 65536..=16777215 = height.0 { + w.write_u8(0x03)?; + w.write_u8(h as u8)?; + w.write_u8((h >> 8) as u8)?; + w.write_u8((h >> 16) as u8)?; + } else if let h @ 16777216..=499_999_999 = height.0 { + w.write_u8(0x04)?; + w.write_u32::(h)?; + } else { + panic!("Invalid coinbase height"); + } + Ok(()) +} + impl ZcashSerialize for TransparentInput { fn zcash_serialize(&self, mut writer: W) -> Result<(), io::Error> { match self { @@ -44,11 +148,18 @@ impl ZcashSerialize for TransparentInput { script.zcash_serialize(&mut writer)?; writer.write_u32::(*sequence)?; } - TransparentInput::Coinbase { data, sequence } => { + TransparentInput::Coinbase { + height, + data, + sequence, + } => { writer.write_all(&[0; 32][..])?; writer.write_u32::(0xffff_ffff)?; - assert!(data.len() <= 100); - writer.write_compactsize(data.len() as u64)?; + let height_len = coinbase_height_len(*height); + let total_len = height_len + data.len(); + assert!(total_len <= 100); + writer.write_compactsize(total_len as u64)?; + write_coinbase_height(*height, &mut writer)?; writer.write_all(&data[..])?; writer.write_u32::(*sequence)?; } @@ -72,8 +183,13 @@ impl ZcashDeserialize for TransparentInput { } let mut data = Vec::with_capacity(len as usize); (&mut reader).take(len).read_to_end(&mut data)?; + let (height, data) = parse_coinbase_height(data)?; let sequence = reader.read_u32::()?; - Ok(TransparentInput::Coinbase { data, sequence }) + Ok(TransparentInput::Coinbase { + height, + data, + sequence, + }) } else { Ok(TransparentInput::PrevOut { outpoint: OutPoint { diff --git a/zebra-chain/src/transaction/tests.rs b/zebra-chain/src/transaction/tests.rs index ced6ad73..3ae35a50 100644 --- a/zebra-chain/src/transaction/tests.rs +++ b/zebra-chain/src/transaction/tests.rs @@ -131,8 +131,14 @@ impl Arbitrary for TransparentInput { } }) .boxed(), - (vec(any::(), 0..100), any::()) - .prop_map(|(data, sequence)| { TransparentInput::Coinbase { data, sequence } }) + (any::(), vec(any::(), 0..95), any::()) + .prop_map(|(height, data, sequence)| { + TransparentInput::Coinbase { + height, + data, + sequence, + } + }) .boxed(), ] .boxed() diff --git a/zebra-chain/src/transaction/transparent.rs b/zebra-chain/src/transaction/transparent.rs index 22aa4faf..211f32e7 100644 --- a/zebra-chain/src/transaction/transparent.rs +++ b/zebra-chain/src/transaction/transparent.rs @@ -3,7 +3,7 @@ #[cfg(test)] use proptest_derive::Arbitrary; -use crate::types::Script; +use crate::types::{BlockHeight, Script}; use super::TransactionHash; @@ -35,7 +35,10 @@ pub enum TransparentInput { }, /// New coins created by the block reward. Coinbase { - /// 100 bytes of arbitrary data. + /// The height of this block. + height: BlockHeight, + /// Approximately 100 bytes of data (95 to be safe). + /// XXX refine this type. data: Vec, /// The sequence number for the output. sequence: u32, diff --git a/zebra-chain/src/types.rs b/zebra-chain/src/types.rs index bd15a73e..6865585e 100644 --- a/zebra-chain/src/types.rs +++ b/zebra-chain/src/types.rs @@ -39,10 +39,24 @@ impl fmt::Debug for Sha256dChecksum { } /// A u32 which represents a block height value. +/// +/// # Invariants +/// +/// Users should not construct block heights greater than or equal to `500_000_000`. #[derive(Copy, Clone, Debug, Eq, PartialEq)] -#[cfg_attr(test, derive(Arbitrary))] pub struct BlockHeight(pub u32); +#[cfg(test)] +impl Arbitrary for BlockHeight { + type Parameters = (); + + fn arbitrary_with(_args: ()) -> Self::Strategy { + (0u32..500_000_000_u32).prop_map(|h| BlockHeight(h)).boxed() + } + + type Strategy = BoxedStrategy; +} + /// A Bitcoin-style `locktime`, representing either a block height or an epoch /// time. ///