From 86f23f7960aabb2e3ed114a15fe2f1b0bc066ca9 Mon Sep 17 00:00:00 2001 From: teor Date: Tue, 15 Jun 2021 08:29:17 +1000 Subject: [PATCH] Security: only apply the outbound connection rate-limit to actual connections (#2278) * Only advance the outbound connection timer when it returns an address Previously, we were advancing the timer even when we returned `None`. This created large wait times when there were no eligible peers. * Refactor to avoid overlapping sleep timers * Add a maximum next peer delay test Also refactor peer numbers into constants. * Make the number of proptests overridable by the standard env var Also cleanup the test constants. * Test that skipping peer connections also skips their rate limits * Allow an extra second after each sleep on loaded machines macOS VMs seem to need this extra time to pass their tests. * Restart test time bounds from the current time This change avoids test failures due to cumulative errors. Also use a single call to `Instant::now` for each test round. And print the times when the tests fail. * Stop generating invalid outbound peers in proptests The candidate set proptests will fail if enough generated peers are invalid for outbound connections. --- .../peer_set/candidate_set/tests/prop.txt | 8 ++ zebra-network/src/meta_addr/arbitrary.rs | 28 ++++- zebra-network/src/peer_set/candidate_set.rs | 12 +- .../src/peer_set/candidate_set/tests/prop.rs | 104 +++++++++++++++--- 4 files changed, 129 insertions(+), 23 deletions(-) create mode 100644 zebra-network/proptest-regressions/peer_set/candidate_set/tests/prop.txt diff --git a/zebra-network/proptest-regressions/peer_set/candidate_set/tests/prop.txt b/zebra-network/proptest-regressions/peer_set/candidate_set/tests/prop.txt new file mode 100644 index 00000000..a8d239db --- /dev/null +++ b/zebra-network/proptest-regressions/peer_set/candidate_set/tests/prop.txt @@ -0,0 +1,8 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc e27ea62986a1ace4e16dde4b99d8ae8557a6b1e92e3f9a7c6aba0d7b179eb817 # shrinks to peers = [MetaAddr { addr: 0.0.0.0:0, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:0, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:1151, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 129.65.108.101:9019, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 95.68.199.21:46887, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 167.206.253.131:43927, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 160.138.155.239:63430, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: [6a4e:a543:8c93:e074:8a2a:5cc4:96f8:ce95]:51071, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623572152, calendar: 2021-06-13T08:15:52Z }, last_connection_state: NeverAttemptedAlternate }], initial_candidates = 0, extra_candidates = 2 +cc 788fe2c47a4af559e9a921764d1ce8fcfd0743361c19e6e7a06adc093bc19078 # shrinks to peers = [MetaAddr { addr: [::e9:8050:dbf7:5665]:27188, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 106.27.232.177:51742, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 127.0.0.1:30827, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:21948, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:38186, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: [4db2:96e6:38c5:fd62:e964:9338:4ce8:bfa0]:32686, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 0.0.0.0:22952, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }, MetaAddr { addr: 114.164.73.233:10203, services: NODE_NETWORK, last_seen: DateTime32 { timestamp: 1623573029, calendar: 2021-06-13T08:30:29Z }, last_connection_state: NeverAttemptedAlternate }], initial_candidates = 3, extra_candidates = 3 diff --git a/zebra-network/src/meta_addr/arbitrary.rs b/zebra-network/src/meta_addr/arbitrary.rs index 8c85c95a..34cb9f8c 100644 --- a/zebra-network/src/meta_addr/arbitrary.rs +++ b/zebra-network/src/meta_addr/arbitrary.rs @@ -5,6 +5,8 @@ use super::{MetaAddr, PeerAddrState, PeerServices}; use zebra_chain::serialization::{arbitrary::canonical_socket_addr, DateTime32}; impl MetaAddr { + /// Create a strategy that generates [`MetaAddr`]s in the + /// [`PeerAddrState::NeverAttemptedGossiped`] state. pub fn gossiped_strategy() -> BoxedStrategy { ( canonical_socket_addr(), @@ -17,9 +19,31 @@ impl MetaAddr { .boxed() } - pub fn alternate_node_strategy() -> BoxedStrategy { + /// Create a strategy that generates [`MetaAddr`]s in the + /// [`PeerAddrState::NeverAttemptedAlternate`] state. + pub fn alternate_strategy() -> BoxedStrategy { + (canonical_socket_addr(), any::()) + .prop_map(|(socket_addr, services)| MetaAddr::new_alternate(&socket_addr, &services)) + .boxed() + } + + /// Create a strategy that generates [`MetaAddr`]s which are ready for outbound connections. + /// + /// TODO: Generate all [`PeerAddrState`] variants, and give them ready fields. + /// (After PR #2276 merges.) + pub fn ready_outbound_strategy() -> BoxedStrategy { canonical_socket_addr() - .prop_map(|address| MetaAddr::new_alternate(&address, &PeerServices::NODE_NETWORK)) + .prop_filter_map("failed MetaAddr::is_valid_for_outbound", |address| { + // Alternate nodes use the current time, so they're always ready + // + // TODO: create a "Zebra supported services" constant + let meta_addr = MetaAddr::new_alternate(&address, &PeerServices::NODE_NETWORK); + if meta_addr.is_valid_for_outbound() { + Some(meta_addr) + } else { + None + } + }) .boxed() } } diff --git a/zebra-network/src/peer_set/candidate_set.rs b/zebra-network/src/peer_set/candidate_set.rs index 2133f259..333a38d5 100644 --- a/zebra-network/src/peer_set/candidate_set.rs +++ b/zebra-network/src/peer_set/candidate_set.rs @@ -1,7 +1,7 @@ use std::{cmp::min, mem, sync::Arc, time::Duration}; use futures::stream::{FuturesUnordered, StreamExt}; -use tokio::time::{sleep, sleep_until, timeout, Instant, Sleep}; +use tokio::time::{sleep, timeout, Instant, Sleep}; use tower::{Service, ServiceExt}; use zebra_chain::serialization::DateTime32; @@ -290,10 +290,6 @@ where /// new peer connections are initiated at least /// [`MIN_PEER_CONNECTION_INTERVAL`][constants::MIN_PEER_CONNECTION_INTERVAL] apart. pub async fn next(&mut self) -> Option { - let current_deadline = self.wait_next_handshake.deadline().max(Instant::now()); - let mut sleep = sleep_until(current_deadline + constants::MIN_PEER_CONNECTION_INTERVAL); - mem::swap(&mut self.wait_next_handshake, &mut sleep); - // # Correctness // // In this critical section, we hold the address mutex, blocking the @@ -316,8 +312,10 @@ where reconnect }; - // SECURITY: rate-limit new candidate connections - sleep.await; + // SECURITY: rate-limit new outbound peer connections + (&mut self.wait_next_handshake).await; + let mut sleep = sleep(constants::MIN_PEER_CONNECTION_INTERVAL); + mem::swap(&mut self.wait_next_handshake, &mut sleep); Some(reconnect) } diff --git a/zebra-network/src/peer_set/candidate_set/tests/prop.rs b/zebra-network/src/peer_set/candidate_set/tests/prop.rs index f68d4168..2ba34739 100644 --- a/zebra-network/src/peer_set/candidate_set/tests/prop.rs +++ b/zebra-network/src/peer_set/candidate_set/tests/prop.rs @@ -1,8 +1,10 @@ use std::{ + env, sync::{Arc, Mutex}, time::{Duration, Instant}, }; +use futures::FutureExt; use proptest::{collection::vec, prelude::*}; use tokio::{ runtime::Runtime, @@ -18,11 +20,25 @@ use crate::{ Request, Response, }; +/// The maximum number of candidates for a "next peer" test. +const MAX_TEST_CANDIDATES: u32 = 4; + +/// The number of random test addresses for each test. +const TEST_ADDRESSES: usize = 2 * MAX_TEST_CANDIDATES as usize; + +/// The default number of proptest cases for each test that includes sleeps. +const DEFAULT_SLEEP_TEST_PROPTEST_CASES: u32 = 16; + +/// The largest extra delay we allow after every test sleep. +/// +/// This makes the tests more reliable on machines with high CPU load. +const MAX_SLEEP_EXTRA_DELAY: Duration = Duration::from_secs(1); + proptest! { /// Test that validated gossiped peers never have a `last_seen` time that's in the future. #[test] fn no_last_seen_times_are_in_the_future( - gossiped_peers in vec(MetaAddr::gossiped_strategy(), 1..10), + gossiped_peers in vec(MetaAddr::gossiped_strategy(), 1..TEST_ADDRESSES), last_seen_limit in any::(), ) { zebra_test::init(); @@ -33,17 +49,50 @@ proptest! { prop_assert![peer.get_last_seen() <= last_seen_limit]; } } + + /// Test that the outbound peer connection rate limit is only applied when + /// a peer address is actually returned. + /// + /// TODO: after PR #2275 merges, add a similar proptest, + /// using a "not ready for attempt" peer generation strategy + #[test] + fn skipping_outbound_peer_connection_skips_rate_limit(next_peer_attempts in 0..TEST_ADDRESSES) { + zebra_test::init(); + + let runtime = Runtime::new().expect("Failed to create Tokio runtime"); + let _guard = runtime.enter(); + + let peer_service = tower::service_fn(|_| async { + unreachable!("Mock peer service is never used"); + }); + + // Since the address book is empty, there won't be any available peers + let address_book = AddressBook::new(&Config::default(), Span::none()); + + let mut candidate_set = CandidateSet::new(Arc::new(Mutex::new(address_book)), peer_service); + + // Make sure that the rate-limit is never triggered, even after multiple calls + for _ in 0..next_peer_attempts { + // An empty address book immediately returns "no next peer" + assert!(matches!(candidate_set.next().now_or_never(), Some(None))); + } + } } proptest! { - #![proptest_config(ProptestConfig::with_cases(16))] + // These tests contain sleeps, so we use a small number of cases by default. + // Set the PROPTEST_CASES env var to override this default. + #![proptest_config(proptest::test_runner::Config::with_cases(env::var("PROPTEST_CASES") + .ok() + .and_then(|v| v.parse().ok()) + .unwrap_or(DEFAULT_SLEEP_TEST_PROPTEST_CASES)))] /// Test that new outbound peer connections are rate-limited. #[test] fn new_outbound_peer_connections_are_rate_limited( - peers in vec(MetaAddr::alternate_node_strategy(), 10), - initial_candidates in 0..4usize, - extra_candidates in 0..4usize, + peers in vec(MetaAddr::ready_outbound_strategy(), TEST_ADDRESSES), + initial_candidates in 0..MAX_TEST_CANDIDATES, + extra_candidates in 0..MAX_TEST_CANDIDATES, ) { zebra_test::init(); @@ -63,13 +112,18 @@ proptest! { // Check rate limiting for initial peers check_candidates_rate_limiting(&mut candidate_set, initial_candidates).await; // Sleep more than the rate limiting delay - sleep(Duration::from_millis(400)).await; + sleep(MAX_TEST_CANDIDATES * MIN_PEER_CONNECTION_INTERVAL).await; // Check that the next peers are still respecting the rate limiting, without causing a // burst of reconnections check_candidates_rate_limiting(&mut candidate_set, extra_candidates).await; }; - assert!(runtime.block_on(timeout(Duration::from_secs(10), checks)).is_ok()); + // Allow enough time for the maximum number of candidates, + // plus some extra time for test machines with high CPU load. + // (The max delay asserts usually fail before hitting this timeout.) + let max_rate_limit_sleep = 3 * MAX_TEST_CANDIDATES * MIN_PEER_CONNECTION_INTERVAL; + let max_extra_delay = (2 * MAX_TEST_CANDIDATES + 1) * MAX_SLEEP_EXTRA_DELAY; + assert!(runtime.block_on(timeout(max_rate_limit_sleep + max_extra_delay, checks)).is_ok()); } } @@ -77,19 +131,41 @@ proptest! { /// /// # Panics /// -/// Will panic if a reconnection peer is returned too quickly, or if no reconnection peer is -/// returned. -async fn check_candidates_rate_limiting(candidate_set: &mut CandidateSet, candidates: usize) +/// Will panic if: +/// - a connection peer is returned too quickly, +/// - a connection peer is returned too slowly, or +/// - if no reconnection peer is returned at all. +async fn check_candidates_rate_limiting(candidate_set: &mut CandidateSet, candidates: u32) where S: tower::Service, S::Future: Send + 'static, { - let mut minimum_reconnect_instant = Instant::now(); + let mut now = Instant::now(); + let mut minimum_reconnect_instant = now; + // Allow extra time for test machines with high CPU load + let mut maximum_reconnect_instant = now + MAX_SLEEP_EXTRA_DELAY; for _ in 0..candidates { - assert!(candidate_set.next().await.is_some()); - assert!(Instant::now() >= minimum_reconnect_instant); + assert!( + candidate_set.next().await.is_some(), + "there are enough available candidates" + ); - minimum_reconnect_instant += MIN_PEER_CONNECTION_INTERVAL; + now = Instant::now(); + assert!( + now >= minimum_reconnect_instant, + "all candidates should obey the minimum rate-limit: now: {:?} min: {:?}", + now, + minimum_reconnect_instant, + ); + assert!( + now <= maximum_reconnect_instant, + "rate-limited candidates should not be delayed too long: now: {:?} max: {:?}. Hint: is the test machine overloaded?", + now, + maximum_reconnect_instant, + ); + + minimum_reconnect_instant = now + MIN_PEER_CONNECTION_INTERVAL; + maximum_reconnect_instant = now + MIN_PEER_CONNECTION_INTERVAL + MAX_SLEEP_EXTRA_DELAY; } }