fix(panic): Stop panicking when handling inbound connection handshakes (#6984)
* Remove a redundant outbound connector timeout * Fix panics in inbound connection handshaker * Refactor to simplify FuturesUnordered types
This commit is contained in:
parent
73ce8fbef0
commit
859353b417
|
|
@ -7,14 +7,13 @@ use std::{
|
||||||
};
|
};
|
||||||
|
|
||||||
use futures::prelude::*;
|
use futures::prelude::*;
|
||||||
use tokio::{net::TcpStream, time::timeout};
|
use tokio::net::TcpStream;
|
||||||
use tower::{Service, ServiceExt};
|
use tower::{Service, ServiceExt};
|
||||||
use tracing_futures::Instrument;
|
use tracing_futures::Instrument;
|
||||||
|
|
||||||
use zebra_chain::chain_tip::{ChainTip, NoChainTip};
|
use zebra_chain::chain_tip::{ChainTip, NoChainTip};
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
constants::HANDSHAKE_TIMEOUT,
|
|
||||||
peer::{Client, ConnectedAddr, Handshake, HandshakeRequest},
|
peer::{Client, ConnectedAddr, Handshake, HandshakeRequest},
|
||||||
peer_set::ConnectionTracker,
|
peer_set::ConnectionTracker,
|
||||||
BoxError, PeerSocketAddr, Request, Response,
|
BoxError, PeerSocketAddr, Request, Response,
|
||||||
|
|
@ -93,8 +92,12 @@ where
|
||||||
let connected_addr = ConnectedAddr::new_outbound_direct(addr);
|
let connected_addr = ConnectedAddr::new_outbound_direct(addr);
|
||||||
let connector_span = info_span!("connector", peer = ?connected_addr);
|
let connector_span = info_span!("connector", peer = ?connected_addr);
|
||||||
|
|
||||||
|
// # Security
|
||||||
|
//
|
||||||
|
// `zebra_network::init()` implements a connection timeout on this future.
|
||||||
|
// Any code outside this future does not have a timeout.
|
||||||
async move {
|
async move {
|
||||||
let tcp_stream = timeout(HANDSHAKE_TIMEOUT, TcpStream::connect(*addr)).await??;
|
let tcp_stream = TcpStream::connect(*addr).await?;
|
||||||
let client = hs
|
let client = hs
|
||||||
.oneshot(HandshakeRequest::<TcpStream> {
|
.oneshot(HandshakeRequest::<TcpStream> {
|
||||||
data_stream: tcp_stream,
|
data_stream: tcp_stream,
|
||||||
|
|
|
||||||
|
|
@ -876,6 +876,10 @@ where
|
||||||
let relay = self.relay;
|
let relay = self.relay;
|
||||||
let minimum_peer_version = self.minimum_peer_version.clone();
|
let minimum_peer_version = self.minimum_peer_version.clone();
|
||||||
|
|
||||||
|
// # Security
|
||||||
|
//
|
||||||
|
// `zebra_network::init()` implements a connection timeout on this future.
|
||||||
|
// Any code outside this future does not have a timeout.
|
||||||
let fut = async move {
|
let fut = async move {
|
||||||
debug!(
|
debug!(
|
||||||
addr = ?connected_addr,
|
addr = ?connected_addr,
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ use std::{
|
||||||
collections::{BTreeMap, HashSet},
|
collections::{BTreeMap, HashSet},
|
||||||
convert::Infallible,
|
convert::Infallible,
|
||||||
net::SocketAddr,
|
net::SocketAddr,
|
||||||
|
pin::Pin,
|
||||||
sync::Arc,
|
sync::Arc,
|
||||||
time::Duration,
|
time::Duration,
|
||||||
};
|
};
|
||||||
|
|
@ -15,13 +16,14 @@ use futures::{
|
||||||
future::{self, FutureExt},
|
future::{self, FutureExt},
|
||||||
sink::SinkExt,
|
sink::SinkExt,
|
||||||
stream::{FuturesUnordered, StreamExt},
|
stream::{FuturesUnordered, StreamExt},
|
||||||
TryFutureExt,
|
Future, TryFutureExt,
|
||||||
};
|
};
|
||||||
use rand::seq::SliceRandom;
|
use rand::seq::SliceRandom;
|
||||||
use tokio::{
|
use tokio::{
|
||||||
net::{TcpListener, TcpStream},
|
net::{TcpListener, TcpStream},
|
||||||
sync::broadcast,
|
sync::broadcast,
|
||||||
time::{sleep, Instant},
|
task::JoinError,
|
||||||
|
time::{error::Elapsed, sleep, Instant},
|
||||||
};
|
};
|
||||||
use tokio_stream::wrappers::IntervalStream;
|
use tokio_stream::wrappers::IntervalStream;
|
||||||
use tower::{
|
use tower::{
|
||||||
|
|
@ -565,7 +567,8 @@ where
|
||||||
"Inbound Connections",
|
"Inbound Connections",
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut handshakes = FuturesUnordered::new();
|
let mut handshakes: FuturesUnordered<Pin<Box<dyn Future<Output = ()> + Send>>> =
|
||||||
|
FuturesUnordered::new();
|
||||||
// Keeping an unresolved future in the pool means the stream never terminates.
|
// Keeping an unresolved future in the pool means the stream never terminates.
|
||||||
handshakes.push(future::pending().boxed());
|
handshakes.push(future::pending().boxed());
|
||||||
|
|
||||||
|
|
@ -575,8 +578,7 @@ where
|
||||||
biased;
|
biased;
|
||||||
next_handshake_res = handshakes.next() => match next_handshake_res {
|
next_handshake_res = handshakes.next() => match next_handshake_res {
|
||||||
// The task has already sent the peer change to the peer set.
|
// The task has already sent the peer change to the peer set.
|
||||||
Some(Ok(_)) => continue,
|
Some(()) => continue,
|
||||||
Some(Err(task_panic)) => panic!("panic in inbound handshake task: {task_panic:?}"),
|
|
||||||
None => unreachable!("handshakes never terminates, because it contains a future that never resolves"),
|
None => unreachable!("handshakes never terminates, because it contains a future that never resolves"),
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|
@ -611,19 +613,37 @@ where
|
||||||
connection_tracker,
|
connection_tracker,
|
||||||
peerset_tx.clone(),
|
peerset_tx.clone(),
|
||||||
)
|
)
|
||||||
.await?;
|
.await?
|
||||||
|
.map(move |res| match res {
|
||||||
|
Ok(()) => (),
|
||||||
|
Err(e @ JoinError { .. }) => {
|
||||||
|
if e.is_panic() {
|
||||||
|
panic!("panic during inbound handshaking: {e:?}");
|
||||||
|
} else {
|
||||||
|
info!(
|
||||||
|
"task error during inbound handshaking: {e:?}, is Zebra shutting down?"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
let handshake_timeout = tokio::time::timeout(
|
||||||
|
// Only trigger this timeout if the inner handshake timeout fails
|
||||||
|
HANDSHAKE_TIMEOUT + Duration::from_millis(500),
|
||||||
|
handshake_task,
|
||||||
|
)
|
||||||
|
.map(move |res| match res {
|
||||||
|
Ok(()) => (),
|
||||||
|
Err(_e @ Elapsed { .. }) => {
|
||||||
|
info!(
|
||||||
|
"timeout in spawned accept_inbound_handshake() task: \
|
||||||
|
inner task should have timeout out already"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// This timeout helps locate inbound peer connection hangs, see #6763 for details.
|
// This timeout helps locate inbound peer connection hangs, see #6763 for details.
|
||||||
handshakes.push(Box::pin(
|
handshakes.push(Box::pin(handshake_timeout));
|
||||||
tokio::time::timeout(
|
|
||||||
// Only trigger this timeout if the inner handshake timeout fails
|
|
||||||
HANDSHAKE_TIMEOUT + Duration::from_millis(500),
|
|
||||||
handshake_task,
|
|
||||||
)
|
|
||||||
.inspect_err(|_elapsed| {
|
|
||||||
info!("timeout in spawned accept_inbound_handshake() task")
|
|
||||||
}),
|
|
||||||
));
|
|
||||||
|
|
||||||
// Rate-limit inbound connection handshakes.
|
// Rate-limit inbound connection handshakes.
|
||||||
// But sleep longer after a successful connection,
|
// But sleep longer after a successful connection,
|
||||||
|
|
@ -798,7 +818,9 @@ where
|
||||||
let candidates = Arc::new(futures::lock::Mutex::new(candidates));
|
let candidates = Arc::new(futures::lock::Mutex::new(candidates));
|
||||||
|
|
||||||
// This contains both crawl and handshake tasks.
|
// This contains both crawl and handshake tasks.
|
||||||
let mut handshakes = FuturesUnordered::new();
|
let mut handshakes: FuturesUnordered<
|
||||||
|
Pin<Box<dyn Future<Output = Result<CrawlerAction, BoxError>> + Send>>,
|
||||||
|
> = FuturesUnordered::new();
|
||||||
// <FuturesUnordered as Stream> returns None when empty.
|
// <FuturesUnordered as Stream> returns None when empty.
|
||||||
// Keeping an unresolved future in the pool means the stream never terminates.
|
// Keeping an unresolved future in the pool means the stream never terminates.
|
||||||
handshakes.push(future::pending().boxed());
|
handshakes.push(future::pending().boxed());
|
||||||
|
|
@ -905,8 +927,14 @@ where
|
||||||
})
|
})
|
||||||
.map(move |res| match res {
|
.map(move |res| match res {
|
||||||
Ok(crawler_action) => crawler_action,
|
Ok(crawler_action) => crawler_action,
|
||||||
Err(e) => {
|
Err(e @ JoinError {..}) => {
|
||||||
panic!("panic during handshaking: {e:?}");
|
if e.is_panic() {
|
||||||
|
panic!("panic during outbound handshake: {e:?}");
|
||||||
|
} else {
|
||||||
|
info!("task error during outbound handshake: {e:?}, is Zebra shutting down?")
|
||||||
|
}
|
||||||
|
// Just fake it
|
||||||
|
Ok(HandshakeFinished)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.in_current_span();
|
.in_current_span();
|
||||||
|
|
@ -929,8 +957,14 @@ where
|
||||||
})
|
})
|
||||||
.map(move |res| match res {
|
.map(move |res| match res {
|
||||||
Ok(crawler_action) => crawler_action,
|
Ok(crawler_action) => crawler_action,
|
||||||
Err(e) => {
|
Err(e @ JoinError {..}) => {
|
||||||
panic!("panic during TimerCrawl: {tick:?} {e:?}");
|
if e.is_panic() {
|
||||||
|
panic!("panic during outbound TimerCrawl: {tick:?} {e:?}");
|
||||||
|
} else {
|
||||||
|
info!("task error during outbound TimerCrawl: {e:?}, is Zebra shutting down?")
|
||||||
|
}
|
||||||
|
// Just fake it
|
||||||
|
Ok(TimerCrawlFinished)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.in_current_span();
|
.in_current_span();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue