From a6c1cd3c35e7b26ea349f79248d5fdaa3fb66c09 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 15 Jan 2021 16:26:34 +1000 Subject: [PATCH] Stop panicking when fail_with is called twice on a connection We can't rule out the connection state changing between the state checks and any eventual failures, particularly in the presence of async code. So we turn this panic into a warning. --- zebra-network/src/peer/connection.rs | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/zebra-network/src/peer/connection.rs b/zebra-network/src/peer/connection.rs index d834ce87..f94090d2 100644 --- a/zebra-network/src/peer/connection.rs +++ b/zebra-network/src/peer/connection.rs @@ -511,8 +511,32 @@ where .0 .lock() .expect("mutex should be unpoisoned"); - if guard.is_some() { - panic!("called fail_with on already-failed connection state"); + if let Some(original_error) = guard.clone() { + // A failed connection might experience further errors if we: + // 1. concurrently process two different messages + // 2. check for a failed state for the second message + // 3. fail the connection due to the first message + // 4. fail the connection due to the second message + // + // It's not clear: + // * if this is actually a bug, + // * how we can modify Zebra to avoid it. + // + // This warning can also happen due to these bugs: + // * we mark a connection as failed without using fail_with + // * we call fail_with without checking for a failed connection + // state + // + // See the original bug #1510, the initial fix #1531, and the later + // bug #1599. + warn!(?original_error, + new_error = ?e, + connection_state = ?self.state, + client_receiver = ?self.client_rx, + "calling fail_with on already-failed connection state: ignoring new error"); + // we don't need to clean up the connection, the original call to + // fail_with does that + return; } else { *guard = Some(e); }