fix: Improve sync diagnostics

This commit is contained in:
teor 2020-08-07 18:04:33 +10:00
parent 65c3c503d5
commit 6aeefcee8b
1 changed files with 40 additions and 21 deletions

View File

@ -79,11 +79,7 @@ where
loop {
self.obtain_tips().await?;
metrics::gauge!(
"sync.prospective_tips.len",
self.prospective_tips.len() as i64
);
metrics::gauge!("sync.pending_blocks.len", self.pending_blocks.len() as i64);
self.update_metrics();
// ObtainTips Step 6
//
@ -92,16 +88,7 @@ where
tracing::debug!("extending prospective tips");
self.extend_tips().await?;
metrics::gauge!(
"sync.prospective_tips.len",
self.prospective_tips.len() as i64
);
metrics::gauge!("sync.pending_blocks.len", self.pending_blocks.len() as i64);
tracing::debug!(
pending.len = self.pending_blocks.len(),
limit = LOOKAHEAD_LIMIT
);
self.update_metrics();
// Check whether we need to wait for existing block download tasks to finish
while self.pending_blocks.len() > LOOKAHEAD_LIMIT {
@ -131,6 +118,10 @@ where
Err(e) => tracing::error!(?e, "potentially transient error"),
};
}
// We just added a bunch of failures, update the metrics now,
// because we might be about to reset or delay.
self.update_metrics();
}
delay_for(Duration::from_secs(15)).await;
@ -162,7 +153,7 @@ where
})
.map_err(|e| eyre!(e))?;
tracing::info!(?block_locator, "trying to obtain new chain tips");
tracing::debug!(?block_locator, "trying to obtain new chain tips");
// ObtainTips Step 2
//
@ -233,12 +224,15 @@ where
// Combine the last elements of each list into a set; this is the
// set of prospective tips.
if !download_set.contains(&new_tip) {
tracing::debug!(?new_tip, "adding new prospective tip");
tracing::debug!(hashes.len = ?hashes.len(), ?new_tip, "adding new prospective tip");
self.prospective_tips.insert(new_tip);
} else {
tracing::debug!(?new_tip, "discarding tip already queued for download");
}
// ObtainTips Step 5.1
//
// Combine all elements of each list into a set
let prev_download_len = download_set.len();
download_set.extend(unknown_hashes);
let new_download_len = download_set.len();
@ -255,10 +249,11 @@ where
}
}
// ObtainTips Step 5
tracing::debug!(?self.prospective_tips, "ObtainTips: downloading blocks for tips");
// ObtainTips Step 5.2
//
// Combine all elements of each list into a set, and queue
// download and verification of those blocks.
// queue download and verification of those blocks.
self.request_blocks(download_set.into_iter().collect())
.await?;
@ -323,10 +318,18 @@ where
//
// Combine the last elements of the remaining responses into
// a set, and add this set to the set of prospective tips.
tracing::debug!(?new_tip, hashes.len = ?hashes.len());
tracing::debug!(hashes.len = ?hashes.len(), ?new_tip, "ExtendTips: extending to new tip");
let _ = self.prospective_tips.insert(new_tip);
let prev_download_len = download_set.len();
download_set.extend(hashes);
let new_download_len = download_set.len();
tracing::debug!(
prev_download_len,
new_download_len,
new_hashes = new_download_len - prev_download_len,
"ExtendTips: added hashes to download set"
);
}
Ok(_) => unreachable!("network returned wrong response"),
// We ignore this error because we made multiple fanout requests.
@ -341,6 +344,7 @@ where
// returned tips
self.prospective_tips
.retain(|tip| !download_set.contains(tip));
tracing::debug!(?self.prospective_tips, "ExtendTips: downloading blocks for tips");
// ExtendTips Step 5
//
@ -359,6 +363,7 @@ where
/// Queue a download for the genesis block, if it isn't currently known to
/// our node.
#[instrument(skip(self))]
async fn request_genesis(&mut self) -> Result<(), Report> {
// Due to Bitcoin protocol limitations, we can't request the genesis
// block using our standard tip-following algorithm:
@ -425,6 +430,20 @@ where
Ok(())
}
/// Update metrics gauges, and create a trace containing metrics.
fn update_metrics(&self) {
metrics::gauge!(
"sync.prospective_tips.len",
self.prospective_tips.len() as i64
);
metrics::gauge!("sync.pending_blocks.len", self.pending_blocks.len() as i64);
tracing::info!(
tips.len = self.prospective_tips.len(),
pending.len = self.pending_blocks.len(),
pending.limit = LOOKAHEAD_LIMIT,
);
}
}
type Error = Box<dyn std::error::Error + Send + Sync + 'static>;