fix(git): correct pool panic detection and add failure diagnostic logs

- Fix match pattern: `Ok(Ok(Err(_)))` must be treated as NOT a panic,
  since execute_task_body returns Err(()) on task failure (not a panic).
  Previously the Err path incorrectly set panicked=true, which could
  cause pool to appear unhealthy.
- Add "task failed" log before retry/discard decision so real error
  is visible in logs (previously only last_error on exhausted-retries
  path was logged, which required hitting 5 retries to see the cause).
- Convert 3 remaining pool/mod.rs shorthand logs to format!() pattern.
This commit is contained in:
ZhenYi 2026-04-16 22:21:30 +08:00
parent bbf2d75fba
commit 8a0d2885f7

View File

@ -153,14 +153,14 @@ impl GitHookPool {
.await
{
Ok(Ok(Ok(()))) => false, // spawn_blocking Ok, catch_unwind Ok, body Ok
Ok(Ok(Err(_))) => true, // spawn_blocking Ok, catch_unwind Ok, body Err(()) — never hit
Ok(Err(_)) => true, // spawn_blocking Ok, catch_unwind Err = panic
Err(_) => true, // spawn_blocking Err = thread aborted
Ok(Ok(Err(_))) => false, // spawn_blocking Ok, catch_unwind Ok, body Err(()) — error handled inside
Ok(Err(_)) => true, // spawn_blocking Ok, catch_unwind Err = panic
Err(_) => true, // spawn_blocking Err = thread aborted
};
drop(permit);
counter_clone.fetch_sub(1, Ordering::Relaxed);
if panicked {
slog::error!(logger_clone, "task panicked";);
slog::error!(logger_clone, "{}", format!("task panicked"));
}
});
@ -177,13 +177,10 @@ impl GitHookPool {
queue_key: String,
work_key: String,
) -> Result<(), ()> {
slog::info!(self.logger, "task started";
"task_id" => &task.id,
"task_type" => %task.task_type,
"repo_id" => &task.repo_id,
"worker_id" => &self.config.worker_id,
"retry" => task.retry_count
);
slog::info!(self.logger, "{}", format!(
"task started task_id={} task_type={} repo_id={} worker_id={} retry={}",
task.id, task.task_type, task.repo_id, self.config.worker_id, task.retry_count
));
self.log_stream
.info(
@ -211,17 +208,19 @@ impl GitHookPool {
.await;
}
Err(e) => {
// Log the actual error so we can diagnose why tasks fail immediately.
slog::warn!(self.logger, "{}", format!(
"task failed task_id={} task_type={} repo_id={} retry={} error={}",
task.id, task.task_type, task.repo_id, task.retry_count, e
));
// Check retry count and decide whether to requeue or discard.
const MAX_RETRIES: u32 = 5;
if task.retry_count >= MAX_RETRIES {
// Max retries exceeded — discard the task to prevent infinite loop.
slog::warn!(self.logger, "task exhausted retries, discarding";
"task_id" => &task.id,
"task_type" => %task.task_type,
"repo_id" => &task.repo_id,
"retry_count" => task.retry_count,
"last_error" => %e
);
slog::warn!(self.logger, "{}", format!(
"task exhausted retries, discarding task_id={} task_type={} repo_id={} retry_count={} last_error={}",
task.id, task.task_type, task.repo_id, task.retry_count, e
));
if let Err(e) = consumer.ack(&work_key, &task_json).await {
slog::warn!(self.logger, "failed to ack discarded task: {}", e);
}