From 5c39e6f2fb174b98032b4edaedd80b9da6979f7b Mon Sep 17 00:00:00 2001 From: QTom Date: Thu, 12 Mar 2026 18:58:03 +0800 Subject: [PATCH] =?UTF-8?q?fix(ops=5Falert):=20wg.Add=20=E7=AB=9E=E6=80=81?= =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20+=20leader=20lock=20release=20context=20?= =?UTF-8?q?=E6=B3=84=E6=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. Start() 中 wg.Add(1) 从 run() goroutine 内部移到 go s.run() 之前, 防止 Stop().wg.Wait() 在 Add 之前返回导致孤儿 goroutine。 2. tryAcquireLeaderLock 返回的 release 闭包改用独立的 context.Background()+5s 超时,避免捕获的 evaluateOnce ctx 在 defer 执行时已过期导致锁释放失败(最长阻塞 90s TTL)。 --- backend/internal/service/ops_alert_evaluator_service.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/internal/service/ops_alert_evaluator_service.go b/backend/internal/service/ops_alert_evaluator_service.go index 88883180..11c5d5ce 100644 --- a/backend/internal/service/ops_alert_evaluator_service.go +++ b/backend/internal/service/ops_alert_evaluator_service.go @@ -88,6 +88,7 @@ func (s *OpsAlertEvaluatorService) Start() { if s.stopCh == nil { s.stopCh = make(chan struct{}) } + s.wg.Add(1) go s.run() }) } @@ -105,7 +106,6 @@ func (s *OpsAlertEvaluatorService) Stop() { } func (s *OpsAlertEvaluatorService) run() { - s.wg.Add(1) defer s.wg.Done() // Start immediately to produce early feedback in ops dashboard. @@ -848,7 +848,9 @@ func (s *OpsAlertEvaluatorService) tryAcquireLeaderLock(ctx context.Context, loc return nil, false } return func() { - _, _ = opsAlertEvaluatorReleaseScript.Run(ctx, s.redisClient, []string{key}, s.instanceID).Result() + releaseCtx, releaseCancel := context.WithTimeout(context.Background(), 5*time.Second) + defer releaseCancel() + _, _ = opsAlertEvaluatorReleaseScript.Run(releaseCtx, s.redisClient, []string{key}, s.instanceID).Result() }, true }