fix(ops_alert): wg.Add 竞态修复 + leader lock release context 泄漏
1. Start() 中 wg.Add(1) 从 run() goroutine 内部移到 go s.run() 之前, 防止 Stop().wg.Wait() 在 Add 之前返回导致孤儿 goroutine。 2. tryAcquireLeaderLock 返回的 release 闭包改用独立的 context.Background()+5s 超时,避免捕获的 evaluateOnce ctx 在 defer 执行时已过期导致锁释放失败(最长阻塞 90s TTL)。
This commit is contained in:
@@ -88,6 +88,7 @@ func (s *OpsAlertEvaluatorService) Start() {
|
||||
if s.stopCh == nil {
|
||||
s.stopCh = make(chan struct{})
|
||||
}
|
||||
s.wg.Add(1)
|
||||
go s.run()
|
||||
})
|
||||
}
|
||||
@@ -105,7 +106,6 @@ func (s *OpsAlertEvaluatorService) Stop() {
|
||||
}
|
||||
|
||||
func (s *OpsAlertEvaluatorService) run() {
|
||||
s.wg.Add(1)
|
||||
defer s.wg.Done()
|
||||
|
||||
// Start immediately to produce early feedback in ops dashboard.
|
||||
@@ -848,7 +848,9 @@ func (s *OpsAlertEvaluatorService) tryAcquireLeaderLock(ctx context.Context, loc
|
||||
return nil, false
|
||||
}
|
||||
return func() {
|
||||
_, _ = opsAlertEvaluatorReleaseScript.Run(ctx, s.redisClient, []string{key}, s.instanceID).Result()
|
||||
releaseCtx, releaseCancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer releaseCancel()
|
||||
_, _ = opsAlertEvaluatorReleaseScript.Run(releaseCtx, s.redisClient, []string{key}, s.instanceID).Result()
|
||||
}, true
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user