feat(运维监控): 增强监控功能和健康评分系统

后端改进:
- 新增健康评分计算服务(ops_health_score.go)
- 添加分布式锁支持(ops_advisory_lock.go)
- 优化指标采集和聚合逻辑
- 新增运维指标采集间隔配置(60-3600秒)
- 移除未使用的WebSocket查询token认证中间件
- 改进清理服务和告警评估逻辑

前端改进:
- 简化OpsDashboard组件结构
- 完善国际化文本(中英文)
- 新增运维监控相关API类型定义
- 添加运维指标采集间隔设置界面
- 优化错误详情模态框

测试:
- 添加健康评分单元测试
- 更新API契约测试
This commit is contained in:
IanShaw027
2026-01-10 01:38:47 +08:00
parent 8ae75e7f6e
commit 585257d340
25 changed files with 570 additions and 385 deletions

View File

@@ -300,30 +300,36 @@ func (s *OpsCleanupService) tryAcquireLeaderLock(ctx context.Context) (func(), b
return nil, true
}
if s.redisClient == nil {
s.warnNoRedisOnce.Do(func() {
log.Printf("[OpsCleanup] redis not configured; running without distributed lock")
})
return nil, true
}
key := opsCleanupLeaderLockKeyDefault
ttl := opsCleanupLeaderLockTTLDefault
ok, err := s.redisClient.SetNX(ctx, key, s.instanceID, ttl).Result()
if err != nil {
// Prefer Redis leader lock when available, but avoid stampeding the DB when Redis is flaky by
// falling back to a DB advisory lock.
if s.redisClient != nil {
ok, err := s.redisClient.SetNX(ctx, key, s.instanceID, ttl).Result()
if err == nil {
if !ok {
return nil, false
}
return func() {
_, _ = opsCleanupReleaseScript.Run(ctx, s.redisClient, []string{key}, s.instanceID).Result()
}, true
}
// Redis error: fall back to DB advisory lock.
s.warnNoRedisOnce.Do(func() {
log.Printf("[OpsCleanup] leader lock SetNX failed; running without lock: %v", err)
log.Printf("[OpsCleanup] leader lock SetNX failed; falling back to DB advisory lock: %v", err)
})
} else {
s.warnNoRedisOnce.Do(func() {
log.Printf("[OpsCleanup] redis not configured; using DB advisory lock")
})
return nil, true
}
release, ok := tryAcquireDBAdvisoryLock(ctx, s.db, hashAdvisoryLockID(key))
if !ok {
return nil, false
}
return func() {
_, _ = opsCleanupReleaseScript.Run(ctx, s.redisClient, []string{key}, s.instanceID).Result()
}, true
return release, true
}
func (s *OpsCleanupService) recordHeartbeatSuccess(runAt time.Time, duration time.Duration) {