refactor(ops): 优化健康分数计算逻辑和阈值

- 移除 SLA 组件(与错误率重复)
- 恢复延迟组件,阈值调整为 1s-2s
- 错误率阈值调整为 1%-10%(更宽松)
- 业务健康分数:错误率 50% + 延迟 50%
- 更新所有相关测试用例期望值
This commit is contained in:
IanShaw027
2026-01-14 23:43:12 +08:00
parent 9b10241561
commit d5eab7da3b
2 changed files with 52 additions and 28 deletions

View File

@@ -32,36 +32,38 @@ func computeDashboardHealthScore(now time.Time, overview *OpsDashboardOverview)
}
// computeBusinessHealth calculates business health score (0-100)
// Components: SLA (50%) + Error Rate (30%)
// Components: Error Rate (50%) + Latency (50%)
func computeBusinessHealth(overview *OpsDashboardOverview) float64 {
// SLA score: 99.5% → 100, 95% → 0 (linear)
slaScore := 100.0
slaPct := clampFloat64(overview.SLA*100, 0, 100)
if slaPct < 99.5 {
if slaPct >= 95 {
slaScore = (slaPct - 95) / 4.5 * 100
} else {
slaScore = 0
}
}
// Error rate score: 0.5% → 100, 5% → 0 (linear)
// Error rate score: 1% → 100, 10% → 0 (linear)
// Combines request errors and upstream errors
errorScore := 100.0
errorPct := clampFloat64(overview.ErrorRate*100, 0, 100)
upstreamPct := clampFloat64(overview.UpstreamErrorRate*100, 0, 100)
combinedErrorPct := math.Max(errorPct, upstreamPct) // Use worst case
if combinedErrorPct > 0.5 {
if combinedErrorPct <= 5 {
errorScore = (5 - combinedErrorPct) / 4.5 * 100
if combinedErrorPct > 1.0 {
if combinedErrorPct <= 10.0 {
errorScore = (10.0 - combinedErrorPct) / 9.0 * 100
} else {
errorScore = 0
}
}
// Weighted combination (renormalized after removing duration)
const weightSum = 0.8
return (slaScore*0.5 + errorScore*0.3) / weightSum
// Latency score: 1s → 100, 2s → 0 (linear)
// Uses P99 of duration
latencyScore := 100.0
if overview.Duration.P99 != nil {
p99 := float64(*overview.Duration.P99)
if p99 > 1000 {
if p99 <= 2000 {
latencyScore = (2000 - p99) / 1000 * 100
} else {
latencyScore = 0
}
}
}
// Weighted combination: 50% error rate + 50% latency
return errorScore*0.5 + latencyScore*0.5
}
// computeInfraHealth calculates infrastructure health score (0-100)