refactor(ops): 优化健康分数计算逻辑和阈值

- 移除 SLA 组件(与错误率重复)
- 恢复延迟组件,阈值调整为 1s-2s
- 错误率阈值调整为 1%-10%(更宽松)
- 业务健康分数:错误率 50% + 延迟 50%
- 更新所有相关测试用例期望值
This commit is contained in:
IanShaw027
2026-01-14 23:43:12 +08:00
parent 9b10241561
commit d5eab7da3b
2 changed files with 52 additions and 28 deletions

View File

@@ -32,36 +32,38 @@ func computeDashboardHealthScore(now time.Time, overview *OpsDashboardOverview)
} }
// computeBusinessHealth calculates business health score (0-100) // computeBusinessHealth calculates business health score (0-100)
// Components: SLA (50%) + Error Rate (30%) // Components: Error Rate (50%) + Latency (50%)
func computeBusinessHealth(overview *OpsDashboardOverview) float64 { func computeBusinessHealth(overview *OpsDashboardOverview) float64 {
// SLA score: 99.5% → 100, 95% → 0 (linear) // Error rate score: 1% → 100, 10% → 0 (linear)
slaScore := 100.0
slaPct := clampFloat64(overview.SLA*100, 0, 100)
if slaPct < 99.5 {
if slaPct >= 95 {
slaScore = (slaPct - 95) / 4.5 * 100
} else {
slaScore = 0
}
}
// Error rate score: 0.5% → 100, 5% → 0 (linear)
// Combines request errors and upstream errors // Combines request errors and upstream errors
errorScore := 100.0 errorScore := 100.0
errorPct := clampFloat64(overview.ErrorRate*100, 0, 100) errorPct := clampFloat64(overview.ErrorRate*100, 0, 100)
upstreamPct := clampFloat64(overview.UpstreamErrorRate*100, 0, 100) upstreamPct := clampFloat64(overview.UpstreamErrorRate*100, 0, 100)
combinedErrorPct := math.Max(errorPct, upstreamPct) // Use worst case combinedErrorPct := math.Max(errorPct, upstreamPct) // Use worst case
if combinedErrorPct > 0.5 { if combinedErrorPct > 1.0 {
if combinedErrorPct <= 5 { if combinedErrorPct <= 10.0 {
errorScore = (5 - combinedErrorPct) / 4.5 * 100 errorScore = (10.0 - combinedErrorPct) / 9.0 * 100
} else { } else {
errorScore = 0 errorScore = 0
} }
} }
// Weighted combination (renormalized after removing duration) // Latency score: 1s → 100, 2s → 0 (linear)
const weightSum = 0.8 // Uses P99 of duration
return (slaScore*0.5 + errorScore*0.3) / weightSum latencyScore := 100.0
if overview.Duration.P99 != nil {
p99 := float64(*overview.Duration.P99)
if p99 > 1000 {
if p99 <= 2000 {
latencyScore = (2000 - p99) / 1000 * 100
} else {
latencyScore = 0
}
}
}
// Weighted combination: 50% error rate + 50% latency
return errorScore*0.5 + latencyScore*0.5
} }
// computeInfraHealth calculates infrastructure health score (0-100) // computeInfraHealth calculates infrastructure health score (0-100)

View File

@@ -127,8 +127,8 @@ func TestComputeDashboardHealthScore_Comprehensive(t *testing.T) {
MemoryUsagePercent: float64Ptr(75), MemoryUsagePercent: float64Ptr(75),
}, },
}, },
wantMin: 57, wantMin: 61,
wantMax: 58, wantMax: 62,
}, },
{ {
name: "DB failure", name: "DB failure",
@@ -277,20 +277,42 @@ func TestComputeBusinessHealth(t *testing.T) {
UpstreamErrorRate: 0, UpstreamErrorRate: 0,
Duration: OpsPercentiles{P99: intPtr(500)}, Duration: OpsPercentiles{P99: intPtr(500)},
}, },
wantMin: 37, wantMin: 100,
wantMax: 38, wantMax: 100,
}, },
{ {
name: "error rate boundary 0.5%", name: "error rate boundary 1%",
overview: &OpsDashboardOverview{ overview: &OpsDashboardOverview{
SLA: 0.995, SLA: 0.99,
ErrorRate: 0.005, ErrorRate: 0.01,
UpstreamErrorRate: 0, UpstreamErrorRate: 0,
Duration: OpsPercentiles{P99: intPtr(500)}, Duration: OpsPercentiles{P99: intPtr(500)},
}, },
wantMin: 95, wantMin: 100,
wantMax: 100, wantMax: 100,
}, },
{
name: "error rate 5%",
overview: &OpsDashboardOverview{
SLA: 0.95,
ErrorRate: 0.05,
UpstreamErrorRate: 0,
Duration: OpsPercentiles{P99: intPtr(500)},
},
wantMin: 77,
wantMax: 78,
},
{
name: "latency boundary 2s",
overview: &OpsDashboardOverview{
SLA: 0.99,
ErrorRate: 0,
UpstreamErrorRate: 0,
Duration: OpsPercentiles{P99: intPtr(2000)},
},
wantMin: 50,
wantMax: 50,
},
{ {
name: "upstream error dominates", name: "upstream error dominates",
overview: &OpsDashboardOverview{ overview: &OpsDashboardOverview{
@@ -299,7 +321,7 @@ func TestComputeBusinessHealth(t *testing.T) {
UpstreamErrorRate: 0.03, UpstreamErrorRate: 0.03,
Duration: OpsPercentiles{P99: intPtr(500)}, Duration: OpsPercentiles{P99: intPtr(500)},
}, },
wantMin: 75, wantMin: 88,
wantMax: 90, wantMax: 90,
}, },
} }