From d5eab7da3bc1137c8b3f15681bc94d98aa2f8c39 Mon Sep 17 00:00:00 2001 From: IanShaw027 <131567472+IanShaw027@users.noreply.github.com> Date: Wed, 14 Jan 2026 23:43:12 +0800 Subject: [PATCH] =?UTF-8?q?refactor(ops):=20=E4=BC=98=E5=8C=96=E5=81=A5?= =?UTF-8?q?=E5=BA=B7=E5=88=86=E6=95=B0=E8=AE=A1=E7=AE=97=E9=80=BB=E8=BE=91?= =?UTF-8?q?=E5=92=8C=E9=98=88=E5=80=BC?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 移除 SLA 组件(与错误率重复) - 恢复延迟组件,阈值调整为 1s-2s - 错误率阈值调整为 1%-10%(更宽松) - 业务健康分数:错误率 50% + 延迟 50% - 更新所有相关测试用例期望值 --- backend/internal/service/ops_health_score.go | 40 ++++++++++--------- .../internal/service/ops_health_score_test.go | 40 ++++++++++++++----- 2 files changed, 52 insertions(+), 28 deletions(-) diff --git a/backend/internal/service/ops_health_score.go b/backend/internal/service/ops_health_score.go index ea648b8c..0ff1d8cb 100644 --- a/backend/internal/service/ops_health_score.go +++ b/backend/internal/service/ops_health_score.go @@ -32,36 +32,38 @@ func computeDashboardHealthScore(now time.Time, overview *OpsDashboardOverview) } // computeBusinessHealth calculates business health score (0-100) -// Components: SLA (50%) + Error Rate (30%) +// Components: Error Rate (50%) + Latency (50%) func computeBusinessHealth(overview *OpsDashboardOverview) float64 { - // SLA score: 99.5% → 100, 95% → 0 (linear) - slaScore := 100.0 - slaPct := clampFloat64(overview.SLA*100, 0, 100) - if slaPct < 99.5 { - if slaPct >= 95 { - slaScore = (slaPct - 95) / 4.5 * 100 - } else { - slaScore = 0 - } - } - - // Error rate score: 0.5% → 100, 5% → 0 (linear) + // Error rate score: 1% → 100, 10% → 0 (linear) // Combines request errors and upstream errors errorScore := 100.0 errorPct := clampFloat64(overview.ErrorRate*100, 0, 100) upstreamPct := clampFloat64(overview.UpstreamErrorRate*100, 0, 100) combinedErrorPct := math.Max(errorPct, upstreamPct) // Use worst case - if combinedErrorPct > 0.5 { - if combinedErrorPct <= 5 { - errorScore = (5 - combinedErrorPct) / 4.5 * 100 + if combinedErrorPct > 1.0 { + if combinedErrorPct <= 10.0 { + errorScore = (10.0 - combinedErrorPct) / 9.0 * 100 } else { errorScore = 0 } } - // Weighted combination (renormalized after removing duration) - const weightSum = 0.8 - return (slaScore*0.5 + errorScore*0.3) / weightSum + // Latency score: 1s → 100, 2s → 0 (linear) + // Uses P99 of duration + latencyScore := 100.0 + if overview.Duration.P99 != nil { + p99 := float64(*overview.Duration.P99) + if p99 > 1000 { + if p99 <= 2000 { + latencyScore = (2000 - p99) / 1000 * 100 + } else { + latencyScore = 0 + } + } + } + + // Weighted combination: 50% error rate + 50% latency + return errorScore*0.5 + latencyScore*0.5 } // computeInfraHealth calculates infrastructure health score (0-100) diff --git a/backend/internal/service/ops_health_score_test.go b/backend/internal/service/ops_health_score_test.go index 5db4b3ec..791238ac 100644 --- a/backend/internal/service/ops_health_score_test.go +++ b/backend/internal/service/ops_health_score_test.go @@ -127,8 +127,8 @@ func TestComputeDashboardHealthScore_Comprehensive(t *testing.T) { MemoryUsagePercent: float64Ptr(75), }, }, - wantMin: 57, - wantMax: 58, + wantMin: 61, + wantMax: 62, }, { name: "DB failure", @@ -277,20 +277,42 @@ func TestComputeBusinessHealth(t *testing.T) { UpstreamErrorRate: 0, Duration: OpsPercentiles{P99: intPtr(500)}, }, - wantMin: 37, - wantMax: 38, + wantMin: 100, + wantMax: 100, }, { - name: "error rate boundary 0.5%", + name: "error rate boundary 1%", overview: &OpsDashboardOverview{ - SLA: 0.995, - ErrorRate: 0.005, + SLA: 0.99, + ErrorRate: 0.01, UpstreamErrorRate: 0, Duration: OpsPercentiles{P99: intPtr(500)}, }, - wantMin: 95, + wantMin: 100, wantMax: 100, }, + { + name: "error rate 5%", + overview: &OpsDashboardOverview{ + SLA: 0.95, + ErrorRate: 0.05, + UpstreamErrorRate: 0, + Duration: OpsPercentiles{P99: intPtr(500)}, + }, + wantMin: 77, + wantMax: 78, + }, + { + name: "latency boundary 2s", + overview: &OpsDashboardOverview{ + SLA: 0.99, + ErrorRate: 0, + UpstreamErrorRate: 0, + Duration: OpsPercentiles{P99: intPtr(2000)}, + }, + wantMin: 50, + wantMax: 50, + }, { name: "upstream error dominates", overview: &OpsDashboardOverview{ @@ -299,7 +321,7 @@ func TestComputeBusinessHealth(t *testing.T) { UpstreamErrorRate: 0.03, Duration: OpsPercentiles{P99: intPtr(500)}, }, - wantMin: 75, + wantMin: 88, wantMax: 90, }, }