From d5eab7da3bc1137c8b3f15681bc94d98aa2f8c39 Mon Sep 17 00:00:00 2001
From: IanShaw027 <131567472+IanShaw027@users.noreply.github.com>
Date: Wed, 14 Jan 2026 23:43:12 +0800
Subject: [PATCH] =?UTF-8?q?refactor(ops):=20=E4=BC=98=E5=8C=96=E5=81=A5?=
 =?UTF-8?q?=E5=BA=B7=E5=88=86=E6=95=B0=E8=AE=A1=E7=AE=97=E9=80=BB=E8=BE=91?=
 =?UTF-8?q?=E5=92=8C=E9=98=88=E5=80=BC?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 移除 SLA 组件（与错误率重复）
- 恢复延迟组件，阈值调整为 1s-2s
- 错误率阈值调整为 1%-10%（更宽松）
- 业务健康分数：错误率 50% + 延迟 50%
- 更新所有相关测试用例期望值
---
 backend/internal/service/ops_health_score.go  | 40 ++++++++++---------
 .../internal/service/ops_health_score_test.go | 40 ++++++++++++++-----
 2 files changed, 52 insertions(+), 28 deletions(-)

diff --git a/backend/internal/service/ops_health_score.go b/backend/internal/service/ops_health_score.go
index ea648b8c..0ff1d8cb 100644
--- a/backend/internal/service/ops_health_score.go
+++ b/backend/internal/service/ops_health_score.go
@@ -32,36 +32,38 @@ func computeDashboardHealthScore(now time.Time, overview *OpsDashboardOverview)
 }
 
 // computeBusinessHealth calculates business health score (0-100)
-// Components: SLA (50%) + Error Rate (30%)
+// Components: Error Rate (50%) + Latency (50%)
 func computeBusinessHealth(overview *OpsDashboardOverview) float64 {
-	// SLA score: 99.5% → 100, 95% → 0 (linear)
-	slaScore := 100.0
-	slaPct := clampFloat64(overview.SLA*100, 0, 100)
-	if slaPct < 99.5 {
-		if slaPct >= 95 {
-			slaScore = (slaPct - 95) / 4.5 * 100
-		} else {
-			slaScore = 0
-		}
-	}
-
-	// Error rate score: 0.5% → 100, 5% → 0 (linear)
+	// Error rate score: 1% → 100, 10% → 0 (linear)
 	// Combines request errors and upstream errors
 	errorScore := 100.0
 	errorPct := clampFloat64(overview.ErrorRate*100, 0, 100)
 	upstreamPct := clampFloat64(overview.UpstreamErrorRate*100, 0, 100)
 	combinedErrorPct := math.Max(errorPct, upstreamPct) // Use worst case
-	if combinedErrorPct > 0.5 {
-		if combinedErrorPct <= 5 {
-			errorScore = (5 - combinedErrorPct) / 4.5 * 100
+	if combinedErrorPct > 1.0 {
+		if combinedErrorPct <= 10.0 {
+			errorScore = (10.0 - combinedErrorPct) / 9.0 * 100
 		} else {
 			errorScore = 0
 		}
 	}
 
-	// Weighted combination (renormalized after removing duration)
-	const weightSum = 0.8
-	return (slaScore*0.5 + errorScore*0.3) / weightSum
+	// Latency score: 1s → 100, 2s → 0 (linear)
+	// Uses P99 of duration
+	latencyScore := 100.0
+	if overview.Duration.P99 != nil {
+		p99 := float64(*overview.Duration.P99)
+		if p99 > 1000 {
+			if p99 <= 2000 {
+				latencyScore = (2000 - p99) / 1000 * 100
+			} else {
+				latencyScore = 0
+			}
+		}
+	}
+
+	// Weighted combination: 50% error rate + 50% latency
+	return errorScore*0.5 + latencyScore*0.5
 }
 
 // computeInfraHealth calculates infrastructure health score (0-100)
diff --git a/backend/internal/service/ops_health_score_test.go b/backend/internal/service/ops_health_score_test.go
index 5db4b3ec..791238ac 100644
--- a/backend/internal/service/ops_health_score_test.go
+++ b/backend/internal/service/ops_health_score_test.go
@@ -127,8 +127,8 @@ func TestComputeDashboardHealthScore_Comprehensive(t *testing.T) {
 					MemoryUsagePercent: float64Ptr(75),
 				},
 			},
-			wantMin: 57,
-			wantMax: 58,
+			wantMin: 61,
+			wantMax: 62,
 		},
 		{
 			name: "DB failure",
@@ -277,20 +277,42 @@ func TestComputeBusinessHealth(t *testing.T) {
 				UpstreamErrorRate: 0,
 				Duration:          OpsPercentiles{P99: intPtr(500)},
 			},
-			wantMin: 37,
-			wantMax: 38,
+			wantMin: 100,
+			wantMax: 100,
 		},
 		{
-			name: "error rate boundary 0.5%",
+			name: "error rate boundary 1%",
 			overview: &OpsDashboardOverview{
-				SLA:               0.995,
-				ErrorRate:         0.005,
+				SLA:               0.99,
+				ErrorRate:         0.01,
 				UpstreamErrorRate: 0,
 				Duration:          OpsPercentiles{P99: intPtr(500)},
 			},
-			wantMin: 95,
+			wantMin: 100,
 			wantMax: 100,
 		},
+		{
+			name: "error rate 5%",
+			overview: &OpsDashboardOverview{
+				SLA:               0.95,
+				ErrorRate:         0.05,
+				UpstreamErrorRate: 0,
+				Duration:          OpsPercentiles{P99: intPtr(500)},
+			},
+			wantMin: 77,
+			wantMax: 78,
+		},
+		{
+			name: "latency boundary 2s",
+			overview: &OpsDashboardOverview{
+				SLA:               0.99,
+				ErrorRate:         0,
+				UpstreamErrorRate: 0,
+				Duration:          OpsPercentiles{P99: intPtr(2000)},
+			},
+			wantMin: 50,
+			wantMax: 50,
+		},
 		{
 			name: "upstream error dominates",
 			overview: &OpsDashboardOverview{
@@ -299,7 +321,7 @@ func TestComputeBusinessHealth(t *testing.T) {
 				UpstreamErrorRate: 0.03,
 				Duration:          OpsPercentiles{P99: intPtr(500)},
 			},
-			wantMin: 75,
+			wantMin: 88,
 			wantMax: 90,
 		},
 	}