From 182683814b1846a7de57857801e802198ddc545c Mon Sep 17 00:00:00 2001
From: IanShaw027 <131567472+IanShaw027@users.noreply.github.com>
Date: Wed, 14 Jan 2026 10:52:56 +0800
Subject: [PATCH] =?UTF-8?q?refactor(ops):=20=E7=A7=BB=E9=99=A4duration?=
 =?UTF-8?q?=E7=9B=B8=E5=85=B3=E5=91=8A=E8=AD=A6=E6=8C=87=E6=A0=87=EF=BC=8C?=
 =?UTF-8?q?=E7=AE=80=E5=8C=96=E7=9B=91=E6=8E=A7=E9=85=8D=E7=BD=AE?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

主要改动：
- 移除 p95_latency_ms 和 p99_latency_ms 告警指标类型
- 移除配置中的 latency_p99_ms_max 阈值设置
- 简化健康分数计算（移除latency权重，重新归一化SLA和错误率）
- 移除duration相关的诊断规则和阈值检查
- 统一术语：延迟 → 请求时长
- 保留duration数据展示，但不再用于告警判断
- 聚焦TTFT作为主要的响应速度告警指标

影响范围：
- Backend: handler, service, models, tests
- Frontend: API types, i18n, components
---
 .../handler/admin/ops_alerts_handler.go       |  2 -
 .../service/ops_alert_evaluator_service.go    | 10 ---
 backend/internal/service/ops_health_score.go  | 21 ++----
 .../internal/service/ops_health_score_test.go | 11 ---
 backend/internal/service/ops_settings.go      |  5 --
 .../internal/service/ops_settings_models.go   |  1 -
 frontend/src/api/admin/ops.ts                 |  3 -
 frontend/src/i18n/locales/en.ts               | 27 +++----
 frontend/src/i18n/locales/zh.ts               | 37 ++++------
 .../ops/components/OpsAlertRulesCard.vue      | 18 -----
 .../ops/components/OpsDashboardHeader.vue     | 71 ++++++-------------
 .../ops/components/OpsErrorDetailsModal.vue   | 66 +++++++++--------
 .../ops/components/OpsRuntimeSettingsCard.vue | 19 +----
 .../ops/components/OpsSettingsDialog.vue      | 28 ++------
 14 files changed, 92 insertions(+), 227 deletions(-)

diff --git a/backend/internal/handler/admin/ops_alerts_handler.go b/backend/internal/handler/admin/ops_alerts_handler.go
index 8dce68c8..c9da19c7 100644
--- a/backend/internal/handler/admin/ops_alerts_handler.go
+++ b/backend/internal/handler/admin/ops_alerts_handler.go
@@ -20,8 +20,6 @@ var validOpsAlertMetricTypes = []string{
 	"success_rate",
 	"error_rate",
 	"upstream_error_rate",
-	"p95_latency_ms",
-	"p99_latency_ms",
 	"cpu_usage_percent",
 	"memory_usage_percent",
 	"concurrency_queue_depth",
diff --git a/backend/internal/service/ops_alert_evaluator_service.go b/backend/internal/service/ops_alert_evaluator_service.go
index a0c93772..2b619f4d 100644
--- a/backend/internal/service/ops_alert_evaluator_service.go
+++ b/backend/internal/service/ops_alert_evaluator_service.go
@@ -523,16 +523,6 @@ func (s *OpsAlertEvaluatorService) computeRuleMetric(
 			return 0, false
 		}
 		return overview.UpstreamErrorRate * 100, true
-	case "p95_latency_ms":
-		if overview.Duration.P95 == nil {
-			return 0, false
-		}
-		return float64(*overview.Duration.P95), true
-	case "p99_latency_ms":
-		if overview.Duration.P99 == nil {
-			return 0, false
-		}
-		return float64(*overview.Duration.P99), true
 	default:
 		return 0, false
 	}
diff --git a/backend/internal/service/ops_health_score.go b/backend/internal/service/ops_health_score.go
index feb0d843..ea648b8c 100644
--- a/backend/internal/service/ops_health_score.go
+++ b/backend/internal/service/ops_health_score.go
@@ -32,7 +32,7 @@ func computeDashboardHealthScore(now time.Time, overview *OpsDashboardOverview)
 }
 
 // computeBusinessHealth calculates business health score (0-100)
-// Components: SLA (50%) + Error Rate (30%) + Latency (20%)
+// Components: SLA (50%) + Error Rate (30%)
 func computeBusinessHealth(overview *OpsDashboardOverview) float64 {
 	// SLA score: 99.5% → 100, 95% → 0 (linear)
 	slaScore := 100.0
@@ -59,22 +59,9 @@ func computeBusinessHealth(overview *OpsDashboardOverview) float64 {
 		}
 	}
 
-	// Latency score: 1s → 100, 10s → 0 (linear)
-	// Uses P99 of duration (TTFT is less critical for overall health)
-	latencyScore := 100.0
-	if overview.Duration.P99 != nil {
-		p99 := float64(*overview.Duration.P99)
-		if p99 > 1000 {
-			if p99 <= 10000 {
-				latencyScore = (10000 - p99) / 9000 * 100
-			} else {
-				latencyScore = 0
-			}
-		}
-	}
-
-	// Weighted combination
-	return slaScore*0.5 + errorScore*0.3 + latencyScore*0.2
+	// Weighted combination (renormalized after removing duration)
+	const weightSum = 0.8
+	return (slaScore*0.5 + errorScore*0.3) / weightSum
 }
 
 // computeInfraHealth calculates infrastructure health score (0-100)
diff --git a/backend/internal/service/ops_health_score_test.go b/backend/internal/service/ops_health_score_test.go
index 849ba146..859bdd2e 100644
--- a/backend/internal/service/ops_health_score_test.go
+++ b/backend/internal/service/ops_health_score_test.go
@@ -291,17 +291,6 @@ func TestComputeBusinessHealth(t *testing.T) {
 			wantMin: 95,
 			wantMax: 100,
 		},
-		{
-			name: "latency boundary 1000ms",
-			overview: &OpsDashboardOverview{
-				SLA:               0.995,
-				ErrorRate:         0,
-				UpstreamErrorRate: 0,
-				Duration:          OpsPercentiles{P99: intPtr(1000)},
-			},
-			wantMin: 95,
-			wantMax: 100,
-		},
 		{
 			name: "upstream error dominates",
 			overview: &OpsDashboardOverview{
diff --git a/backend/internal/service/ops_settings.go b/backend/internal/service/ops_settings.go
index 53c78fed..6c2f6551 100644
--- a/backend/internal/service/ops_settings.go
+++ b/backend/internal/service/ops_settings.go
@@ -482,13 +482,11 @@ const SettingKeyOpsMetricThresholds = "ops_metric_thresholds"
 
 func defaultOpsMetricThresholds() *OpsMetricThresholds {
 	slaMin := 99.5
-	latencyMax := 2000.0
 	ttftMax := 500.0
 	reqErrMax := 5.0
 	upstreamErrMax := 5.0
 	return &OpsMetricThresholds{
 		SLAPercentMin:               &slaMin,
-		LatencyP99MsMax:             &latencyMax,
 		TTFTp99MsMax:                &ttftMax,
 		RequestErrorRatePercentMax:  &reqErrMax,
 		UpstreamErrorRatePercentMax: &upstreamErrMax,
@@ -538,9 +536,6 @@ func (s *OpsService) UpdateMetricThresholds(ctx context.Context, cfg *OpsMetricT
 	if cfg.SLAPercentMin != nil && (*cfg.SLAPercentMin < 0 || *cfg.SLAPercentMin > 100) {
 		return nil, errors.New("sla_percent_min must be between 0 and 100")
 	}
-	if cfg.LatencyP99MsMax != nil && *cfg.LatencyP99MsMax < 0 {
-		return nil, errors.New("latency_p99_ms_max must be >= 0")
-	}
 	if cfg.TTFTp99MsMax != nil && *cfg.TTFTp99MsMax < 0 {
 		return nil, errors.New("ttft_p99_ms_max must be >= 0")
 	}
diff --git a/backend/internal/service/ops_settings_models.go b/backend/internal/service/ops_settings_models.go
index 229488a1..9ff83ccb 100644
--- a/backend/internal/service/ops_settings_models.go
+++ b/backend/internal/service/ops_settings_models.go
@@ -63,7 +63,6 @@ type OpsAlertSilencingSettings struct {
 
 type OpsMetricThresholds struct {
 	SLAPercentMin               *float64 `json:"sla_percent_min,omitempty"`                 // SLA低于此值变红
-	LatencyP99MsMax             *float64 `json:"latency_p99_ms_max,omitempty"`              // 延迟P99高于此值变红
 	TTFTp99MsMax                *float64 `json:"ttft_p99_ms_max,omitempty"`                 // TTFT P99高于此值变红
 	RequestErrorRatePercentMax  *float64 `json:"request_error_rate_percent_max,omitempty"`  // 请求错误率高于此值变红
 	UpstreamErrorRatePercentMax *float64 `json:"upstream_error_rate_percent_max,omitempty"` // 上游错误率高于此值变红
diff --git a/frontend/src/api/admin/ops.ts b/frontend/src/api/admin/ops.ts
index 24ac7ad3..dfe4f2ca 100644
--- a/frontend/src/api/admin/ops.ts
+++ b/frontend/src/api/admin/ops.ts
@@ -653,8 +653,6 @@ export type MetricType =
   | 'success_rate'
   | 'error_rate'
   | 'upstream_error_rate'
-  | 'p95_latency_ms'
-  | 'p99_latency_ms'
   | 'cpu_usage_percent'
   | 'memory_usage_percent'
   | 'concurrency_queue_depth'
@@ -729,7 +727,6 @@ export interface EmailNotificationConfig {
 
 export interface OpsMetricThresholds {
   sla_percent_min?: number | null                 // SLA低于此值变红
-  latency_p99_ms_max?: number | null              // 延迟 P99 高于此值变红
   ttft_p99_ms_max?: number | null                 // TTFT P99高于此值变红
   request_error_rate_percent_max?: number | null  // 请求错误率高于此值变红
   upstream_error_rate_percent_max?: number | null // 上游错误率高于此值变红
diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts
index 3c6d8f84..7d7776b4 100644
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -1887,7 +1887,7 @@ export default {
       totalRequests: 'Total Requests',
       avgQps: 'Avg QPS',
       avgTps: 'Avg TPS',
-      avgLatency: 'Avg Latency',
+      avgLatency: 'Avg Request Duration',
       avgTtft: 'Avg TTFT',
       exceptions: 'Exceptions',
       requestErrors: 'Request Errors',
@@ -1899,7 +1899,7 @@ export default {
       errors: 'Errors',
       errorRate: 'error_rate:',
       upstreamRate: 'upstream_rate:',
-      latencyDuration: 'Latency (duration_ms)',
+      latencyDuration: 'Request Duration (ms)',
       ttftLabel: 'TTFT (first_token_ms)',
       p50: 'p50:',
       p90: 'p90:',
@@ -1919,7 +1919,7 @@ export default {
       failedToLoadData: 'Failed to load ops data.',
       failedToLoadOverview: 'Failed to load overview',
       failedToLoadThroughputTrend: 'Failed to load throughput trend',
-      failedToLoadLatencyHistogram: 'Failed to load latency histogram',
+      failedToLoadLatencyHistogram: 'Failed to load request duration histogram',
       failedToLoadErrorTrend: 'Failed to load error trend',
       failedToLoadErrorDistribution: 'Failed to load error distribution',
       failedToLoadErrorDetail: 'Failed to load error detail',
@@ -1927,7 +1927,7 @@ export default {
       tpsK: 'TPS (K)',
       top: 'Top:',
       throughputTrend: 'Throughput Trend',
-      latencyHistogram: 'Latency Histogram',
+      latencyHistogram: 'Request Duration Histogram',
       errorTrend: 'Error Trend',
       errorDistribution: 'Error Distribution',
       // Health Score & Diagnosis
@@ -1973,14 +1973,7 @@ export default {
         memoryHigh: 'Memory usage elevated ({usage}%)',
         memoryHighImpact: 'Memory pressure is high, needs attention',
         memoryHighAction: 'Monitor memory trends, check for memory leaks',
-        // Latency diagnostics
-        latencyCritical: 'Response latency critically high ({latency}ms)',
-        latencyCriticalImpact: 'User experience extremely poor, many requests timing out',
-        latencyCriticalAction: 'Check slow queries, database indexes, network latency, and upstream services',
-        latencyHigh: 'Response latency elevated ({latency}ms)',
-        latencyHighImpact: 'User experience degraded, needs optimization',
-        latencyHighAction: 'Analyze slow request logs, optimize database queries and business logic',
-        ttftHigh: 'Time to first byte elevated ({ttft}ms)',
+        ttftHigh: 'Time to first token elevated ({ttft}ms)',
         ttftHighImpact: 'User perceived latency increased',
         ttftHighAction: 'Optimize request processing flow, reduce pre-processing time',
         // Error rate diagnostics
@@ -2020,7 +2013,7 @@ export default {
         context: 'Context',
         status: 'Status',
         message: 'Message',
-        latency: 'Latency',
+        latency: 'Request Duration',
         action: 'Action',
         noErrors: 'No errors in this window.',
         grp: 'GRP:',
@@ -2049,7 +2042,7 @@ export default {
         basicInfo: 'Basic Info',
         platform: 'Platform',
         model: 'Model',
-        latency: 'Latency',
+        latency: 'Request Duration',
         ttft: 'TTFT',
         businessLimited: 'Business Limited',
         requestPath: 'Request Path',
@@ -2398,8 +2391,6 @@ export default {
         metricThresholdsHint: 'Configure alert thresholds for metrics, values exceeding thresholds will be displayed in red',
         slaMinPercent: 'SLA Minimum Percentage',
         slaMinPercentHint: 'SLA below this value will be displayed in red (default: 99.5%)',
-        latencyP99MaxMs: 'Latency P99 Maximum (ms)',
-        latencyP99MaxMsHint: 'Latency P99 above this value will be displayed in red (default: 2000ms)',
         ttftP99MaxMs: 'TTFT P99 Maximum (ms)',
         ttftP99MaxMsHint: 'TTFT P99 above this value will be displayed in red (default: 500ms)',
         requestErrorRateMaxPercent: 'Request Error Rate Maximum (%)',
@@ -2458,7 +2449,7 @@ export default {
       tooltips: {
         totalRequests: 'Total number of requests (including both successful and failed requests) in the selected time window.',
         throughputTrend: 'Requests/QPS + Tokens/TPS in the selected window.',
-        latencyHistogram: 'Latency distribution (duration_ms) for successful requests.',
+        latencyHistogram: 'Request duration distribution (ms) for successful requests.',
         errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).',
         errorDistribution: 'Error distribution by status code.',
         goroutines:
@@ -2473,7 +2464,7 @@ export default {
         sla: 'Service Level Agreement success rate, excluding business limits (e.g., insufficient balance, quota exceeded).',
         errors: 'Error statistics, including total errors, error rate, and upstream error rate.',
         upstreamErrors: 'Upstream error statistics, excluding rate limit errors (429/529).',
-        latency: 'Request latency statistics, including p50, p90, p95, p99 percentiles.',
+        latency: 'Request duration statistics, including p50, p90, p95, p99 percentiles.',
         ttft: 'Time To First Token, measuring the speed of first byte return in streaming responses.',
         health: 'System health score (0-100), considering SLA, error rate, and resource usage.'
       },
diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts
index 57f5e0cc..d955cc1b 100644
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@@ -2031,7 +2031,7 @@ export default {
       totalRequests: '总请求',
       avgQps: '平均 QPS',
       avgTps: '平均 TPS',
-      avgLatency: '平均延迟',
+      avgLatency: '平均请求时长',
       avgTtft: '平均首字延迟',
       exceptions: '异常数',
       requestErrors: '请求错误',
@@ -2043,7 +2043,7 @@ export default {
       errors: '错误',
       errorRate: '错误率：',
       upstreamRate: '上游错误率：',
-      latencyDuration: '延迟（毫秒）',
+      latencyDuration: '请求时长（毫秒）',
       ttftLabel: '首字延迟（毫秒）',
       p50: 'p50',
       p90: 'p90',
@@ -2063,7 +2063,7 @@ export default {
       failedToLoadData: '加载运维数据失败',
       failedToLoadOverview: '加载概览数据失败',
       failedToLoadThroughputTrend: '加载吞吐趋势失败',
-      failedToLoadLatencyHistogram: '加载延迟分布失败',
+      failedToLoadLatencyHistogram: '加载请求时长分布失败',
       failedToLoadErrorTrend: '加载错误趋势失败',
       failedToLoadErrorDistribution: '加载错误分布失败',
       failedToLoadErrorDetail: '加载错误详情失败',
@@ -2071,7 +2071,7 @@ export default {
       tpsK: 'TPS（千）',
       top: '最高：',
       throughputTrend: '吞吐趋势',
-      latencyHistogram: '延迟分布',
+      latencyHistogram: '请求时长分布',
       errorTrend: '错误趋势',
       errorDistribution: '错误分布',
       // Health Score & Diagnosis
@@ -2117,15 +2117,8 @@ export default {
         memoryHigh: '内存使用率偏高 ({usage}%)',
         memoryHighImpact: '内存压力较大，需要关注',
         memoryHighAction: '监控内存趋势，检查是否有内存泄漏',
-        // Latency diagnostics
-        latencyCritical: '响应延迟严重过高 ({latency}ms)',
-        latencyCriticalImpact: '用户体验极差，大量请求超时',
-        latencyCriticalAction: '检查慢查询、数据库索引、网络延迟和上游服务',
-        latencyHigh: '响应延迟偏高 ({latency}ms)',
-        latencyHighImpact: '用户体验下降，需要优化',
-        latencyHighAction: '分析慢请求日志，优化数据库查询和业务逻辑',
         ttftHigh: '首字节时间偏高 ({ttft}ms)',
-        ttftHighImpact: '用户感知延迟增加',
+        ttftHighImpact: '用户感知时长增加',
         ttftHighAction: '优化请求处理流程，减少前置逻辑耗时',
         // Error rate diagnostics
         upstreamCritical: '上游错误率严重偏高 ({rate}%)',
@@ -2143,13 +2136,13 @@ export default {
         // SLA diagnostics
         slaCritical: 'SLA 严重低于目标 ({sla}%)',
         slaCriticalImpact: '用户体验严重受损',
-        slaCriticalAction: '紧急排查错误和延迟问题，考虑限流保护',
+        slaCriticalAction: '紧急排查错误原因，必要时采取限流保护',
         slaLow: 'SLA 低于目标 ({sla}%)',
         slaLowImpact: '需要关注服务质量',
         slaLowAction: '分析SLA下降原因，优化系统性能',
         // Health score diagnostics
         healthCritical: '综合健康评分过低 ({score})',
-        healthCriticalImpact: '多个指标可能同时异常，建议优先排查错误与延迟',
+        healthCriticalImpact: '多个指标可能同时异常，建议优先排查错误与资源使用情况',
         healthCriticalAction: '全面检查系统状态，优先处理critical级别问题',
         healthLow: '综合健康评分偏低 ({score})',
         healthLowImpact: '可能存在轻度波动，建议关注 SLA 与错误率',
@@ -2164,7 +2157,7 @@ export default {
         context: '上下文',
         status: '状态码',
         message: '消息',
-        latency: '延迟',
+        latency: '请求时长',
         action: '操作',
         noErrors: '该窗口内暂无错误。',
         grp: 'GRP：',
@@ -2193,7 +2186,7 @@ export default {
         basicInfo: '基本信息',
         platform: '平台',
         model: '模型',
-        latency: '延迟',
+        latency: '请求时长',
         ttft: 'TTFT',
         businessLimited: '业务限制',
         requestPath: '请求路径',
@@ -2351,8 +2344,8 @@ export default {
           successRate: '成功率 (%)',
           errorRate: '错误率 (%)',
           upstreamErrorRate: '上游错误率 (%)',
-          p95: 'P95 延迟 (ms)',
-          p99: 'P99 延迟 (ms)',
+          p95: 'P95 请求时长 (ms)',
+          p99: 'P99 请求时长 (ms)',
           cpu: 'CPU 使用率 (%)',
           memory: '内存使用率 (%)',
           queueDepth: '并发排队深度',
@@ -2542,8 +2535,6 @@ export default {
         metricThresholdsHint: '配置各项指标的告警阈值，超出阈值时将以红色显示',
         slaMinPercent: 'SLA最低百分比',
         slaMinPercentHint: 'SLA低于此值时显示为红色（默认：99.5%）',
-        latencyP99MaxMs: '延迟P99最大值（毫秒）',
-        latencyP99MaxMsHint: '延迟P99高于此值时显示为红色（默认：2000ms）',
         ttftP99MaxMs: 'TTFT P99最大值（毫秒）',
         ttftP99MaxMsHint: 'TTFT P99高于此值时显示为红色（默认：500ms）',
         requestErrorRateMaxPercent: '请求错误率最大值（%）',
@@ -2602,12 +2593,12 @@ export default {
       tooltips: {
         totalRequests: '当前时间窗口内的总请求数和Token消耗量。',
         throughputTrend: '当前窗口内的请求/QPS 与 token/TPS 趋势。',
-        latencyHistogram: '成功请求的延迟分布（毫秒）。',
+        latencyHistogram: '成功请求的请求时长分布（毫秒）。',
         errorTrend: '错误趋势（SLA 口径排除业务限制；上游错误率排除 429/529）。',
         errorDistribution: '按状态码统计的错误分布。',
         upstreamErrors: '上游服务返回的错误，包括API提供商的错误响应（排除429/529限流错误）。',
         goroutines:
-          'Go 运行时的协程数量（轻量级线程）。没有绝对“安全值”，建议以历史基线为准。经验参考：<2000 常见；2000-8000 需关注；>8000 且伴随队列/延迟上升时，优先排查阻塞/泄漏。',
+          'Go 运行时的协程数量（轻量级线程）。没有绝对"安全值"，建议以历史基线为准。经验参考：<2000 常见；2000-8000 需关注；>8000 且伴随队列上升时，优先排查阻塞/泄漏。',
         cpu: 'CPU 使用率，显示系统处理器的负载情况。',
         memory: '内存使用率，包括已使用和总可用内存。',
         db: '数据库连接池状态，包括活跃连接、空闲连接和等待连接数。',
@@ -2617,7 +2608,7 @@ export default {
         tokens: '当前时间窗口内处理的总Token数量。',
         sla: '服务等级协议达成率，排除业务限制（如余额不足、配额超限）的成功请求占比。',
         errors: '错误统计，包括总错误数、错误率和上游错误率。',
-        latency: '请求延迟统计，包括 p50、p90、p95、p99 等百分位数。',
+        latency: '请求时长统计，包括 p50、p90、p95、p99 等百分位数。',
         ttft: '首Token延迟（Time To First Token），衡量流式响应的首字节返回速度。',
         health: '系统健康评分（0-100），综合考虑 SLA、错误率和资源使用情况。'
       },
diff --git a/frontend/src/views/admin/ops/components/OpsAlertRulesCard.vue b/frontend/src/views/admin/ops/components/OpsAlertRulesCard.vue
index 2cf097c0..627303c0 100644
--- a/frontend/src/views/admin/ops/components/OpsAlertRulesCard.vue
+++ b/frontend/src/views/admin/ops/components/OpsAlertRulesCard.vue
@@ -140,24 +140,6 @@ const metricDefinitions = computed(() => {
       recommendedThreshold: 1,
       unit: '%'
     },
-    {
-      type: 'p95_latency_ms',
-      group: 'system',
-      label: t('admin.ops.alertRules.metrics.p95'),
-      description: t('admin.ops.alertRules.metricDescriptions.p95'),
-      recommendedOperator: '>',
-      recommendedThreshold: 1000,
-      unit: 'ms'
-    },
-    {
-      type: 'p99_latency_ms',
-      group: 'system',
-      label: t('admin.ops.alertRules.metrics.p99'),
-      description: t('admin.ops.alertRules.metricDescriptions.p99'),
-      recommendedOperator: '>',
-      recommendedThreshold: 2000,
-      unit: 'ms'
-    },
     {
       type: 'cpu_usage_percent',
       group: 'system',
diff --git a/frontend/src/views/admin/ops/components/OpsDashboardHeader.vue b/frontend/src/views/admin/ops/components/OpsDashboardHeader.vue
index 96dc9c8a..f92c6c50 100644
--- a/frontend/src/views/admin/ops/components/OpsDashboardHeader.vue
+++ b/frontend/src/views/admin/ops/components/OpsDashboardHeader.vue
@@ -169,8 +169,8 @@ const updatedAtLabel = computed(() => {
   return props.lastUpdated.toLocaleTimeString()
 })
 
-// --- Color coding for latency/TTFT ---
-function getLatencyColor(ms: number | null | undefined): string {
+// --- Color coding for TTFT ---
+function getTTFTColor(ms: number | null | undefined): string {
   if (ms == null) return 'text-gray-900 dark:text-white'
   if (ms < 500) return 'text-green-600 dark:text-green-400'
   if (ms < 1000) return 'text-yellow-600 dark:text-yellow-400'
@@ -186,13 +186,6 @@ function isSLABelowThreshold(slaPercent: number | null): boolean {
   return slaPercent < threshold
 }
 
-function isLatencyAboveThreshold(latencyP99Ms: number | null): boolean {
-  if (latencyP99Ms == null) return false
-  const threshold = props.thresholds?.latency_p99_ms_max
-  if (threshold == null) return false
-  return latencyP99Ms > threshold
-}
-
 function isTTFTAboveThreshold(ttftP99Ms: number | null): boolean {
   if (ttftP99Ms == null) return false
   const threshold = props.thresholds?.ttft_p99_ms_max
@@ -482,24 +475,6 @@ const diagnosisReport = computed<DiagnosisItem[]>(() => {
     }
   }
 
-  // Latency diagnostics
-  const durationP99 = ov.duration?.p99_ms ?? 0
-  if (durationP99 > 2000) {
-    report.push({
-      type: 'critical',
-      message: t('admin.ops.diagnosis.latencyCritical', { latency: durationP99.toFixed(0) }),
-      impact: t('admin.ops.diagnosis.latencyCriticalImpact'),
-      action: t('admin.ops.diagnosis.latencyCriticalAction')
-    })
-  } else if (durationP99 > 1000) {
-    report.push({
-      type: 'warning',
-      message: t('admin.ops.diagnosis.latencyHigh', { latency: durationP99.toFixed(0) }),
-      impact: t('admin.ops.diagnosis.latencyHighImpact'),
-      action: t('admin.ops.diagnosis.latencyHighAction')
-    })
-  }
-
   const ttftP99 = ov.ttft?.p99_ms ?? 0
   if (ttftP99 > 500) {
     report.push({
@@ -1181,7 +1156,7 @@ function handleToolbarRefresh() {
       <!-- Right: 6 cards (3 cols x 2 rows) -->
       <div class="grid h-full grid-cols-1 content-center gap-4 sm:grid-cols-2 lg:col-span-7 lg:grid-cols-3">
         <!-- Card 1: Requests -->
-        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
+        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900" style="order: 1;">
           <div class="flex items-center justify-between">
             <div class="flex items-center gap-1">
               <span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.requestsTitle') }}</span>
@@ -1217,7 +1192,7 @@ function handleToolbarRefresh() {
         </div>
 
         <!-- Card 2: SLA -->
-        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
+        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900" style="order: 2;">
           <div class="flex items-center justify-between">
             <div class="flex items-center gap-2">
               <span class="text-[10px] font-bold uppercase text-gray-400">SLA</span>
@@ -1247,8 +1222,8 @@ function handleToolbarRefresh() {
           </div>
         </div>
 
-        <!-- Card 3: Latency (Duration) -->
-        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
+        <!-- Card 4: Request Duration -->
+        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900" style="order: 4;">
           <div class="flex items-center justify-between">
             <div class="flex items-center gap-1">
               <span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.latencyDuration') }}</span>
@@ -1264,7 +1239,7 @@ function handleToolbarRefresh() {
             </button>
           </div>
           <div class="mt-2 flex items-baseline gap-2">
-            <div class="text-3xl font-black" :class="isLatencyAboveThreshold(durationP99Ms) ? 'text-red-600 dark:text-red-400' : getLatencyColor(durationP99Ms)">
+            <div class="text-3xl font-black text-gray-900 dark:text-white">
               {{ durationP99Ms ?? '-' }}
             </div>
             <span class="text-xs font-bold text-gray-400">ms (P99)</span>
@@ -1272,34 +1247,34 @@ function handleToolbarRefresh() {
           <div class="mt-3 flex flex-wrap gap-x-3 gap-y-1 text-xs">
             <div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
               <span class="text-gray-500">P95:</span>
-              <span class="font-bold" :class="getLatencyColor(durationP95Ms)">{{ durationP95Ms ?? '-' }}</span>
+              <span class="font-bold text-gray-900 dark:text-white">{{ durationP95Ms ?? '-' }}</span>
               <span class="text-gray-400">ms</span>
             </div>
             <div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
               <span class="text-gray-500">P90:</span>
-              <span class="font-bold" :class="getLatencyColor(durationP90Ms)">{{ durationP90Ms ?? '-' }}</span>
+              <span class="font-bold text-gray-900 dark:text-white">{{ durationP90Ms ?? '-' }}</span>
               <span class="text-gray-400">ms</span>
             </div>
             <div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
               <span class="text-gray-500">P50:</span>
-              <span class="font-bold" :class="getLatencyColor(durationP50Ms)">{{ durationP50Ms ?? '-' }}</span>
+              <span class="font-bold text-gray-900 dark:text-white">{{ durationP50Ms ?? '-' }}</span>
               <span class="text-gray-400">ms</span>
             </div>
             <div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
               <span class="text-gray-500">Avg:</span>
-              <span class="font-bold" :class="getLatencyColor(durationAvgMs)">{{ durationAvgMs ?? '-' }}</span>
+              <span class="font-bold text-gray-900 dark:text-white">{{ durationAvgMs ?? '-' }}</span>
               <span class="text-gray-400">ms</span>
             </div>
             <div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
               <span class="text-gray-500">Max:</span>
-              <span class="font-bold" :class="getLatencyColor(durationMaxMs)">{{ durationMaxMs ?? '-' }}</span>
+              <span class="font-bold text-gray-900 dark:text-white">{{ durationMaxMs ?? '-' }}</span>
               <span class="text-gray-400">ms</span>
             </div>
           </div>
         </div>
 
-        <!-- Card 4: TTFT -->
-        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
+        <!-- Card 5: TTFT -->
+        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900" style="order: 5;">
           <div class="flex items-center justify-between">
             <div class="flex items-center gap-1">
               <span class="text-[10px] font-bold uppercase text-gray-400">TTFT</span>
@@ -1315,7 +1290,7 @@ function handleToolbarRefresh() {
             </button>
           </div>
           <div class="mt-2 flex items-baseline gap-2">
-            <div class="text-3xl font-black" :class="isTTFTAboveThreshold(ttftP99Ms) ? 'text-red-600 dark:text-red-400' : getLatencyColor(ttftP99Ms)">
+            <div class="text-3xl font-black" :class="isTTFTAboveThreshold(ttftP99Ms) ? 'text-red-600 dark:text-red-400' : getTTFTColor(ttftP99Ms)">
               {{ ttftP99Ms ?? '-' }}
             </div>
             <span class="text-xs font-bold text-gray-400">ms (P99)</span>
@@ -1323,34 +1298,34 @@ function handleToolbarRefresh() {
           <div class="mt-3 flex flex-wrap gap-x-3 gap-y-1 text-xs">
             <div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
               <span class="text-gray-500">P95:</span>
-              <span class="font-bold" :class="getLatencyColor(ttftP95Ms)">{{ ttftP95Ms ?? '-' }}</span>
+              <span class="font-bold" :class="getTTFTColor(ttftP95Ms)">{{ ttftP95Ms ?? '-' }}</span>
               <span class="text-gray-400">ms</span>
             </div>
             <div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
               <span class="text-gray-500">P90:</span>
-              <span class="font-bold" :class="getLatencyColor(ttftP90Ms)">{{ ttftP90Ms ?? '-' }}</span>
+              <span class="font-bold" :class="getTTFTColor(ttftP90Ms)">{{ ttftP90Ms ?? '-' }}</span>
               <span class="text-gray-400">ms</span>
             </div>
             <div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
               <span class="text-gray-500">P50:</span>
-              <span class="font-bold" :class="getLatencyColor(ttftP50Ms)">{{ ttftP50Ms ?? '-' }}</span>
+              <span class="font-bold" :class="getTTFTColor(ttftP50Ms)">{{ ttftP50Ms ?? '-' }}</span>
               <span class="text-gray-400">ms</span>
             </div>
             <div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
               <span class="text-gray-500">Avg:</span>
-              <span class="font-bold" :class="getLatencyColor(ttftAvgMs)">{{ ttftAvgMs ?? '-' }}</span>
+              <span class="font-bold" :class="getTTFTColor(ttftAvgMs)">{{ ttftAvgMs ?? '-' }}</span>
               <span class="text-gray-400">ms</span>
             </div>
             <div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
               <span class="text-gray-500">Max:</span>
-              <span class="font-bold" :class="getLatencyColor(ttftMaxMs)">{{ ttftMaxMs ?? '-' }}</span>
+              <span class="font-bold" :class="getTTFTColor(ttftMaxMs)">{{ ttftMaxMs ?? '-' }}</span>
               <span class="text-gray-400">ms</span>
             </div>
           </div>
         </div>
 
-        <!-- Card 5: Request Errors -->
-        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
+        <!-- Card 3: Request Errors -->
+        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900" style="order: 3;">
           <div class="flex items-center justify-between">
             <div class="flex items-center gap-1">
               <span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.requestErrors') }}</span>
@@ -1376,7 +1351,7 @@ function handleToolbarRefresh() {
         </div>
 
         <!-- Card 6: Upstream Errors -->
-        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
+        <div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900" style="order: 6;">
           <div class="flex items-center justify-between">
             <div class="flex items-center gap-1">
               <span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.upstreamErrors') }}</span>
diff --git a/frontend/src/views/admin/ops/components/OpsErrorDetailsModal.vue b/frontend/src/views/admin/ops/components/OpsErrorDetailsModal.vue
index 244b3e13..1d9859d4 100644
--- a/frontend/src/views/admin/ops/components/OpsErrorDetailsModal.vue
+++ b/frontend/src/views/admin/ops/components/OpsErrorDetailsModal.vue
@@ -205,12 +205,13 @@ watch(
     <div class="flex h-full min-h-0 flex-col">
       <!-- Filters -->
       <div class="mb-4 flex-shrink-0 border-b border-gray-200 pb-4 dark:border-dark-700">
-        <div class="grid grid-cols-1 gap-4 lg:grid-cols-14">
-          <div class="lg:col-span-4">
-            <div class="relative group">
-              <div class="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3.5">
+        <div class="flex flex-col gap-2">
+          <!-- 第一行: 搜索框 -->
+          <div class="flex items-center gap-2">
+            <div class="relative flex-1 group">
+              <div class="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3">
                 <svg
-                  class="h-4 w-4 text-gray-400 transition-colors group-focus-within:text-blue-500"
+                  class="h-3.5 w-3.5 text-gray-400 transition-colors group-focus-within:text-blue-500"
                   fill="none"
                   viewBox="0 0 24 24"
                   stroke="currentColor"
@@ -221,42 +222,45 @@ watch(
               <input
                 v-model="q"
                 type="text"
-                class="w-full rounded-2xl border-gray-200 bg-gray-50/50 py-2 pl-10 pr-4 text-sm font-medium text-gray-700 transition-all focus:border-blue-500 focus:bg-white focus:ring-4 focus:ring-blue-500/10 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:focus:bg-dark-800"
+                class="w-full rounded-lg border-gray-200 bg-gray-50/50 py-1.5 pl-9 pr-3 text-xs font-medium text-gray-700 transition-all focus:border-blue-500 focus:bg-white focus:ring-2 focus:ring-blue-500/10 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:focus:bg-dark-800"
                 :placeholder="t('admin.ops.errorDetails.searchPlaceholder')"
               />
             </div>
           </div>
 
-          <div class="lg:col-span-2">
-            <Select :model-value="statusCode" :options="statusCodeSelectOptions" class="w-full" @update:model-value="statusCode = $event as any" />
-          </div>
+          <!-- 第二行: 筛选选项 -->
+          <div class="grid grid-cols-6 gap-2">
+            <div class="col-span-1">
+              <Select :model-value="statusCode" :options="statusCodeSelectOptions" size="sm" @update:model-value="statusCode = $event as any" />
+            </div>
 
-          <div class="lg:col-span-2">
-            <Select :model-value="phase" :options="phaseSelectOptions" class="w-full" @update:model-value="phase = String($event ?? '')" />
-          </div>
+            <div class="col-span-1">
+              <Select :model-value="phase" :options="phaseSelectOptions" size="sm" @update:model-value="phase = String($event ?? '')" />
+            </div>
 
-          <div class="lg:col-span-2">
-            <Select :model-value="errorOwner" :options="ownerSelectOptions" class="w-full" @update:model-value="errorOwner = String($event ?? '')" />
-          </div>
+            <div class="col-span-1">
+              <Select :model-value="errorOwner" :options="ownerSelectOptions" size="sm" @update:model-value="errorOwner = String($event ?? '')" />
+            </div>
 
-          <div class="lg:col-span-2">
-            <Select :model-value="resolvedStatus" :options="resolvedSelectOptions" class="w-full" @update:model-value="resolvedStatus = String($event ?? 'unresolved')" />
-          </div>
+            <div class="col-span-1">
+              <Select :model-value="resolvedStatus" :options="resolvedSelectOptions" size="sm" @update:model-value="resolvedStatus = String($event ?? 'unresolved')" />
+            </div>
 
-          <div class="lg:col-span-1">
-            <input
-              v-model="accountIdInput"
-              type="text"
-              inputmode="numeric"
-              class="input w-full text-sm"
-              :placeholder="t('admin.ops.errorDetails.accountIdPlaceholder')"
-            />
-          </div>
+            <div class="col-span-1">
+              <input
+                v-model="accountIdInput"
+                type="text"
+                inputmode="numeric"
+                class="w-full rounded-lg border-gray-200 bg-gray-50/50 py-1.5 px-3 text-xs font-medium text-gray-700 transition-all focus:border-blue-500 focus:bg-white focus:ring-2 focus:ring-blue-500/10 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:focus:bg-dark-800"
+                :placeholder="t('admin.ops.errorDetails.accountIdPlaceholder')"
+              />
+            </div>
 
-          <div class="lg:col-span-1 flex items-center justify-end">
-            <button type="button" class="btn btn-secondary btn-sm" @click="resetFilters">
-              {{ t('common.reset') }}
-            </button>
+            <div class="col-span-1 flex items-center justify-end">
+              <button type="button" class="rounded-lg bg-gray-100 px-3 py-1.5 text-xs font-semibold text-gray-700 transition-colors hover:bg-gray-200 dark:bg-dark-700 dark:text-gray-300 dark:hover:bg-dark-600" @click="resetFilters">
+                {{ t('common.reset') }}
+              </button>
+            </div>
           </div>
         </div>
       </div>
diff --git a/frontend/src/views/admin/ops/components/OpsRuntimeSettingsCard.vue b/frontend/src/views/admin/ops/components/OpsRuntimeSettingsCard.vue
index 1dcab4b3..d64ae390 100644
--- a/frontend/src/views/admin/ops/components/OpsRuntimeSettingsCard.vue
+++ b/frontend/src/views/admin/ops/components/OpsRuntimeSettingsCard.vue
@@ -53,11 +53,6 @@ function validateRuntimeSettings(settings: OpsAlertRuntimeSettings): ValidationR
         errors.push('SLA 最低值必须在 0-100 之间')
       }
     }
-    if (thresholds.latency_p99_ms_max != null) {
-      if (!Number.isFinite(thresholds.latency_p99_ms_max) || thresholds.latency_p99_ms_max < 0) {
-        errors.push('延迟 P99 最大值必须大于或等于 0')
-      }
-    }
     if (thresholds.ttft_p99_ms_max != null) {
       if (!Number.isFinite(thresholds.ttft_p99_ms_max) || thresholds.ttft_p99_ms_max < 0) {
         errors.push('TTFT P99 最大值必须大于或等于 0')
@@ -163,7 +158,6 @@ function openAlertEditor() {
     if (!draftAlert.value.thresholds) {
       draftAlert.value.thresholds = {
         sla_percent_min: 99.5,
-        latency_p99_ms_max: 2000,
         ttft_p99_ms_max: 500,
         request_error_rate_percent_max: 5,
         upstream_error_rate_percent_max: 5
@@ -353,18 +347,7 @@ onMounted(() => {
             <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">SLA 低于此值时将显示为红色</p>
           </div>
 
-          <div>
-            <div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">延迟 P99 最大值 (ms)</div>
-            <input
-              v-model.number="draftAlert.thresholds.latency_p99_ms_max"
-              type="number"
-              min="0"
-              step="100"
-              class="input"
-              placeholder="2000"
-            />
-            <p class="mt-1 text-xs text-gray-500 dark:text-gray-400">延迟 P99 高于此值时将显示为红色</p>
-          </div>
+
 
           <div>
             <div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">TTFT P99 最大值 (ms)</div>
diff --git a/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue b/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue
index 1f64f253..c8291313 100644
--- a/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue
+++ b/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue
@@ -32,7 +32,6 @@ const advancedSettings = ref<OpsAdvancedSettings | null>(null)
 // 指标阈值配置
 const metricThresholds = ref<OpsMetricThresholds>({
   sla_percent_min: 99.5,
-  latency_p99_ms_max: 2000,
   ttft_p99_ms_max: 500,
   request_error_rate_percent_max: 5,
   upstream_error_rate_percent_max: 5
@@ -53,13 +52,12 @@ async function loadAllSettings() {
     advancedSettings.value = advanced
     // 如果后端返回了阈值，使用后端的值；否则保持默认值
     if (thresholds && Object.keys(thresholds).length > 0) {
-      metricThresholds.value = {
-        sla_percent_min: thresholds.sla_percent_min ?? 99.5,
-        latency_p99_ms_max: thresholds.latency_p99_ms_max ?? 2000,
-        ttft_p99_ms_max: thresholds.ttft_p99_ms_max ?? 500,
-        request_error_rate_percent_max: thresholds.request_error_rate_percent_max ?? 5,
-        upstream_error_rate_percent_max: thresholds.upstream_error_rate_percent_max ?? 5
-      }
+        metricThresholds.value = {
+          sla_percent_min: thresholds.sla_percent_min ?? 99.5,
+          ttft_p99_ms_max: thresholds.ttft_p99_ms_max ?? 500,
+          request_error_rate_percent_max: thresholds.request_error_rate_percent_max ?? 5,
+          upstream_error_rate_percent_max: thresholds.upstream_error_rate_percent_max ?? 5
+        }
     }
   } catch (err: any) {
     console.error('[OpsSettingsDialog] Failed to load settings', err)
@@ -161,9 +159,6 @@ const validation = computed(() => {
   if (metricThresholds.value.sla_percent_min != null && (metricThresholds.value.sla_percent_min < 0 || metricThresholds.value.sla_percent_min > 100)) {
     errors.push('SLA最低百分比必须在0-100之间')
   }
-  if (metricThresholds.value.latency_p99_ms_max != null && metricThresholds.value.latency_p99_ms_max < 0) {
-    errors.push('延迟P99最大值必须大于等于0')
-  }
   if (metricThresholds.value.ttft_p99_ms_max != null && metricThresholds.value.ttft_p99_ms_max < 0) {
     errors.push('TTFT P99最大值必须大于等于0')
   }
@@ -362,17 +357,6 @@ async function saveAllSettings() {
             <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.slaMinPercentHint') }}</p>
           </div>
 
-          <div>
-            <label class="input-label">{{ t('admin.ops.settings.latencyP99MaxMs') }}</label>
-            <input
-              v-model.number="metricThresholds.latency_p99_ms_max"
-              type="number"
-              min="0"
-              step="100"
-              class="input"
-            />
-            <p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.latencyP99MaxMsHint') }}</p>
-          </div>
 
           <div>
             <label class="input-label">{{ t('admin.ops.settings.ttftP99MaxMs') }}</label>