refactor(ops): 移除duration相关告警指标，简化监控配置

主要改动： - 移除 p95_latency_ms 和 p99_latency_ms 告警指标类型 - 移除配置中的 latency_p99_ms_max 阈值设置 - 简化健康分数计算（移除latency权重，重新归一化SLA和错误率） - 移除duration相关的诊断规则和阈值检查 - 统一术语：延迟 → 请求时长 - 保留duration数据展示，但不再用于告警判断 - 聚焦TTFT作为主要的响应速度告警指标影响范围： - Backend: handler, service, models, tests - Frontend: API types, i18n, components
2026-01-14 10:52:56 +08:00
parent 33f58d583d
commit 182683814b
14 changed files with 92 additions and 227 deletions
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -1887,7 +1887,7 @@ export default {
      totalRequests: 'Total Requests',
      avgQps: 'Avg QPS',
      avgTps: 'Avg TPS',
-      avgLatency: 'Avg Latency',
+      avgLatency: 'Avg Request Duration',
      avgTtft: 'Avg TTFT',
      exceptions: 'Exceptions',
      requestErrors: 'Request Errors',
@@ -1899,7 +1899,7 @@ export default {
      errors: 'Errors',
      errorRate: 'error_rate:',
      upstreamRate: 'upstream_rate:',
-      latencyDuration: 'Latency (duration_ms)',
+      latencyDuration: 'Request Duration (ms)',
      ttftLabel: 'TTFT (first_token_ms)',
      p50: 'p50:',
      p90: 'p90:',
@@ -1919,7 +1919,7 @@ export default {
      failedToLoadData: 'Failed to load ops data.',
      failedToLoadOverview: 'Failed to load overview',
      failedToLoadThroughputTrend: 'Failed to load throughput trend',
-      failedToLoadLatencyHistogram: 'Failed to load latency histogram',
+      failedToLoadLatencyHistogram: 'Failed to load request duration histogram',
      failedToLoadErrorTrend: 'Failed to load error trend',
      failedToLoadErrorDistribution: 'Failed to load error distribution',
      failedToLoadErrorDetail: 'Failed to load error detail',
@@ -1927,7 +1927,7 @@ export default {
      tpsK: 'TPS (K)',
      top: 'Top:',
      throughputTrend: 'Throughput Trend',
-      latencyHistogram: 'Latency Histogram',
+      latencyHistogram: 'Request Duration Histogram',
      errorTrend: 'Error Trend',
      errorDistribution: 'Error Distribution',
      // Health Score & Diagnosis
@@ -1973,14 +1973,7 @@ export default {
        memoryHigh: 'Memory usage elevated ({usage}%)',
        memoryHighImpact: 'Memory pressure is high, needs attention',
        memoryHighAction: 'Monitor memory trends, check for memory leaks',
-        // Latency diagnostics
-        latencyCritical: 'Response latency critically high ({latency}ms)',
-        latencyCriticalImpact: 'User experience extremely poor, many requests timing out',
-        latencyCriticalAction: 'Check slow queries, database indexes, network latency, and upstream services',
-        latencyHigh: 'Response latency elevated ({latency}ms)',
-        latencyHighImpact: 'User experience degraded, needs optimization',
-        latencyHighAction: 'Analyze slow request logs, optimize database queries and business logic',
-        ttftHigh: 'Time to first byte elevated ({ttft}ms)',
+        ttftHigh: 'Time to first token elevated ({ttft}ms)',
        ttftHighImpact: 'User perceived latency increased',
        ttftHighAction: 'Optimize request processing flow, reduce pre-processing time',
        // Error rate diagnostics
@@ -2020,7 +2013,7 @@ export default {
        context: 'Context',
        status: 'Status',
        message: 'Message',
-        latency: 'Latency',
+        latency: 'Request Duration',
        action: 'Action',
        noErrors: 'No errors in this window.',
        grp: 'GRP:',
@@ -2049,7 +2042,7 @@ export default {
        basicInfo: 'Basic Info',
        platform: 'Platform',
        model: 'Model',
-        latency: 'Latency',
+        latency: 'Request Duration',
        ttft: 'TTFT',
        businessLimited: 'Business Limited',
        requestPath: 'Request Path',
@@ -2398,8 +2391,6 @@ export default {
        metricThresholdsHint: 'Configure alert thresholds for metrics, values exceeding thresholds will be displayed in red',
        slaMinPercent: 'SLA Minimum Percentage',
        slaMinPercentHint: 'SLA below this value will be displayed in red (default: 99.5%)',
-        latencyP99MaxMs: 'Latency P99 Maximum (ms)',
-        latencyP99MaxMsHint: 'Latency P99 above this value will be displayed in red (default: 2000ms)',
        ttftP99MaxMs: 'TTFT P99 Maximum (ms)',
        ttftP99MaxMsHint: 'TTFT P99 above this value will be displayed in red (default: 500ms)',
        requestErrorRateMaxPercent: 'Request Error Rate Maximum (%)',
@@ -2458,7 +2449,7 @@ export default {
      tooltips: {
        totalRequests: 'Total number of requests (including both successful and failed requests) in the selected time window.',
        throughputTrend: 'Requests/QPS + Tokens/TPS in the selected window.',
-        latencyHistogram: 'Latency distribution (duration_ms) for successful requests.',
+        latencyHistogram: 'Request duration distribution (ms) for successful requests.',
        errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).',
        errorDistribution: 'Error distribution by status code.',
        goroutines:
@@ -2473,7 +2464,7 @@ export default {
        sla: 'Service Level Agreement success rate, excluding business limits (e.g., insufficient balance, quota exceeded).',
        errors: 'Error statistics, including total errors, error rate, and upstream error rate.',
        upstreamErrors: 'Upstream error statistics, excluding rate limit errors (429/529).',
-        latency: 'Request latency statistics, including p50, p90, p95, p99 percentiles.',
+        latency: 'Request duration statistics, including p50, p90, p95, p99 percentiles.',
        ttft: 'Time To First Token, measuring the speed of first byte return in streaming responses.',
        health: 'System health score (0-100), considering SLA, error rate, and resource usage.'
      },