refactor(ops): 移除duration相关告警指标，简化监控配置

主要改动： - 移除 p95_latency_ms 和 p99_latency_ms 告警指标类型 - 移除配置中的 latency_p99_ms_max 阈值设置 - 简化健康分数计算（移除latency权重，重新归一化SLA和错误率） - 移除duration相关的诊断规则和阈值检查 - 统一术语：延迟 → 请求时长 - 保留duration数据展示，但不再用于告警判断 - 聚焦TTFT作为主要的响应速度告警指标影响范围： - Backend: handler, service, models, tests - Frontend: API types, i18n, components
2026-01-14 10:52:56 +08:00
parent 33f58d583d
commit 182683814b
14 changed files with 92 additions and 227 deletions
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -1887,7 +1887,7 @@ export default {
      totalRequests: 'Total Requests',
      avgQps: 'Avg QPS',
      avgTps: 'Avg TPS',
-      avgLatency: 'Avg Latency',
+      avgLatency: 'Avg Request Duration',
      avgTtft: 'Avg TTFT',
      exceptions: 'Exceptions',
      requestErrors: 'Request Errors',
@@ -1899,7 +1899,7 @@ export default {
      errors: 'Errors',
      errorRate: 'error_rate:',
      upstreamRate: 'upstream_rate:',
-      latencyDuration: 'Latency (duration_ms)',
+      latencyDuration: 'Request Duration (ms)',
      ttftLabel: 'TTFT (first_token_ms)',
      p50: 'p50:',
      p90: 'p90:',
@@ -1919,7 +1919,7 @@ export default {
      failedToLoadData: 'Failed to load ops data.',
      failedToLoadOverview: 'Failed to load overview',
      failedToLoadThroughputTrend: 'Failed to load throughput trend',
-      failedToLoadLatencyHistogram: 'Failed to load latency histogram',
+      failedToLoadLatencyHistogram: 'Failed to load request duration histogram',
      failedToLoadErrorTrend: 'Failed to load error trend',
      failedToLoadErrorDistribution: 'Failed to load error distribution',
      failedToLoadErrorDetail: 'Failed to load error detail',
@@ -1927,7 +1927,7 @@ export default {
      tpsK: 'TPS (K)',
      top: 'Top:',
      throughputTrend: 'Throughput Trend',
-      latencyHistogram: 'Latency Histogram',
+      latencyHistogram: 'Request Duration Histogram',
      errorTrend: 'Error Trend',
      errorDistribution: 'Error Distribution',
      // Health Score & Diagnosis
@@ -1973,14 +1973,7 @@ export default {
        memoryHigh: 'Memory usage elevated ({usage}%)',
        memoryHighImpact: 'Memory pressure is high, needs attention',
        memoryHighAction: 'Monitor memory trends, check for memory leaks',
-        // Latency diagnostics
-        latencyCritical: 'Response latency critically high ({latency}ms)',
-        latencyCriticalImpact: 'User experience extremely poor, many requests timing out',
-        latencyCriticalAction: 'Check slow queries, database indexes, network latency, and upstream services',
-        latencyHigh: 'Response latency elevated ({latency}ms)',
-        latencyHighImpact: 'User experience degraded, needs optimization',
-        latencyHighAction: 'Analyze slow request logs, optimize database queries and business logic',
-        ttftHigh: 'Time to first byte elevated ({ttft}ms)',
+        ttftHigh: 'Time to first token elevated ({ttft}ms)',
        ttftHighImpact: 'User perceived latency increased',
        ttftHighAction: 'Optimize request processing flow, reduce pre-processing time',
        // Error rate diagnostics
@@ -2020,7 +2013,7 @@ export default {
        context: 'Context',
        status: 'Status',
        message: 'Message',
-        latency: 'Latency',
+        latency: 'Request Duration',
        action: 'Action',
        noErrors: 'No errors in this window.',
        grp: 'GRP:',
@@ -2049,7 +2042,7 @@ export default {
        basicInfo: 'Basic Info',
        platform: 'Platform',
        model: 'Model',
-        latency: 'Latency',
+        latency: 'Request Duration',
        ttft: 'TTFT',
        businessLimited: 'Business Limited',
        requestPath: 'Request Path',
@@ -2398,8 +2391,6 @@ export default {
        metricThresholdsHint: 'Configure alert thresholds for metrics, values exceeding thresholds will be displayed in red',
        slaMinPercent: 'SLA Minimum Percentage',
        slaMinPercentHint: 'SLA below this value will be displayed in red (default: 99.5%)',
-        latencyP99MaxMs: 'Latency P99 Maximum (ms)',
-        latencyP99MaxMsHint: 'Latency P99 above this value will be displayed in red (default: 2000ms)',
        ttftP99MaxMs: 'TTFT P99 Maximum (ms)',
        ttftP99MaxMsHint: 'TTFT P99 above this value will be displayed in red (default: 500ms)',
        requestErrorRateMaxPercent: 'Request Error Rate Maximum (%)',
@@ -2458,7 +2449,7 @@ export default {
      tooltips: {
        totalRequests: 'Total number of requests (including both successful and failed requests) in the selected time window.',
        throughputTrend: 'Requests/QPS + Tokens/TPS in the selected window.',
-        latencyHistogram: 'Latency distribution (duration_ms) for successful requests.',
+        latencyHistogram: 'Request duration distribution (ms) for successful requests.',
        errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).',
        errorDistribution: 'Error distribution by status code.',
        goroutines:
@@ -2473,7 +2464,7 @@ export default {
        sla: 'Service Level Agreement success rate, excluding business limits (e.g., insufficient balance, quota exceeded).',
        errors: 'Error statistics, including total errors, error rate, and upstream error rate.',
        upstreamErrors: 'Upstream error statistics, excluding rate limit errors (429/529).',
-        latency: 'Request latency statistics, including p50, p90, p95, p99 percentiles.',
+        latency: 'Request duration statistics, including p50, p90, p95, p99 percentiles.',
        ttft: 'Time To First Token, measuring the speed of first byte return in streaming responses.',
        health: 'System health score (0-100), considering SLA, error rate, and resource usage.'
      },
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@@ -2031,7 +2031,7 @@ export default {
      totalRequests: '总请求',
      avgQps: '平均 QPS',
      avgTps: '平均 TPS',
-      avgLatency: '平均延迟',
+      avgLatency: '平均请求时长',
      avgTtft: '平均首字延迟',
      exceptions: '异常数',
      requestErrors: '请求错误',
@@ -2043,7 +2043,7 @@ export default {
      errors: '错误',
      errorRate: '错误率：',
      upstreamRate: '上游错误率：',
-      latencyDuration: '延迟（毫秒）',
+      latencyDuration: '请求时长（毫秒）',
      ttftLabel: '首字延迟（毫秒）',
      p50: 'p50',
      p90: 'p90',
@@ -2063,7 +2063,7 @@ export default {
      failedToLoadData: '加载运维数据失败',
      failedToLoadOverview: '加载概览数据失败',
      failedToLoadThroughputTrend: '加载吞吐趋势失败',
-      failedToLoadLatencyHistogram: '加载延迟分布失败',
+      failedToLoadLatencyHistogram: '加载请求时长分布失败',
      failedToLoadErrorTrend: '加载错误趋势失败',
      failedToLoadErrorDistribution: '加载错误分布失败',
      failedToLoadErrorDetail: '加载错误详情失败',
@@ -2071,7 +2071,7 @@ export default {
      tpsK: 'TPS（千）',
      top: '最高：',
      throughputTrend: '吞吐趋势',
-      latencyHistogram: '延迟分布',
+      latencyHistogram: '请求时长分布',
      errorTrend: '错误趋势',
      errorDistribution: '错误分布',
      // Health Score & Diagnosis
@@ -2117,15 +2117,8 @@ export default {
        memoryHigh: '内存使用率偏高 ({usage}%)',
        memoryHighImpact: '内存压力较大，需要关注',
        memoryHighAction: '监控内存趋势，检查是否有内存泄漏',
-        // Latency diagnostics
-        latencyCritical: '响应延迟严重过高 ({latency}ms)',
-        latencyCriticalImpact: '用户体验极差，大量请求超时',
-        latencyCriticalAction: '检查慢查询、数据库索引、网络延迟和上游服务',
-        latencyHigh: '响应延迟偏高 ({latency}ms)',
-        latencyHighImpact: '用户体验下降，需要优化',
-        latencyHighAction: '分析慢请求日志，优化数据库查询和业务逻辑',
        ttftHigh: '首字节时间偏高 ({ttft}ms)',
-        ttftHighImpact: '用户感知延迟增加',
+        ttftHighImpact: '用户感知时长增加',
        ttftHighAction: '优化请求处理流程，减少前置逻辑耗时',
        // Error rate diagnostics
        upstreamCritical: '上游错误率严重偏高 ({rate}%)',
@@ -2143,13 +2136,13 @@ export default {
        // SLA diagnostics
        slaCritical: 'SLA 严重低于目标 ({sla}%)',
        slaCriticalImpact: '用户体验严重受损',
-        slaCriticalAction: '紧急排查错误和延迟问题，考虑限流保护',
+        slaCriticalAction: '紧急排查错误原因，必要时采取限流保护',
        slaLow: 'SLA 低于目标 ({sla}%)',
        slaLowImpact: '需要关注服务质量',
        slaLowAction: '分析SLA下降原因，优化系统性能',
        // Health score diagnostics
        healthCritical: '综合健康评分过低 ({score})',
-        healthCriticalImpact: '多个指标可能同时异常，建议优先排查错误与延迟',
+        healthCriticalImpact: '多个指标可能同时异常，建议优先排查错误与资源使用情况',
        healthCriticalAction: '全面检查系统状态，优先处理critical级别问题',
        healthLow: '综合健康评分偏低 ({score})',
        healthLowImpact: '可能存在轻度波动，建议关注 SLA 与错误率',
@@ -2164,7 +2157,7 @@ export default {
        context: '上下文',
        status: '状态码',
        message: '消息',
-        latency: '延迟',
+        latency: '请求时长',
        action: '操作',
        noErrors: '该窗口内暂无错误。',
        grp: 'GRP：',
@@ -2193,7 +2186,7 @@ export default {
        basicInfo: '基本信息',
        platform: '平台',
        model: '模型',
-        latency: '延迟',
+        latency: '请求时长',
        ttft: 'TTFT',
        businessLimited: '业务限制',
        requestPath: '请求路径',
@@ -2351,8 +2344,8 @@ export default {
          successRate: '成功率 (%)',
          errorRate: '错误率 (%)',
          upstreamErrorRate: '上游错误率 (%)',
-          p95: 'P95 延迟 (ms)',
-          p99: 'P99 延迟 (ms)',
+          p95: 'P95 请求时长 (ms)',
+          p99: 'P99 请求时长 (ms)',
          cpu: 'CPU 使用率 (%)',
          memory: '内存使用率 (%)',
          queueDepth: '并发排队深度',
@@ -2542,8 +2535,6 @@ export default {
        metricThresholdsHint: '配置各项指标的告警阈值，超出阈值时将以红色显示',
        slaMinPercent: 'SLA最低百分比',
        slaMinPercentHint: 'SLA低于此值时显示为红色（默认：99.5%）',
-        latencyP99MaxMs: '延迟P99最大值（毫秒）',
-        latencyP99MaxMsHint: '延迟P99高于此值时显示为红色（默认：2000ms）',
        ttftP99MaxMs: 'TTFT P99最大值（毫秒）',
        ttftP99MaxMsHint: 'TTFT P99高于此值时显示为红色（默认：500ms）',
        requestErrorRateMaxPercent: '请求错误率最大值（%）',
@@ -2602,12 +2593,12 @@ export default {
      tooltips: {
        totalRequests: '当前时间窗口内的总请求数和Token消耗量。',
        throughputTrend: '当前窗口内的请求/QPS 与 token/TPS 趋势。',
-        latencyHistogram: '成功请求的延迟分布（毫秒）。',
+        latencyHistogram: '成功请求的请求时长分布（毫秒）。',
        errorTrend: '错误趋势（SLA 口径排除业务限制；上游错误率排除 429/529）。',
        errorDistribution: '按状态码统计的错误分布。',
        upstreamErrors: '上游服务返回的错误，包括API提供商的错误响应（排除429/529限流错误）。',
        goroutines:
-          'Go 运行时的协程数量（轻量级线程）。没有绝对“安全值”，建议以历史基线为准。经验参考：<2000 常见；2000-8000 需关注；>8000 且伴随队列/延迟上升时，优先排查阻塞/泄漏。',
+          'Go 运行时的协程数量（轻量级线程）。没有绝对"安全值"，建议以历史基线为准。经验参考：<2000 常见；2000-8000 需关注；>8000 且伴随队列上升时，优先排查阻塞/泄漏。',
        cpu: 'CPU 使用率，显示系统处理器的负载情况。',
        memory: '内存使用率，包括已使用和总可用内存。',
        db: '数据库连接池状态，包括活跃连接、空闲连接和等待连接数。',
@@ -2617,7 +2608,7 @@ export default {
        tokens: '当前时间窗口内处理的总Token数量。',
        sla: '服务等级协议达成率，排除业务限制（如余额不足、配额超限）的成功请求占比。',
        errors: '错误统计，包括总错误数、错误率和上游错误率。',
-        latency: '请求延迟统计，包括 p50、p90、p95、p99 等百分位数。',
+        latency: '请求时长统计，包括 p50、p90、p95、p99 等百分位数。',
        ttft: '首Token延迟（Time To First Token），衡量流式响应的首字节返回速度。',
        health: '系统健康评分（0-100），综合考虑 SLA、错误率和资源使用情况。'
      },